mirror of
				https://github.com/searxng/searxng
				synced 2024-01-01 19:24:07 +01:00 
			
		
		
		
	
						commit
						f4df27fa59
					
				
					 4 changed files with 142 additions and 85 deletions
				
			
		|  | @ -1,7 +1,7 @@ | |||
| """ | ||||
|  Nyaa.se (Anime Bittorrent tracker) | ||||
|  Nyaa.si (Anime Bittorrent tracker) | ||||
| 
 | ||||
|  @website      http://www.nyaa.se/ | ||||
|  @website      http://www.nyaa.si/ | ||||
|  @provide-api  no | ||||
|  @using-api    no | ||||
|  @results      HTML | ||||
|  | @ -12,50 +12,25 @@ | |||
| from lxml import html | ||||
| from searx.engines.xpath import extract_text | ||||
| from searx.url_utils import urlencode | ||||
| from searx.utils import get_torrent_size, int_or_zero | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['files', 'images', 'videos', 'music'] | ||||
| paging = True | ||||
| 
 | ||||
| # search-url | ||||
| base_url = 'http://www.nyaa.se/' | ||||
| base_url = 'http://www.nyaa.si/' | ||||
| search_url = base_url + '?page=search&{query}&offset={offset}' | ||||
| 
 | ||||
| # xpath queries | ||||
| xpath_results = '//table[@class="tlist"]//tr[contains(@class, "tlistrow")]' | ||||
| xpath_category = './/td[@class="tlisticon"]/a' | ||||
| xpath_title = './/td[@class="tlistname"]/a' | ||||
| xpath_torrent_file = './/td[@class="tlistdownload"]/a' | ||||
| xpath_filesize = './/td[@class="tlistsize"]/text()' | ||||
| xpath_seeds = './/td[@class="tlistsn"]/text()' | ||||
| xpath_leeches = './/td[@class="tlistln"]/text()' | ||||
| xpath_downloads = './/td[@class="tlistdn"]/text()' | ||||
| 
 | ||||
| 
 | ||||
| # convert a variable to integer or return 0 if it's not a number | ||||
| def int_or_zero(num): | ||||
|     if isinstance(num, list): | ||||
|         if len(num) < 1: | ||||
|             return 0 | ||||
|         num = num[0] | ||||
|     if num.isdigit(): | ||||
|         return int(num) | ||||
|     return 0 | ||||
| 
 | ||||
| 
 | ||||
| # get multiplier to convert torrent size to bytes | ||||
| def get_filesize_mul(suffix): | ||||
|     return { | ||||
|         'KB': 1024, | ||||
|         'MB': 1024 ** 2, | ||||
|         'GB': 1024 ** 3, | ||||
|         'TB': 1024 ** 4, | ||||
| 
 | ||||
|         'KIB': 1024, | ||||
|         'MIB': 1024 ** 2, | ||||
|         'GIB': 1024 ** 3, | ||||
|         'TIB': 1024 ** 4 | ||||
|     }[str(suffix).upper()] | ||||
| xpath_results = '//table[contains(@class, "torrent-list")]//tr[not(th)]' | ||||
| xpath_category = './/td[1]/a[1]' | ||||
| xpath_title = './/td[2]/a[last()]' | ||||
| xpath_torrent_links = './/td[3]/a' | ||||
| xpath_filesize = './/td[4]/text()' | ||||
| xpath_seeds = './/td[6]/text()' | ||||
| xpath_leeches = './/td[7]/text()' | ||||
| xpath_downloads = './/td[8]/text()' | ||||
| 
 | ||||
| 
 | ||||
| # do search-request | ||||
|  | @ -72,25 +47,32 @@ def response(resp): | |||
|     dom = html.fromstring(resp.text) | ||||
| 
 | ||||
|     for result in dom.xpath(xpath_results): | ||||
|         # defaults | ||||
|         filesize = 0 | ||||
|         magnet_link = "" | ||||
|         torrent_link = "" | ||||
| 
 | ||||
|         # category in which our torrent belongs | ||||
|         category = result.xpath(xpath_category)[0].attrib.get('title') | ||||
|         try: | ||||
|             category = result.xpath(xpath_category)[0].attrib.get('title') | ||||
|         except: | ||||
|             pass | ||||
| 
 | ||||
|         # torrent title | ||||
|         page_a = result.xpath(xpath_title)[0] | ||||
|         title = extract_text(page_a) | ||||
| 
 | ||||
|         # link to the page | ||||
|         href = page_a.attrib.get('href') | ||||
|         href = base_url + page_a.attrib.get('href') | ||||
| 
 | ||||
|         # link to the torrent file | ||||
|         torrent_link = result.xpath(xpath_torrent_file)[0].attrib.get('href') | ||||
| 
 | ||||
|         # torrent size | ||||
|         try: | ||||
|             file_size, suffix = result.xpath(xpath_filesize)[0].split(' ') | ||||
|             file_size = int(float(file_size) * get_filesize_mul(suffix)) | ||||
|         except: | ||||
|             file_size = None | ||||
|         for link in result.xpath(xpath_torrent_links): | ||||
|             url = link.attrib.get('href') | ||||
|             if 'magnet' in url: | ||||
|                 # link to the magnet | ||||
|                 magnet_link = url | ||||
|             else: | ||||
|                 # link to the torrent file | ||||
|                 torrent_link = url | ||||
| 
 | ||||
|         # seed count | ||||
|         seed = int_or_zero(result.xpath(xpath_seeds)) | ||||
|  | @ -101,6 +83,14 @@ def response(resp): | |||
|         # torrent downloads count | ||||
|         downloads = int_or_zero(result.xpath(xpath_downloads)) | ||||
| 
 | ||||
|         # let's try to calculate the torrent size | ||||
|         try: | ||||
|             filesize_info = result.xpath(xpath_filesize)[0] | ||||
|             filesize, filesize_multiplier = filesize_info.split() | ||||
|             filesize = get_torrent_size(filesize, filesize_multiplier) | ||||
|         except: | ||||
|             pass | ||||
| 
 | ||||
|         # content string contains all information not included into template | ||||
|         content = 'Category: "{category}". Downloaded {downloads} times.' | ||||
|         content = content.format(category=category, downloads=downloads) | ||||
|  | @ -110,8 +100,9 @@ def response(resp): | |||
|                         'content': content, | ||||
|                         'seed': seed, | ||||
|                         'leech': leech, | ||||
|                         'filesize': file_size, | ||||
|                         'filesize': filesize, | ||||
|                         'torrentfile': torrent_link, | ||||
|                         'magnetlink': magnet_link, | ||||
|                         'template': 'torrent.html'}) | ||||
| 
 | ||||
|     return results | ||||
|  |  | |||
|  | @ -14,8 +14,8 @@ import re | |||
| from lxml import html | ||||
| from searx.engines.xpath import extract_text | ||||
| from datetime import datetime | ||||
| from searx.engines.nyaa import int_or_zero, get_filesize_mul | ||||
| from searx.url_utils import urlencode | ||||
| from searx.utils import get_torrent_size, int_or_zero | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['files', 'videos', 'music'] | ||||
|  | @ -76,8 +76,7 @@ def response(resp): | |||
|                 try: | ||||
|                     # ('1.228', 'GB') | ||||
|                     groups = size_re.match(item).groups() | ||||
|                     multiplier = get_filesize_mul(groups[1]) | ||||
|                     params['filesize'] = int(multiplier * float(groups[0])) | ||||
|                     params['filesize'] = get_torrent_size(groups[0], groups[1]) | ||||
|                 except: | ||||
|                     pass | ||||
|             elif item.startswith('Date:'): | ||||
|  |  | |||
|  | @ -290,6 +290,15 @@ def convert_str_to_int(number_str): | |||
|         return 0 | ||||
| 
 | ||||
| 
 | ||||
| # convert a variable to integer or return 0 if it's not a number | ||||
| def int_or_zero(num): | ||||
|     if isinstance(num, list): | ||||
|         if len(num) < 1: | ||||
|             return 0 | ||||
|         num = num[0] | ||||
|     return convert_str_to_int(num) | ||||
| 
 | ||||
| 
 | ||||
| def is_valid_lang(lang): | ||||
|     is_abbr = (len(lang) == 2) | ||||
|     if is_abbr: | ||||
|  |  | |||
|  | @ -13,38 +13,92 @@ class TestNyaaEngine(SearxTestCase): | |||
|         params = nyaa.request(query, dic) | ||||
|         self.assertTrue('url' in params) | ||||
|         self.assertTrue(query in params['url']) | ||||
|         self.assertTrue('nyaa.se' in params['url']) | ||||
|         self.assertTrue('nyaa.si' in params['url']) | ||||
| 
 | ||||
|     def test_response(self): | ||||
|         resp = mock.Mock(text='<html></html>') | ||||
|         self.assertEqual(nyaa.response(resp), []) | ||||
| 
 | ||||
|         html = """ | ||||
|         <table class="tlist"> | ||||
|           <tbody> | ||||
|             <tr class="trusted tlistrow"> | ||||
|               <td class="tlisticon"> | ||||
|                 <a href="//www.nyaa.se" title="English-translated Anime"> | ||||
|                    <img src="//files.nyaa.se" alt="English-translated Anime"> | ||||
|                 </a> | ||||
|               </td> | ||||
|               <td class="tlistname"> | ||||
|                 <a href="//www.nyaa.se/?page3"> | ||||
|                   Sample torrent title | ||||
|                 </a> | ||||
|               </td> | ||||
|               <td class="tlistdownload"> | ||||
|                 <a href="//www.nyaa.se/?page_dl" title="Download"> | ||||
|                   <img src="//files.nyaa.se/www-dl.png" alt="DL"> | ||||
|                 </a> | ||||
|               </td> | ||||
|               <td class="tlistsize">10 MiB</td> | ||||
|               <td class="tlistsn">1</td> | ||||
|               <td class="tlistln">3</td> | ||||
|               <td class="tlistdn">666</td> | ||||
|               <td class="tlistmn">0</td> | ||||
|             </tr> | ||||
|           </tbody> | ||||
|         <table class="table table-bordered table-hover table-striped torrent-list"> | ||||
|         <thead> | ||||
|         <tr> | ||||
|         <th class="hdr-category text-center" style="width:80px;"> | ||||
|         <div>Category</div> | ||||
|         </th> | ||||
|         <th class="hdr-name" style="width:auto;"> | ||||
|         <div>Name</div> | ||||
|         </th> | ||||
|         <th class="hdr-comments sorting text-center" title="Comments" style="width:50px;"> | ||||
|         <a href="/?f=0&c=0_0&q=Death+Parade&s=comments&o=desc"></a> | ||||
|         <i class="fa fa-comments-o"></i> | ||||
|         </th> | ||||
|         <th class="hdr-link text-center" style="width:70px;"> | ||||
|         <div>Link</div> | ||||
|         </th> | ||||
|         <th class="hdr-size sorting text-center" style="width:100px;"> | ||||
|         <a href="/?f=0&c=0_0&q=Death+Parade&s=size&o=desc"></a> | ||||
|         <div>Size</div> | ||||
|         </th> | ||||
|         <th class="hdr-date sorting_desc text-center" title="In local time" style="width:140px;"> | ||||
|         <a href="/?f=0&c=0_0&q=Death+Parade&s=id&o=asc"></a> | ||||
|         <div>Date</div> | ||||
|         </th> | ||||
|         <th class="hdr-seeders sorting text-center" title="Seeders" style="width:50px;"> | ||||
|         <a href="/?f=0&c=0_0&q=Death+Parade&s=seeders&o=desc"></a> | ||||
|         <i class="fa fa-arrow-up" aria-hidden="true"></i> | ||||
|         </th> | ||||
|         <th class="hdr-leechers sorting text-center" title="Leechers" style="width:50px;"> | ||||
|         <a href="/?f=0&c=0_0&q=Death+Parade&s=leechers&o=desc"></a> | ||||
|         <i class="fa fa-arrow-down" aria-hidden="true"></i> | ||||
|         </th> | ||||
|         <th class="hdr-downloads sorting text-center" title="Completed downloads" style="width:50px;"> | ||||
|         <a href="/?f=0&c=0_0&q=Death+Parade&s=downloads&o=desc"></a> | ||||
|         <i class="fa fa-check" aria-hidden="true"></i> | ||||
|         </th> | ||||
|         </tr> | ||||
|         </thead> | ||||
|         <tbody> | ||||
|         <tr class="default"> | ||||
|         <td style="padding:0 4px;"> | ||||
|         <a href="/?c=1_2" title="Anime - English-translated"> | ||||
|         <img src="/static/img/icons/nyaa/1_2.png" alt="Anime - English-translated"> | ||||
|         </a> | ||||
|         </td> | ||||
|         <td colspan="2"> | ||||
|         <a href="/view/1" title="Sample title 1">Sample title 1</a> | ||||
|         </td> | ||||
|         <td class="text-center" style="white-space: nowrap;"> | ||||
|         <a href="/download/1.torrent"><i class="fa fa-fw fa-download"></i></a> | ||||
|         <a href="magnet:?xt=urn:btih:2"><i class="fa fa-fw fa-magnet"></i></a> | ||||
|         </td> | ||||
|         <td class="text-center">723.7 MiB</td> | ||||
|         <td class="text-center" data-timestamp="1503307456" title="1 week 3 | ||||
|         days 9 hours 44 minutes 39 seconds ago">2017-08-21 11:24</td> | ||||
|         <td class="text-center" style="color: green;">1</td> | ||||
|         <td class="text-center" style="color: red;">3</td> | ||||
|         <td class="text-center">12</td> | ||||
|         </tr> | ||||
|         <tr class="default"> | ||||
|         <td style="padding:0 4px;"> | ||||
|         <a href="/?c=1_2" title="Anime - English-translated"> | ||||
|         <img src="/static/img/icons/nyaa/1_2.png" alt="Anime - English-translated"> | ||||
|         </a> | ||||
|         </td> | ||||
|         <td colspan="2"> | ||||
|         <a href="/view/2" title="Sample title 2">Sample title 2</a> | ||||
|         </td> | ||||
|         <td class="text-center" style="white-space: nowrap;"> | ||||
|         <a href="magnet:?xt=urn:btih:2"><i class="fa fa-fw fa-magnet"></i></a> | ||||
|         </td> | ||||
|         <td class="text-center">8.2 GiB</td> | ||||
|         <td class="text-center" data-timestamp="1491608400" title="4 months 3 | ||||
|         weeks 4 days 19 hours 28 minutes 55 seconds ago">2017-04-08 01:40</td> | ||||
|         <td class="text-center" style="color: green;">10</td> | ||||
|         <td class="text-center" style="color: red;">1</td> | ||||
|         <td class="text-center">206</td> | ||||
|         </tr> | ||||
|         </tbody> | ||||
|         </table> | ||||
|         """ | ||||
| 
 | ||||
|  | @ -52,15 +106,19 @@ class TestNyaaEngine(SearxTestCase): | |||
|         results = nyaa.response(resp) | ||||
| 
 | ||||
|         self.assertEqual(type(results), list) | ||||
|         self.assertEqual(len(results), 1) | ||||
|         self.assertEqual(len(results), 2) | ||||
| 
 | ||||
|         r = results[0] | ||||
|         self.assertTrue(r['url'].find('www.nyaa.se/?page3') >= 0) | ||||
|         self.assertTrue(r['torrentfile'].find('www.nyaa.se/?page_dl') >= 0) | ||||
|         self.assertTrue(r['content'].find('English-translated Anime') >= 0) | ||||
|         self.assertTrue(r['content'].find('Downloaded 666 times.') >= 0) | ||||
|         self.assertTrue(r['url'].find('1') >= 0) | ||||
|         self.assertTrue(r['torrentfile'].find('1.torrent') >= 0) | ||||
|         self.assertTrue(r['content'].find('Anime - English-translated') >= 0) | ||||
|         self.assertTrue(r['content'].find('Downloaded 12 times.') >= 0) | ||||
| 
 | ||||
|         self.assertEqual(r['title'], 'Sample torrent title') | ||||
|         self.assertEqual(r['title'], 'Sample title 1') | ||||
|         self.assertEqual(r['seed'], 1) | ||||
|         self.assertEqual(r['leech'], 3) | ||||
|         self.assertEqual(r['filesize'], 10 * 1024 * 1024) | ||||
|         self.assertEqual(r['filesize'], 723700000) | ||||
| 
 | ||||
|         r = results[1] | ||||
|         self.assertTrue(r['url'].find('2') >= 0) | ||||
|         self.assertTrue(r['magnetlink'].find('magnet:') >= 0) | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Adam Tauber
						Adam Tauber