mirror of
				https://github.com/searxng/searxng
				synced 2024-01-01 19:24:07 +01:00 
			
		
		
		
	
						commit
						f4df27fa59
					
				
					 4 changed files with 142 additions and 85 deletions
				
			
		|  | @ -1,7 +1,7 @@ | ||||||
| """ | """ | ||||||
|  Nyaa.se (Anime Bittorrent tracker) |  Nyaa.si (Anime Bittorrent tracker) | ||||||
| 
 | 
 | ||||||
|  @website      http://www.nyaa.se/ |  @website      http://www.nyaa.si/ | ||||||
|  @provide-api  no |  @provide-api  no | ||||||
|  @using-api    no |  @using-api    no | ||||||
|  @results      HTML |  @results      HTML | ||||||
|  | @ -12,50 +12,25 @@ | ||||||
| from lxml import html | from lxml import html | ||||||
| from searx.engines.xpath import extract_text | from searx.engines.xpath import extract_text | ||||||
| from searx.url_utils import urlencode | from searx.url_utils import urlencode | ||||||
|  | from searx.utils import get_torrent_size, int_or_zero | ||||||
| 
 | 
 | ||||||
| # engine dependent config | # engine dependent config | ||||||
| categories = ['files', 'images', 'videos', 'music'] | categories = ['files', 'images', 'videos', 'music'] | ||||||
| paging = True | paging = True | ||||||
| 
 | 
 | ||||||
| # search-url | # search-url | ||||||
| base_url = 'http://www.nyaa.se/' | base_url = 'http://www.nyaa.si/' | ||||||
| search_url = base_url + '?page=search&{query}&offset={offset}' | search_url = base_url + '?page=search&{query}&offset={offset}' | ||||||
| 
 | 
 | ||||||
| # xpath queries | # xpath queries | ||||||
| xpath_results = '//table[@class="tlist"]//tr[contains(@class, "tlistrow")]' | xpath_results = '//table[contains(@class, "torrent-list")]//tr[not(th)]' | ||||||
| xpath_category = './/td[@class="tlisticon"]/a' | xpath_category = './/td[1]/a[1]' | ||||||
| xpath_title = './/td[@class="tlistname"]/a' | xpath_title = './/td[2]/a[last()]' | ||||||
| xpath_torrent_file = './/td[@class="tlistdownload"]/a' | xpath_torrent_links = './/td[3]/a' | ||||||
| xpath_filesize = './/td[@class="tlistsize"]/text()' | xpath_filesize = './/td[4]/text()' | ||||||
| xpath_seeds = './/td[@class="tlistsn"]/text()' | xpath_seeds = './/td[6]/text()' | ||||||
| xpath_leeches = './/td[@class="tlistln"]/text()' | xpath_leeches = './/td[7]/text()' | ||||||
| xpath_downloads = './/td[@class="tlistdn"]/text()' | xpath_downloads = './/td[8]/text()' | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| # convert a variable to integer or return 0 if it's not a number |  | ||||||
| def int_or_zero(num): |  | ||||||
|     if isinstance(num, list): |  | ||||||
|         if len(num) < 1: |  | ||||||
|             return 0 |  | ||||||
|         num = num[0] |  | ||||||
|     if num.isdigit(): |  | ||||||
|         return int(num) |  | ||||||
|     return 0 |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| # get multiplier to convert torrent size to bytes |  | ||||||
| def get_filesize_mul(suffix): |  | ||||||
|     return { |  | ||||||
|         'KB': 1024, |  | ||||||
|         'MB': 1024 ** 2, |  | ||||||
|         'GB': 1024 ** 3, |  | ||||||
|         'TB': 1024 ** 4, |  | ||||||
| 
 |  | ||||||
|         'KIB': 1024, |  | ||||||
|         'MIB': 1024 ** 2, |  | ||||||
|         'GIB': 1024 ** 3, |  | ||||||
|         'TIB': 1024 ** 4 |  | ||||||
|     }[str(suffix).upper()] |  | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| # do search-request | # do search-request | ||||||
|  | @ -72,25 +47,32 @@ def response(resp): | ||||||
|     dom = html.fromstring(resp.text) |     dom = html.fromstring(resp.text) | ||||||
| 
 | 
 | ||||||
|     for result in dom.xpath(xpath_results): |     for result in dom.xpath(xpath_results): | ||||||
|  |         # defaults | ||||||
|  |         filesize = 0 | ||||||
|  |         magnet_link = "" | ||||||
|  |         torrent_link = "" | ||||||
|  | 
 | ||||||
|         # category in which our torrent belongs |         # category in which our torrent belongs | ||||||
|         category = result.xpath(xpath_category)[0].attrib.get('title') |         try: | ||||||
|  |             category = result.xpath(xpath_category)[0].attrib.get('title') | ||||||
|  |         except: | ||||||
|  |             pass | ||||||
| 
 | 
 | ||||||
|         # torrent title |         # torrent title | ||||||
|         page_a = result.xpath(xpath_title)[0] |         page_a = result.xpath(xpath_title)[0] | ||||||
|         title = extract_text(page_a) |         title = extract_text(page_a) | ||||||
| 
 | 
 | ||||||
|         # link to the page |         # link to the page | ||||||
|         href = page_a.attrib.get('href') |         href = base_url + page_a.attrib.get('href') | ||||||
| 
 | 
 | ||||||
|         # link to the torrent file |         for link in result.xpath(xpath_torrent_links): | ||||||
|         torrent_link = result.xpath(xpath_torrent_file)[0].attrib.get('href') |             url = link.attrib.get('href') | ||||||
| 
 |             if 'magnet' in url: | ||||||
|         # torrent size |                 # link to the magnet | ||||||
|         try: |                 magnet_link = url | ||||||
|             file_size, suffix = result.xpath(xpath_filesize)[0].split(' ') |             else: | ||||||
|             file_size = int(float(file_size) * get_filesize_mul(suffix)) |                 # link to the torrent file | ||||||
|         except: |                 torrent_link = url | ||||||
|             file_size = None |  | ||||||
| 
 | 
 | ||||||
|         # seed count |         # seed count | ||||||
|         seed = int_or_zero(result.xpath(xpath_seeds)) |         seed = int_or_zero(result.xpath(xpath_seeds)) | ||||||
|  | @ -101,6 +83,14 @@ def response(resp): | ||||||
|         # torrent downloads count |         # torrent downloads count | ||||||
|         downloads = int_or_zero(result.xpath(xpath_downloads)) |         downloads = int_or_zero(result.xpath(xpath_downloads)) | ||||||
| 
 | 
 | ||||||
|  |         # let's try to calculate the torrent size | ||||||
|  |         try: | ||||||
|  |             filesize_info = result.xpath(xpath_filesize)[0] | ||||||
|  |             filesize, filesize_multiplier = filesize_info.split() | ||||||
|  |             filesize = get_torrent_size(filesize, filesize_multiplier) | ||||||
|  |         except: | ||||||
|  |             pass | ||||||
|  | 
 | ||||||
|         # content string contains all information not included into template |         # content string contains all information not included into template | ||||||
|         content = 'Category: "{category}". Downloaded {downloads} times.' |         content = 'Category: "{category}". Downloaded {downloads} times.' | ||||||
|         content = content.format(category=category, downloads=downloads) |         content = content.format(category=category, downloads=downloads) | ||||||
|  | @ -110,8 +100,9 @@ def response(resp): | ||||||
|                         'content': content, |                         'content': content, | ||||||
|                         'seed': seed, |                         'seed': seed, | ||||||
|                         'leech': leech, |                         'leech': leech, | ||||||
|                         'filesize': file_size, |                         'filesize': filesize, | ||||||
|                         'torrentfile': torrent_link, |                         'torrentfile': torrent_link, | ||||||
|  |                         'magnetlink': magnet_link, | ||||||
|                         'template': 'torrent.html'}) |                         'template': 'torrent.html'}) | ||||||
| 
 | 
 | ||||||
|     return results |     return results | ||||||
|  |  | ||||||
|  | @ -14,8 +14,8 @@ import re | ||||||
| from lxml import html | from lxml import html | ||||||
| from searx.engines.xpath import extract_text | from searx.engines.xpath import extract_text | ||||||
| from datetime import datetime | from datetime import datetime | ||||||
| from searx.engines.nyaa import int_or_zero, get_filesize_mul |  | ||||||
| from searx.url_utils import urlencode | from searx.url_utils import urlencode | ||||||
|  | from searx.utils import get_torrent_size, int_or_zero | ||||||
| 
 | 
 | ||||||
| # engine dependent config | # engine dependent config | ||||||
| categories = ['files', 'videos', 'music'] | categories = ['files', 'videos', 'music'] | ||||||
|  | @ -76,8 +76,7 @@ def response(resp): | ||||||
|                 try: |                 try: | ||||||
|                     # ('1.228', 'GB') |                     # ('1.228', 'GB') | ||||||
|                     groups = size_re.match(item).groups() |                     groups = size_re.match(item).groups() | ||||||
|                     multiplier = get_filesize_mul(groups[1]) |                     params['filesize'] = get_torrent_size(groups[0], groups[1]) | ||||||
|                     params['filesize'] = int(multiplier * float(groups[0])) |  | ||||||
|                 except: |                 except: | ||||||
|                     pass |                     pass | ||||||
|             elif item.startswith('Date:'): |             elif item.startswith('Date:'): | ||||||
|  |  | ||||||
|  | @ -290,6 +290,15 @@ def convert_str_to_int(number_str): | ||||||
|         return 0 |         return 0 | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | # convert a variable to integer or return 0 if it's not a number | ||||||
|  | def int_or_zero(num): | ||||||
|  |     if isinstance(num, list): | ||||||
|  |         if len(num) < 1: | ||||||
|  |             return 0 | ||||||
|  |         num = num[0] | ||||||
|  |     return convert_str_to_int(num) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| def is_valid_lang(lang): | def is_valid_lang(lang): | ||||||
|     is_abbr = (len(lang) == 2) |     is_abbr = (len(lang) == 2) | ||||||
|     if is_abbr: |     if is_abbr: | ||||||
|  |  | ||||||
|  | @ -13,38 +13,92 @@ class TestNyaaEngine(SearxTestCase): | ||||||
|         params = nyaa.request(query, dic) |         params = nyaa.request(query, dic) | ||||||
|         self.assertTrue('url' in params) |         self.assertTrue('url' in params) | ||||||
|         self.assertTrue(query in params['url']) |         self.assertTrue(query in params['url']) | ||||||
|         self.assertTrue('nyaa.se' in params['url']) |         self.assertTrue('nyaa.si' in params['url']) | ||||||
| 
 | 
 | ||||||
|     def test_response(self): |     def test_response(self): | ||||||
|         resp = mock.Mock(text='<html></html>') |         resp = mock.Mock(text='<html></html>') | ||||||
|         self.assertEqual(nyaa.response(resp), []) |         self.assertEqual(nyaa.response(resp), []) | ||||||
| 
 | 
 | ||||||
|         html = """ |         html = """ | ||||||
|         <table class="tlist"> |         <table class="table table-bordered table-hover table-striped torrent-list"> | ||||||
|           <tbody> |         <thead> | ||||||
|             <tr class="trusted tlistrow"> |         <tr> | ||||||
|               <td class="tlisticon"> |         <th class="hdr-category text-center" style="width:80px;"> | ||||||
|                 <a href="//www.nyaa.se" title="English-translated Anime"> |         <div>Category</div> | ||||||
|                    <img src="//files.nyaa.se" alt="English-translated Anime"> |         </th> | ||||||
|                 </a> |         <th class="hdr-name" style="width:auto;"> | ||||||
|               </td> |         <div>Name</div> | ||||||
|               <td class="tlistname"> |         </th> | ||||||
|                 <a href="//www.nyaa.se/?page3"> |         <th class="hdr-comments sorting text-center" title="Comments" style="width:50px;"> | ||||||
|                   Sample torrent title |         <a href="/?f=0&c=0_0&q=Death+Parade&s=comments&o=desc"></a> | ||||||
|                 </a> |         <i class="fa fa-comments-o"></i> | ||||||
|               </td> |         </th> | ||||||
|               <td class="tlistdownload"> |         <th class="hdr-link text-center" style="width:70px;"> | ||||||
|                 <a href="//www.nyaa.se/?page_dl" title="Download"> |         <div>Link</div> | ||||||
|                   <img src="//files.nyaa.se/www-dl.png" alt="DL"> |         </th> | ||||||
|                 </a> |         <th class="hdr-size sorting text-center" style="width:100px;"> | ||||||
|               </td> |         <a href="/?f=0&c=0_0&q=Death+Parade&s=size&o=desc"></a> | ||||||
|               <td class="tlistsize">10 MiB</td> |         <div>Size</div> | ||||||
|               <td class="tlistsn">1</td> |         </th> | ||||||
|               <td class="tlistln">3</td> |         <th class="hdr-date sorting_desc text-center" title="In local time" style="width:140px;"> | ||||||
|               <td class="tlistdn">666</td> |         <a href="/?f=0&c=0_0&q=Death+Parade&s=id&o=asc"></a> | ||||||
|               <td class="tlistmn">0</td> |         <div>Date</div> | ||||||
|             </tr> |         </th> | ||||||
|           </tbody> |         <th class="hdr-seeders sorting text-center" title="Seeders" style="width:50px;"> | ||||||
|  |         <a href="/?f=0&c=0_0&q=Death+Parade&s=seeders&o=desc"></a> | ||||||
|  |         <i class="fa fa-arrow-up" aria-hidden="true"></i> | ||||||
|  |         </th> | ||||||
|  |         <th class="hdr-leechers sorting text-center" title="Leechers" style="width:50px;"> | ||||||
|  |         <a href="/?f=0&c=0_0&q=Death+Parade&s=leechers&o=desc"></a> | ||||||
|  |         <i class="fa fa-arrow-down" aria-hidden="true"></i> | ||||||
|  |         </th> | ||||||
|  |         <th class="hdr-downloads sorting text-center" title="Completed downloads" style="width:50px;"> | ||||||
|  |         <a href="/?f=0&c=0_0&q=Death+Parade&s=downloads&o=desc"></a> | ||||||
|  |         <i class="fa fa-check" aria-hidden="true"></i> | ||||||
|  |         </th> | ||||||
|  |         </tr> | ||||||
|  |         </thead> | ||||||
|  |         <tbody> | ||||||
|  |         <tr class="default"> | ||||||
|  |         <td style="padding:0 4px;"> | ||||||
|  |         <a href="/?c=1_2" title="Anime - English-translated"> | ||||||
|  |         <img src="/static/img/icons/nyaa/1_2.png" alt="Anime - English-translated"> | ||||||
|  |         </a> | ||||||
|  |         </td> | ||||||
|  |         <td colspan="2"> | ||||||
|  |         <a href="/view/1" title="Sample title 1">Sample title 1</a> | ||||||
|  |         </td> | ||||||
|  |         <td class="text-center" style="white-space: nowrap;"> | ||||||
|  |         <a href="/download/1.torrent"><i class="fa fa-fw fa-download"></i></a> | ||||||
|  |         <a href="magnet:?xt=urn:btih:2"><i class="fa fa-fw fa-magnet"></i></a> | ||||||
|  |         </td> | ||||||
|  |         <td class="text-center">723.7 MiB</td> | ||||||
|  |         <td class="text-center" data-timestamp="1503307456" title="1 week 3 | ||||||
|  |         days 9 hours 44 minutes 39 seconds ago">2017-08-21 11:24</td> | ||||||
|  |         <td class="text-center" style="color: green;">1</td> | ||||||
|  |         <td class="text-center" style="color: red;">3</td> | ||||||
|  |         <td class="text-center">12</td> | ||||||
|  |         </tr> | ||||||
|  |         <tr class="default"> | ||||||
|  |         <td style="padding:0 4px;"> | ||||||
|  |         <a href="/?c=1_2" title="Anime - English-translated"> | ||||||
|  |         <img src="/static/img/icons/nyaa/1_2.png" alt="Anime - English-translated"> | ||||||
|  |         </a> | ||||||
|  |         </td> | ||||||
|  |         <td colspan="2"> | ||||||
|  |         <a href="/view/2" title="Sample title 2">Sample title 2</a> | ||||||
|  |         </td> | ||||||
|  |         <td class="text-center" style="white-space: nowrap;"> | ||||||
|  |         <a href="magnet:?xt=urn:btih:2"><i class="fa fa-fw fa-magnet"></i></a> | ||||||
|  |         </td> | ||||||
|  |         <td class="text-center">8.2 GiB</td> | ||||||
|  |         <td class="text-center" data-timestamp="1491608400" title="4 months 3 | ||||||
|  |         weeks 4 days 19 hours 28 minutes 55 seconds ago">2017-04-08 01:40</td> | ||||||
|  |         <td class="text-center" style="color: green;">10</td> | ||||||
|  |         <td class="text-center" style="color: red;">1</td> | ||||||
|  |         <td class="text-center">206</td> | ||||||
|  |         </tr> | ||||||
|  |         </tbody> | ||||||
|         </table> |         </table> | ||||||
|         """ |         """ | ||||||
| 
 | 
 | ||||||
|  | @ -52,15 +106,19 @@ class TestNyaaEngine(SearxTestCase): | ||||||
|         results = nyaa.response(resp) |         results = nyaa.response(resp) | ||||||
| 
 | 
 | ||||||
|         self.assertEqual(type(results), list) |         self.assertEqual(type(results), list) | ||||||
|         self.assertEqual(len(results), 1) |         self.assertEqual(len(results), 2) | ||||||
| 
 | 
 | ||||||
|         r = results[0] |         r = results[0] | ||||||
|         self.assertTrue(r['url'].find('www.nyaa.se/?page3') >= 0) |         self.assertTrue(r['url'].find('1') >= 0) | ||||||
|         self.assertTrue(r['torrentfile'].find('www.nyaa.se/?page_dl') >= 0) |         self.assertTrue(r['torrentfile'].find('1.torrent') >= 0) | ||||||
|         self.assertTrue(r['content'].find('English-translated Anime') >= 0) |         self.assertTrue(r['content'].find('Anime - English-translated') >= 0) | ||||||
|         self.assertTrue(r['content'].find('Downloaded 666 times.') >= 0) |         self.assertTrue(r['content'].find('Downloaded 12 times.') >= 0) | ||||||
| 
 | 
 | ||||||
|         self.assertEqual(r['title'], 'Sample torrent title') |         self.assertEqual(r['title'], 'Sample title 1') | ||||||
|         self.assertEqual(r['seed'], 1) |         self.assertEqual(r['seed'], 1) | ||||||
|         self.assertEqual(r['leech'], 3) |         self.assertEqual(r['leech'], 3) | ||||||
|         self.assertEqual(r['filesize'], 10 * 1024 * 1024) |         self.assertEqual(r['filesize'], 723700000) | ||||||
|  | 
 | ||||||
|  |         r = results[1] | ||||||
|  |         self.assertTrue(r['url'].find('2') >= 0) | ||||||
|  |         self.assertTrue(r['magnetlink'].find('magnet:') >= 0) | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Adam Tauber
						Adam Tauber