mirror of
				https://github.com/searxng/searxng
				synced 2024-01-01 19:24:07 +01:00 
			
		
		
		
	Add Torrentz.eu search engine
This commit is contained in:
		
							parent
							
								
									d026a97e42
								
							
						
					
					
						commit
						7fbc12ee4e
					
				
					 4 changed files with 203 additions and 12 deletions
				
			
		|  | @ -43,6 +43,19 @@ def int_or_zero(num): | |||
|         return int(num) | ||||
|     return 0 | ||||
| 
 | ||||
| # get multiplier to convert torrent size to bytes | ||||
| def get_filesize_mul(suffix): | ||||
|     return { | ||||
|         'KB': 1024, | ||||
|         'MB': 1024 ** 2, | ||||
|         'GB': 1024 ** 3, | ||||
|         'TB': 1024 ** 4, | ||||
| 
 | ||||
|         'KIB': 1024, | ||||
|         'MIB': 1024 ** 2, | ||||
|         'GIB': 1024 ** 3, | ||||
|         'TIB': 1024 ** 4 | ||||
|     }[str(suffix).upper()] | ||||
| 
 | ||||
| # do search-request | ||||
| def request(query, params): | ||||
|  | @ -74,18 +87,7 @@ def response(resp): | |||
|         # torrent size | ||||
|         try: | ||||
|             file_size, suffix = result.xpath(xpath_filesize)[0].split(' ') | ||||
| 
 | ||||
|             # convert torrent size to bytes. | ||||
|             # if there is no correct index in this dictionary, | ||||
|             # the try block fails as it should | ||||
|             multiplier = { | ||||
|                 'KIB': 1024, | ||||
|                 'MIB': 1024 ** 2, | ||||
|                 'GIB': 1024 ** 3, | ||||
|                 'TIB': 1024 ** 4 | ||||
|             }[suffix.upper()] | ||||
| 
 | ||||
|             file_size = int(float(file_size) * multiplier) | ||||
|             file_size = int(float(file_size) * get_filesize_mul(suffix)) | ||||
|         except Exception as e: | ||||
|             file_size = None | ||||
| 
 | ||||
|  |  | |||
							
								
								
									
										93
									
								
								searx/engines/torrentz.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										93
									
								
								searx/engines/torrentz.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,93 @@ | |||
| """ | ||||
|  Torrentz.eu (BitTorrent meta-search engine) | ||||
| 
 | ||||
|  @website      https://torrentz.eu/ | ||||
|  @provide-api  no | ||||
| 
 | ||||
|  @using-api    no | ||||
|  @results      HTML | ||||
|  @stable       no (HTML can change, although unlikely, | ||||
|                    see https://torrentz.eu/torrentz.btsearch) | ||||
|  @parse        url, title, publishedDate, seed, leech, filesize, magnetlink | ||||
| """ | ||||
| 
 | ||||
| import re | ||||
| from cgi import escape | ||||
| from urllib import urlencode | ||||
| from lxml import html | ||||
| from searx.engines.xpath import extract_text | ||||
| from datetime import datetime | ||||
| from searx.engines.nyaa import int_or_zero, get_filesize_mul | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['files', 'videos', 'music'] | ||||
| paging = True | ||||
| 
 | ||||
| # search-url | ||||
| # https://torrentz.eu/search?f=EXAMPLE&p=6 | ||||
| base_url = 'https://torrentz.eu/' | ||||
| search_url = base_url + 'search?{query}' | ||||
| 
 | ||||
| 
 | ||||
| # do search-request | ||||
| def request(query, params): | ||||
|     page = params['pageno'] - 1 | ||||
|     query = urlencode({'q': query, 'p': page}) | ||||
|     params['url'] = search_url.format(query=query) | ||||
|     return params | ||||
| 
 | ||||
| 
 | ||||
| # get response from search-request | ||||
| def response(resp): | ||||
|     results = [] | ||||
| 
 | ||||
|     dom = html.fromstring(resp.text) | ||||
| 
 | ||||
|     for result in dom.xpath('//div[@class="results"]/dl'): | ||||
|         name_cell = result.xpath('./dt')[0] | ||||
|         title = extract_text(name_cell) | ||||
| 
 | ||||
|         # skip rows that do not contain a link to a torrent | ||||
|         links = name_cell.xpath('./a') | ||||
|         if len(links) != 1: | ||||
|             continue | ||||
| 
 | ||||
|         # extract url and remove a slash in the beginning | ||||
|         link = links[0].attrib.get('href').lstrip('/') | ||||
| 
 | ||||
|         seed = result.xpath('./dd/span[@class="u"]/text()')[0].replace(',', '') | ||||
|         leech = result.xpath('./dd/span[@class="d"]/text()')[0].replace(',', '') | ||||
| 
 | ||||
|         params = { | ||||
|             'url': base_url + link, | ||||
|             'title': title, | ||||
|             'seed': int_or_zero(seed), | ||||
|             'leech': int_or_zero(leech), | ||||
|             'template': 'torrent.html' | ||||
|         } | ||||
| 
 | ||||
|         # let's try to calculate the torrent size | ||||
|         try: | ||||
|             size_str = result.xpath('./dd/span[@class="s"]/text()')[0] | ||||
|             size, suffix = size_str.split() | ||||
|             params['filesize'] = int(size) * get_filesize_mul(suffix) | ||||
|         except Exception as e: | ||||
|             pass | ||||
| 
 | ||||
|         # does our link contain a valid SHA1 sum? | ||||
|         if re.compile('[0-9a-fA-F]{40}').match(link): | ||||
|             # add a magnet link to the result | ||||
|             params['magnetlink'] = 'magnet:?xt=urn:btih:' + link | ||||
| 
 | ||||
|         # extract and convert creation date | ||||
|         try: | ||||
|             date_str = result.xpath('./dd/span[@class="a"]/span')[0].attrib.get('title') | ||||
|             # Fri, 25 Mar 2016 16:29:01 | ||||
|             date = datetime.strptime(date_str, '%a, %d %b %Y %H:%M:%S') | ||||
|             params['publishedDate'] = date | ||||
|         except Exception as e: | ||||
|             pass | ||||
| 
 | ||||
|         results.append(params) | ||||
| 
 | ||||
|     return results | ||||
|  | @ -271,6 +271,11 @@ engines: | |||
|     shortcut : sw | ||||
|     disabled : True | ||||
| 
 | ||||
|   - name : torrentz | ||||
|     engine : torrentz | ||||
|     timeout : 5.0 | ||||
|     shortcut : to | ||||
| 
 | ||||
|   - name : twitter | ||||
|     engine : twitter | ||||
|     shortcut : tw | ||||
|  |  | |||
							
								
								
									
										91
									
								
								tests/unit/engines/test_torrentz.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										91
									
								
								tests/unit/engines/test_torrentz.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,91 @@ | |||
| import mock | ||||
| from collections import defaultdict | ||||
| from searx.engines import torrentz | ||||
| from searx.testing import SearxTestCase | ||||
| from datetime import datetime | ||||
| 
 | ||||
| 
 | ||||
| class TestTorrentzEngine(SearxTestCase): | ||||
| 
 | ||||
|     def test_request(self): | ||||
|         query = 'test_query' | ||||
|         dic = defaultdict(dict) | ||||
|         dic['pageno'] = 1 | ||||
|         params = torrentz.request(query, dic) | ||||
|         self.assertTrue('url' in params) | ||||
|         self.assertTrue(query in params['url']) | ||||
|         self.assertTrue('torrentz.eu' in params['url']) | ||||
| 
 | ||||
|     def test_response(self): | ||||
|         resp = mock.Mock(text='<html></html>') | ||||
|         self.assertEqual(torrentz.response(resp), []) | ||||
| 
 | ||||
|         html = """ | ||||
|         <div class="results"> | ||||
|           <dl> | ||||
|             <dt> | ||||
|               <a href="/4362e08b1d80e1820fb2550b752f9f3126fe76d6"> | ||||
|                 Completely valid info | ||||
|               </a> | ||||
|               books ebooks | ||||
|             </dt> | ||||
|             <dd> | ||||
|               <span class="v">1</span> | ||||
|               <span class="a"> | ||||
|                 <span title="Sun, 22 Nov 2015 03:01:42">4 months</span> | ||||
|               </span> | ||||
|               <span class="s">30 MB</span> | ||||
|               <span class="u">14</span> | ||||
|               <span class="d">1</span> | ||||
|             </dd> | ||||
|           </dl> | ||||
| 
 | ||||
|           <dl> | ||||
|             <dt> | ||||
|               <a href="/poaskdpokaspod"> | ||||
|                 Invalid hash and date and filesize | ||||
|               </a> | ||||
|               books ebooks | ||||
|             </dt> | ||||
|             <dd> | ||||
|               <span class="v">1</span> | ||||
|               <span class="a"> | ||||
|                 <span title="Sun, 2124091j0j190gm42">4 months</span> | ||||
|               </span> | ||||
|               <span class="s">30MB</span> | ||||
|               <span class="u">5,555</span> | ||||
|               <span class="d">1,234,567</span> | ||||
|             </dd> | ||||
|           </dl> | ||||
|         </div> | ||||
|         """ | ||||
| 
 | ||||
|         resp = mock.Mock(text=html) | ||||
|         results = torrentz.response(resp) | ||||
| 
 | ||||
|         self.assertEqual(type(results), list) | ||||
|         self.assertEqual(len(results), 2) | ||||
| 
 | ||||
|         # testing against the first result | ||||
|         r = results[0] | ||||
|         self.assertEqual(r['url'], 'https://torrentz.eu/4362e08b1d80e1820fb2550b752f9f3126fe76d6') | ||||
|         self.assertEqual(r['title'], 'Completely valid info books ebooks') | ||||
|         # 22 Nov 2015 03:01:42 | ||||
|         self.assertEqual(r['publishedDate'], datetime(2015, 11, 22, 3, 1, 42)) | ||||
|         self.assertEqual(r['seed'], 14) | ||||
|         self.assertEqual(r['leech'], 1) | ||||
|         self.assertEqual(r['filesize'], 30 * 1024 * 1024) | ||||
|         self.assertEqual(r['magnetlink'], 'magnet:?xt=urn:btih:4362e08b1d80e1820fb2550b752f9f3126fe76d6') | ||||
| 
 | ||||
|         # testing against the second result | ||||
|         r = results[1] | ||||
|         self.assertEqual(r['url'], 'https://torrentz.eu/poaskdpokaspod') | ||||
|         self.assertEqual(r['title'], 'Invalid hash and date and filesize books ebooks') | ||||
|         self.assertEqual(r['seed'], 5555) | ||||
|         self.assertEqual(r['leech'], 1234567) | ||||
| 
 | ||||
|         # in the second result we have invalid hash, creation date & torrent size, | ||||
|         # so these tests should fail | ||||
|         self.assertFalse('magnetlink' in r) | ||||
|         self.assertFalse('filesize' in r) | ||||
|         self.assertFalse('publishedDate' in r) | ||||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Kirill Isakov
						Kirill Isakov