mirror of
				https://github.com/searxng/searxng
				synced 2024-01-01 19:24:07 +01:00 
			
		
		
		
	Merge pull request #1283 from rinpatch/acgsou-engine
[Feature] Acgsou as a searchable engine
This commit is contained in:
		
						commit
						1a1f9852f1
					
				
					 3 changed files with 158 additions and 0 deletions
				
			
		
							
								
								
									
										75
									
								
								searx/engines/acgsou.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										75
									
								
								searx/engines/acgsou.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,75 @@ | |||
| """ | ||||
|  Acgsou (Japanese Animation/Music/Comics Bittorrent tracker) | ||||
| 
 | ||||
|  @website      https://www.acgsou.com/ | ||||
|  @provide-api  no | ||||
|  @using-api    no | ||||
|  @results      HTML | ||||
|  @stable       no (HTML can change) | ||||
|  @parse        url, title, content, seed, leech, torrentfile | ||||
| """ | ||||
| 
 | ||||
| from lxml import html | ||||
| from searx.engines.xpath import extract_text | ||||
| from searx.url_utils import urlencode | ||||
| from searx.utils import get_torrent_size, int_or_zero | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['files', 'images', 'videos', 'music'] | ||||
| paging = True | ||||
| 
 | ||||
| # search-url | ||||
| base_url = 'https://www.acgsou.com/' | ||||
| search_url = base_url + 'search.php?{query}&page={offset}' | ||||
| # xpath queries | ||||
| xpath_results = '//table[contains(@class, "list_style table_fixed")]//tr[not(th)]' | ||||
| xpath_category = './/td[2]/a[1]' | ||||
| xpath_title = './/td[3]/a[last()]' | ||||
| xpath_torrent_links = './/td[3]/a' | ||||
| xpath_filesize = './/td[4]/text()' | ||||
| 
 | ||||
| 
 | ||||
| def request(query, params): | ||||
|     query = urlencode({'keyword': query}) | ||||
|     params['url'] = search_url.format(query=query, offset=params['pageno']) | ||||
|     return params | ||||
| 
 | ||||
| 
 | ||||
| def response(resp): | ||||
|     results = [] | ||||
|     dom = html.fromstring(resp.text) | ||||
|     for result in dom.xpath(xpath_results): | ||||
|         # defaults | ||||
|         filesize = 0 | ||||
|         magnet_link = "magnet:?xt=urn:btih:{}&tr=http://tracker.acgsou.com:2710/announce" | ||||
|         torrent_link = "" | ||||
| 
 | ||||
|         try: | ||||
|             category = extract_text(result.xpath(xpath_category)[0]) | ||||
|         except: | ||||
|             pass | ||||
| 
 | ||||
|         page_a = result.xpath(xpath_title)[0] | ||||
|         title = extract_text(page_a) | ||||
|         href = base_url + page_a.attrib.get('href') | ||||
| 
 | ||||
|         magnet_link = magnet_link.format(page_a.attrib.get('href')[5:-5]) | ||||
| 
 | ||||
|         try: | ||||
|             filesize_info = result.xpath(xpath_filesize)[0] | ||||
|             filesize = filesize_info[:-2] | ||||
|             filesize_multiplier = filesize_info[-2:] | ||||
|             filesize = get_torrent_size(filesize, filesize_multiplier) | ||||
|         except: | ||||
|             pass | ||||
|         # I didn't add download/seed/leech count since as I figured out they are generated randomly everytime | ||||
|         content = 'Category: "{category}".' | ||||
|         content = content.format(category=category) | ||||
| 
 | ||||
|         results.append({'url': href, | ||||
|                         'title': title, | ||||
|                         'content': content, | ||||
|                         'filesize': filesize, | ||||
|                         'magnetlink': magnet_link, | ||||
|                         'template': 'torrent.html'}) | ||||
|     return results | ||||
|  | @ -433,6 +433,12 @@ engines: | |||
|     engine : nyaa | ||||
|     shortcut : nt | ||||
|     disabled : True | ||||
|    | ||||
|   - name : acgsou | ||||
|     engine : acgsou | ||||
|     shortcut : acg | ||||
|     disabled : True | ||||
|     timeout: 5.0 | ||||
| 
 | ||||
|   - name : openairedatasets | ||||
|     engine : json_engine | ||||
|  |  | |||
							
								
								
									
										77
									
								
								tests/unit/engines/test_acgsou.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										77
									
								
								tests/unit/engines/test_acgsou.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,77 @@ | |||
| from collections import defaultdict | ||||
| import mock | ||||
| from searx.engines import acgsou | ||||
| from searx.testing import SearxTestCase | ||||
| 
 | ||||
| 
 | ||||
| class TestAcgsouEngine(SearxTestCase): | ||||
| 
 | ||||
|     def test_request(self): | ||||
|         query = 'test_query' | ||||
|         dic = defaultdict(dict) | ||||
|         dic['pageno'] = 1 | ||||
|         params = acgsou.request(query, dic) | ||||
|         self.assertTrue('url' in params) | ||||
|         self.assertTrue(query in params['url']) | ||||
|         self.assertTrue('acgsou.com' in params['url']) | ||||
| 
 | ||||
|     def test_response(self): | ||||
|         resp = mock.Mock(text='<html></html>') | ||||
|         self.assertEqual(acgsou.response(resp), []) | ||||
| 
 | ||||
|         html = """ | ||||
|         <html> | ||||
| <table id="listTable" class="list_style table_fixed"> | ||||
|   <thead class="tcat"> | ||||
|       <tr> | ||||
|         <th axis="string" class="l1 tableHeaderOver">test</th> | ||||
|         <th axis="string" class="l2 tableHeaderOver">test</th> | ||||
|         <th axis="string" class="l3 tableHeaderOver">test</th> | ||||
|         <th axis="size" class="l4 tableHeaderOver">test</th> | ||||
|         <th axis="number" class="l5 tableHeaderOver">test</th> | ||||
|         <th axis="number" class="l6 tableHeaderOver">test</th> | ||||
|         <th axis="number" class="l7 tableHeaderOver">test</th> | ||||
|         <th axis="string" class="l8 tableHeaderOver">test</th> | ||||
|       </tr> | ||||
|   </thead> | ||||
|   <tbody class="tbody" id="data_list"> | ||||
|  <tr class="alt1 "> | ||||
|         <td nowrap="nowrap">date</td> | ||||
|         <td><a href="category.html">testcategory</a></td> | ||||
|         <td style="text-align:left;"> | ||||
|             <a href="show-torrentid.html" target="_blank">torrentname</a> | ||||
|         </td> | ||||
|         <td>1MB</td> | ||||
|         <td nowrap="nowrap"> | ||||
|             <span class="bts_1"> | ||||
|             29 | ||||
|             </span> | ||||
|         </td> | ||||
|         <td nowrap="nowrap"> | ||||
|             <span class="btl_1"> | ||||
|             211 | ||||
|         </span> | ||||
|         </td> | ||||
|         <td nowrap="nowrap"> | ||||
|         <span class="btc_"> | ||||
|             168 | ||||
|         </span> | ||||
|         </td> | ||||
|         <td><a href="random.html">user</a></td> | ||||
|       </tr> | ||||
|       </tbody> | ||||
| </table> | ||||
| </html> | ||||
|         """ | ||||
| 
 | ||||
|         resp = mock.Mock(text=html) | ||||
|         results = acgsou.response(resp) | ||||
| 
 | ||||
|         self.assertEqual(type(results), list) | ||||
|         self.assertEqual(len(results), 1) | ||||
| 
 | ||||
|         r = results[0] | ||||
|         self.assertEqual(r['url'], 'https://www.acgsou.com/show-torrentid.html') | ||||
|         self.assertEqual(r['content'], 'Category: "testcategory".') | ||||
|         self.assertEqual(r['title'], 'torrentname') | ||||
|         self.assertEqual(r['filesize'], 1048576) | ||||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Adam Tauber
						Adam Tauber