mirror of
				https://github.com/searxng/searxng
				synced 2024-01-01 19:24:07 +01:00 
			
		
		
		
	Merge branch 'master' into patch-2
This commit is contained in:
		
						commit
						c2da901afa
					
				
					 4 changed files with 143 additions and 3 deletions
				
			
		|  | @ -1,6 +1,8 @@ | |||
| import hashlib | ||||
| import random | ||||
| import string | ||||
| import sys | ||||
| import uuid | ||||
| from flask_babel import gettext | ||||
| 
 | ||||
| # required answerer attribute | ||||
|  | @ -16,9 +18,13 @@ else: | |||
|     random_string_letters = string.ascii_lowercase + string.digits + string.ascii_uppercase | ||||
| 
 | ||||
| 
 | ||||
| def random_characters(): | ||||
|     return [random.choice(random_string_letters) | ||||
|             for _ in range(random.randint(8, 32))] | ||||
| 
 | ||||
| 
 | ||||
| def random_string(): | ||||
|     return u''.join(random.choice(random_string_letters) | ||||
|                     for _ in range(random.randint(8, 32))) | ||||
|     return u''.join(random_characters()) | ||||
| 
 | ||||
| 
 | ||||
| def random_float(): | ||||
|  | @ -29,9 +35,21 @@ def random_int(): | |||
|     return unicode(random.randint(-random_int_max, random_int_max)) | ||||
| 
 | ||||
| 
 | ||||
| def random_sha256(): | ||||
|     m = hashlib.sha256() | ||||
|     m.update(b''.join(random_characters())) | ||||
|     return unicode(m.hexdigest()) | ||||
| 
 | ||||
| 
 | ||||
| def random_uuid(): | ||||
|     return unicode(uuid.uuid4()) | ||||
| 
 | ||||
| 
 | ||||
| random_types = {b'string': random_string, | ||||
|                 b'int': random_int, | ||||
|                 b'float': random_float} | ||||
|                 b'float': random_float, | ||||
|                 b'sha256': random_sha256, | ||||
|                 b'uuid': random_uuid} | ||||
| 
 | ||||
| 
 | ||||
| # required answerer function | ||||
|  |  | |||
							
								
								
									
										76
									
								
								searx/engines/duden.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										76
									
								
								searx/engines/duden.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,76 @@ | |||
| """ | ||||
|  Duden | ||||
|  @website     https://www.duden.de | ||||
|  @provide-api no | ||||
|  @using-api   no | ||||
|  @results     HTML (using search portal) | ||||
|  @stable      no (HTML can change) | ||||
|  @parse       url, title, content | ||||
| """ | ||||
| 
 | ||||
| from lxml import html, etree | ||||
| import re | ||||
| from searx.engines.xpath import extract_text | ||||
| from searx.url_utils import quote | ||||
| from searx import logger | ||||
| 
 | ||||
| categories = ['general'] | ||||
| paging = True | ||||
| language_support = False | ||||
| 
 | ||||
| # search-url | ||||
| base_url = 'https://www.duden.de/' | ||||
| search_url = base_url + 'suchen/dudenonline/{query}?page={offset}' | ||||
| 
 | ||||
| 
 | ||||
| def request(query, params): | ||||
|     '''pre-request callback | ||||
|     params<dict>: | ||||
|       method  : POST/GET | ||||
|       headers : {} | ||||
|       data    : {} # if method == POST | ||||
|       url     : '' | ||||
|       category: 'search category' | ||||
|       pageno  : 1 # number of the requested page | ||||
|     ''' | ||||
| 
 | ||||
|     offset = (params['pageno'] - 1) | ||||
|     params['url'] = search_url.format(offset=offset, query=quote(query)) | ||||
|     return params | ||||
| 
 | ||||
| 
 | ||||
| def response(resp): | ||||
|     '''post-response callback | ||||
|     resp: requests response object | ||||
|     ''' | ||||
|     results = [] | ||||
| 
 | ||||
|     dom = html.fromstring(resp.text) | ||||
| 
 | ||||
|     try: | ||||
|         number_of_results_string = re.sub('[^0-9]', '', dom.xpath( | ||||
|             '//a[@class="active" and contains(@href,"/suchen/dudenonline")]/span/text()')[0] | ||||
|         ) | ||||
| 
 | ||||
|         results.append({'number_of_results': int(number_of_results_string)}) | ||||
| 
 | ||||
|     except: | ||||
|         logger.debug("Couldn't read number of results.") | ||||
|         pass | ||||
| 
 | ||||
|     for result in dom.xpath('//section[@class="wide" and not(contains(@style,"overflow:hidden"))]'): | ||||
|         try: | ||||
|             logger.debug("running for %s" % str(result)) | ||||
|             link = result.xpath('.//h2/a')[0] | ||||
|             url = link.attrib.get('href') | ||||
|             title = result.xpath('string(.//h2/a)') | ||||
|             content = extract_text(result.xpath('.//p')) | ||||
|             # append result | ||||
|             results.append({'url': url, | ||||
|                             'title': title, | ||||
|                             'content': content}) | ||||
|         except: | ||||
|             logger.debug('result parse error in:\n%s', etree.tostring(result, pretty_print=True)) | ||||
|             continue | ||||
| 
 | ||||
|     return results | ||||
|  | @ -714,6 +714,11 @@ engines: | |||
|     shortcut : 1337x | ||||
|     disabled : True | ||||
| 
 | ||||
|   - name : Duden | ||||
|     engine : duden | ||||
|     shortcut : du | ||||
|     disabled : True | ||||
| 
 | ||||
| #  - name : yacy | ||||
| #    engine : yacy | ||||
| #    shortcut : ya | ||||
|  |  | |||
							
								
								
									
										41
									
								
								tests/unit/engines/test_duden.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										41
									
								
								tests/unit/engines/test_duden.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,41 @@ | |||
| from collections import defaultdict | ||||
| import mock | ||||
| from searx.engines import duden | ||||
| from searx.testing import SearxTestCase | ||||
| from datetime import datetime | ||||
| 
 | ||||
| 
 | ||||
| class TestDudenEngine(SearxTestCase): | ||||
| 
 | ||||
|     def test_request(self): | ||||
|         query = 'Haus' | ||||
|         dic = defaultdict(dict) | ||||
|         dic['pageno'] = 1 | ||||
|         params = duden.request(query, dic) | ||||
|         self.assertTrue('url' in params) | ||||
|         self.assertTrue(query in params['url']) | ||||
|         self.assertTrue('duden.de' in params['url']) | ||||
| 
 | ||||
|     def test_response(self): | ||||
|         resp = mock.Mock(text='<html></html>') | ||||
|         self.assertEqual(duden.response(resp), []) | ||||
| 
 | ||||
|         html = """ | ||||
|         <section class="wide"> | ||||
|         <h2><a href="https://this.is.the.url/" class="hidden-link"><strong>This is the title</strong> also here</a></h2> | ||||
|         <p>This is the <strong>content</strong></p> | ||||
|         <a href="https://this.is.the.url/">Zum vollständigen Artikel</a> | ||||
|         </section> | ||||
|         """ | ||||
| 
 | ||||
|         resp = mock.Mock(text=html) | ||||
|         results = duden.response(resp) | ||||
| 
 | ||||
|         self.assertEqual(len(results), 1) | ||||
|         self.assertEqual(type(results), list) | ||||
| 
 | ||||
|         # testing result (dictionary entry) | ||||
|         r = results[0] | ||||
|         self.assertEqual(r['url'], 'https://this.is.the.url/') | ||||
|         self.assertEqual(r['title'], 'This is the title also here') | ||||
|         self.assertEqual(r['content'], 'This is the content') | ||||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Angristan
						Angristan