mirror of
				https://github.com/searxng/searxng
				synced 2024-01-01 19:24:07 +01:00 
			
		
		
		
	[feat] engine: implementation of ask.com
This commit is contained in:
		
							parent
							
								
									c197c0e35e
								
							
						
					
					
						commit
						3df53d6e50
					
				
					 2 changed files with 61 additions and 0 deletions
				
			
		
							
								
								
									
										56
									
								
								searx/engines/ask.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										56
									
								
								searx/engines/ask.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,56 @@ | |||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| # lint: pylint | ||||
| """Ask.com""" | ||||
| 
 | ||||
| from urllib.parse import urlencode | ||||
| import re | ||||
| from lxml import html | ||||
| 
 | ||||
| # Metadata | ||||
| about = { | ||||
|     "website": "https://www.ask.com/", | ||||
|     "wikidata_id": 'Q847564', | ||||
|     "official_api_documentation": None, | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": "HTML", | ||||
| } | ||||
| 
 | ||||
| # Engine Configuration | ||||
| categories = ['general'] | ||||
| paging = True | ||||
| 
 | ||||
| # Base URL | ||||
| base_url = "https://www.ask.com/web" | ||||
| 
 | ||||
| 
 | ||||
| def request(query, params): | ||||
| 
 | ||||
|     query_params = { | ||||
|         "q": query, | ||||
|         "page": params["pageno"], | ||||
|     } | ||||
| 
 | ||||
|     params["url"] = f"{base_url}?{urlencode(query_params)}" | ||||
|     return params | ||||
| 
 | ||||
| 
 | ||||
| def response(resp): | ||||
| 
 | ||||
|     text = html.fromstring(resp.text).text_content() | ||||
|     urls_match = re.findall(r'"url":"(.*?)"', text) | ||||
|     titles_match = re.findall(r'"title":"(.*?)"', text)[3:] | ||||
|     content_match = re.findall(r'"abstract":"(.*?)"', text) | ||||
| 
 | ||||
|     results = [ | ||||
|         { | ||||
|             "url": url, | ||||
|             "title": title, | ||||
|             "content": content, | ||||
|         } | ||||
|         for url, title, content in zip(urls_match, titles_match, content_match) | ||||
|         if "&qo=relatedSearchNarrow" not in url | ||||
|         # Related searches shouldn't be in the search results: www.ask.com/web&q=related | ||||
|     ] | ||||
| 
 | ||||
|     return results | ||||
|  | @ -364,6 +364,11 @@ engines: | |||
|     shortcut: arx | ||||
|     timeout: 4.0 | ||||
| 
 | ||||
|   - name: ask | ||||
|     engine: ask | ||||
|     shortcut: ask | ||||
|     disabled: true | ||||
| 
 | ||||
|   # tmp suspended:  dh key too small | ||||
|   # - name: base | ||||
|   #   engine: base | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Hackurei
						Hackurei