forked from zaclys/searxng
		
	
						commit
						2b60d0d243
					
				
					 2 changed files with 65 additions and 17 deletions
				
			
		
							
								
								
									
										64
									
								
								searx/engines/seznam.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										64
									
								
								searx/engines/seznam.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,64 @@ | |||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Seznam | ||||
| """ | ||||
| 
 | ||||
| from urllib.parse import urlencode, urlparse | ||||
| from lxml import html | ||||
| from searx.poolrequests import get | ||||
| from searx.exceptions import SearxEngineAccessDeniedException | ||||
| from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": "https://www.seznam.cz/", | ||||
|     "wikidata_id": "Q3490485", | ||||
|     "official_api_documentation": "https://api.sklik.cz/", | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": "HTML", | ||||
| } | ||||
| 
 | ||||
| base_url = 'https://search.seznam.cz/' | ||||
| 
 | ||||
| 
 | ||||
| def request(query, params): | ||||
|     response_index = get(base_url, headers=params['headers'], raise_for_httperror=True) | ||||
|     dom = html.fromstring(response_index.text) | ||||
| 
 | ||||
|     url_params = {'q': query} | ||||
|     for e in eval_xpath_list(dom, '//input[@type="hidden"]'): | ||||
|         name = e.get('name') | ||||
|         value = e.get('value') | ||||
|         url_params[name] = value | ||||
| 
 | ||||
|     params['url'] = base_url + '?' + urlencode(url_params) | ||||
|     params['cookies'] = response_index.cookies | ||||
|     return params | ||||
| 
 | ||||
| 
 | ||||
| def response(resp): | ||||
|     resp_url = urlparse(resp.url) | ||||
|     if resp_url.path.startswith('/verify'): | ||||
|         raise SearxEngineAccessDeniedException() | ||||
| 
 | ||||
|     results = [] | ||||
| 
 | ||||
|     dom = html.fromstring(resp.content.decode()) | ||||
|     for result_element in eval_xpath_list(dom, '//div[@id="searchpage-root"]//div[@data-dot="results"]/div'): | ||||
|         dot_data = eval_xpath_getindex(result_element, './div/div[@data-dot-data]/@data-dot-data', 0, default=None) | ||||
|         if dot_data is None: | ||||
|             title_element = eval_xpath_getindex(result_element, './/h3/a', 0) | ||||
|             results.append({ | ||||
|                 'url': title_element.get('href'), | ||||
|                 'title': extract_text(title_element), | ||||
|                 'content': extract_text(eval_xpath_getindex(title_element, '../../div[2]', 0)), | ||||
|             }) | ||||
|         elif dot_data == '{"reporter_name":"hint/related/relates"}': | ||||
|             suggestions_element = eval_xpath_getindex(result_element, | ||||
|                                                       './div/div[@data-dot="main-box"]', 0, default=None) | ||||
|             if suggestions_element is not None: | ||||
|                 for suggestion in eval_xpath_list(suggestions_element, './/ul/li'): | ||||
|                     results.append({'suggestion': extract_text(suggestion)}) | ||||
| 
 | ||||
|     return results | ||||
|  | @ -1170,24 +1170,8 @@ engines: | |||
| 
 | ||||
|   - name : seznam | ||||
|     shortcut: szn | ||||
|     engine: xpath | ||||
|     paging : True | ||||
|     search_url : https://search.seznam.cz/?q={query}&count=10&from={pageno} | ||||
|     results_xpath: //div[@class="Page-content"]//div[contains(@class, "Result ")] | ||||
|     url_xpath : ./h3/a/@href | ||||
|     title_xpath : ./h3 | ||||
|     content_xpath : .//p[@class="Result-description"] | ||||
|     suggestion_xpath: //div[@class="Related-container"]//div[@class="RelatedItem"]/div/span/a | ||||
|     first_page_num : 0 | ||||
|     page_size : 10 | ||||
|     engine: seznam | ||||
|     disabled : True | ||||
|     about: | ||||
|       website: https://www.seznam.cz/ | ||||
|       wikidata_id: Q3490485 | ||||
|       official_api_documentation: https://api.sklik.cz/ | ||||
|       use_official_api: false | ||||
|       require_api_key: false | ||||
|       results: HTML | ||||
| 
 | ||||
|   - name : mojeek | ||||
|     shortcut: mjk | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Alexandre Flament
						Alexandre Flament