mirror of
				https://github.com/searxng/searxng
				synced 2024-01-01 19:24:07 +01:00 
			
		
		
		
	[fix] startpage engine - avoid captcha
Startpage has introduced new anti-scraping measures that make SearXNG instances run into captchas: 1. some arguments has been removed and a new `sc` has been added. 2. search path changed from `do/search` to `sp/search` 3. POST request is no longer needed Closes: https://github.com/searxng/searxng/issues/692 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
		
							parent
							
								
									79e0aa2645
								
							
						
					
					
						commit
						f1f5e69c42
					
				
					 1 changed files with 9 additions and 8 deletions
				
			
		|  | @ -3,6 +3,8 @@ | |||
|  Startpage (Web) | ||||
| """ | ||||
| 
 | ||||
| from urllib.parse import urlencode | ||||
| 
 | ||||
| from lxml import html | ||||
| from dateutil import parser | ||||
| from datetime import datetime, timedelta | ||||
|  | @ -33,7 +35,7 @@ supported_languages_url = 'https://www.startpage.com/do/settings' | |||
| 
 | ||||
| # search-url | ||||
| base_url = 'https://startpage.com/' | ||||
| search_url = base_url + 'do/search' | ||||
| search_url = base_url + 'sp/search?' | ||||
| 
 | ||||
| # specific xpath variables | ||||
| # ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"] | ||||
|  | @ -46,14 +48,12 @@ content_xpath = './/p[@class="w-gl__description"]' | |||
| # do search-request | ||||
| def request(query, params): | ||||
| 
 | ||||
|     params['url'] = search_url | ||||
|     params['method'] = 'POST' | ||||
|     params['data'] = { | ||||
|     args = { | ||||
|         'query': query, | ||||
|         'page': params['pageno'], | ||||
|         'cat': 'web', | ||||
|         'cmd': 'process_search', | ||||
|         'engine0': 'v1all', | ||||
|         # 'abp': "-1", | ||||
|         'sc': 'Mj4jZy61QETj20', | ||||
|     } | ||||
| 
 | ||||
|     # set language if specified | ||||
|  | @ -61,9 +61,10 @@ def request(query, params): | |||
|         lang_code = match_language(params['language'], supported_languages, fallback=None) | ||||
|         if lang_code: | ||||
|             language_name = supported_languages[lang_code]['alias'] | ||||
|             params['data']['language'] = language_name | ||||
|             params['data']['lui'] = language_name | ||||
|             args['language'] = language_name | ||||
|             args['lui'] = language_name | ||||
| 
 | ||||
|     params['url'] = search_url + urlencode(args) | ||||
|     return params | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Markus Heiser
						Markus Heiser