mirror of
				https://github.com/searxng/searxng
				synced 2024-01-01 19:24:07 +01:00 
			
		
		
		
	[fix] startpage engine - avoid captcha
Startpage has introduced new anti-scraping measures that make SearXNG instances run into captchas: 1. some arguments has been removed and a new `sc` has been added. 2. search path changed from `do/search` to `sp/search` 3. POST request is no longer needed Closes: https://github.com/searxng/searxng/issues/692 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
		
							parent
							
								
									79e0aa2645
								
							
						
					
					
						commit
						f1f5e69c42
					
				
					 1 changed files with 9 additions and 8 deletions
				
			
		|  | @ -3,6 +3,8 @@ | ||||||
|  Startpage (Web) |  Startpage (Web) | ||||||
| """ | """ | ||||||
| 
 | 
 | ||||||
|  | from urllib.parse import urlencode | ||||||
|  | 
 | ||||||
| from lxml import html | from lxml import html | ||||||
| from dateutil import parser | from dateutil import parser | ||||||
| from datetime import datetime, timedelta | from datetime import datetime, timedelta | ||||||
|  | @ -33,7 +35,7 @@ supported_languages_url = 'https://www.startpage.com/do/settings' | ||||||
| 
 | 
 | ||||||
| # search-url | # search-url | ||||||
| base_url = 'https://startpage.com/' | base_url = 'https://startpage.com/' | ||||||
| search_url = base_url + 'do/search' | search_url = base_url + 'sp/search?' | ||||||
| 
 | 
 | ||||||
| # specific xpath variables | # specific xpath variables | ||||||
| # ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"] | # ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"] | ||||||
|  | @ -46,14 +48,12 @@ content_xpath = './/p[@class="w-gl__description"]' | ||||||
| # do search-request | # do search-request | ||||||
| def request(query, params): | def request(query, params): | ||||||
| 
 | 
 | ||||||
|     params['url'] = search_url |     args = { | ||||||
|     params['method'] = 'POST' |  | ||||||
|     params['data'] = { |  | ||||||
|         'query': query, |         'query': query, | ||||||
|         'page': params['pageno'], |         'page': params['pageno'], | ||||||
|         'cat': 'web', |         'cat': 'web', | ||||||
|         'cmd': 'process_search', |         # 'abp': "-1", | ||||||
|         'engine0': 'v1all', |         'sc': 'Mj4jZy61QETj20', | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     # set language if specified |     # set language if specified | ||||||
|  | @ -61,9 +61,10 @@ def request(query, params): | ||||||
|         lang_code = match_language(params['language'], supported_languages, fallback=None) |         lang_code = match_language(params['language'], supported_languages, fallback=None) | ||||||
|         if lang_code: |         if lang_code: | ||||||
|             language_name = supported_languages[lang_code]['alias'] |             language_name = supported_languages[lang_code]['alias'] | ||||||
|             params['data']['language'] = language_name |             args['language'] = language_name | ||||||
|             params['data']['lui'] = language_name |             args['lui'] = language_name | ||||||
| 
 | 
 | ||||||
|  |     params['url'] = search_url + urlencode(args) | ||||||
|     return params |     return params | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Markus Heiser
						Markus Heiser