forked from zaclys/searxng
		
	[fix] startpage engine: fetch CAPTCHA & issues related to PR-695
In case of CAPTCHA raise a SearxEngineCaptchaException and suspend for 7 days. When get_sc_code() fails raise a SearxEngineResponseException and suspend for 7 days. [1] https://github.com/searxng/searxng/pull/695 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
		
							parent
							
								
									2f4e567e90
								
							
						
					
					
						commit
						21e884f369
					
				
					 1 changed files with 21 additions and 2 deletions
				
			
		| 
						 | 
					@ -18,6 +18,11 @@ from babel.localedata import locale_identifiers
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from searx import network
 | 
					from searx import network
 | 
				
			||||||
from searx.utils import extract_text, eval_xpath, match_language
 | 
					from searx.utils import extract_text, eval_xpath, match_language
 | 
				
			||||||
 | 
					from searx.exceptions import (
 | 
				
			||||||
 | 
					    SearxEngineResponseException,
 | 
				
			||||||
 | 
					    SearxEngineCaptchaException,
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# about
 | 
					# about
 | 
				
			||||||
about = {
 | 
					about = {
 | 
				
			||||||
| 
						 | 
					@ -54,6 +59,13 @@ sc_code_ts = 0
 | 
				
			||||||
sc_code = ''
 | 
					sc_code = ''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def raise_captcha(resp):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if str(resp.url).startswith('https://www.startpage.com/sp/captcha'):
 | 
				
			||||||
 | 
					        # suspend CAPTCHA for 7 days
 | 
				
			||||||
 | 
					        raise SearxEngineCaptchaException(suspended_time=7 * 24 * 3600)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_sc_code(headers):
 | 
					def get_sc_code(headers):
 | 
				
			||||||
    """Get an actual `sc` argument from startpage's home page.
 | 
					    """Get an actual `sc` argument from startpage's home page.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -73,10 +85,17 @@ def get_sc_code(headers):
 | 
				
			||||||
        logger.debug("query new sc time-stamp ...")
 | 
					        logger.debug("query new sc time-stamp ...")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        resp = network.get(base_url, headers=headers)
 | 
					        resp = network.get(base_url, headers=headers)
 | 
				
			||||||
 | 
					        raise_captcha(resp)
 | 
				
			||||||
        dom = html.fromstring(resp.text)
 | 
					        dom = html.fromstring(resp.text)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # href --> '/?sc=adrKJMgF8xwp20'
 | 
					        try:
 | 
				
			||||||
        href = eval_xpath(dom, '//a[@class="footer-home__logo"]')[0].get('href')
 | 
					            # href --> '/?sc=adrKJMgF8xwp20'
 | 
				
			||||||
 | 
					            href = eval_xpath(dom, '//a[@class="footer-home__logo"]')[0].get('href')
 | 
				
			||||||
 | 
					        except IndexError as exc:
 | 
				
			||||||
 | 
					            # suspend startpage API --> https://github.com/searxng/searxng/pull/695
 | 
				
			||||||
 | 
					            raise SearxEngineResponseException(
 | 
				
			||||||
 | 
					                suspended_time=7 * 24 * 3600, message="PR-695: query new sc time-stamp failed!"
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        sc_code = href[5:]
 | 
					        sc_code = href[5:]
 | 
				
			||||||
        sc_code_ts = time()
 | 
					        sc_code_ts = time()
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		
		Reference in a new issue