From 37addec69e4211aac6b2302c82cb5c5c7b1d5e04 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Sat, 28 Jan 2023 10:24:14 +0000 Subject: [PATCH] search.suspended_time settings: bug fixes * fix type in settings.yml: replace suspend_times by suspended_times * always use delay defined in settings.yml: * HTTP status 402 and 403: read the value from settings.yml instead of using the hardcoded value of 1 day. * startpage engine: CAPTCHA suspend the engine for one day instead of one week --- searx/engines/startpage.py | 3 +-- searx/exceptions.py | 9 ++++++++- searx/network/raise_for_httperror.py | 4 +--- searx/settings.yml | 2 +- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py index 24aa59d03..f857f7b6d 100644 --- a/searx/engines/startpage.py +++ b/searx/engines/startpage.py @@ -62,8 +62,7 @@ sc_code = '' def raise_captcha(resp): if str(resp.url).startswith('https://www.startpage.com/sp/captcha'): - # suspend CAPTCHA for 7 days - raise SearxEngineCaptchaException(suspended_time=7 * 24 * 3600) + raise SearxEngineCaptchaException() def get_sc_code(headers): diff --git a/searx/exceptions.py b/searx/exceptions.py index af81bfb23..b11821b17 100644 --- a/searx/exceptions.py +++ b/searx/exceptions.py @@ -70,8 +70,15 @@ class SearxEngineAccessDeniedException(SearxEngineResponseException): """The website is blocking the access""" SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineAccessDenied" + """This settings contains the default suspended time""" - def __init__(self, suspended_time=None, message='Access denied'): + def __init__(self, suspended_time: int = None, message: str = 'Access denied'): + """Generic exception to raise when an engine denies access to the results + + Args: + suspended_time (int, optional): How long the engine is going to be suspended in second. Defaults to None. + message (str, optional): Internal message. Defaults to 'Access denied'. + """ suspended_time = suspended_time or self._get_default_suspended_time() super().__init__(message + ', suspended_time=' + str(suspended_time)) self.suspended_time = suspended_time diff --git a/searx/network/raise_for_httperror.py b/searx/network/raise_for_httperror.py index 7fc2b7877..9f847d436 100644 --- a/searx/network/raise_for_httperror.py +++ b/searx/network/raise_for_httperror.py @@ -72,9 +72,7 @@ def raise_for_httperror(resp): if resp.status_code and resp.status_code >= 400: raise_for_captcha(resp) if resp.status_code in (402, 403): - raise SearxEngineAccessDeniedException( - message='HTTP error ' + str(resp.status_code), suspended_time=3600 * 24 - ) + raise SearxEngineAccessDeniedException(message='HTTP error ' + str(resp.status_code)) if resp.status_code == 429: raise SearxEngineTooManyRequestsException() resp.raise_for_status() diff --git a/searx/settings.yml b/searx/settings.yml index 81025d653..216cb3c82 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -45,7 +45,7 @@ search: ban_time_on_fail: 5 # max ban time in seconds after engine errors max_ban_time_on_fail: 120 - suspend_times: + suspended_times: # Engine suspension time after error (in seconds; set to 0 to disable) # For error "Access denied" and "HTTP error [402, 403]" SearxEngineAccessDenied: 86400