Merge pull request #1967 from tiekoetter/suspended_time-settings

settings.yml: add search.suspended_times
This commit is contained in:
Alexandre Flament 2023-01-15 10:07:44 +01:00 committed by GitHub
commit 52d4155997
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 89 additions and 6 deletions

View File

@ -69,11 +69,19 @@ class SearxEngineAPIException(SearxEngineResponseException):
class SearxEngineAccessDeniedException(SearxEngineResponseException): class SearxEngineAccessDeniedException(SearxEngineResponseException):
"""The website is blocking the access""" """The website is blocking the access"""
def __init__(self, suspended_time=24 * 3600, message='Access denied'): SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineAccessDenied"
def __init__(self, suspended_time=None, message='Access denied'):
suspended_time = suspended_time or self._get_default_suspended_time()
super().__init__(message + ', suspended_time=' + str(suspended_time)) super().__init__(message + ', suspended_time=' + str(suspended_time))
self.suspended_time = suspended_time self.suspended_time = suspended_time
self.message = message self.message = message
def _get_default_suspended_time(self):
from searx import get_setting
return get_setting(self.SUSPEND_TIME_SETTING)
class SearxEngineCaptchaException(SearxEngineAccessDeniedException): class SearxEngineCaptchaException(SearxEngineAccessDeniedException):
"""The website has returned a CAPTCHA """The website has returned a CAPTCHA
@ -81,7 +89,9 @@ class SearxEngineCaptchaException(SearxEngineAccessDeniedException):
By default, searx stops sending requests to this engine for 1 day. By default, searx stops sending requests to this engine for 1 day.
""" """
def __init__(self, suspended_time=24 * 3600, message='CAPTCHA'): SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineCaptcha"
def __init__(self, suspended_time=None, message='CAPTCHA'):
super().__init__(message=message, suspended_time=suspended_time) super().__init__(message=message, suspended_time=suspended_time)
@ -91,7 +101,9 @@ class SearxEngineTooManyRequestsException(SearxEngineAccessDeniedException):
By default, searx stops sending requests to this engine for 1 hour. By default, searx stops sending requests to this engine for 1 hour.
""" """
def __init__(self, suspended_time=3600, message='Too many request'): SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineTooManyRequests"
def __init__(self, suspended_time=None, message='Too many request'):
super().__init__(message=message, suspended_time=suspended_time) super().__init__(message=message, suspended_time=suspended_time)

View File

@ -9,6 +9,7 @@ from searx.exceptions import (
SearxEngineTooManyRequestsException, SearxEngineTooManyRequestsException,
SearxEngineAccessDeniedException, SearxEngineAccessDeniedException,
) )
from searx import get_setting
def is_cloudflare_challenge(resp): def is_cloudflare_challenge(resp):
@ -33,15 +34,22 @@ def raise_for_cloudflare_captcha(resp):
if is_cloudflare_challenge(resp): if is_cloudflare_challenge(resp):
# https://support.cloudflare.com/hc/en-us/articles/200170136-Understanding-Cloudflare-Challenge-Passage-Captcha- # https://support.cloudflare.com/hc/en-us/articles/200170136-Understanding-Cloudflare-Challenge-Passage-Captcha-
# suspend for 2 weeks # suspend for 2 weeks
raise SearxEngineCaptchaException(message='Cloudflare CAPTCHA', suspended_time=3600 * 24 * 15) raise SearxEngineCaptchaException(
message='Cloudflare CAPTCHA', suspended_time=get_setting('search.suspended_times.cf_SearxEngineCaptcha')
)
if is_cloudflare_firewall(resp): if is_cloudflare_firewall(resp):
raise SearxEngineAccessDeniedException(message='Cloudflare Firewall', suspended_time=3600 * 24) raise SearxEngineAccessDeniedException(
message='Cloudflare Firewall',
suspended_time=get_setting('search.suspended_times.cf_SearxEngineAccessDenied'),
)
def raise_for_recaptcha(resp): def raise_for_recaptcha(resp):
if resp.status_code == 503 and '"https://www.google.com/recaptcha/' in resp.text: if resp.status_code == 503 and '"https://www.google.com/recaptcha/' in resp.text:
raise SearxEngineCaptchaException(message='ReCAPTCHA', suspended_time=3600 * 24 * 7) raise SearxEngineCaptchaException(
message='ReCAPTCHA', suspended_time=get_setting('search.suspended_times.recaptcha_SearxEngineCaptcha')
)
def raise_for_captcha(resp): def raise_for_captcha(resp):

View File

@ -45,6 +45,20 @@ search:
ban_time_on_fail: 5 ban_time_on_fail: 5
# max ban time in seconds after engine errors # max ban time in seconds after engine errors
max_ban_time_on_fail: 120 max_ban_time_on_fail: 120
suspend_times:
# Engine suspension time after error (in seconds; set to 0 to disable)
# For error "Access denied" and "HTTP error [402, 403]"
SearxEngineAccessDenied: 86400
# For error "CAPTCHA"
SearxEngineCaptcha: 86400
# For error "Too many request" and "HTTP error 429"
SearxEngineTooManyRequests: 3600
# Cloudflare CAPTCHA
cf_SearxEngineCaptcha: 1296000
cf_SearxEngineAccessDenied: 86400
# ReCAPTCHA
recaptcha_SearxEngineCaptcha: 604800
# remove format to deny access, use lower case. # remove format to deny access, use lower case.
# formats: [html, csv, json, rss] # formats: [html, csv, json, rss]
formats: formats:

View File

@ -160,6 +160,14 @@ SCHEMA = {
'languages': SettingSublistValue(LANGUAGE_CODES, LANGUAGE_CODES), 'languages': SettingSublistValue(LANGUAGE_CODES, LANGUAGE_CODES),
'ban_time_on_fail': SettingsValue(numbers.Real, 5), 'ban_time_on_fail': SettingsValue(numbers.Real, 5),
'max_ban_time_on_fail': SettingsValue(numbers.Real, 120), 'max_ban_time_on_fail': SettingsValue(numbers.Real, 120),
'suspended_times': {
'SearxEngineAccessDenied': SettingsValue(numbers.Real, 86400),
'SearxEngineCaptcha': SettingsValue(numbers.Real, 86400),
'SearxEngineTooManyRequests': SettingsValue(numbers.Real, 3600),
'cf_SearxEngineCaptcha': SettingsValue(numbers.Real, 1296000),
'cf_SearxEngineAccessDenied': SettingsValue(numbers.Real, 86400),
'recaptcha_SearxEngineCaptcha': SettingsValue(numbers.Real, 604800),
},
'formats': SettingsValue(list, OUTPUT_FORMATS), 'formats': SettingsValue(list, OUTPUT_FORMATS),
}, },
'server': { 'server': {

View File

@ -0,0 +1,41 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
from tests import SearxTestCase
import searx.exceptions
from searx import get_setting
class TestExceptions(SearxTestCase):
def test_default_suspend_time(self):
with self.assertRaises(searx.exceptions.SearxEngineAccessDeniedException) as e:
raise searx.exceptions.SearxEngineAccessDeniedException()
self.assertEqual(
e.exception.suspended_time,
get_setting(searx.exceptions.SearxEngineAccessDeniedException.SUSPEND_TIME_SETTING),
)
with self.assertRaises(searx.exceptions.SearxEngineCaptchaException) as e:
raise searx.exceptions.SearxEngineCaptchaException()
self.assertEqual(
e.exception.suspended_time, get_setting(searx.exceptions.SearxEngineCaptchaException.SUSPEND_TIME_SETTING)
)
with self.assertRaises(searx.exceptions.SearxEngineTooManyRequestsException) as e:
raise searx.exceptions.SearxEngineTooManyRequestsException()
self.assertEqual(
e.exception.suspended_time,
get_setting(searx.exceptions.SearxEngineTooManyRequestsException.SUSPEND_TIME_SETTING),
)
def test_custom_suspend_time(self):
with self.assertRaises(searx.exceptions.SearxEngineAccessDeniedException) as e:
raise searx.exceptions.SearxEngineAccessDeniedException(suspended_time=1337)
self.assertEqual(e.exception.suspended_time, 1337)
with self.assertRaises(searx.exceptions.SearxEngineCaptchaException) as e:
raise searx.exceptions.SearxEngineCaptchaException(suspended_time=1409)
self.assertEqual(e.exception.suspended_time, 1409)
with self.assertRaises(searx.exceptions.SearxEngineTooManyRequestsException) as e:
raise searx.exceptions.SearxEngineTooManyRequestsException(suspended_time=1543)
self.assertEqual(e.exception.suspended_time, 1543)