mirror of https://github.com/searxng/searxng.git
Add search.suspended_times settings
Make suspended_time changeable in settings.yml Allow different values to be set for different exceptions. Co-authored-by: Alexandre Flament <alex@al-f.net>
This commit is contained in:
parent
b720a495f0
commit
0cedb1c6d8
|
@ -69,11 +69,19 @@ class SearxEngineAPIException(SearxEngineResponseException):
|
|||
class SearxEngineAccessDeniedException(SearxEngineResponseException):
|
||||
"""The website is blocking the access"""
|
||||
|
||||
def __init__(self, suspended_time=24 * 3600, message='Access denied'):
|
||||
SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineAccessDenied"
|
||||
|
||||
def __init__(self, suspended_time=None, message='Access denied'):
|
||||
suspended_time = suspended_time or self._get_default_suspended_time()
|
||||
super().__init__(message + ', suspended_time=' + str(suspended_time))
|
||||
self.suspended_time = suspended_time
|
||||
self.message = message
|
||||
|
||||
def _get_default_suspended_time(self):
|
||||
from searx import get_setting
|
||||
|
||||
return get_setting(self.SUSPEND_TIME_SETTING)
|
||||
|
||||
|
||||
class SearxEngineCaptchaException(SearxEngineAccessDeniedException):
|
||||
"""The website has returned a CAPTCHA
|
||||
|
@ -81,7 +89,9 @@ class SearxEngineCaptchaException(SearxEngineAccessDeniedException):
|
|||
By default, searx stops sending requests to this engine for 1 day.
|
||||
"""
|
||||
|
||||
def __init__(self, suspended_time=24 * 3600, message='CAPTCHA'):
|
||||
SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineCaptcha"
|
||||
|
||||
def __init__(self, suspended_time=None, message='CAPTCHA'):
|
||||
super().__init__(message=message, suspended_time=suspended_time)
|
||||
|
||||
|
||||
|
@ -91,7 +101,9 @@ class SearxEngineTooManyRequestsException(SearxEngineAccessDeniedException):
|
|||
By default, searx stops sending requests to this engine for 1 hour.
|
||||
"""
|
||||
|
||||
def __init__(self, suspended_time=3600, message='Too many request'):
|
||||
SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineTooManyRequests"
|
||||
|
||||
def __init__(self, suspended_time=None, message='Too many request'):
|
||||
super().__init__(message=message, suspended_time=suspended_time)
|
||||
|
||||
|
||||
|
|
|
@ -9,6 +9,7 @@ from searx.exceptions import (
|
|||
SearxEngineTooManyRequestsException,
|
||||
SearxEngineAccessDeniedException,
|
||||
)
|
||||
from searx import get_setting
|
||||
|
||||
|
||||
def is_cloudflare_challenge(resp):
|
||||
|
@ -33,15 +34,22 @@ def raise_for_cloudflare_captcha(resp):
|
|||
if is_cloudflare_challenge(resp):
|
||||
# https://support.cloudflare.com/hc/en-us/articles/200170136-Understanding-Cloudflare-Challenge-Passage-Captcha-
|
||||
# suspend for 2 weeks
|
||||
raise SearxEngineCaptchaException(message='Cloudflare CAPTCHA', suspended_time=3600 * 24 * 15)
|
||||
raise SearxEngineCaptchaException(
|
||||
message='Cloudflare CAPTCHA', suspended_time=get_setting('search.suspended_times.cf_SearxEngineCaptcha')
|
||||
)
|
||||
|
||||
if is_cloudflare_firewall(resp):
|
||||
raise SearxEngineAccessDeniedException(message='Cloudflare Firewall', suspended_time=3600 * 24)
|
||||
raise SearxEngineAccessDeniedException(
|
||||
message='Cloudflare Firewall',
|
||||
suspended_time=get_setting('search.suspended_times.cf_SearxEngineAccessDenied'),
|
||||
)
|
||||
|
||||
|
||||
def raise_for_recaptcha(resp):
|
||||
if resp.status_code == 503 and '"https://www.google.com/recaptcha/' in resp.text:
|
||||
raise SearxEngineCaptchaException(message='ReCAPTCHA', suspended_time=3600 * 24 * 7)
|
||||
raise SearxEngineCaptchaException(
|
||||
message='ReCAPTCHA', suspended_time=get_setting('search.suspended_times.recaptcha_SearxEngineCaptcha')
|
||||
)
|
||||
|
||||
|
||||
def raise_for_captcha(resp):
|
||||
|
|
|
@ -45,6 +45,20 @@ search:
|
|||
ban_time_on_fail: 5
|
||||
# max ban time in seconds after engine errors
|
||||
max_ban_time_on_fail: 120
|
||||
suspend_times:
|
||||
# Engine suspension time after error (in seconds; set to 0 to disable)
|
||||
# For error "Access denied" and "HTTP error [402, 403]"
|
||||
SearxEngineAccessDenied: 86400
|
||||
# For error "CAPTCHA"
|
||||
SearxEngineCaptcha: 86400
|
||||
# For error "Too many request" and "HTTP error 429"
|
||||
SearxEngineTooManyRequests: 3600
|
||||
# Cloudflare CAPTCHA
|
||||
cf_SearxEngineCaptcha: 1296000
|
||||
cf_SearxEngineAccessDenied: 86400
|
||||
# ReCAPTCHA
|
||||
recaptcha_SearxEngineCaptcha: 604800
|
||||
|
||||
# remove format to deny access, use lower case.
|
||||
# formats: [html, csv, json, rss]
|
||||
formats:
|
||||
|
|
|
@ -160,6 +160,14 @@ SCHEMA = {
|
|||
'languages': SettingSublistValue(LANGUAGE_CODES, LANGUAGE_CODES),
|
||||
'ban_time_on_fail': SettingsValue(numbers.Real, 5),
|
||||
'max_ban_time_on_fail': SettingsValue(numbers.Real, 120),
|
||||
'suspended_times': {
|
||||
'SearxEngineAccessDenied': SettingsValue(numbers.Real, 86400),
|
||||
'SearxEngineCaptcha': SettingsValue(numbers.Real, 86400),
|
||||
'SearxEngineTooManyRequests': SettingsValue(numbers.Real, 3600),
|
||||
'cf_SearxEngineCaptcha': SettingsValue(numbers.Real, 1296000),
|
||||
'cf_SearxEngineAccessDenied': SettingsValue(numbers.Real, 86400),
|
||||
'recaptcha_SearxEngineCaptcha': SettingsValue(numbers.Real, 604800),
|
||||
},
|
||||
'formats': SettingsValue(list, OUTPUT_FORMATS),
|
||||
},
|
||||
'server': {
|
||||
|
|
|
@ -0,0 +1,41 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
|
||||
from tests import SearxTestCase
|
||||
import searx.exceptions
|
||||
from searx import get_setting
|
||||
|
||||
|
||||
class TestExceptions(SearxTestCase):
|
||||
def test_default_suspend_time(self):
|
||||
with self.assertRaises(searx.exceptions.SearxEngineAccessDeniedException) as e:
|
||||
raise searx.exceptions.SearxEngineAccessDeniedException()
|
||||
self.assertEqual(
|
||||
e.exception.suspended_time,
|
||||
get_setting(searx.exceptions.SearxEngineAccessDeniedException.SUSPEND_TIME_SETTING),
|
||||
)
|
||||
|
||||
with self.assertRaises(searx.exceptions.SearxEngineCaptchaException) as e:
|
||||
raise searx.exceptions.SearxEngineCaptchaException()
|
||||
self.assertEqual(
|
||||
e.exception.suspended_time, get_setting(searx.exceptions.SearxEngineCaptchaException.SUSPEND_TIME_SETTING)
|
||||
)
|
||||
|
||||
with self.assertRaises(searx.exceptions.SearxEngineTooManyRequestsException) as e:
|
||||
raise searx.exceptions.SearxEngineTooManyRequestsException()
|
||||
self.assertEqual(
|
||||
e.exception.suspended_time,
|
||||
get_setting(searx.exceptions.SearxEngineTooManyRequestsException.SUSPEND_TIME_SETTING),
|
||||
)
|
||||
|
||||
def test_custom_suspend_time(self):
|
||||
with self.assertRaises(searx.exceptions.SearxEngineAccessDeniedException) as e:
|
||||
raise searx.exceptions.SearxEngineAccessDeniedException(suspended_time=1337)
|
||||
self.assertEqual(e.exception.suspended_time, 1337)
|
||||
|
||||
with self.assertRaises(searx.exceptions.SearxEngineCaptchaException) as e:
|
||||
raise searx.exceptions.SearxEngineCaptchaException(suspended_time=1409)
|
||||
self.assertEqual(e.exception.suspended_time, 1409)
|
||||
|
||||
with self.assertRaises(searx.exceptions.SearxEngineTooManyRequestsException) as e:
|
||||
raise searx.exceptions.SearxEngineTooManyRequestsException(suspended_time=1543)
|
||||
self.assertEqual(e.exception.suspended_time, 1543)
|
Loading…
Reference in New Issue