mirror of https://github.com/searxng/searxng.git
Add search.suspended_times settings
Make suspended_time changeable in settings.yml Allow different values to be set for different exceptions. Co-authored-by: Alexandre Flament <alex@al-f.net>
This commit is contained in:
parent
b720a495f0
commit
0cedb1c6d8
|
@ -69,11 +69,19 @@ class SearxEngineAPIException(SearxEngineResponseException):
|
||||||
class SearxEngineAccessDeniedException(SearxEngineResponseException):
|
class SearxEngineAccessDeniedException(SearxEngineResponseException):
|
||||||
"""The website is blocking the access"""
|
"""The website is blocking the access"""
|
||||||
|
|
||||||
def __init__(self, suspended_time=24 * 3600, message='Access denied'):
|
SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineAccessDenied"
|
||||||
|
|
||||||
|
def __init__(self, suspended_time=None, message='Access denied'):
|
||||||
|
suspended_time = suspended_time or self._get_default_suspended_time()
|
||||||
super().__init__(message + ', suspended_time=' + str(suspended_time))
|
super().__init__(message + ', suspended_time=' + str(suspended_time))
|
||||||
self.suspended_time = suspended_time
|
self.suspended_time = suspended_time
|
||||||
self.message = message
|
self.message = message
|
||||||
|
|
||||||
|
def _get_default_suspended_time(self):
|
||||||
|
from searx import get_setting
|
||||||
|
|
||||||
|
return get_setting(self.SUSPEND_TIME_SETTING)
|
||||||
|
|
||||||
|
|
||||||
class SearxEngineCaptchaException(SearxEngineAccessDeniedException):
|
class SearxEngineCaptchaException(SearxEngineAccessDeniedException):
|
||||||
"""The website has returned a CAPTCHA
|
"""The website has returned a CAPTCHA
|
||||||
|
@ -81,7 +89,9 @@ class SearxEngineCaptchaException(SearxEngineAccessDeniedException):
|
||||||
By default, searx stops sending requests to this engine for 1 day.
|
By default, searx stops sending requests to this engine for 1 day.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, suspended_time=24 * 3600, message='CAPTCHA'):
|
SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineCaptcha"
|
||||||
|
|
||||||
|
def __init__(self, suspended_time=None, message='CAPTCHA'):
|
||||||
super().__init__(message=message, suspended_time=suspended_time)
|
super().__init__(message=message, suspended_time=suspended_time)
|
||||||
|
|
||||||
|
|
||||||
|
@ -91,7 +101,9 @@ class SearxEngineTooManyRequestsException(SearxEngineAccessDeniedException):
|
||||||
By default, searx stops sending requests to this engine for 1 hour.
|
By default, searx stops sending requests to this engine for 1 hour.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, suspended_time=3600, message='Too many request'):
|
SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineTooManyRequests"
|
||||||
|
|
||||||
|
def __init__(self, suspended_time=None, message='Too many request'):
|
||||||
super().__init__(message=message, suspended_time=suspended_time)
|
super().__init__(message=message, suspended_time=suspended_time)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -9,6 +9,7 @@ from searx.exceptions import (
|
||||||
SearxEngineTooManyRequestsException,
|
SearxEngineTooManyRequestsException,
|
||||||
SearxEngineAccessDeniedException,
|
SearxEngineAccessDeniedException,
|
||||||
)
|
)
|
||||||
|
from searx import get_setting
|
||||||
|
|
||||||
|
|
||||||
def is_cloudflare_challenge(resp):
|
def is_cloudflare_challenge(resp):
|
||||||
|
@ -33,15 +34,22 @@ def raise_for_cloudflare_captcha(resp):
|
||||||
if is_cloudflare_challenge(resp):
|
if is_cloudflare_challenge(resp):
|
||||||
# https://support.cloudflare.com/hc/en-us/articles/200170136-Understanding-Cloudflare-Challenge-Passage-Captcha-
|
# https://support.cloudflare.com/hc/en-us/articles/200170136-Understanding-Cloudflare-Challenge-Passage-Captcha-
|
||||||
# suspend for 2 weeks
|
# suspend for 2 weeks
|
||||||
raise SearxEngineCaptchaException(message='Cloudflare CAPTCHA', suspended_time=3600 * 24 * 15)
|
raise SearxEngineCaptchaException(
|
||||||
|
message='Cloudflare CAPTCHA', suspended_time=get_setting('search.suspended_times.cf_SearxEngineCaptcha')
|
||||||
|
)
|
||||||
|
|
||||||
if is_cloudflare_firewall(resp):
|
if is_cloudflare_firewall(resp):
|
||||||
raise SearxEngineAccessDeniedException(message='Cloudflare Firewall', suspended_time=3600 * 24)
|
raise SearxEngineAccessDeniedException(
|
||||||
|
message='Cloudflare Firewall',
|
||||||
|
suspended_time=get_setting('search.suspended_times.cf_SearxEngineAccessDenied'),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def raise_for_recaptcha(resp):
|
def raise_for_recaptcha(resp):
|
||||||
if resp.status_code == 503 and '"https://www.google.com/recaptcha/' in resp.text:
|
if resp.status_code == 503 and '"https://www.google.com/recaptcha/' in resp.text:
|
||||||
raise SearxEngineCaptchaException(message='ReCAPTCHA', suspended_time=3600 * 24 * 7)
|
raise SearxEngineCaptchaException(
|
||||||
|
message='ReCAPTCHA', suspended_time=get_setting('search.suspended_times.recaptcha_SearxEngineCaptcha')
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def raise_for_captcha(resp):
|
def raise_for_captcha(resp):
|
||||||
|
|
|
@ -45,6 +45,20 @@ search:
|
||||||
ban_time_on_fail: 5
|
ban_time_on_fail: 5
|
||||||
# max ban time in seconds after engine errors
|
# max ban time in seconds after engine errors
|
||||||
max_ban_time_on_fail: 120
|
max_ban_time_on_fail: 120
|
||||||
|
suspend_times:
|
||||||
|
# Engine suspension time after error (in seconds; set to 0 to disable)
|
||||||
|
# For error "Access denied" and "HTTP error [402, 403]"
|
||||||
|
SearxEngineAccessDenied: 86400
|
||||||
|
# For error "CAPTCHA"
|
||||||
|
SearxEngineCaptcha: 86400
|
||||||
|
# For error "Too many request" and "HTTP error 429"
|
||||||
|
SearxEngineTooManyRequests: 3600
|
||||||
|
# Cloudflare CAPTCHA
|
||||||
|
cf_SearxEngineCaptcha: 1296000
|
||||||
|
cf_SearxEngineAccessDenied: 86400
|
||||||
|
# ReCAPTCHA
|
||||||
|
recaptcha_SearxEngineCaptcha: 604800
|
||||||
|
|
||||||
# remove format to deny access, use lower case.
|
# remove format to deny access, use lower case.
|
||||||
# formats: [html, csv, json, rss]
|
# formats: [html, csv, json, rss]
|
||||||
formats:
|
formats:
|
||||||
|
|
|
@ -160,6 +160,14 @@ SCHEMA = {
|
||||||
'languages': SettingSublistValue(LANGUAGE_CODES, LANGUAGE_CODES),
|
'languages': SettingSublistValue(LANGUAGE_CODES, LANGUAGE_CODES),
|
||||||
'ban_time_on_fail': SettingsValue(numbers.Real, 5),
|
'ban_time_on_fail': SettingsValue(numbers.Real, 5),
|
||||||
'max_ban_time_on_fail': SettingsValue(numbers.Real, 120),
|
'max_ban_time_on_fail': SettingsValue(numbers.Real, 120),
|
||||||
|
'suspended_times': {
|
||||||
|
'SearxEngineAccessDenied': SettingsValue(numbers.Real, 86400),
|
||||||
|
'SearxEngineCaptcha': SettingsValue(numbers.Real, 86400),
|
||||||
|
'SearxEngineTooManyRequests': SettingsValue(numbers.Real, 3600),
|
||||||
|
'cf_SearxEngineCaptcha': SettingsValue(numbers.Real, 1296000),
|
||||||
|
'cf_SearxEngineAccessDenied': SettingsValue(numbers.Real, 86400),
|
||||||
|
'recaptcha_SearxEngineCaptcha': SettingsValue(numbers.Real, 604800),
|
||||||
|
},
|
||||||
'formats': SettingsValue(list, OUTPUT_FORMATS),
|
'formats': SettingsValue(list, OUTPUT_FORMATS),
|
||||||
},
|
},
|
||||||
'server': {
|
'server': {
|
||||||
|
|
|
@ -0,0 +1,41 @@
|
||||||
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
|
||||||
|
from tests import SearxTestCase
|
||||||
|
import searx.exceptions
|
||||||
|
from searx import get_setting
|
||||||
|
|
||||||
|
|
||||||
|
class TestExceptions(SearxTestCase):
|
||||||
|
def test_default_suspend_time(self):
|
||||||
|
with self.assertRaises(searx.exceptions.SearxEngineAccessDeniedException) as e:
|
||||||
|
raise searx.exceptions.SearxEngineAccessDeniedException()
|
||||||
|
self.assertEqual(
|
||||||
|
e.exception.suspended_time,
|
||||||
|
get_setting(searx.exceptions.SearxEngineAccessDeniedException.SUSPEND_TIME_SETTING),
|
||||||
|
)
|
||||||
|
|
||||||
|
with self.assertRaises(searx.exceptions.SearxEngineCaptchaException) as e:
|
||||||
|
raise searx.exceptions.SearxEngineCaptchaException()
|
||||||
|
self.assertEqual(
|
||||||
|
e.exception.suspended_time, get_setting(searx.exceptions.SearxEngineCaptchaException.SUSPEND_TIME_SETTING)
|
||||||
|
)
|
||||||
|
|
||||||
|
with self.assertRaises(searx.exceptions.SearxEngineTooManyRequestsException) as e:
|
||||||
|
raise searx.exceptions.SearxEngineTooManyRequestsException()
|
||||||
|
self.assertEqual(
|
||||||
|
e.exception.suspended_time,
|
||||||
|
get_setting(searx.exceptions.SearxEngineTooManyRequestsException.SUSPEND_TIME_SETTING),
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_custom_suspend_time(self):
|
||||||
|
with self.assertRaises(searx.exceptions.SearxEngineAccessDeniedException) as e:
|
||||||
|
raise searx.exceptions.SearxEngineAccessDeniedException(suspended_time=1337)
|
||||||
|
self.assertEqual(e.exception.suspended_time, 1337)
|
||||||
|
|
||||||
|
with self.assertRaises(searx.exceptions.SearxEngineCaptchaException) as e:
|
||||||
|
raise searx.exceptions.SearxEngineCaptchaException(suspended_time=1409)
|
||||||
|
self.assertEqual(e.exception.suspended_time, 1409)
|
||||||
|
|
||||||
|
with self.assertRaises(searx.exceptions.SearxEngineTooManyRequestsException) as e:
|
||||||
|
raise searx.exceptions.SearxEngineTooManyRequestsException(suspended_time=1543)
|
||||||
|
self.assertEqual(e.exception.suspended_time, 1543)
|
Loading…
Reference in New Issue