From 93982f6d8e2730b1ee4853c6cdcd0d44895cba6d Mon Sep 17 00:00:00 2001 From: Marc Abonce Seguin Date: Sun, 20 Mar 2022 19:22:44 -0600 Subject: [PATCH] add rate limiting per engine --- docs/admin/engines/settings.rst | 12 +++++ searx/engines/__init__.py | 6 +++ searx/plugins/engine_rate_limiter.py | 79 ++++++++++++++++++++++++++++ 3 files changed, 97 insertions(+) create mode 100644 searx/plugins/engine_rate_limiter.py diff --git a/docs/admin/engines/settings.rst b/docs/admin/engines/settings.rst index 85feddd6a..6a5e6b4db 100644 --- a/docs/admin/engines/settings.rst +++ b/docs/admin/engines/settings.rst @@ -422,6 +422,9 @@ engine is shown. Most of the options have a default value or even are optional. max_connections: 100 max_keepalive_connections: 10 keepalive_expiry: 5.0 + rate_limit: + max_requests: 200 + interval: 60 proxies: http: - http://proxy1:8080 @@ -487,6 +490,15 @@ engine is shown. Most of the options have a default value or even are optional. - ``ipv4`` set ``local_addresses`` to ``0.0.0.0`` (use only IPv4 local addresses) - ``ipv6`` set ``local_addresses`` to ``::`` (use only IPv6 local addresses) +``rate_limit``: optional + Limit how many outgoing requests is SearXNG going to send to the engines. + Requires :ref:`Redis `. + + - ``max_requests`` is the maximum number of requests that will be sent to this + engine per interval. + - ``interval`` (optional) is the number of seconds before this engine's rate + limiter is reset. Defaults to 1 second if unspecified. + .. note:: A few more options are possible, but they are pretty specific to some diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index ae132f48d..24f9087f0 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -44,6 +44,7 @@ ENGINE_DEFAULT_ARGS = { "enable_http": False, "using_tor_proxy": False, "display_error_messages": True, + "rate_limit": { "max_requests": float('inf'), "interval": 1 }, "tokens": [], "about": {}, } @@ -168,6 +169,11 @@ def update_engine_attributes(engine: Engine, engine_data): engine.categories = param_value elif hasattr(engine, 'about') and param_name == 'about': engine.about = {**engine.about, **engine_data['about']} + elif hasattr(engine, 'rate_limit') and param_name == 'rate_limit': + engine.rate_limit = { + 'max_requests': int(param_value.get('max_requests')), + 'interval': int(param_value.get('interval', 1)) + } else: setattr(engine, param_name, param_value) diff --git a/searx/plugins/engine_rate_limiter.py b/searx/plugins/engine_rate_limiter.py new file mode 100644 index 000000000..cff1f688a --- /dev/null +++ b/searx/plugins/engine_rate_limiter.py @@ -0,0 +1,79 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +# pyright: basic +"""Rate limit outgoing requests per engine + +Enable ``settings.yml``: + +- ``redis.url: ...`` check the value, see :ref:`settings redis` +- ``rate_limit: ...`` max_requests and interval, as specified below +- ``max_requests: ...`` max number of requests for that engine per interval +- ``interval: ...`` number of seconds before rate limiting resets (optional, by default 1 second) +""" + +import hmac + +from searx import settings +from searx.engines import engines +from searx.shared import redisdb + +name = "Engine rate limiter" +description = "Limit the number of outgoing requests per engine" +default_on = True +preference_section = 'service' + + +def check_rate_limiter(engine_name, limit, interval): + redis_client = redisdb.client() + lua_script = """ + local engine = KEYS[1] + local limit = ARGV[1] + local interval = ARGV[2] + + local count = redis.call('GET', engine) + if count and count > limit then + return count + else + local newCount = redis.call('INCR', engine) + if newCount == 1 then + redis.call('EXPIRE', engine, interval) + end + return newCount + end + """ + script_sha = redis_client.script_load(lua_script) + + secret_key_bytes = bytes(settings['server']['secret_key'], encoding='utf-8') + m = hmac.new(secret_key_bytes, digestmod='sha256') + m.update(bytes(engine_name, encoding='utf-8')) + key = m.digest() + + requestsCounter = redis_client.evalsha(script_sha, 1, key, limit, interval) + return int(requestsCounter) + + +def below_rate_limit(engine_name): + engine = engines[engine_name] + max_requests = engine.rate_limit['max_requests'] + interval = engine.rate_limit['interval'] + + if max_requests == float('inf'): + return True + if max_requests >= check_rate_limiter(engine_name, max_requests, interval): + return True + logger.debug(f"{engine_name} exceeded rate limit of {max_requests} requests per {interval} seconds") # pylint: disable=undefined-variable + return False + + +def pre_search(_, search): + allowed_engines = list(filter(lambda e: below_rate_limit(e.name), search.search_query.engineref_list)) + search.search_query.engineref_list = allowed_engines + return bool(allowed_engines) + + +def init(*args, **kwargs): # pylint: disable=unused-argument + logger.debug("init engine rate limiter DB") # pylint: disable=undefined-variable + if not redisdb.init(): + logger.error("init engine rate limiter DB failed!!!") # pylint: disable=undefined-variable + return False + return True