From 93982f6d8e2730b1ee4853c6cdcd0d44895cba6d Mon Sep 17 00:00:00 2001
From: Marc Abonce Seguin <marc-abonce@mailbox.org>
Date: Sun, 20 Mar 2022 19:22:44 -0600
Subject: [PATCH] add rate limiting per engine

---
 docs/admin/engines/settings.rst      | 12 +++++
 searx/engines/__init__.py            |  6 +++
 searx/plugins/engine_rate_limiter.py | 79 ++++++++++++++++++++++++++++
 3 files changed, 97 insertions(+)
 create mode 100644 searx/plugins/engine_rate_limiter.py
diff --git a/docs/admin/engines/settings.rst b/docs/admin/engines/settings.rst
index 85feddd6a..6a5e6b4db 100644
--- a/docs/admin/engines/settings.rst
+++ b/docs/admin/engines/settings.rst
@@ -422,6 +422,9 @@ engine is shown.  Most of the options have a default value or even are optional.
      max_connections: 100
      max_keepalive_connections: 10
      keepalive_expiry: 5.0
+     rate_limit:
+        max_requests: 200
+        interval: 60
      proxies:
        http:
          - http://proxy1:8080
@@ -487,6 +490,15 @@ engine is shown.  Most of the options have a default value or even are optional.
   - ``ipv4`` set ``local_addresses`` to ``0.0.0.0`` (use only IPv4 local addresses)
   - ``ipv6`` set ``local_addresses`` to ``::`` (use only IPv6 local addresses)
 
+``rate_limit``: optional
+  Limit how many outgoing requests is SearXNG going to send to the engines.
+  Requires :ref:`Redis <settings redis>`.
+
+  - ``max_requests`` is the maximum number of requests that will be sent to this
+    engine per interval.
+  - ``interval`` (optional) is the number of seconds before this engine's rate
+    limiter is reset. Defaults to 1 second if unspecified.
+
 .. note::
 
    A few more options are possible, but they are pretty specific to some
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py
index ae132f48d..24f9087f0 100644
--- a/searx/engines/__init__.py
+++ b/searx/engines/__init__.py
@@ -44,6 +44,7 @@ ENGINE_DEFAULT_ARGS = {
     "enable_http": False,
     "using_tor_proxy": False,
     "display_error_messages": True,
+    "rate_limit": { "max_requests": float('inf'), "interval": 1 },
     "tokens": [],
     "about": {},
 }
@@ -168,6 +169,11 @@ def update_engine_attributes(engine: Engine, engine_data):
             engine.categories = param_value
         elif hasattr(engine, 'about') and param_name == 'about':
             engine.about = {**engine.about, **engine_data['about']}
+        elif hasattr(engine, 'rate_limit') and param_name == 'rate_limit':
+            engine.rate_limit = {
+                'max_requests': int(param_value.get('max_requests')),
+                'interval': int(param_value.get('interval', 1))
+            }
         else:
             setattr(engine, param_name, param_value)
 
diff --git a/searx/plugins/engine_rate_limiter.py b/searx/plugins/engine_rate_limiter.py
new file mode 100644
index 000000000..cff1f688a
--- /dev/null
+++ b/searx/plugins/engine_rate_limiter.py
@@ -0,0 +1,79 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+# pyright: basic
+"""Rate limit outgoing requests per engine
+
+Enable ``settings.yml``:
+
+- ``redis.url: ...`` check the value, see :ref:`settings redis`
+- ``rate_limit: ...`` max_requests and interval, as specified below
+- ``max_requests: ...`` max number of requests for that engine per interval
+- ``interval: ...`` number of seconds before rate limiting resets (optional, by default 1 second)
+"""
+
+import hmac
+
+from searx import settings
+from searx.engines import engines
+from searx.shared import redisdb
+
+name = "Engine rate limiter"
+description = "Limit the number of outgoing requests per engine"
+default_on = True
+preference_section = 'service'
+
+
+def check_rate_limiter(engine_name, limit, interval):
+    redis_client = redisdb.client()
+    lua_script = """
+    local engine = KEYS[1]
+    local limit = ARGV[1]
+    local interval = ARGV[2]
+
+    local count = redis.call('GET', engine)
+    if count and count > limit then
+        return count
+    else
+        local newCount = redis.call('INCR', engine)
+        if newCount == 1 then
+            redis.call('EXPIRE', engine, interval)
+        end
+        return newCount
+    end
+    """
+    script_sha = redis_client.script_load(lua_script)
+
+    secret_key_bytes = bytes(settings['server']['secret_key'], encoding='utf-8')
+    m = hmac.new(secret_key_bytes, digestmod='sha256')
+    m.update(bytes(engine_name, encoding='utf-8'))
+    key = m.digest()
+
+    requestsCounter = redis_client.evalsha(script_sha, 1, key, limit, interval)
+    return int(requestsCounter)
+
+
+def below_rate_limit(engine_name):
+    engine = engines[engine_name]
+    max_requests = engine.rate_limit['max_requests']
+    interval = engine.rate_limit['interval']
+
+    if max_requests == float('inf'):
+        return True
+    if max_requests >= check_rate_limiter(engine_name, max_requests, interval):
+        return True
+    logger.debug(f"{engine_name} exceeded rate limit of {max_requests} requests per {interval} seconds")  # pylint: disable=undefined-variable
+    return False
+
+
+def pre_search(_, search):
+    allowed_engines = list(filter(lambda e: below_rate_limit(e.name), search.search_query.engineref_list))
+    search.search_query.engineref_list = allowed_engines
+    return bool(allowed_engines)
+
+
+def init(*args, **kwargs):  # pylint: disable=unused-argument
+    logger.debug("init engine rate limiter DB")  # pylint: disable=undefined-variable
+    if not redisdb.init():
+        logger.error("init engine rate limiter DB failed!!!")  # pylint: disable=undefined-variable
+        return False
+    return True