Merge pull request #2357 / limiter -> botdetection

The monolithic implementation of the limiter was divided into methods and implemented in the Python package searx.botdetection. Detailed documentation on the methods has been added. The methods are divided into two groups: 1. Probe HTTP headers - Method http_accept - Method http_accept_encoding - Method http_accept_language - Method http_connection - Method http_user_agent 2. Rate limit: - Method ip_limit - Method link_token (new) The (reduced) implementation of the limiter is now in the module searx.botdetection.limiter. The first group was transferred unchanged to this module. The ip_limit contains the sliding windows implemented by the limiter so far. This merge also fixes some long outstandig issue: - limiter does not evaluate the Accept-Language correct [1] - limiter needs a IPv6 prefix to block networks instead of IPs [2] Without additional configuration the limiter works as before (apart from the bugfixes). For the commissioning of additional methods (link_toke), a configuration must be made in an additional configuration file. Without this configuration, the limiter runs as before (zero configuration). The ip_limit Method implements the sliding windows of the vanilla limiter, additionally the link_token method can be used in this method. The link_token method can be used to investigate whether a request is suspicious. To activate the link_token method in the ip_limit method add the following to your /etc/searxng/limiter.toml:: [botdetection.ip_limit] link_token = true [1] https://github.com/searxng/searxng/issues/2455 [2] https://github.com/searxng/searxng/issues/2477
2024-01-01 19:24:07 +01:00 · 2023-06-03 06:00:15 +02:00 · 2023-06-03 06:00:15 +02:00 · 80aaef6c95
commit 80aaef6c95
parent 1a1ab34d9d 80af38d37b
22 changed files with 1273 additions and 138 deletions
--- a/docs/admin/engines/settings.rst
+++ b/docs/admin/engines/settings.rst
@ -235,7 +235,7 @@ Global Settings
 ``limiter`` :
  Rate limit the number of request on the instance, block some bots.  The
-  :ref:`limiter plugin` requires a :ref:`settings redis` database.
+  :ref:`limiter src` requires a :ref:`settings redis` database.
 .. _image_proxy:
--- a/docs/src/searx.botdetection.rst
+++ b/docs/src/searx.botdetection.rst
@ -0,0 +1,45 @@
 .. _botdetection:
 =============
 Bot Detection
 =============
 .. contents:: Contents
   :depth: 2
   :local:
   :backlinks: entry
 .. automodule:: searx.botdetection
  :members:
 .. automodule:: searx.botdetection.limiter
  :members:
 Rate limit
 ==========
 .. automodule:: searx.botdetection.ip_limit
  :members:
 .. automodule:: searx.botdetection.link_token
  :members:
 Probe HTTP headers
 ==================
 .. automodule:: searx.botdetection.http_accept
  :members:
 .. automodule:: searx.botdetection.http_accept_encoding
  :members:
 .. automodule:: searx.botdetection.http_accept_language
  :members:
 .. automodule:: searx.botdetection.http_connection
  :members:
 .. automodule:: searx.botdetection.http_user_agent
  :members:
--- a/docs/src/searx.plugins.limiter.rst
+++ b/docs/src/searx.plugins.limiter.rst
@ -1,13 +0,0 @@
 .. _limiter plugin:
 ==============
 Limiter Plugin
 ==============
 .. sidebar:: info
   The :ref:`limiter plugin` requires a :ref:`Redis <settings redis>` database.
 .. automodule:: searx.plugins.limiter
  :members:
--- a/requirements.txt
+++ b/requirements.txt
@ -16,3 +16,4 @@ redis==4.5.5
 markdown-it-py==2.2.0
 typing_extensions==4.6.3
 fasttext-predict==0.9.2.1
 pytomlpp==1.0.13
--- a/searx/botdetection/init.py
+++ b/searx/botdetection/init.py
@ -0,0 +1,27 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """.. _botdetection src:
 X-Forwarded-For
 ===============
 .. attention::
   A correct setup of the HTTP request headers ``X-Forwarded-For`` and
   ``X-Real-IP`` is essential to be able to assign a request to an IP correctly:
   - `NGINX RequestHeader`_
   - `Apache RequestHeader`_
 .. _NGINX RequestHeader:
    https://docs.searxng.org/admin/installation-nginx.html#nginx-s-searxng-site
 .. _Apache RequestHeader:
    https://docs.searxng.org/admin/installation-apache.html#apache-s-searxng-site
 .. autofunction:: searx.botdetection.get_real_ip
 """
 from ._helpers import dump_request
 from ._helpers import get_real_ip
 from ._helpers import too_many_requests
--- a/searx/botdetection/_helpers.py
+++ b/searx/botdetection/_helpers.py
@ -0,0 +1,121 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 # pylint: disable=missing-module-docstring, invalid-name
 from __future__ import annotations
 from ipaddress import (
    IPv4Network,
    IPv6Network,
    IPv6Address,
    ip_address,
    ip_network,
 )
 import flask
 import werkzeug
 from searx.tools import config
 from searx import logger
 logger = logger.getChild('botdetection')
 def dump_request(request: flask.Request):
    return (
        request.path
        + " || X-Forwarded-For: %s" % request.headers.get('X-Forwarded-For')
        + " || X-Real-IP: %s" % request.headers.get('X-Real-IP')
        + " || form: %s" % request.form
        + " || Accept: %s" % request.headers.get('Accept')
        + " || Accept-Language: %s" % request.headers.get('Accept-Language')
        + " || Accept-Encoding: %s" % request.headers.get('Accept-Encoding')
        + " || Content-Type: %s" % request.headers.get('Content-Type')
        + " || Content-Length: %s" % request.headers.get('Content-Length')
        + " || Connection: %s" % request.headers.get('Connection')
        + " || User-Agent: %s" % request.headers.get('User-Agent')
    )
 def too_many_requests(network: IPv4Network | IPv6Network, log_msg: str) -> werkzeug.Response | None:
    """Returns a HTTP 429 response object and writes a ERROR message to the
    'botdetection' logger.  This function is used in part by the filter methods
    to return the default ``Too Many Requests`` response.
    """
    logger.debug("BLOCK %s: %s", network.compressed, log_msg)
    return flask.make_response(('Too Many Requests', 429))
 def get_network(real_ip: str, cfg: config.Config) -> IPv4Network | IPv6Network:
    """Returns the (client) network of whether the real_ip is part of."""
    ip = ip_address(real_ip)
    if isinstance(ip, IPv6Address):
        prefix = cfg['real_ip.ipv6_prefix']
    else:
        prefix = cfg['real_ip.ipv4_prefix']
    network = ip_network(f"{real_ip}/{prefix}", strict=False)
    # logger.debug("get_network(): %s", network.compressed)
    return network
 def get_real_ip(request: flask.Request) -> str:
    """Returns real IP of the request.  Since not all proxies set all the HTTP
    headers and incoming headers can be faked it may happen that the IP cannot
    be determined correctly.
    .. sidebar:: :py:obj:`flask.Request.remote_addr`
       SearXNG uses Werkzeug's ProxyFix_ (with it default ``x_for=1``).
    This function tries to get the remote IP in the order listed below,
    additional some tests are done and if inconsistencies or errors are
    detected, they are logged.
    The remote IP of the request is taken from (first match):
    - X-Forwarded-For_ header
    - `X-real-IP header <https://github.com/searxng/searxng/issues/1237#issuecomment-1147564516>`__
    - :py:obj:`flask.Request.remote_addr`
    .. _ProxyFix:
       https://werkzeug.palletsprojects.com/middleware/proxy_fix/
    .. _X-Forwarded-For:
      https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
    """
    forwarded_for = request.headers.get("X-Forwarded-For")
    real_ip = request.headers.get('X-Real-IP')
    remote_addr = request.remote_addr
    # logger.debug(
    #     "X-Forwarded-For: %s || X-Real-IP: %s || request.remote_addr: %s", forwarded_for, real_ip, remote_addr
    # )
    if not forwarded_for:
        logger.error("X-Forwarded-For header is not set!")
    else:
        from .limiter import get_cfg  # pylint: disable=import-outside-toplevel, cyclic-import
        forwarded_for = [x.strip() for x in forwarded_for.split(',')]
        x_for: int = get_cfg()['real_ip.x_for']
        forwarded_for = forwarded_for[-min(len(forwarded_for), x_for)]
    if not real_ip:
        logger.error("X-Real-IP header is not set!")
    if forwarded_for and real_ip and forwarded_for != real_ip:
        logger.warning("IP from X-Real-IP (%s) is not equal to IP from X-Forwarded-For (%s)", real_ip, forwarded_for)
    if forwarded_for and remote_addr and forwarded_for != remote_addr:
        logger.warning(
            "IP from WSGI environment (%s) is not equal to IP from X-Forwarded-For (%s)", remote_addr, forwarded_for
        )
    if real_ip and remote_addr and real_ip != remote_addr:
        logger.warning("IP from WSGI environment (%s) is not equal to IP from X-Real-IP (%s)", remote_addr, real_ip)
    request_ip = forwarded_for or real_ip or remote_addr or '0.0.0.0'
    # logger.debug("get_real_ip() -> %s", request_ip)
    return request_ip
--- a/searx/botdetection/http_accept.py
+++ b/searx/botdetection/http_accept.py
@ -0,0 +1,39 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """
 Method ``http_accept``
 ----------------------
 The ``http_accept`` method evaluates a request as the request of a bot if the
 Accept_ header ..
 - did not contain ``text/html``
 .. _Accept:
   https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept
 """
 # pylint: disable=unused-argument
 from __future__ import annotations
 from ipaddress import (
    IPv4Network,
    IPv6Network,
 )
 import flask
 import werkzeug
 from searx.tools import config
 from ._helpers import too_many_requests
 def filter_request(
    network: IPv4Network | IPv6Network,
    request: flask.Request,
    cfg: config.Config,
 ) -> werkzeug.Response | None:
    if 'text/html' not in request.accept_mimetypes:
        return too_many_requests(network, "HTTP header Accept did not contain text/html")
    return None
--- a/searx/botdetection/http_accept_encoding.py
+++ b/searx/botdetection/http_accept_encoding.py
@ -0,0 +1,41 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """
 Method ``http_accept_encoding``
 -------------------------------
 The ``http_accept_encoding`` method evaluates a request as the request of a
 bot if the Accept-Encoding_ header ..
 - did not contain ``gzip`` AND ``deflate`` (if both values are missed)
 - did not contain ``text/html``
 .. _Accept-Encoding:
   https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Encoding
 """
 # pylint: disable=unused-argument
 from __future__ import annotations
 from ipaddress import (
    IPv4Network,
    IPv6Network,
 )
 import flask
 import werkzeug
 from searx.tools import config
 from ._helpers import too_many_requests
 def filter_request(
    network: IPv4Network | IPv6Network,
    request: flask.Request,
    cfg: config.Config,
 ) -> werkzeug.Response | None:
    accept_list = [l.strip() for l in request.headers.get('Accept-Encoding', '').split(',')]
    if not ('gzip' in accept_list or 'deflate' in accept_list):
        return too_many_requests(network, "HTTP header Accept-Encoding did not contain gzip nor deflate")
    return None
--- a/searx/botdetection/http_accept_language.py
+++ b/searx/botdetection/http_accept_language.py
@ -0,0 +1,35 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """
 Method ``http_accept_language``
 -------------------------------
 The ``http_accept_language`` method evaluates a request as the request of a bot
 if the Accept-Language_ header is unset.
 .. _Accept-Language:
   https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent
 """
 # pylint: disable=unused-argument
 from __future__ import annotations
 from ipaddress import (
    IPv4Network,
    IPv6Network,
 )
 import flask
 import werkzeug
 from searx.tools import config
 from ._helpers import too_many_requests
 def filter_request(
    network: IPv4Network | IPv6Network,
    request: flask.Request,
    cfg: config.Config,
 ) -> werkzeug.Response | None:
    if request.headers.get('Accept-Language', '').strip() == '':
        return too_many_requests(network, "missing HTTP header Accept-Language")
    return None
--- a/searx/botdetection/http_connection.py
+++ b/searx/botdetection/http_connection.py
@ -0,0 +1,37 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """
 Method ``http_connection``
 --------------------------
 The ``http_connection`` method evaluates a request as the request of a bot if
 the Connection_ header is set to ``close``.
 .. _Connection:
   https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Connection
 """
 # pylint: disable=unused-argument
 from __future__ import annotations
 from ipaddress import (
    IPv4Network,
    IPv6Network,
 )
 import flask
 import werkzeug
 from searx.tools import config
 from ._helpers import too_many_requests
 def filter_request(
    network: IPv4Network | IPv6Network,
    request: flask.Request,
    cfg: config.Config,
 ) -> werkzeug.Response | None:
    if request.headers.get('Connection', '').strip() == 'close':
        return too_many_requests(network, "HTTP header 'Connection=close")
    return None
--- a/searx/botdetection/http_user_agent.py
+++ b/searx/botdetection/http_user_agent.py
@ -0,0 +1,67 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """
 Method ``http_user_agent``
 --------------------------
 The ``http_user_agent`` method evaluates a request as the request of a bot if
 the User-Agent_ header is unset or matches the regular expression
 :py:obj:`USER_AGENT`.
 .. _User-Agent:
   https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent
 """
 # pylint: disable=unused-argument
 from __future__ import annotations
 import re
 from ipaddress import (
    IPv4Network,
    IPv6Network,
 )
 import flask
 import werkzeug
 from searx.tools import config
 from ._helpers import too_many_requests
 USER_AGENT = (
    r'('
    + r'unknown'
    + r'|[Cc][Uu][Rr][Ll]|[wW]get|Scrapy|splash|JavaFX|FeedFetcher|python-requests|Go-http-client|Java|Jakarta|okhttp'
    + r'|HttpClient|Jersey|Python|libwww-perl|Ruby|SynHttpClient|UniversalFeedParser|Googlebot|GoogleImageProxy'
    + r'|bingbot|Baiduspider|yacybot|YandexMobileBot|YandexBot|Yahoo! Slurp|MJ12bot|AhrefsBot|archive.org_bot|msnbot'
    + r'|MJ12bot|SeznamBot|linkdexbot|Netvibes|SMTBot|zgrab|James BOT|Sogou|Abonti|Pixray|Spinn3r|SemrushBot|Exabot'
    + r'|ZmEu|BLEXBot|bitlybot'
    # unmaintained Farside instances
    + r'|'
    + re.escape(r'Mozilla/5.0 (compatible; Farside/0.1.0; +https://farside.link)')
    # other bots and client to block
    + '|.*PetalBot.*'
    + r')'
 )
 """Regular expression that matches to User-Agent_ from known *bots*"""
 _regexp = None
 def regexp_user_agent():
    global _regexp  # pylint: disable=global-statement
    if not _regexp:
        _regexp = re.compile(USER_AGENT)
    return _regexp
 def filter_request(
    network: IPv4Network | IPv6Network,
    request: flask.Request,
    cfg: config.Config,
 ) -> werkzeug.Response | None:
    user_agent = request.headers.get('User-Agent', 'unknown')
    if regexp_user_agent().match(user_agent):
        return too_many_requests(network, f"bot detected, HTTP header User-Agent: {user_agent}")
    return None
--- a/searx/botdetection/ip_limit.py
+++ b/searx/botdetection/ip_limit.py
@ -0,0 +1,146 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """.. _botdetection.ip_limit:
 Method ``ip_limit``
 -------------------
 The ``ip_limit`` method counts request from an IP in *sliding windows*.  If
 there are to many requests in a sliding window, the request is evaluated as a
 bot request.  This method requires a redis DB and needs a HTTP X-Forwarded-For_
 header.  To take privacy only the hash value of an IP is stored in the redis DB
 and at least for a maximum of 10 minutes.
 The :py:obj:`.link_token` method can be used to investigate whether a request is
 *suspicious*.  To activate the :py:obj:`.link_token` method in the
 :py:obj:`.ip_limit` method add the following to your
 ``/etc/searxng/limiter.toml``:
 .. code:: toml
   [botdetection.ip_limit]
   link_token = true
 If the :py:obj:`.link_token` method is activated and a request is *suspicious*
 the request rates are reduced:
 - :py:obj:`BURST_MAX` -> :py:obj:`BURST_MAX_SUSPICIOUS`
 - :py:obj:`LONG_MAX` -> :py:obj:`LONG_MAX_SUSPICIOUS`
 To intercept bots that get their IPs from a range of IPs, there is a
 :py:obj:`SUSPICIOUS_IP_WINDOW`.  In this window the suspicious IPs are stored
 for a longer time.  IPs stored in this sliding window have a maximum of
 :py:obj:`SUSPICIOUS_IP_MAX` accesses before they are blocked.  As soon as the IP
 makes a request that is not suspicious, the sliding window for this IP is
 droped.
 .. _X-Forwarded-For:
   https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
 """
 from __future__ import annotations
 from ipaddress import (
    IPv4Network,
    IPv6Network,
 )
 import flask
 import werkzeug
 from searx.tools import config
 from searx import redisdb
 from searx import logger
 from searx.redislib import incr_sliding_window, drop_counter
 from . import link_token
 from ._helpers import too_many_requests
 logger = logger.getChild('botdetection.ip_limit')
 BURST_WINDOW = 20
 """Time (sec) before sliding window for *burst* requests expires."""
 BURST_MAX = 15
 """Maximum requests from one IP in the :py:obj:`BURST_WINDOW`"""
 BURST_MAX_SUSPICIOUS = 2
 """Maximum of suspicious requests from one IP in the :py:obj:`BURST_WINDOW`"""
 LONG_WINDOW = 600
 """Time (sec) before the longer sliding window expires."""
 LONG_MAX = 150
 """Maximum requests from one IP in the :py:obj:`LONG_WINDOW`"""
 LONG_MAX_SUSPICIOUS = 10
 """Maximum suspicious requests from one IP in the :py:obj:`LONG_WINDOW`"""
 API_WONDOW = 3600
 """Time (sec) before sliding window for API requests (format != html) expires."""
 API_MAX = 4
 """Maximum requests from one IP in the :py:obj:`API_WONDOW`"""
 SUSPICIOUS_IP_WINDOW = 3600 * 24 * 30
 """Time (sec) before sliding window for one suspicious IP expires."""
 SUSPICIOUS_IP_MAX = 3
 """Maximum requests from one suspicious IP in the :py:obj:`SUSPICIOUS_IP_WINDOW`."""
 def filter_request(
    network: IPv4Network | IPv6Network,
    request: flask.Request,
    cfg: config.Config,
 ) -> werkzeug.Response | None:
    # pylint: disable=too-many-return-statements
    redis_client = redisdb.client()
    if network.is_link_local and not cfg['botdetection.ip_limit.filter_link_local']:
        logger.debug("network %s is link-local -> not monitored by ip_limit method", network.compressed)
        return None
    if request.args.get('format', 'html') != 'html':
        c = incr_sliding_window(redis_client, 'ip_limit.API_WONDOW:' + network.compressed, API_WONDOW)
        if c > API_MAX:
            return too_many_requests(network, "too many request in API_WINDOW")
    if cfg['botdetection.ip_limit.link_token']:
        suspicious = link_token.is_suspicious(network, request, True)
        if not suspicious:
            # this IP is no longer suspicious: release ip again / delete the counter of this IP
            drop_counter(redis_client, 'ip_limit.SUSPICIOUS_IP_WINDOW' + network.compressed)
            return None
        # this IP is suspicious: count requests from this IP
        c = incr_sliding_window(
            redis_client, 'ip_limit.SUSPICIOUS_IP_WINDOW' + network.compressed, SUSPICIOUS_IP_WINDOW
        )
        if c > SUSPICIOUS_IP_MAX:
            logger.error("BLOCK: too many request from %s in SUSPICIOUS_IP_WINDOW (redirect to /)", network)
            return flask.redirect(flask.url_for('index'), code=302)
        c = incr_sliding_window(redis_client, 'ip_limit.BURST_WINDOW' + network.compressed, BURST_WINDOW)
        if c > BURST_MAX_SUSPICIOUS:
            return too_many_requests(network, "too many request in BURST_WINDOW (BURST_MAX_SUSPICIOUS)")
        c = incr_sliding_window(redis_client, 'ip_limit.LONG_WINDOW' + network.compressed, LONG_WINDOW)
        if c > LONG_MAX_SUSPICIOUS:
            return too_many_requests(network, "too many request in LONG_WINDOW (LONG_MAX_SUSPICIOUS)")
        return None
    # vanilla limiter without extensions counts BURST_MAX and LONG_MAX
    c = incr_sliding_window(redis_client, 'ip_limit.BURST_WINDOW' + network.compressed, BURST_WINDOW)
    if c > BURST_MAX:
        return too_many_requests(network, "too many request in BURST_WINDOW (BURST_MAX)")
    c = incr_sliding_window(redis_client, 'ip_limit.LONG_WINDOW' + network.compressed, LONG_WINDOW)
    if c > LONG_MAX:
        return too_many_requests(network, "too many request in LONG_WINDOW (LONG_MAX)")
    return None
--- a/searx/botdetection/limiter.py
+++ b/searx/botdetection/limiter.py
@ -0,0 +1,118 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """.. _limiter src:
 Limiter
 =======
 .. sidebar:: info
   The limiter requires a :ref:`Redis <settings redis>` database.
 Bot protection / IP rate limitation.  The intention of rate limitation is to
 limit suspicious requests from an IP.  The motivation behind this is the fact
 that SearXNG passes through requests from bots and is thus classified as a bot
 itself.  As a result, the SearXNG engine then receives a CAPTCHA or is blocked
 by the search engine (the origin) in some other way.
 To avoid blocking, the requests from bots to SearXNG must also be blocked, this
 is the task of the limiter.  To perform this task, the limiter uses the methods
 from the :py:obj:`searx.botdetection`.
 To enable the limiter activate:
 .. code:: yaml
   server:
     ...
     limiter: true  # rate limit the number of request on the instance, block some bots
 and set the redis-url connection. Check the value, it depends on your redis DB
 (see :ref:`settings redis`), by example:
 .. code:: yaml
   redis:
     url: unix:///usr/local/searxng-redis/run/redis.sock?db=0
 """
 from __future__ import annotations
 from pathlib import Path
 import flask
 import werkzeug
 from searx.tools import config
 from searx import logger
 from . import (
    http_accept,
    http_accept_encoding,
    http_accept_language,
    http_connection,
    http_user_agent,
    ip_limit,
 )
 from ._helpers import (
    get_network,
    get_real_ip,
    dump_request,
 )
 logger = logger.getChild('botdetection.limiter')
 CFG: config.Config = None  # type: ignore
 LIMITER_CFG_SCHEMA = Path(__file__).parent / "limiter.toml"
 """Base configuration (schema) of the botdetection."""
 LIMITER_CFG = Path('/etc/searxng/limiter.toml')
 """Lokal Limiter configuration."""
 CFG_DEPRECATED = {
    # "dummy.old.foo": "config 'dummy.old.foo' exists only for tests.  Don't use it in your real project config."
 }
 def get_cfg() -> config.Config:
    global CFG  # pylint: disable=global-statement
    if CFG is None:
        CFG = config.Config.from_toml(LIMITER_CFG_SCHEMA, LIMITER_CFG, CFG_DEPRECATED)
    return CFG
 def filter_request(request: flask.Request) -> werkzeug.Response | None:
    cfg = get_cfg()
    real_ip = get_real_ip(request)
    network = get_network(real_ip, cfg)
    if network.is_link_local:
        return None
    if request.path == '/healthz':
        return None
    for func in [
        http_user_agent,
    ]:
        val = func.filter_request(network, request, cfg)
        if val is not None:
            return val
    if request.path == '/search':
        for func in [
            http_accept,
            http_accept_encoding,
            http_accept_language,
            http_connection,
            http_user_agent,
            ip_limit,
        ]:
            val = func.filter_request(network, request, cfg)
            if val is not None:
                return val
    logger.debug(f"OK {network}: %s", dump_request(flask.request))
    return None
--- a/searx/botdetection/limiter.toml
+++ b/searx/botdetection/limiter.toml
@ -0,0 +1,22 @@
 [real_ip]
 # Number of values to trust for X-Forwarded-For.
 x_for = 1
 # The prefix defines the number of leading bits in an address that are compared
 # to determine whether or not an address is part of a (client) network.
 ipv4_prefix = 32
 ipv6_prefix = 48
 [botdetection.ip_limit]
 # To get unlimited access in a local network, by default link-lokal addresses
 # (networks) are not monitored by the ip_limit
 filter_link_local = false
 # acrivate link_token method in the ip_limit method
 link_token = false
--- a/searx/botdetection/link_token.py
+++ b/searx/botdetection/link_token.py
@ -0,0 +1,156 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """
 Method ``link_token``
 ---------------------
 The ``link_token`` method evaluates a request as :py:obj:`suspicious
 <is_suspicious>` if the URL ``/client<token>.css`` is not requested by the
 client.  By adding a random component (the token) in the URL, a bot can not send
 a ping by request a static URL.
 .. note::
   This method requires a redis DB and needs a HTTP X-Forwarded-For_ header.
 To get in use of this method a flask URL route needs to be added:
 .. code:: python
   @app.route('/client<token>.css', methods=['GET', 'POST'])
   def client_token(token=None):
       link_token.ping(request, token)
       return Response('', mimetype='text/css')
 And in the HTML template from flask a stylesheet link is needed (the value of
 ``link_token`` comes from :py:obj:`get_token`):
 .. code:: html
   <link rel="stylesheet"
         href="{{ url_for('client_token', token=link_token) }}"
         type="text/css" />
 .. _X-Forwarded-For:
   https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
 """
 from __future__ import annotations
 from ipaddress import (
    IPv4Network,
    IPv6Network,
 )
 import string
 import random
 import flask
 from searx import logger
 from searx import redisdb
 from searx.redislib import secret_hash
 from ._helpers import (
    get_network,
    get_real_ip,
 )
 TOKEN_LIVE_TIME = 600
 """Livetime (sec) of limiter's CSS token."""
 PING_LIVE_TIME = 3600
 """Livetime (sec) of the ping-key from a client (request)"""
 PING_KEY = 'SearXNG_limiter.ping'
 """Prefix of all ping-keys generated by :py:obj:`get_ping_key`"""
 TOKEN_KEY = 'SearXNG_limiter.token'
 """Key for which the current token is stored in the DB"""
 logger = logger.getChild('botdetection.link_token')
 def is_suspicious(network: IPv4Network | IPv6Network, request: flask.Request, renew: bool = False):
    """Checks whether a valid ping is exists for this (client) network, if not
    this request is rated as *suspicious*.  If a valid ping exists and argument
    ``renew`` is ``True`` the expire time of this ping is reset to
    :py:obj:`PING_LIVE_TIME`.
    """
    redis_client = redisdb.client()
    if not redis_client:
        return False
    ping_key = get_ping_key(network, request)
    if not redis_client.get(ping_key):
        logger.warning("missing ping (IP: %s) / request: %s", network.compressed, ping_key)
        return True
    if renew:
        redis_client.set(ping_key, 1, ex=PING_LIVE_TIME)
    logger.debug("found ping for (client) network %s -> %s", network.compressed, ping_key)
    return False
 def ping(request: flask.Request, token: str):
    """This function is called by a request to URL ``/client<token>.css``.  If
    ``token`` is valid a :py:obj:`PING_KEY` for the client is stored in the DB.
    The expire time of this ping-key is :py:obj:`PING_LIVE_TIME`.
    """
    from . import limiter  # pylint: disable=import-outside-toplevel, cyclic-import
    redis_client = redisdb.client()
    if not redis_client:
        return
    if not token_is_valid(token):
        return
    cfg = limiter.get_cfg()
    real_ip = get_real_ip(request)
    network = get_network(real_ip, cfg)
    ping_key = get_ping_key(network, request)
    logger.debug("store ping_key for (client) network %s (IP %s) -> %s", network.compressed, real_ip, ping_key)
    redis_client.set(ping_key, 1, ex=PING_LIVE_TIME)
 def get_ping_key(network: IPv4Network | IPv6Network, request: flask.Request) -> str:
    """Generates a hashed key that fits (more or less) to a *WEB-browser
    session* in a network."""
    return (
        PING_KEY
        + "["
        + secret_hash(
            network.compressed + request.headers.get('Accept-Language', '') + request.headers.get('User-Agent', '')
        )
        + "]"
    )
 def token_is_valid(token) -> bool:
    valid = token == get_token()
    logger.debug("token is valid --> %s", valid)
    return valid
 def get_token() -> str:
    """Returns current token.  If there is no currently active token a new token
    is generated randomly and stored in the redis DB.
    - :py:obj:`TOKEN_LIVE_TIME`
    - :py:obj:`TOKEN_KEY`
    """
    redis_client = redisdb.client()
    if not redis_client:
        # This function is also called when limiter is inactive / no redis DB
        # (see render function in webapp.py)
        return '12345678'
    token = redis_client.get(TOKEN_KEY)
    if token:
        token = token.decode('UTF-8')
    else:
        token = ''.join(random.choice(string.ascii_lowercase + string.digits) for _ in range(16))
        redis_client.set(TOKEN_KEY, token, ex=TOKEN_LIVE_TIME)
    return token
--- a/searx/plugins/limiter.py
+++ b/searx/plugins/limiter.py
@ -1,119 +1,32 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 # pyright: basic
-"""Some bot protection / rate limitation
+"""see :ref:`limiter src`"""
-To monitor rate limits and protect privacy the IP addresses are getting stored
+import flask
 with a hash so the limiter plugin knows who to block.  A redis database is
 needed to store the hash values.
 Enable the plugin in ``settings.yml``:
 - ``server.limiter: true``
 - ``redis.url: ...`` check the value, see :ref:`settings redis`
 """
 import re
 from flask import request
 from searx import redisdb
 from searx.plugins import logger
-from searx.redislib import incr_sliding_window
+from searx.botdetection import limiter
 name = "Request limiter"
 description = "Limit the number of request"
 default_on = False
 preference_section = 'service'
 logger = logger.getChild('limiter')
 block_user_agent = re.compile(
    r'('
    + r'unknown'
    + r'|[Cc][Uu][Rr][Ll]|[wW]get|Scrapy|splash|JavaFX|FeedFetcher|python-requests|Go-http-client|Java|Jakarta|okhttp'
    + r'|HttpClient|Jersey|Python|libwww-perl|Ruby|SynHttpClient|UniversalFeedParser|Googlebot|GoogleImageProxy'
    + r'|bingbot|Baiduspider|yacybot|YandexMobileBot|YandexBot|Yahoo! Slurp|MJ12bot|AhrefsBot|archive.org_bot|msnbot'
    + r'|MJ12bot|SeznamBot|linkdexbot|Netvibes|SMTBot|zgrab|James BOT|Sogou|Abonti|Pixray|Spinn3r|SemrushBot|Exabot'
    + r'|ZmEu|BLEXBot|bitlybot'
    # unmaintained Farside instances
    + r'|'
    + re.escape(r'Mozilla/5.0 (compatible; Farside/0.1.0; +https://farside.link)')
    + '|.*PetalBot.*'
    + r')'
 )
 def is_accepted_request() -> bool:
    # pylint: disable=too-many-return-statements
    redis_client = redisdb.client()
    user_agent = request.headers.get('User-Agent', 'unknown')
    x_forwarded_for = request.headers.get('X-Forwarded-For', '')
    if request.path == '/healthz':
        return True
    if block_user_agent.match(user_agent):
        logger.debug("BLOCK %s: %s --> detected User-Agent: %s" % (x_forwarded_for, request.path, user_agent))
        return False
    if request.path == '/search':
        c_burst = incr_sliding_window(redis_client, 'IP limit, burst' + x_forwarded_for, 20)
        c_10min = incr_sliding_window(redis_client, 'IP limit, 10 minutes' + x_forwarded_for, 600)
        if c_burst > 15 or c_10min > 150:
            logger.debug("BLOCK %s: to many request", x_forwarded_for)
            return False
        if len(request.headers.get('Accept-Language', '').strip()) == '':
            logger.debug("BLOCK %s: missing Accept-Language", x_forwarded_for)
            return False
        if request.headers.get('Connection') == 'close':
            logger.debug("BLOCK %s: got Connection=close", x_forwarded_for)
            return False
        accept_encoding_list = [l.strip() for l in request.headers.get('Accept-Encoding', '').split(',')]
        if 'gzip' not in accept_encoding_list and 'deflate' not in accept_encoding_list:
            logger.debug("BLOCK %s: suspicious Accept-Encoding", x_forwarded_for)
            return False
        if 'text/html' not in request.accept_mimetypes:
            logger.debug("BLOCK %s: Accept-Encoding misses text/html", x_forwarded_for)
            return False
        if request.args.get('format', 'html') != 'html':
            c = incr_sliding_window(redis_client, 'API limit' + x_forwarded_for, 3600)
            if c > 4:
                logger.debug("BLOCK %s: API limit exceeded", x_forwarded_for)
                return False
    logger.debug(
        "OK %s: '%s'" % (x_forwarded_for, request.path)
        + " || form: %s" % request.form
        + " || Accept: %s" % request.headers.get('Accept', '')
        + " || Accept-Language: %s" % request.headers.get('Accept-Language', '')
        + " || Accept-Encoding: %s" % request.headers.get('Accept-Encoding', '')
        + " || Content-Type: %s" % request.headers.get('Content-Type', '')
        + " || Content-Length: %s" % request.headers.get('Content-Length', '')
        + " || Connection: %s" % request.headers.get('Connection', '')
        + " || User-Agent: %s" % user_agent
    )
    return True
 def pre_request():
-    if not is_accepted_request():
+    """See :ref:`flask.Flask.before_request`"""
-        return 'Too Many Requests', 429
+    return limiter.filter_request(flask.request)
    return None
-def init(app, settings):
+def init(app: flask.Flask, settings) -> bool:
    if not settings['server']['limiter']:
        return False
    if not redisdb.client():
-        logger.error("The limiter requires Redis")  # pylint: disable=undefined-variable
+        logger.error("The limiter requires Redis")
        return False
    app.before_request(pre_request)
    return True
--- a/searx/plugins/self_info.py
+++ b/searx/plugins/self_info.py
@ -1,21 +1,11 @@
-'''
+# SPDX-License-Identifier: AGPL-3.0-or-later
-searx is free software: you can redistribute it and/or modify
+# lint: pylint
-it under the terms of the GNU Affero General Public License as published by
+# pylint: disable=missing-module-docstring,invalid-name
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.
 searx is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU Affero General Public License for more details.
 You should have received a copy of the GNU Affero General Public License
 along with searx. If not, see < http://www.gnu.org/licenses/ >.
 (C) 2015 by Adam Tauber, <asciimoo@gmail.com>
 '''
 from flask_babel import gettext
 import re
 from flask_babel import gettext
 from searx.botdetection._helpers import get_real_ip
 name = gettext('Self Information')
 description = gettext('Displays your IP if the query is "ip" and your user agent if the query contains "user agent".')
@ -28,18 +18,11 @@ query_examples = ''
 p = re.compile('.*user[ -]agent.*', re.IGNORECASE)
 # attach callback to the post search hook
 #  request: flask request object
 #  ctx: the whole local context of the pre search hook
 def post_search(request, search):
    if search.search_query.pageno > 1:
        return True
    if search.search_query.query == 'ip':
-        x_forwarded_for = request.headers.getlist("X-Forwarded-For")
+        ip = get_real_ip(request)
        if x_forwarded_for:
            ip = x_forwarded_for[0]
        else:
            ip = request.remote_addr
        search.result_container.answers['ip'] = {'answer': ip}
    elif p.match(search.search_query.query):
        ua = request.user_agent
--- a/searx/templates/simple/base.html
+++ b/searx/templates/simple/base.html
@ -17,6 +17,9 @@
  {% else %}
  <link rel="stylesheet" href="{{ url_for('static', filename='css/searxng.min.css') }}" type="text/css" media="screen" />
  {% endif %}
  {% if get_setting('server.limiter') %}
  <link rel="stylesheet" href="{{ url_for('client_token', token=link_token) }}" type="text/css" />
  {% endif %}
  {% block styles %}{% endblock %}
  <!--[if gte IE 9]>-->
  <script src="{{ url_for('static', filename='js/searxng.head.min.js') }}" client_settings="{{ client_settings }}"></script>
--- a/searx/tools/init.py
+++ b/searx/tools/init.py
@ -0,0 +1,8 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """.. _tools src:
 A collection of *utilities* used by SearXNG, but without SearXNG specific
 peculiarities.
 """
--- a/searx/tools/config.py
+++ b/searx/tools/config.py
@ -0,0 +1,376 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """Configuration class :py:class:`Config` with deep-update, schema validation
 and deprecated names.
 The :py:class:`Config` class implements a configuration that is based on
 structured dictionaries.  The configuration schema is defined in a dictionary
 structure and the configuration data is given in a dictionary structure.
 """
 from __future__ import annotations
 import copy
 import typing
 import logging
 import pathlib
 import pytomlpp as toml
 __all__ = ['Config', 'UNSET', 'SchemaIssue']
 log = logging.getLogger(__name__)
 class FALSE:
    """Class of ``False`` singelton"""
    # pylint: disable=multiple-statements
    def __init__(self, msg):
        self.msg = msg
    def __bool__(self):
        return False
    def __str__(self):
        return self.msg
    __repr__ = __str__
 UNSET = FALSE('<UNSET>')
 class SchemaIssue(ValueError):
    """Exception to store and/or raise a message from a schema issue."""
    def __init__(self, level: typing.Literal['warn', 'invalid'], msg: str):
        self.level = level
        super().__init__(msg)
    def __str__(self):
        return f"[cfg schema {self.level}] {self.args[0]}"
 class Config:
    """Base class used for configuration"""
    UNSET = UNSET
    @classmethod
    def from_toml(cls, schema_file: pathlib.Path, cfg_file: pathlib.Path, deprecated: dict) -> Config:
        # init schema
        log.debug("load schema file: %s", schema_file)
        cfg = cls(cfg_schema=toml.load(schema_file), deprecated=deprecated)
        if not cfg_file.exists():
            log.warning("missing config file: %s", cfg_file)
            return cfg
        # load configuration
        log.debug("load config file: %s", cfg_file)
        try:
            upd_cfg = toml.load(cfg_file)
        except toml.DecodeError as exc:
            msg = str(exc).replace('\t', '').replace('\n', ' ')
            log.error("%s: %s", cfg_file, msg)
            raise
        is_valid, issue_list = cfg.validate(upd_cfg)
        for msg in issue_list:
            log.error(str(msg))
        if not is_valid:
            raise TypeError(f"schema of {cfg_file} is invalid!")
        cfg.update(upd_cfg)
        return cfg
    def __init__(self, cfg_schema: typing.Dict, deprecated: typing.Dict[str, str]):
        """Construtor of class Config.
        :param cfg_schema: Schema of the configuration
        :param deprecated: dictionary that maps deprecated configuration names to a messages
        These values are needed for validation, see :py:obj:`validate`.
        """
        self.cfg_schema = cfg_schema
        self.deprecated = deprecated
        self.cfg = copy.deepcopy(cfg_schema)
    def __getitem__(self, key: str):
        return self.get(key)
    def validate(self, cfg: dict):
        """Validation of dictionary ``cfg`` on :py:obj:`Config.SCHEMA`.
        Validation is done by :py:obj:`validate`."""
        return validate(self.cfg_schema, cfg, self.deprecated)
    def update(self, upd_cfg: dict):
        """Update this configuration by ``upd_cfg``."""
        dict_deepupdate(self.cfg, upd_cfg)
    def default(self, name: str):
        """Returns default value of field ``name`` in ``self.cfg_schema``."""
        return value(name, self.cfg_schema)
    def get(self, name: str, default=UNSET, replace=True):
        """Returns the value to which ``name`` points in the configuration.
        If there is no such ``name`` in the config and the ``default`` is
        :py:obj:`UNSET`, a :py:obj:`KeyError` is raised.
        """
        parent = self._get_parent_dict(name)
        val = parent.get(name.split('.')[-1], UNSET)
        if val is UNSET:
            if default is UNSET:
                raise KeyError(name)
            val = default
        if replace and isinstance(val, str):
            val = val % self
        return val
    def set(self, name: str, val):
        """Set the value to which ``name`` points in the configuration.
        If there is no such ``name`` in the config, a :py:obj:`KeyError` is
        raised.
        """
        parent = self._get_parent_dict(name)
        parent[name.split('.')[-1]] = val
    def _get_parent_dict(self, name):
        parent_name = '.'.join(name.split('.')[:-1])
        if parent_name:
            parent = value(parent_name, self.cfg)
        else:
            parent = self.cfg
        if (parent is UNSET) or (not isinstance(parent, dict)):
            raise KeyError(parent_name)
        return parent
    def path(self, name: str, default=UNSET):
        """Get a :py:class:`pathlib.Path` object from a config string."""
        val = self.get(name, default)
        if val is UNSET:
            if default is UNSET:
                raise KeyError(name)
            return default
        return pathlib.Path(str(val))
    def pyobj(self, name, default=UNSET):
        """Get python object refered by full qualiffied name (FQN) in the config
        string."""
        fqn = self.get(name, default)
        if fqn is UNSET:
            if default is UNSET:
                raise KeyError(name)
            return default
        (modulename, name) = str(fqn).rsplit('.', 1)
        m = __import__(modulename, {}, {}, [name], 0)
        return getattr(m, name)
 # working with dictionaries
 def value(name: str, data_dict: dict):
    """Returns the value to which ``name`` points in the ``dat_dict``.
    .. code: python
        >>> data_dict = {
                "foo": {"bar": 1 },
                "bar": {"foo": 2 },
                "foobar": [1, 2, 3],
            }
        >>> value('foobar', data_dict)
        [1, 2, 3]
        >>> value('foo.bar', data_dict)
        1
        >>> value('foo.bar.xxx', data_dict)
        <UNSET>
    """
    ret_val = data_dict
    for part in name.split('.'):
        if isinstance(ret_val, dict):
            ret_val = ret_val.get(part, UNSET)
        if ret_val is UNSET:
            break
    return ret_val
 def validate(
    schema_dict: typing.Dict, data_dict: typing.Dict, deprecated: typing.Dict[str, str]
 ) -> typing.Tuple[bool, list]:
    """Deep validation of dictionary in ``data_dict`` against dictionary in
    ``schema_dict``.  Argument deprecated is a dictionary that maps deprecated
    configuration names to a messages::
        deprecated = {
            "foo.bar" : "config 'foo.bar' is deprecated, use 'bar.foo'",
            "..."     : "..."
        }
    The function returns a python tuple ``(is_valid, issue_list)``:
    ``is_valid``:
      A bool value indicating ``data_dict`` is valid or not.
    ``issue_list``:
      A list of messages (:py:obj:`SchemaIssue`) from the validation::
          [schema warn] data_dict: deprecated 'fontlib.foo': <DEPRECATED['foo.bar']>
          [schema invalid] data_dict: key unknown 'fontlib.foo'
          [schema invalid] data_dict: type mismatch 'fontlib.foo': expected ..., is ...
    If ``schema_dict`` or ``data_dict`` is not a dictionary type a
    :py:obj:`SchemaIssue` is raised.
    """
    names = []
    is_valid = True
    issue_list = []
    if not isinstance(schema_dict, dict):
        raise SchemaIssue('invalid', "schema_dict is not a dict type")
    if not isinstance(data_dict, dict):
        raise SchemaIssue('invalid', f"data_dict issue{'.'.join(names)} is not a dict type")
    is_valid, issue_list = _validate(names, issue_list, schema_dict, data_dict, deprecated)
    return is_valid, issue_list
 def _validate(
    names: typing.List,
    issue_list: typing.List,
    schema_dict: typing.Dict,
    data_dict: typing.Dict,
    deprecated: typing.Dict[str, str],
 ) -> typing.Tuple[bool, typing.List]:
    is_valid = True
    for key, data_value in data_dict.items():
        names.append(key)
        name = '.'.join(names)
        deprecated_msg = deprecated.get(name)
        # print("XXX %s: key %s //   data_value: %s" % (name, key, data_value))
        if deprecated_msg:
            issue_list.append(SchemaIssue('warn', f"data_dict '{name}': deprecated - {deprecated_msg}"))
        schema_value = value(name, schema_dict)
        # print("YYY %s: key %s // schema_value: %s" % (name, key, schema_value))
        if schema_value is UNSET:
            if not deprecated_msg:
                issue_list.append(SchemaIssue('invalid', f"data_dict '{name}': key unknown in schema_dict"))
                is_valid = False
        elif type(schema_value) != type(data_value):  # pylint: disable=unidiomatic-typecheck
            issue_list.append(
                SchemaIssue(
                    'invalid',
                    (f"data_dict: type mismatch '{name}':" f" expected {type(schema_value)}, is: {type(data_value)}"),
                )
            )
            is_valid = False
        elif isinstance(data_value, dict):
            _valid, _ = _validate(names, issue_list, schema_dict, data_value, deprecated)
            is_valid = is_valid and _valid
        names.pop()
    return is_valid, issue_list
 def dict_deepupdate(base_dict: dict, upd_dict: dict, names=None):
    """Deep-update of dictionary in ``base_dict`` by dictionary in ``upd_dict``.
    For each ``upd_key`` & ``upd_val`` pair in ``upd_dict``:
    0. If types of ``base_dict[upd_key]`` and ``upd_val`` do not match raise a
       :py:obj:`TypeError`.
    1. If ``base_dict[upd_key]`` is a dict: recursively deep-update it by ``upd_val``.
    2. If ``base_dict[upd_key]`` not exist: set ``base_dict[upd_key]`` from a
       (deep-) copy of ``upd_val``.
    3. If ``upd_val`` is a list, extend list in ``base_dict[upd_key]`` by the
       list in ``upd_val``.
    4. If ``upd_val`` is a set, update set in ``base_dict[upd_key]`` by set in
       ``upd_val``.
    """
    # pylint: disable=too-many-branches
    if not isinstance(base_dict, dict):
        raise TypeError("argument 'base_dict' is not a ditionary type")
    if not isinstance(upd_dict, dict):
        raise TypeError("argument 'upd_dict' is not a ditionary type")
    if names is None:
        names = []
    for upd_key, upd_val in upd_dict.items():
        # For each upd_key & upd_val pair in upd_dict:
        if isinstance(upd_val, dict):
            if upd_key in base_dict:
                # if base_dict[upd_key] exists, recursively deep-update it
                if not isinstance(base_dict[upd_key], dict):
                    raise TypeError(f"type mismatch {'.'.join(names)}: is not a dict type in base_dict")
                dict_deepupdate(
                    base_dict[upd_key],
                    upd_val,
                    names
                    + [
                        upd_key,
                    ],
                )
            else:
                # if base_dict[upd_key] not exist, set base_dict[upd_key] from deepcopy of upd_val
                base_dict[upd_key] = copy.deepcopy(upd_val)
        elif isinstance(upd_val, list):
            if upd_key in base_dict:
                # if base_dict[upd_key] exists, base_dict[up_key] is extended by
                # the list from upd_val
                if not isinstance(base_dict[upd_key], list):
                    raise TypeError(f"type mismatch {'.'.join(names)}: is not a list type in base_dict")
                base_dict[upd_key].extend(upd_val)
            else:
                # if base_dict[upd_key] doesn't exists, set base_dict[key] from a deepcopy of the
                # list in upd_val.
                base_dict[upd_key] = copy.deepcopy(upd_val)
        elif isinstance(upd_val, set):
            if upd_key in base_dict:
                # if base_dict[upd_key] exists, base_dict[up_key] is updated by the set in upd_val
                if not isinstance(base_dict[upd_key], set):
                    raise TypeError(f"type mismatch {'.'.join(names)}: is not a set type in base_dict")
                base_dict[upd_key].update(upd_val.copy())
            else:
                # if base_dict[upd_key] doesn't exists, set base_dict[upd_key] from a copy of the
                # set in upd_val
                base_dict[upd_key] = upd_val.copy()
        else:
            # for any other type of upd_val replace or add base_dict[upd_key] by a copy
            # of upd_val
            base_dict[upd_key] = copy.copy(upd_val)
--- a/searx/webapp.py
+++ b/searx/webapp.py
@ -94,6 +94,7 @@ from searx.utils import (
 from searx.version import VERSION_STRING, GIT_URL, GIT_BRANCH
 from searx.query import RawTextQuery
 from searx.plugins import Plugin, plugins, initialize as plugin_initialize
 from searx.botdetection import link_token
 from searx.plugins.oa_doi_rewrite import get_doi_resolver
 from searx.preferences import (
    Preferences,
@ -416,6 +417,7 @@ def render(template_name: str, **kwargs):
    kwargs['endpoint'] = 'results' if 'q' in kwargs else request.endpoint
    kwargs['cookies'] = request.cookies
    kwargs['errors'] = request.errors
    kwargs['link_token'] = link_token.get_token()
    # values from the preferences
    kwargs['preferences'] = request.preferences
@ -642,6 +644,12 @@ def health():
    return Response('OK', mimetype='text/plain')
@app.route('/client<token>.css', methods=['GET', 'POST'])
 def client_token(token=None):
    link_token.ping(request, token)
    return Response('', mimetype='text/css')
@app.route('/search', methods=['GET', 'POST'])
 def search():
    """Search query in q and return results.
--- a/tests/unit/test_plugins.py
+++ b/tests/unit/test_plugins.py
@ -50,9 +50,13 @@ class SelfIPTest(SearxTestCase):
        self.assertTrue(len(store.plugins) == 1)
        # IP test
-        request = Mock(remote_addr='127.0.0.1')
+        request = Mock()
-        request.headers.getlist.return_value = []
+        request.remote_addr = '127.0.0.1'
-        search = get_search_mock(query='ip', pageno=1)
+        request.headers = {'X-Forwarded-For': '1.2.3.4, 127.0.0.1', 'X-Real-IP': '127.0.0.1'}
        search = get_search_mock(
            query='ip',
            pageno=1,
        )
        store.call(store.plugins, 'post_search', request, search)
        self.assertTrue('127.0.0.1' in search.result_container.answers["ip"]["answer"])
@ -62,7 +66,6 @@ class SelfIPTest(SearxTestCase):
        # User agent test
        request = Mock(user_agent='Mock')
        request.headers.getlist.return_value = []
        search = get_search_mock(query='user-agent', pageno=1)
        store.call(store.plugins, 'post_search', request, search)
@ -98,7 +101,6 @@ class HashPluginTest(SearxTestCase):
        self.assertTrue(len(store.plugins) == 1)
        request = Mock(remote_addr='127.0.0.1')
        request.headers.getlist.return_value = []
        # MD5
        search = get_search_mock(query='md5 test', pageno=1)