[mod] isolation of botdetection from SearXNG core

In PR-2894[1] we isolated botdetection from the limiter, this PR isolates the
botdetection from the SearXNG core code.

This PR also fixes the issue [2] that the ``server.public_instance`` option
needs to activate the limiter.

- [1] https://github.com/searxng/searxng/pull/2894
- [2] https://github.com/searxng/searxng/issues/2975

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2023-11-01 14:18:44 +01:00
parent c0b97c6543
commit 523a875f1e
20 changed files with 555 additions and 373 deletions

View file

@ -11,8 +11,9 @@
port: 8888
bind_address: "127.0.0.1"
secret_key: "ultrasecretkey" # change this!
limiter: false
public_instance: false
limiter: false
pass_searxng_org: false
image_proxy: false
default_http_headers:
X-Content-Type-Options : nosniff
@ -31,10 +32,6 @@
``secret_key`` : ``$SEARXNG_SECRET``
Used for cryptography purpose.
``limiter`` :
Rate limit the number of request on the instance, block some bots. The
:ref:`limiter` requires a :ref:`settings redis` database.
.. _public_instance:
``public_instance`` :
@ -43,8 +40,22 @@
needed for local usage). By set to ``true`` the following features are
activated:
- ``server: limiter`` option :ref:`see below <activate limiter>`
- ``server: pass_searxng_org`` option :ref:`see below <pass_searxng_org>`
- :py:obj:`searx.botdetection.link_token` in the :ref:`limiter`
.. _activate limiter:
``limiter`` :
Rate limit the number of request on the instance, block some bots. The
:ref:`limiter` requires a :ref:`settings redis` database.
.. _pass_searxng_org:
``pass_searxng_org`` :
In the limiter activates the passlist of (hardcoded) IPs of the SearXNG
organization, e.g. ``check.searx.space``.
.. _image_proxy:
``image_proxy`` :

View file

@ -104,10 +104,3 @@ if max_request_timeout is None:
logger.info('max_request_timeout=%s', repr(max_request_timeout))
else:
logger.info('max_request_timeout=%i second(s)', max_request_timeout)
if settings['server']['public_instance']:
logger.warning(
"Be aware you have activated features intended only for public instances. "
"This force the usage of the limiter and link_token / "
"see https://docs.searxng.org/admin/searx.limiter.html"
)

View file

@ -5,19 +5,44 @@
Implementations used for bot detection.
"""
from __future__ import annotations
from dataclasses import dataclass
import pathlib
import redis
from .config import Config
from ._helpers import logger
from ._helpers import dump_request
from ._helpers import get_real_ip
from ._helpers import get_network
from ._helpers import too_many_requests
logger = logger.getChild('init')
__all__ = ['dump_request', 'get_network', 'get_real_ip', 'too_many_requests']
redis_client = None
cfg = None
CFG_SCHEMA = pathlib.Path(__file__).parent / "schema.toml"
"""Base configuration (schema) of the botdetection."""
CFG_DEPRECATED = {
# "dummy.old.foo": "config 'dummy.old.foo' exists only for tests. Don't use it in your real project config."
}
def init(_cfg, _redis_client):
global redis_client, cfg # pylint: disable=global-statement
redis_client = _redis_client
cfg = _cfg
@dataclass
class Context:
"""A global context of the botdetection"""
# pylint: disable=too-few-public-methods
redis_client: redis.Redis | None = None
cfg: Config = Config.from_toml(schema_file=CFG_SCHEMA, cfg_file=None, deprecated=CFG_DEPRECATED)
def init(self, toml_cfg: pathlib.Path, redis_client: redis.Redis | None):
self.redis_client = redis_client
self.cfg.load_toml(toml_cfg)
ctx = Context()

View file

@ -3,6 +3,7 @@
# pylint: disable=missing-module-docstring, invalid-name
from __future__ import annotations
import logging
from ipaddress import (
IPv4Network,
IPv6Network,
@ -13,10 +14,9 @@ from ipaddress import (
import flask
import werkzeug
from searx import logger
from . import config
logger = logger.getChild('botdetection')
logger = logging.getLogger('botdetection')
def dump_request(request: flask.Request):
@ -104,10 +104,10 @@ def get_real_ip(request: flask.Request) -> str:
if not forwarded_for:
_log_error_only_once("X-Forwarded-For header is not set!")
else:
from . import cfg # pylint: disable=import-outside-toplevel, cyclic-import
from . import ctx # pylint: disable=import-outside-toplevel, cyclic-import
forwarded_for = [x.strip() for x in forwarded_for.split(',')]
x_for: int = cfg['real_ip.x_for'] # type: ignore
x_for: int = ctx.cfg['real_ip.x_for'] # type: ignore
forwarded_for = forwarded_for[-min(len(forwarded_for), x_for)]
if not real_ip:

View file

@ -57,18 +57,20 @@ class Config:
UNSET = UNSET
@classmethod
def from_toml(cls, schema_file: pathlib.Path, cfg_file: pathlib.Path, deprecated: dict) -> Config:
def from_toml(cls, schema_file: pathlib.Path, cfg_file: pathlib.Path | None, deprecated: dict) -> Config:
# init schema
log.debug("load schema file: %s", schema_file)
cfg = cls(cfg_schema=toml.load(schema_file), deprecated=deprecated)
if cfg_file is None:
return cfg
if not cfg_file.exists():
log.warning("missing config file: %s", cfg_file)
return cfg
# load configuration from toml file
cfg.load_toml(cfg_file)
return cfg
# load configuration
def load_toml(self, cfg_file: pathlib.Path):
log.debug("load config file: %s", cfg_file)
try:
upd_cfg = toml.load(cfg_file)
@ -77,13 +79,12 @@ class Config:
log.error("%s: %s", cfg_file, msg)
raise
is_valid, issue_list = cfg.validate(upd_cfg)
is_valid, issue_list = self.validate(upd_cfg)
for msg in issue_list:
log.error(str(msg))
if not is_valid:
raise TypeError(f"schema of {cfg_file} is invalid!")
cfg.update(upd_cfg)
return cfg
self.update(upd_cfg)
def __init__(self, cfg_schema: typing.Dict, deprecated: typing.Dict[str, str]):
"""Construtor of class Config.
@ -153,7 +154,7 @@ class Config:
raise KeyError(parent_name)
return parent
def path(self, name: str, default=UNSET):
def path(self, name: str, default: Any = UNSET):
"""Get a :py:class:`pathlib.Path` object from a config string."""
val = self.get(name, default)
@ -163,7 +164,7 @@ class Config:
return default
return pathlib.Path(str(val))
def pyobj(self, name, default=UNSET):
def pyobj(self, name, default: Any = UNSET):
"""Get python object refered by full qualiffied name (FQN) in the config
string."""

View file

@ -36,6 +36,24 @@ dropped.
.. _X-Forwarded-For:
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
Config
~~~~~~
.. code:: toml
[botdetection.ip_limit]
# To get unlimited access in a local network, by default link-lokal addresses
# (networks) are not monitored by the ip_limit
filter_link_local = false
# activate link_token method in the ip_limit method
link_token = false
Implementations
~~~~~~~~~~~~~~~
"""
from __future__ import annotations
from ipaddress import (
@ -46,9 +64,8 @@ from ipaddress import (
import flask
import werkzeug
from searx import redisdb
from searx.redislib import incr_sliding_window, drop_counter
from . import ctx
from .redislib import incr_sliding_window, drop_counter
from . import link_token
from . import config
from ._helpers import (
@ -77,11 +94,11 @@ LONG_MAX = 150
LONG_MAX_SUSPICIOUS = 10
"""Maximum suspicious requests from one IP in the :py:obj:`LONG_WINDOW`"""
API_WONDOW = 3600
API_WINDOW = 3600
"""Time (sec) before sliding window for API requests (format != html) expires."""
API_MAX = 4
"""Maximum requests from one IP in the :py:obj:`API_WONDOW`"""
"""Maximum requests from one IP in the :py:obj:`API_WINDOW`"""
SUSPICIOUS_IP_WINDOW = 3600 * 24 * 30
"""Time (sec) before sliding window for one suspicious IP expires."""
@ -97,14 +114,13 @@ def filter_request(
) -> werkzeug.Response | None:
# pylint: disable=too-many-return-statements
redis_client = redisdb.client()
if network.is_link_local and not cfg['botdetection.ip_limit.filter_link_local']:
logger.debug("network %s is link-local -> not monitored by ip_limit method", network.compressed)
return None
if request.args.get('format', 'html') != 'html':
c = incr_sliding_window(redis_client, 'ip_limit.API_WONDOW:' + network.compressed, API_WONDOW)
c = incr_sliding_window(ctx.redis_client, 'ip_limit.API_WINDOW:' + network.compressed, API_WINDOW)
if c > API_MAX:
return too_many_requests(network, "too many request in API_WINDOW")
@ -114,33 +130,33 @@ def filter_request(
if not suspicious:
# this IP is no longer suspicious: release ip again / delete the counter of this IP
drop_counter(redis_client, 'ip_limit.SUSPICIOUS_IP_WINDOW' + network.compressed)
drop_counter(ctx.redis_client, 'ip_limit.SUSPICIOUS_IP_WINDOW' + network.compressed)
return None
# this IP is suspicious: count requests from this IP
c = incr_sliding_window(
redis_client, 'ip_limit.SUSPICIOUS_IP_WINDOW' + network.compressed, SUSPICIOUS_IP_WINDOW
ctx.redis_client, 'ip_limit.SUSPICIOUS_IP_WINDOW' + network.compressed, SUSPICIOUS_IP_WINDOW
)
if c > SUSPICIOUS_IP_MAX:
logger.error("BLOCK: too many request from %s in SUSPICIOUS_IP_WINDOW (redirect to /)", network)
return flask.redirect(flask.url_for('index'), code=302)
c = incr_sliding_window(redis_client, 'ip_limit.BURST_WINDOW' + network.compressed, BURST_WINDOW)
c = incr_sliding_window(ctx.redis_client, 'ip_limit.BURST_WINDOW' + network.compressed, BURST_WINDOW)
if c > BURST_MAX_SUSPICIOUS:
return too_many_requests(network, "too many request in BURST_WINDOW (BURST_MAX_SUSPICIOUS)")
c = incr_sliding_window(redis_client, 'ip_limit.LONG_WINDOW' + network.compressed, LONG_WINDOW)
c = incr_sliding_window(ctx.redis_client, 'ip_limit.LONG_WINDOW' + network.compressed, LONG_WINDOW)
if c > LONG_MAX_SUSPICIOUS:
return too_many_requests(network, "too many request in LONG_WINDOW (LONG_MAX_SUSPICIOUS)")
return None
# vanilla limiter without extensions counts BURST_MAX and LONG_MAX
c = incr_sliding_window(redis_client, 'ip_limit.BURST_WINDOW' + network.compressed, BURST_WINDOW)
c = incr_sliding_window(ctx.redis_client, 'ip_limit.BURST_WINDOW' + network.compressed, BURST_WINDOW)
if c > BURST_MAX:
return too_many_requests(network, "too many request in BURST_WINDOW (BURST_MAX)")
c = incr_sliding_window(redis_client, 'ip_limit.LONG_WINDOW' + network.compressed, LONG_WINDOW)
c = incr_sliding_window(ctx.redis_client, 'ip_limit.LONG_WINDOW' + network.compressed, LONG_WINDOW)
if c > LONG_MAX:
return too_many_requests(network, "too many request in LONG_WINDOW (LONG_MAX)")

View file

@ -8,6 +8,10 @@ Method ``ip_lists``
The ``ip_lists`` method implements IP :py:obj:`block- <block_ip>` and
:py:obj:`pass-lists <pass_ip>`.
Config
~~~~~~
.. code:: toml
[botdetection.ip_lists]
@ -22,6 +26,10 @@ The ``ip_lists`` method implements IP :py:obj:`block- <block_ip>` and
'257.1.1.1', # invalid IP --> will be ignored, logged in ERROR class
]
Implementations
~~~~~~~~~~~~~~~
"""
# pylint: disable=unused-argument
@ -38,24 +46,11 @@ from ._helpers import logger
logger = logger.getChild('ip_limit')
SEARXNG_ORG = [
# https://github.com/searxng/searxng/pull/2484#issuecomment-1576639195
'167.235.158.251', # IPv4 check.searx.space
'2a01:04f8:1c1c:8fc2::/64', # IPv6 check.searx.space
]
"""Passlist of IPs from the SearXNG organization, e.g. `check.searx.space`."""
def pass_ip(real_ip: IPv4Address | IPv6Address, cfg: config.Config) -> Tuple[bool, str]:
"""Checks if the IP on the subnet is in one of the members of the
``botdetection.ip_lists.pass_ip`` list.
"""
if cfg.get('botdetection.ip_lists.pass_searxng_org', default=True):
for net in SEARXNG_ORG:
net = ip_network(net, strict=False)
if real_ip.version == net.version and real_ip in net:
return True, f"IP matches {net.compressed} in SEARXNG_ORG list."
return ip_is_subnet_of_member_in_list(real_ip, 'botdetection.ip_lists.pass_ip', cfg)

View file

@ -13,6 +13,9 @@ a ping by request a static URL.
This method requires a redis DB and needs a HTTP X-Forwarded-For_ header.
.. _X-Forwarded-For:
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
To get in use of this method a flask URL route needs to be added:
.. code:: python
@ -31,10 +34,31 @@ And in the HTML template from flask a stylesheet link is needed (the value of
href="{{ url_for('client_token', token=link_token) }}"
type="text/css" />
.. _X-Forwarded-For:
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
Config
~~~~~~
.. code:: toml
[botdetection.link_token]
# Livetime (sec) of limiter's CSS token.
TOKEN_LIVE_TIME = 600
# Livetime (sec) of the ping-key from a client (request)
PING_LIVE_TIME = 3600
# Prefix of all ping-keys generated by link_token.get_ping_key
PING_KEY = 'botdetection.link_token.PING_KEY'
# Key for which the current token is stored in the DB
TOKEN_KEY = 'botdetection.link_token.TOKEN_KEY'
Implementations
~~~~~~~~~~~~~~~
"""
from __future__ import annotations
from ipaddress import (
IPv4Network,
@ -46,48 +70,47 @@ import string
import random
import flask
from searx import logger
from searx import redisdb
from searx.redislib import secret_hash
from . import ctx
from .redislib import secret_hash
from ._helpers import (
logger,
get_network,
get_real_ip,
)
TOKEN_LIVE_TIME = 600
"""Livetime (sec) of limiter's CSS token."""
PING_LIVE_TIME = 3600
"""Livetime (sec) of the ping-key from a client (request)"""
logger = logger.getChild('link_token')
PING_KEY = 'SearXNG_limiter.ping'
PING_KEY = 'botdetection.link_token.PING_KEY'
"""Prefix of all ping-keys generated by :py:obj:`get_ping_key`"""
TOKEN_KEY = 'SearXNG_limiter.token'
TOKEN_KEY = 'botdetection.link_token.TOKEN_KEY'
"""Key for which the current token is stored in the DB"""
logger = logger.getChild('botdetection.link_token')
def _cfg(name):
return ctx.cfg.get(f'botdetection.link_token.{name}')
def is_suspicious(network: IPv4Network | IPv6Network, request: flask.Request, renew: bool = False):
"""Checks whether a valid ping is exists for this (client) network, if not
this request is rated as *suspicious*. If a valid ping exists and argument
``renew`` is ``True`` the expire time of this ping is reset to
:py:obj:`PING_LIVE_TIME`.
``PING_LIVE_TIME``.
"""
redis_client = redisdb.client()
if not redis_client:
if not ctx.redis_client:
return False
ping_key = get_ping_key(network, request)
if not redis_client.get(ping_key):
if not ctx.redis_client.get(ping_key):
logger.info("missing ping (IP: %s) / request: %s", network.compressed, ping_key)
return True
if renew:
redis_client.set(ping_key, 1, ex=PING_LIVE_TIME)
ctx.redis_client.set(ping_key, 1, ex=_cfg('PING_LIVE_TIME'))
logger.debug("found ping for (client) network %s -> %s", network.compressed, ping_key)
return False
@ -96,22 +119,21 @@ def is_suspicious(network: IPv4Network | IPv6Network, request: flask.Request, re
def ping(request: flask.Request, token: str):
"""This function is called by a request to URL ``/client<token>.css``. If
``token`` is valid a :py:obj:`PING_KEY` for the client is stored in the DB.
The expire time of this ping-key is :py:obj:`PING_LIVE_TIME`.
The expire time of this ping-key is ``PING_LIVE_TIME``.
"""
from . import redis_client, cfg # pylint: disable=import-outside-toplevel, cyclic-import
if not redis_client:
if not ctx.redis_client:
return
if not token_is_valid(token):
return
real_ip = ip_address(get_real_ip(request))
network = get_network(real_ip, cfg)
network = get_network(real_ip, ctx.cfg)
ping_key = get_ping_key(network, request)
logger.debug("store ping_key for (client) network %s (IP %s) -> %s", network.compressed, real_ip, ping_key)
redis_client.set(ping_key, 1, ex=PING_LIVE_TIME)
ctx.redis_client.set(ping_key, 1, ex=_cfg('PING_LIVE_TIME'))
def get_ping_key(network: IPv4Network | IPv6Network, request: flask.Request) -> str:
@ -137,19 +159,21 @@ def get_token() -> str:
"""Returns current token. If there is no currently active token a new token
is generated randomly and stored in the redis DB.
- :py:obj:`TOKEN_LIVE_TIME`
- :py:obj:`TOKEN_KEY`
Config:
- ``TOKEN_LIVE_TIME``
- ``TOKEN_KEY``
"""
redis_client = redisdb.client()
if not redis_client:
if not ctx.redis_client:
# This function is also called when limiter is inactive / no redis DB
# (see render function in webapp.py)
return '12345678'
token = redis_client.get(TOKEN_KEY)
token_key = _cfg('TOKEN_KEY')
token = ctx.redis_client.get(token_key)
if token:
token = token.decode('UTF-8')
else:
token = ''.join(random.choice(string.ascii_lowercase + string.digits) for _ in range(16))
redis_client.set(TOKEN_KEY, token, ex=TOKEN_LIVE_TIME)
ctx.redis_client.set(token_key, token, ex=_cfg('TOKEN_LIVE_TIME'))
return token

View file

@ -0,0 +1,263 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""A collection of convenient functions and redis/lua scripts.
This code was partial inspired by the `Bullet-Proofing Lua Scripts in RedisPy`_
article.
.. _Bullet-Proofing Lua Scripts in RedisPy:
https://redis.com/blog/bullet-proofing-lua-scripts-in-redispy/
Config
~~~~~~
.. code:: toml
[botdetection.redis]
# FQDN of a function definition. A function with which the DB keys of the Redis
# DB are to be annonymized.
secret_hash = ''
# A prefix to all keys store by the botdetection in the redis DB
REDIS_KEY_PREFIX = 'botdetection_'
Implementations
~~~~~~~~~~~~~~~
"""
from __future__ import annotations
from . import ctx
REDIS_KEY_PREFIX = 'botdetection'
"""A prefix applied to all keys store by the botdetection in the redis DB."""
LUA_SCRIPT_STORAGE = {}
"""A global dictionary to cache client's ``Script`` objects, used by
:py:obj:`lua_script_storage`"""
def secret_hash(name: str) -> str:
"""Returns a annonymized name if ``secret_hash`` is configured, otherwise
the ``name`` is returned unchanged."""
func = ctx.cfg.pyobj('botdetection.redis.secret_hash', default=None) # type: ignore
if not func:
return name
return func(name)
def _prefix(val: str | None = None) -> str:
if val is None:
val = ctx.cfg.get('botdetection.redis.REDIS_KEY_PREFIX', default=REDIS_KEY_PREFIX) # type: ignore
return str(val)
def lua_script_storage(client, script):
"""Returns a redis :py:obj:`Script
<redis.commands.core.CoreCommands.register_script>` instance.
Due to performance reason the ``Script`` object is instantiated only once
for a client (``client.register_script(..)``) and is cached in
:py:obj:`LUA_SCRIPT_STORAGE`.
"""
# redis connection can be closed, lets use the id() of the redis connector
# as key in the script-storage:
client_id = id(client)
if LUA_SCRIPT_STORAGE.get(client_id) is None:
LUA_SCRIPT_STORAGE[client_id] = {}
if LUA_SCRIPT_STORAGE[client_id].get(script) is None:
LUA_SCRIPT_STORAGE[client_id][script] = client.register_script(script)
return LUA_SCRIPT_STORAGE[client_id][script]
PURGE_BY_PREFIX = """
local prefix = tostring(ARGV[1])
for i, name in ipairs(redis.call('KEYS', prefix .. '*')) do
redis.call('EXPIRE', name, 0)
end
"""
def purge_by_prefix(client, prefix: str | None):
"""Purge all keys with ``prefix`` from database.
Queries all keys in the database by the given prefix and set expire time to
zero. The default prefix will drop all keys which has been set by
:py:obj:`REDIS_KEY_PREFIX`.
The implementation is the lua script from string :py:obj:`PURGE_BY_PREFIX`.
The lua script uses EXPIRE_ instead of DEL_: if there are a lot keys to
delete and/or their values are big, `DEL` could take more time and blocks
the command loop while `EXPIRE` turns back immediate.
:param prefix: prefix of the key to delete (default: :py:obj:`REDIS_KEY_PREFIX`)
:type name: str
.. _EXPIRE: https://redis.io/commands/expire/
.. _DEL: https://redis.io/commands/del/
"""
script = lua_script_storage(client, PURGE_BY_PREFIX)
script(args=[_prefix(prefix)])
INCR_COUNTER = """
local limit = tonumber(ARGV[1])
local expire = tonumber(ARGV[2])
local c_name = KEYS[1]
local c = redis.call('GET', c_name)
if not c then
c = redis.call('INCR', c_name)
if expire > 0 then
redis.call('EXPIRE', c_name, expire)
end
else
c = tonumber(c)
if limit == 0 or c < limit then
c = redis.call('INCR', c_name)
end
end
return c
"""
def incr_counter(client, name: str, limit: int = 0, expire: int = 0):
"""Increment a counter and return the new value.
If counter with redis key :py:obj:`REDIS_KEY_PREFIX` + ``counter_<name>``
does not exists it is created with initial value 1 returned. The
replacement ``<name>`` is a *secret hash* of the value from argument
``name`` (see :py:func:`secret_hash`).
The implementation of the redis counter is the lua script from string
:py:obj:`INCR_COUNTER`.
:param name: name of the counter
:type name: str
:param expire: live-time of the counter in seconds (default ``None`` means
infinite).
:type expire: int / see EXPIRE_
:param limit: limit where the counter stops to increment (default ``None``)
:type limit: int / limit is 2^64 see INCR_
:return: value of the incremented counter
:type return: int
.. _EXPIRE: https://redis.io/commands/expire/
.. _INCR: https://redis.io/commands/incr/
A simple demo of a counter with expire time and limit::
>>> for i in range(6):
... i, incr_counter(client, "foo", 3, 5) # max 3, duration 5 sec
... time.sleep(1) # from the third call on max has been reached
...
(0, 1)
(1, 2)
(2, 3)
(3, 3)
(4, 3)
(5, 1)
"""
script = lua_script_storage(client, INCR_COUNTER)
name = _prefix() + "counter_" + secret_hash(name)
c = script(args=[limit, expire], keys=[name])
return c
def drop_counter(client, name):
"""Drop counter with redis key :py:obj:`REDIS_KEY_PREFIX` +
``counter_<name>``
The replacement ``<name>`` is a *secret hash* of the value from argument
``name`` (see :py:func:`incr_counter` and :py:func:`incr_sliding_window`).
"""
name = _prefix() + "counter_" + secret_hash(name)
client.delete(name)
INCR_SLIDING_WINDOW = """
local expire = tonumber(ARGV[1])
local name = KEYS[1]
local current_time = redis.call('TIME')
redis.call('ZREMRANGEBYSCORE', name, 0, current_time[1] - expire)
redis.call('ZADD', name, current_time[1], current_time[1] .. current_time[2])
local result = redis.call('ZCOUNT', name, 0, current_time[1] + 1)
redis.call('EXPIRE', name, expire)
return result
"""
def incr_sliding_window(client, name: str, duration: int):
"""Increment a sliding-window counter and return the new value.
If counter with redis key :py:obj:`REDIS_KEY_PREFIX` + ``counter_<name>``
does not exists it is created with initial value 1 returned. The
replacement ``<name>`` is a *secret hash* of the value from argument
``name`` (see :py:func:`secret_hash`).
:param name: name of the counter
:type name: str
:param duration: live-time of the sliding window in seconds
:typeduration: int
:return: value of the incremented counter
:type return: int
The implementation of the redis counter is the lua script from string
:py:obj:`INCR_SLIDING_WINDOW`. The lua script uses `sorted sets in Redis`_
to implement a sliding window for the redis key :py:obj:`REDIS_KEY_PREFIX` +
``counter_<name>`` (ZADD_). The current TIME_ is used to score the items in
the sorted set and the time window is moved by removing items with a score
lower current time minus *duration* time (ZREMRANGEBYSCORE_).
The EXPIRE_ time (the duration of the sliding window) is refreshed on each
call (increment) and if there is no call in this duration, the sorted
set expires from the redis DB.
The return value is the amount of items in the sorted set (ZCOUNT_), what
means the number of calls in the sliding window.
.. _Sorted sets in Redis:
https://redis.com/ebook/part-1-getting-started/chapter-1-getting-to-know-redis/1-2-what-redis-data-structures-look-like/1-2-5-sorted-sets-in-redis/
.. _TIME: https://redis.io/commands/time/
.. _ZADD: https://redis.io/commands/zadd/
.. _EXPIRE: https://redis.io/commands/expire/
.. _ZREMRANGEBYSCORE: https://redis.io/commands/zremrangebyscore/
.. _ZCOUNT: https://redis.io/commands/zcount/
A simple demo of the sliding window::
>>> for i in range(5):
... incr_sliding_window(client, "foo", 3) # duration 3 sec
... time.sleep(1) # from the third call (second) on the window is moved
...
1
2
3
3
3
>>> time.sleep(3) # wait until expire
>>> incr_sliding_window(client, "foo", 3)
1
"""
script = lua_script_storage(client, INCR_SLIDING_WINDOW)
name = _prefix() + "counter_" + secret_hash(name)
c = script(args=[duration], keys=[name])
return c

View file

@ -0,0 +1,58 @@
[real_ip]
# Number of values to trust for X-Forwarded-For.
x_for = 1
# The prefix defines the number of leading bits in an address that are compared
# to determine whether or not an address is part of a (client) network.
ipv4_prefix = 32
ipv6_prefix = 48
[botdetection.redis]
# FQDN of a function definition. A function with which the DB keys of the Redis
# DB are to be annonymized.
secret_hash = ''
# A prefix to all keys store by the botdetection in the redis DB
REDIS_KEY_PREFIX = 'botdetection_'
[botdetection.ip_limit]
# To get unlimited access in a local network, by default link-lokal addresses
# (networks) are not monitored by the ip_limit
filter_link_local = false
# activate link_token method in the ip_limit method
link_token = false
[botdetection.link_token]
# Livetime (sec) of limiter's CSS token.
TOKEN_LIVE_TIME = 600
# Livetime (sec) of the ping-key from a client (request)
PING_LIVE_TIME = 3600
# Prefix of all ping-keys generated by link_token.get_ping_key
PING_KEY = 'botdetection.link_token.PING_KEY'
# Key for which the current token is stored in the DB
TOKEN_KEY = 'botdetection.link_token.TOKEN_KEY'
[botdetection.ip_lists]
# In the limiter, the ip_lists method has priority over all other methods -> if
# an IP is in the pass_ip list, it has unrestricted access and it is also not
# checked if e.g. the "user agent" suggests a bot (e.g. curl).
block_ip = [
# '93.184.216.34', # IPv4 of example.org
# '257.1.1.1', # invalid IP --> will be ignored, logged in ERROR class
]
pass_ip = [
# '192.168.0.0/16', # IPv4 private network
# 'fe80::/10' # IPv6 linklocal / wins over botdetection.ip_limit.filter_link_local
]

View file

@ -107,7 +107,6 @@ from searx import (
)
from searx import botdetection
from searx.botdetection import (
config,
http_accept,
http_accept_encoding,
http_accept_language,
@ -123,31 +122,26 @@ from searx.botdetection import (
# coherency, the logger is "limiter"
logger = logger.getChild('limiter')
CFG: config.Config = None # type: ignore
_INSTALLED = False
_FULLY_INSTALLED = False
LIMITER_CFG_SCHEMA = Path(__file__).parent / "limiter.toml"
DEFAULT_CFG = Path(__file__).parent / "limiter.toml"
"""Base configuration (schema) of the botdetection."""
LIMITER_CFG = Path('/etc/searxng/limiter.toml')
"""Local Limiter configuration."""
CFG_DEPRECATED = {
# "dummy.old.foo": "config 'dummy.old.foo' exists only for tests. Don't use it in your real project config."
}
def get_cfg() -> config.Config:
global CFG # pylint: disable=global-statement
if CFG is None:
CFG = config.Config.from_toml(LIMITER_CFG_SCHEMA, LIMITER_CFG, CFG_DEPRECATED)
return CFG
SEARXNG_ORG = [
# https://github.com/searxng/searxng/pull/2484#issuecomment-1576639195
'167.235.158.251', # IPv4 check.searx.space
'2a01:04f8:1c1c:8fc2::/64', # IPv6 check.searx.space
]
"""Passlist of IPs from the SearXNG organization, e.g. `check.searx.space`."""
def filter_request(request: flask.Request) -> werkzeug.Response | None:
# pylint: disable=too-many-return-statements
cfg = get_cfg()
cfg = botdetection.ctx.cfg
real_ip = ip_address(get_real_ip(request))
network = get_network(real_ip, cfg)
@ -210,34 +204,42 @@ def pre_request():
return filter_request(flask.request)
def is_installed():
def is_fully_installed():
"""Returns ``True`` if limiter is active and a redis DB is available."""
return _INSTALLED
return _FULLY_INSTALLED
def initialize(app: flask.Flask, settings):
"""Install the limiter"""
global _INSTALLED # pylint: disable=global-statement
global _FULLY_INSTALLED # pylint: disable=global-statement
if not (settings['server']['limiter'] or settings['server']['public_instance']):
return
# even if the limiter is not activated, the botdetection must be activated
# (e.g. the self_info plugin uses the botdetection to get client IP)
redis_client = redisdb.client()
if not redis_client:
botdetection.ctx.init(DEFAULT_CFG, redis_client)
cfg = botdetection.ctx.cfg
if settings['server']['public_instance']:
# overwrite SearXNG and limiter.toml settings
settings['server']['limiter'] = True
settings['server']['pass_searxng_org'] = True
cfg.set('botdetection.ip_limit.link_token', True)
if settings['server']['pass_searxng_org']:
cfg.get('botdetection.ip_lists.pass_ip').extend(SEARXNG_ORG)
if settings['server']['limiter']:
app.before_request(pre_request)
if redis_client:
_FULLY_INSTALLED = True
else:
logger.error(
"The limiter requires Redis, please consult the documentation: "
"https://docs.searxng.org/admin/searx.limiter.html"
)
if settings['server']['public_instance']:
sys.exit(1)
return
_INSTALLED = True
cfg = get_cfg()
if settings['server']['public_instance']:
# overwrite limiter.toml setting
cfg.set('botdetection.ip_limit.link_token', True)
botdetection.init(cfg, redis_client)
app.before_request(pre_request)
logger.error('server:public_instance activated but redis DB is missed')
sys.exit()

View file

@ -10,31 +10,25 @@ x_for = 1
ipv4_prefix = 32
ipv6_prefix = 48
[botdetection.ip_limit]
[botdetection.redis]
# To get unlimited access in a local network, by default link-lokal addresses
# (networks) are not monitored by the ip_limit
filter_link_local = false
# FQDN of a function definition. A function with which the DB keys of the Redis
# DB are to be annonymized.
secret_hash = 'searx.redislib.secret_hash'
# activate link_token method in the ip_limit method
link_token = false
# A prefix to all keys stored by the botdetection in the redis DB
REDIS_KEY_PREFIX = 'SearXNG_'
[botdetection.ip_lists]
[botdetection.link_token]
# In the limiter, the ip_lists method has priority over all other methods -> if
# an IP is in the pass_ip list, it has unrestricted access and it is also not
# checked if e.g. the "user agent" suggests a bot (e.g. curl).
# Livetime (sec) of limiter's CSS token.
TOKEN_LIVE_TIME = 600
block_ip = [
# '93.184.216.34', # IPv4 of example.org
# '257.1.1.1', # invalid IP --> will be ignored, logged in ERROR class
]
# Livetime (sec) of the ping-key from a client (request)
PING_LIVE_TIME = 3600
pass_ip = [
# '192.168.0.0/16', # IPv4 private network
# 'fe80::/10' # IPv6 linklocal / wins over botdetection.ip_limit.filter_link_local
]
# Prefix of all ping-keys generated by link_token.get_ping_key
PING_KEY = 'SearXNG_limiter.ping'
# Activate passlist of (hardcoded) IPs from the SearXNG organization,
# e.g. `check.searx.space`.
pass_searxng_org = true
# Key for which the current token is stored in the DB
TOKEN_KEY = 'SearXNG_limiter.token'

View file

@ -1,77 +1,12 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""A collection of convenient functions and redis/lua scripts.
This code was partial inspired by the `Bullet-Proofing Lua Scripts in RedisPy`_
article.
.. _Bullet-Proofing Lua Scripts in RedisPy:
https://redis.com/blog/bullet-proofing-lua-scripts-in-redispy/
"""
import hmac
from searx import get_setting
LUA_SCRIPT_STORAGE = {}
"""A global dictionary to cache client's ``Script`` objects, used by
:py:obj:`lua_script_storage`"""
def lua_script_storage(client, script):
"""Returns a redis :py:obj:`Script
<redis.commands.core.CoreCommands.register_script>` instance.
Due to performance reason the ``Script`` object is instantiated only once
for a client (``client.register_script(..)``) and is cached in
:py:obj:`LUA_SCRIPT_STORAGE`.
"""
# redis connection can be closed, lets use the id() of the redis connector
# as key in the script-storage:
client_id = id(client)
if LUA_SCRIPT_STORAGE.get(client_id) is None:
LUA_SCRIPT_STORAGE[client_id] = {}
if LUA_SCRIPT_STORAGE[client_id].get(script) is None:
LUA_SCRIPT_STORAGE[client_id][script] = client.register_script(script)
return LUA_SCRIPT_STORAGE[client_id][script]
PURGE_BY_PREFIX = """
local prefix = tostring(ARGV[1])
for i, name in ipairs(redis.call('KEYS', prefix .. '*')) do
redis.call('EXPIRE', name, 0)
end
"""
def purge_by_prefix(client, prefix: str = "SearXNG_"):
"""Purge all keys with ``prefix`` from database.
Queries all keys in the database by the given prefix and set expire time to
zero. The default prefix will drop all keys which has been set by SearXNG
(drops SearXNG schema entirely from database).
The implementation is the lua script from string :py:obj:`PURGE_BY_PREFIX`.
The lua script uses EXPIRE_ instead of DEL_: if there are a lot keys to
delete and/or their values are big, `DEL` could take more time and blocks
the command loop while `EXPIRE` turns back immediate.
:param prefix: prefix of the key to delete (default: ``SearXNG_``)
:type name: str
.. _EXPIRE: https://redis.io/commands/expire/
.. _DEL: https://redis.io/commands/del/
"""
script = lua_script_storage(client, PURGE_BY_PREFIX)
script(args=[prefix])
def secret_hash(name: str):
"""Creates a hash of the ``name``.
@ -86,156 +21,3 @@ def secret_hash(name: str):
m = hmac.new(bytes(name, encoding='utf-8'), digestmod='sha256')
m.update(bytes(get_setting('server.secret_key'), encoding='utf-8'))
return m.hexdigest()
INCR_COUNTER = """
local limit = tonumber(ARGV[1])
local expire = tonumber(ARGV[2])
local c_name = KEYS[1]
local c = redis.call('GET', c_name)
if not c then
c = redis.call('INCR', c_name)
if expire > 0 then
redis.call('EXPIRE', c_name, expire)
end
else
c = tonumber(c)
if limit == 0 or c < limit then
c = redis.call('INCR', c_name)
end
end
return c
"""
def incr_counter(client, name: str, limit: int = 0, expire: int = 0):
"""Increment a counter and return the new value.
If counter with redis key ``SearXNG_counter_<name>`` does not exists it is
created with initial value 1 returned. The replacement ``<name>`` is a
*secret hash* of the value from argument ``name`` (see
:py:func:`secret_hash`).
The implementation of the redis counter is the lua script from string
:py:obj:`INCR_COUNTER`.
:param name: name of the counter
:type name: str
:param expire: live-time of the counter in seconds (default ``None`` means
infinite).
:type expire: int / see EXPIRE_
:param limit: limit where the counter stops to increment (default ``None``)
:type limit: int / limit is 2^64 see INCR_
:return: value of the incremented counter
:type return: int
.. _EXPIRE: https://redis.io/commands/expire/
.. _INCR: https://redis.io/commands/incr/
A simple demo of a counter with expire time and limit::
>>> for i in range(6):
... i, incr_counter(client, "foo", 3, 5) # max 3, duration 5 sec
... time.sleep(1) # from the third call on max has been reached
...
(0, 1)
(1, 2)
(2, 3)
(3, 3)
(4, 3)
(5, 1)
"""
script = lua_script_storage(client, INCR_COUNTER)
name = "SearXNG_counter_" + secret_hash(name)
c = script(args=[limit, expire], keys=[name])
return c
def drop_counter(client, name):
"""Drop counter with redis key ``SearXNG_counter_<name>``
The replacement ``<name>`` is a *secret hash* of the value from argument
``name`` (see :py:func:`incr_counter` and :py:func:`incr_sliding_window`).
"""
name = "SearXNG_counter_" + secret_hash(name)
client.delete(name)
INCR_SLIDING_WINDOW = """
local expire = tonumber(ARGV[1])
local name = KEYS[1]
local current_time = redis.call('TIME')
redis.call('ZREMRANGEBYSCORE', name, 0, current_time[1] - expire)
redis.call('ZADD', name, current_time[1], current_time[1] .. current_time[2])
local result = redis.call('ZCOUNT', name, 0, current_time[1] + 1)
redis.call('EXPIRE', name, expire)
return result
"""
def incr_sliding_window(client, name: str, duration: int):
"""Increment a sliding-window counter and return the new value.
If counter with redis key ``SearXNG_counter_<name>`` does not exists it is
created with initial value 1 returned. The replacement ``<name>`` is a
*secret hash* of the value from argument ``name`` (see
:py:func:`secret_hash`).
:param name: name of the counter
:type name: str
:param duration: live-time of the sliding window in seconds
:typeduration: int
:return: value of the incremented counter
:type return: int
The implementation of the redis counter is the lua script from string
:py:obj:`INCR_SLIDING_WINDOW`. The lua script uses `sorted sets in Redis`_
to implement a sliding window for the redis key ``SearXNG_counter_<name>``
(ZADD_). The current TIME_ is used to score the items in the sorted set and
the time window is moved by removing items with a score lower current time
minus *duration* time (ZREMRANGEBYSCORE_).
The EXPIRE_ time (the duration of the sliding window) is refreshed on each
call (increment) and if there is no call in this duration, the sorted
set expires from the redis DB.
The return value is the amount of items in the sorted set (ZCOUNT_), what
means the number of calls in the sliding window.
.. _Sorted sets in Redis:
https://redis.com/ebook/part-1-getting-started/chapter-1-getting-to-know-redis/1-2-what-redis-data-structures-look-like/1-2-5-sorted-sets-in-redis/
.. _TIME: https://redis.io/commands/time/
.. _ZADD: https://redis.io/commands/zadd/
.. _EXPIRE: https://redis.io/commands/expire/
.. _ZREMRANGEBYSCORE: https://redis.io/commands/zremrangebyscore/
.. _ZCOUNT: https://redis.io/commands/zcount/
A simple demo of the sliding window::
>>> for i in range(5):
... incr_sliding_window(client, "foo", 3) # duration 3 sec
... time.sleep(1) # from the third call (second) on the window is moved
...
1
2
3
3
3
>>> time.sleep(3) # wait until expire
>>> incr_sliding_window(client, "foo", 3)
1
"""
script = lua_script_storage(client, INCR_SLIDING_WINDOW)
name = "SearXNG_counter_" + secret_hash(name)
c = script(args=[duration], keys=[name])
return c

View file

@ -18,7 +18,7 @@ import importlib
from typing import Callable
from searx.redisdb import client as get_redis_client
from searx.redislib import lua_script_storage
from searx.botdetection.redislib import lua_script_storage
logger = logging.getLogger('searx.search.checker')

View file

@ -78,8 +78,9 @@ server:
# public URL of the instance, to ensure correct inbound links. Is overwritten
# by ${SEARXNG_URL}.
base_url: false # "http://example.com/location"
public_instance: false # enable best defaults designed for public instances
limiter: false # rate limit the number of request on the instance, block some bots
public_instance: false # enable features designed only for public instances
pass_searxng_org: false # pass IPs from the SearXNG org (check.searx.space)
# If your instance owns a /etc/searxng/settings.yml file, then set the following
# values there.

View file

@ -176,6 +176,7 @@ SCHEMA = {
'port': SettingsValue((int, str), 8888, 'SEARXNG_PORT'),
'bind_address': SettingsValue(str, '127.0.0.1', 'SEARXNG_BIND_ADDRESS'),
'limiter': SettingsValue(bool, False),
'pass_searxng_org': SettingsValue(bool, False),
'public_instance': SettingsValue(bool, False),
'secret_key': SettingsValue(str, environ_name='SEARXNG_SECRET'),
'base_url': SettingsValue((False, str), False, 'SEARXNG_BASE_URL'),
@ -247,4 +248,16 @@ SCHEMA = {
def settings_set_defaults(settings):
apply_schema(settings, SCHEMA, [])
public_instance(settings)
return settings
def public_instance(settings):
if settings['server']['public_instance']:
logger.warning(
"Be aware you have activated features intended only for public instances. "
"This force the usage of the limiter and link_token / "
"see https://docs.searxng.org/admin/searx.limiter.html"
)
# public_instance activates by default the limiter
settings['server']['limiter'] = True

View file

@ -17,7 +17,7 @@
{% else %}
<link rel="stylesheet" href="{{ url_for('static', filename='css/searxng.min.css') }}" type="text/css" media="screen" />
{% endif %}
{% if get_setting('server.limiter') or get_setting('server.public_instance') %}
{% if get_setting('server.limiter') %}
<link rel="stylesheet" href="{{ url_for('client_token', token=link_token) }}" type="text/css" />
{% endif %}
{% block styles %}{% endblock %}

View file

@ -58,7 +58,7 @@ from searx import (
from searx import infopage
from searx import limiter
from searx.botdetection import link_token
from searx import botdetection
from searx.data import ENGINE_DESCRIPTIONS
from searx.results import Timing
@ -385,7 +385,7 @@ def render(template_name: str, **kwargs):
kwargs['endpoint'] = 'results' if 'q' in kwargs else request.endpoint
kwargs['cookies'] = request.cookies
kwargs['errors'] = request.errors
kwargs['link_token'] = link_token.get_token()
kwargs['link_token'] = botdetection.link_token.get_token()
# values from the preferences
kwargs['preferences'] = request.preferences
@ -617,7 +617,7 @@ def health():
@app.route('/client<token>.css', methods=['GET', 'POST'])
def client_token(token=None):
link_token.ping(request, token)
botdetection.link_token.ping(request, token)
return Response('', mimetype='text/css')
@ -1267,8 +1267,6 @@ def config():
for _ in plugins:
_plugins.append({'name': _.name, 'enabled': _.default_on})
_limiter_cfg = limiter.get_cfg()
return jsonify(
{
'categories': list(categories.keys()),
@ -1289,9 +1287,11 @@ def config():
'DOCS_URL': get_setting('brand.docs_url'),
},
'limiter': {
'enabled': limiter.is_installed(),
'botdetection.ip_limit.link_token': _limiter_cfg.get('botdetection.ip_limit.link_token'),
'botdetection.ip_lists.pass_searxng_org': _limiter_cfg.get('botdetection.ip_lists.pass_searxng_org'),
'enabled': limiter.is_fully_installed(),
'pass_searxng_org': settings['server']['pass_searxng_org'],
'botdetection.ip_limit.link_token': botdetection.ctx.cfg.get('botdetection.ip_limit.link_token'),
# depricated .. replaced by 'pass_searxng_org' from above
'botdetection.ip_lists.pass_searxng_org': settings['server']['pass_searxng_org'],
},
'doi_resolvers': list(settings['doi_resolvers'].keys()),
'default_doi_resolver': settings['default_doi_resolver'],

View file

@ -3,7 +3,6 @@
from searx import (
plugins,
limiter,
botdetection,
)
from mock import Mock
@ -51,8 +50,10 @@ class SelfIPTest(SearxTestCase):
plugin = plugins.load_and_initialize_plugin('searx.plugins.self_info', False, (None, {}))
store = plugins.PluginStore()
store.register(plugin)
cfg = limiter.get_cfg()
botdetection.init(cfg, None)
from searx import webapp # pylint disable=import-outside-toplevel
limiter.initialize(webapp.app, webapp.settings)
self.assertTrue(len(store.plugins) == 1)

View file

@ -6,6 +6,7 @@ from mock import Mock
from searx.results import Timing
import searx.search.processors
from searx import limiter
from searx.search import Search
from searx.preferences import Preferences
from tests import SearxTestCase
@ -21,6 +22,8 @@ class ViewsTestCase(SearxTestCase):
from searx import webapp # pylint disable=import-outside-toplevel
limiter.initialize(webapp.app, webapp.settings)
webapp.app.config['TESTING'] = True # to get better error messages
self.app = webapp.app.test_client()