mirror of
https://github.com/searxng/searxng
synced 2024-01-01 19:24:07 +01:00
[mod] isolation of botdetection from SearXNG core
In PR-2894[1] we isolated botdetection from the limiter, this PR isolates the botdetection from the SearXNG core code. This PR also fixes the issue [2] that the ``server.public_instance`` option needs to activate the limiter. - [1] https://github.com/searxng/searxng/pull/2894 - [2] https://github.com/searxng/searxng/issues/2975 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
c0b97c6543
commit
523a875f1e
20 changed files with 555 additions and 373 deletions
|
@ -11,8 +11,9 @@
|
|||
port: 8888
|
||||
bind_address: "127.0.0.1"
|
||||
secret_key: "ultrasecretkey" # change this!
|
||||
limiter: false
|
||||
public_instance: false
|
||||
limiter: false
|
||||
pass_searxng_org: false
|
||||
image_proxy: false
|
||||
default_http_headers:
|
||||
X-Content-Type-Options : nosniff
|
||||
|
@ -31,10 +32,6 @@
|
|||
``secret_key`` : ``$SEARXNG_SECRET``
|
||||
Used for cryptography purpose.
|
||||
|
||||
``limiter`` :
|
||||
Rate limit the number of request on the instance, block some bots. The
|
||||
:ref:`limiter` requires a :ref:`settings redis` database.
|
||||
|
||||
.. _public_instance:
|
||||
|
||||
``public_instance`` :
|
||||
|
@ -43,8 +40,22 @@
|
|||
needed for local usage). By set to ``true`` the following features are
|
||||
activated:
|
||||
|
||||
- ``server: limiter`` option :ref:`see below <activate limiter>`
|
||||
- ``server: pass_searxng_org`` option :ref:`see below <pass_searxng_org>`
|
||||
- :py:obj:`searx.botdetection.link_token` in the :ref:`limiter`
|
||||
|
||||
.. _activate limiter:
|
||||
|
||||
``limiter`` :
|
||||
Rate limit the number of request on the instance, block some bots. The
|
||||
:ref:`limiter` requires a :ref:`settings redis` database.
|
||||
|
||||
.. _pass_searxng_org:
|
||||
|
||||
``pass_searxng_org`` :
|
||||
In the limiter activates the passlist of (hardcoded) IPs of the SearXNG
|
||||
organization, e.g. ``check.searx.space``.
|
||||
|
||||
.. _image_proxy:
|
||||
|
||||
``image_proxy`` :
|
||||
|
|
|
@ -104,10 +104,3 @@ if max_request_timeout is None:
|
|||
logger.info('max_request_timeout=%s', repr(max_request_timeout))
|
||||
else:
|
||||
logger.info('max_request_timeout=%i second(s)', max_request_timeout)
|
||||
|
||||
if settings['server']['public_instance']:
|
||||
logger.warning(
|
||||
"Be aware you have activated features intended only for public instances. "
|
||||
"This force the usage of the limiter and link_token / "
|
||||
"see https://docs.searxng.org/admin/searx.limiter.html"
|
||||
)
|
||||
|
|
|
@ -5,19 +5,44 @@
|
|||
Implementations used for bot detection.
|
||||
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
import pathlib
|
||||
|
||||
import redis
|
||||
from .config import Config
|
||||
|
||||
from ._helpers import logger
|
||||
from ._helpers import dump_request
|
||||
from ._helpers import get_real_ip
|
||||
from ._helpers import get_network
|
||||
from ._helpers import too_many_requests
|
||||
|
||||
logger = logger.getChild('init')
|
||||
|
||||
__all__ = ['dump_request', 'get_network', 'get_real_ip', 'too_many_requests']
|
||||
|
||||
redis_client = None
|
||||
cfg = None
|
||||
CFG_SCHEMA = pathlib.Path(__file__).parent / "schema.toml"
|
||||
"""Base configuration (schema) of the botdetection."""
|
||||
|
||||
CFG_DEPRECATED = {
|
||||
# "dummy.old.foo": "config 'dummy.old.foo' exists only for tests. Don't use it in your real project config."
|
||||
}
|
||||
|
||||
|
||||
def init(_cfg, _redis_client):
|
||||
global redis_client, cfg # pylint: disable=global-statement
|
||||
redis_client = _redis_client
|
||||
cfg = _cfg
|
||||
@dataclass
|
||||
class Context:
|
||||
"""A global context of the botdetection"""
|
||||
|
||||
# pylint: disable=too-few-public-methods
|
||||
|
||||
redis_client: redis.Redis | None = None
|
||||
cfg: Config = Config.from_toml(schema_file=CFG_SCHEMA, cfg_file=None, deprecated=CFG_DEPRECATED)
|
||||
|
||||
def init(self, toml_cfg: pathlib.Path, redis_client: redis.Redis | None):
|
||||
self.redis_client = redis_client
|
||||
self.cfg.load_toml(toml_cfg)
|
||||
|
||||
|
||||
ctx = Context()
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
# pylint: disable=missing-module-docstring, invalid-name
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from ipaddress import (
|
||||
IPv4Network,
|
||||
IPv6Network,
|
||||
|
@ -13,10 +14,9 @@ from ipaddress import (
|
|||
import flask
|
||||
import werkzeug
|
||||
|
||||
from searx import logger
|
||||
from . import config
|
||||
|
||||
logger = logger.getChild('botdetection')
|
||||
logger = logging.getLogger('botdetection')
|
||||
|
||||
|
||||
def dump_request(request: flask.Request):
|
||||
|
@ -104,10 +104,10 @@ def get_real_ip(request: flask.Request) -> str:
|
|||
if not forwarded_for:
|
||||
_log_error_only_once("X-Forwarded-For header is not set!")
|
||||
else:
|
||||
from . import cfg # pylint: disable=import-outside-toplevel, cyclic-import
|
||||
from . import ctx # pylint: disable=import-outside-toplevel, cyclic-import
|
||||
|
||||
forwarded_for = [x.strip() for x in forwarded_for.split(',')]
|
||||
x_for: int = cfg['real_ip.x_for'] # type: ignore
|
||||
x_for: int = ctx.cfg['real_ip.x_for'] # type: ignore
|
||||
forwarded_for = forwarded_for[-min(len(forwarded_for), x_for)]
|
||||
|
||||
if not real_ip:
|
||||
|
|
|
@ -57,18 +57,20 @@ class Config:
|
|||
UNSET = UNSET
|
||||
|
||||
@classmethod
|
||||
def from_toml(cls, schema_file: pathlib.Path, cfg_file: pathlib.Path, deprecated: dict) -> Config:
|
||||
|
||||
def from_toml(cls, schema_file: pathlib.Path, cfg_file: pathlib.Path | None, deprecated: dict) -> Config:
|
||||
# init schema
|
||||
|
||||
log.debug("load schema file: %s", schema_file)
|
||||
cfg = cls(cfg_schema=toml.load(schema_file), deprecated=deprecated)
|
||||
if cfg_file is None:
|
||||
return cfg
|
||||
if not cfg_file.exists():
|
||||
log.warning("missing config file: %s", cfg_file)
|
||||
return cfg
|
||||
# load configuration from toml file
|
||||
cfg.load_toml(cfg_file)
|
||||
return cfg
|
||||
|
||||
# load configuration
|
||||
|
||||
def load_toml(self, cfg_file: pathlib.Path):
|
||||
log.debug("load config file: %s", cfg_file)
|
||||
try:
|
||||
upd_cfg = toml.load(cfg_file)
|
||||
|
@ -77,13 +79,12 @@ class Config:
|
|||
log.error("%s: %s", cfg_file, msg)
|
||||
raise
|
||||
|
||||
is_valid, issue_list = cfg.validate(upd_cfg)
|
||||
is_valid, issue_list = self.validate(upd_cfg)
|
||||
for msg in issue_list:
|
||||
log.error(str(msg))
|
||||
if not is_valid:
|
||||
raise TypeError(f"schema of {cfg_file} is invalid!")
|
||||
cfg.update(upd_cfg)
|
||||
return cfg
|
||||
self.update(upd_cfg)
|
||||
|
||||
def __init__(self, cfg_schema: typing.Dict, deprecated: typing.Dict[str, str]):
|
||||
"""Construtor of class Config.
|
||||
|
@ -153,7 +154,7 @@ class Config:
|
|||
raise KeyError(parent_name)
|
||||
return parent
|
||||
|
||||
def path(self, name: str, default=UNSET):
|
||||
def path(self, name: str, default: Any = UNSET):
|
||||
"""Get a :py:class:`pathlib.Path` object from a config string."""
|
||||
|
||||
val = self.get(name, default)
|
||||
|
@ -163,7 +164,7 @@ class Config:
|
|||
return default
|
||||
return pathlib.Path(str(val))
|
||||
|
||||
def pyobj(self, name, default=UNSET):
|
||||
def pyobj(self, name, default: Any = UNSET):
|
||||
"""Get python object refered by full qualiffied name (FQN) in the config
|
||||
string."""
|
||||
|
||||
|
|
|
@ -36,6 +36,24 @@ dropped.
|
|||
.. _X-Forwarded-For:
|
||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
|
||||
|
||||
|
||||
Config
|
||||
~~~~~~
|
||||
|
||||
.. code:: toml
|
||||
|
||||
[botdetection.ip_limit]
|
||||
|
||||
# To get unlimited access in a local network, by default link-lokal addresses
|
||||
# (networks) are not monitored by the ip_limit
|
||||
filter_link_local = false
|
||||
|
||||
# activate link_token method in the ip_limit method
|
||||
link_token = false
|
||||
|
||||
Implementations
|
||||
~~~~~~~~~~~~~~~
|
||||
|
||||
"""
|
||||
from __future__ import annotations
|
||||
from ipaddress import (
|
||||
|
@ -46,9 +64,8 @@ from ipaddress import (
|
|||
import flask
|
||||
import werkzeug
|
||||
|
||||
from searx import redisdb
|
||||
from searx.redislib import incr_sliding_window, drop_counter
|
||||
|
||||
from . import ctx
|
||||
from .redislib import incr_sliding_window, drop_counter
|
||||
from . import link_token
|
||||
from . import config
|
||||
from ._helpers import (
|
||||
|
@ -77,11 +94,11 @@ LONG_MAX = 150
|
|||
LONG_MAX_SUSPICIOUS = 10
|
||||
"""Maximum suspicious requests from one IP in the :py:obj:`LONG_WINDOW`"""
|
||||
|
||||
API_WONDOW = 3600
|
||||
API_WINDOW = 3600
|
||||
"""Time (sec) before sliding window for API requests (format != html) expires."""
|
||||
|
||||
API_MAX = 4
|
||||
"""Maximum requests from one IP in the :py:obj:`API_WONDOW`"""
|
||||
"""Maximum requests from one IP in the :py:obj:`API_WINDOW`"""
|
||||
|
||||
SUSPICIOUS_IP_WINDOW = 3600 * 24 * 30
|
||||
"""Time (sec) before sliding window for one suspicious IP expires."""
|
||||
|
@ -97,14 +114,13 @@ def filter_request(
|
|||
) -> werkzeug.Response | None:
|
||||
|
||||
# pylint: disable=too-many-return-statements
|
||||
redis_client = redisdb.client()
|
||||
|
||||
if network.is_link_local and not cfg['botdetection.ip_limit.filter_link_local']:
|
||||
logger.debug("network %s is link-local -> not monitored by ip_limit method", network.compressed)
|
||||
return None
|
||||
|
||||
if request.args.get('format', 'html') != 'html':
|
||||
c = incr_sliding_window(redis_client, 'ip_limit.API_WONDOW:' + network.compressed, API_WONDOW)
|
||||
c = incr_sliding_window(ctx.redis_client, 'ip_limit.API_WINDOW:' + network.compressed, API_WINDOW)
|
||||
if c > API_MAX:
|
||||
return too_many_requests(network, "too many request in API_WINDOW")
|
||||
|
||||
|
@ -114,33 +130,33 @@ def filter_request(
|
|||
|
||||
if not suspicious:
|
||||
# this IP is no longer suspicious: release ip again / delete the counter of this IP
|
||||
drop_counter(redis_client, 'ip_limit.SUSPICIOUS_IP_WINDOW' + network.compressed)
|
||||
drop_counter(ctx.redis_client, 'ip_limit.SUSPICIOUS_IP_WINDOW' + network.compressed)
|
||||
return None
|
||||
|
||||
# this IP is suspicious: count requests from this IP
|
||||
c = incr_sliding_window(
|
||||
redis_client, 'ip_limit.SUSPICIOUS_IP_WINDOW' + network.compressed, SUSPICIOUS_IP_WINDOW
|
||||
ctx.redis_client, 'ip_limit.SUSPICIOUS_IP_WINDOW' + network.compressed, SUSPICIOUS_IP_WINDOW
|
||||
)
|
||||
if c > SUSPICIOUS_IP_MAX:
|
||||
logger.error("BLOCK: too many request from %s in SUSPICIOUS_IP_WINDOW (redirect to /)", network)
|
||||
return flask.redirect(flask.url_for('index'), code=302)
|
||||
|
||||
c = incr_sliding_window(redis_client, 'ip_limit.BURST_WINDOW' + network.compressed, BURST_WINDOW)
|
||||
c = incr_sliding_window(ctx.redis_client, 'ip_limit.BURST_WINDOW' + network.compressed, BURST_WINDOW)
|
||||
if c > BURST_MAX_SUSPICIOUS:
|
||||
return too_many_requests(network, "too many request in BURST_WINDOW (BURST_MAX_SUSPICIOUS)")
|
||||
|
||||
c = incr_sliding_window(redis_client, 'ip_limit.LONG_WINDOW' + network.compressed, LONG_WINDOW)
|
||||
c = incr_sliding_window(ctx.redis_client, 'ip_limit.LONG_WINDOW' + network.compressed, LONG_WINDOW)
|
||||
if c > LONG_MAX_SUSPICIOUS:
|
||||
return too_many_requests(network, "too many request in LONG_WINDOW (LONG_MAX_SUSPICIOUS)")
|
||||
|
||||
return None
|
||||
|
||||
# vanilla limiter without extensions counts BURST_MAX and LONG_MAX
|
||||
c = incr_sliding_window(redis_client, 'ip_limit.BURST_WINDOW' + network.compressed, BURST_WINDOW)
|
||||
c = incr_sliding_window(ctx.redis_client, 'ip_limit.BURST_WINDOW' + network.compressed, BURST_WINDOW)
|
||||
if c > BURST_MAX:
|
||||
return too_many_requests(network, "too many request in BURST_WINDOW (BURST_MAX)")
|
||||
|
||||
c = incr_sliding_window(redis_client, 'ip_limit.LONG_WINDOW' + network.compressed, LONG_WINDOW)
|
||||
c = incr_sliding_window(ctx.redis_client, 'ip_limit.LONG_WINDOW' + network.compressed, LONG_WINDOW)
|
||||
if c > LONG_MAX:
|
||||
return too_many_requests(network, "too many request in LONG_WINDOW (LONG_MAX)")
|
||||
|
||||
|
|
|
@ -8,6 +8,10 @@ Method ``ip_lists``
|
|||
The ``ip_lists`` method implements IP :py:obj:`block- <block_ip>` and
|
||||
:py:obj:`pass-lists <pass_ip>`.
|
||||
|
||||
|
||||
Config
|
||||
~~~~~~
|
||||
|
||||
.. code:: toml
|
||||
|
||||
[botdetection.ip_lists]
|
||||
|
@ -22,6 +26,10 @@ The ``ip_lists`` method implements IP :py:obj:`block- <block_ip>` and
|
|||
'257.1.1.1', # invalid IP --> will be ignored, logged in ERROR class
|
||||
]
|
||||
|
||||
|
||||
Implementations
|
||||
~~~~~~~~~~~~~~~
|
||||
|
||||
"""
|
||||
# pylint: disable=unused-argument
|
||||
|
||||
|
@ -38,24 +46,11 @@ from ._helpers import logger
|
|||
|
||||
logger = logger.getChild('ip_limit')
|
||||
|
||||
SEARXNG_ORG = [
|
||||
# https://github.com/searxng/searxng/pull/2484#issuecomment-1576639195
|
||||
'167.235.158.251', # IPv4 check.searx.space
|
||||
'2a01:04f8:1c1c:8fc2::/64', # IPv6 check.searx.space
|
||||
]
|
||||
"""Passlist of IPs from the SearXNG organization, e.g. `check.searx.space`."""
|
||||
|
||||
|
||||
def pass_ip(real_ip: IPv4Address | IPv6Address, cfg: config.Config) -> Tuple[bool, str]:
|
||||
"""Checks if the IP on the subnet is in one of the members of the
|
||||
``botdetection.ip_lists.pass_ip`` list.
|
||||
"""
|
||||
|
||||
if cfg.get('botdetection.ip_lists.pass_searxng_org', default=True):
|
||||
for net in SEARXNG_ORG:
|
||||
net = ip_network(net, strict=False)
|
||||
if real_ip.version == net.version and real_ip in net:
|
||||
return True, f"IP matches {net.compressed} in SEARXNG_ORG list."
|
||||
return ip_is_subnet_of_member_in_list(real_ip, 'botdetection.ip_lists.pass_ip', cfg)
|
||||
|
||||
|
||||
|
|
|
@ -13,6 +13,9 @@ a ping by request a static URL.
|
|||
|
||||
This method requires a redis DB and needs a HTTP X-Forwarded-For_ header.
|
||||
|
||||
.. _X-Forwarded-For:
|
||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
|
||||
|
||||
To get in use of this method a flask URL route needs to be added:
|
||||
|
||||
.. code:: python
|
||||
|
@ -31,10 +34,31 @@ And in the HTML template from flask a stylesheet link is needed (the value of
|
|||
href="{{ url_for('client_token', token=link_token) }}"
|
||||
type="text/css" />
|
||||
|
||||
.. _X-Forwarded-For:
|
||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
|
||||
|
||||
Config
|
||||
~~~~~~
|
||||
|
||||
.. code:: toml
|
||||
|
||||
[botdetection.link_token]
|
||||
# Livetime (sec) of limiter's CSS token.
|
||||
TOKEN_LIVE_TIME = 600
|
||||
|
||||
# Livetime (sec) of the ping-key from a client (request)
|
||||
PING_LIVE_TIME = 3600
|
||||
|
||||
# Prefix of all ping-keys generated by link_token.get_ping_key
|
||||
PING_KEY = 'botdetection.link_token.PING_KEY'
|
||||
|
||||
# Key for which the current token is stored in the DB
|
||||
TOKEN_KEY = 'botdetection.link_token.TOKEN_KEY'
|
||||
|
||||
|
||||
Implementations
|
||||
~~~~~~~~~~~~~~~
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
from ipaddress import (
|
||||
IPv4Network,
|
||||
|
@ -46,48 +70,47 @@ import string
|
|||
import random
|
||||
import flask
|
||||
|
||||
from searx import logger
|
||||
from searx import redisdb
|
||||
from searx.redislib import secret_hash
|
||||
from . import ctx
|
||||
from .redislib import secret_hash
|
||||
|
||||
from ._helpers import (
|
||||
logger,
|
||||
get_network,
|
||||
get_real_ip,
|
||||
)
|
||||
|
||||
TOKEN_LIVE_TIME = 600
|
||||
"""Livetime (sec) of limiter's CSS token."""
|
||||
|
||||
PING_LIVE_TIME = 3600
|
||||
"""Livetime (sec) of the ping-key from a client (request)"""
|
||||
logger = logger.getChild('link_token')
|
||||
|
||||
PING_KEY = 'SearXNG_limiter.ping'
|
||||
|
||||
PING_KEY = 'botdetection.link_token.PING_KEY'
|
||||
"""Prefix of all ping-keys generated by :py:obj:`get_ping_key`"""
|
||||
|
||||
TOKEN_KEY = 'SearXNG_limiter.token'
|
||||
TOKEN_KEY = 'botdetection.link_token.TOKEN_KEY'
|
||||
"""Key for which the current token is stored in the DB"""
|
||||
|
||||
logger = logger.getChild('botdetection.link_token')
|
||||
|
||||
def _cfg(name):
|
||||
return ctx.cfg.get(f'botdetection.link_token.{name}')
|
||||
|
||||
|
||||
def is_suspicious(network: IPv4Network | IPv6Network, request: flask.Request, renew: bool = False):
|
||||
"""Checks whether a valid ping is exists for this (client) network, if not
|
||||
this request is rated as *suspicious*. If a valid ping exists and argument
|
||||
``renew`` is ``True`` the expire time of this ping is reset to
|
||||
:py:obj:`PING_LIVE_TIME`.
|
||||
``PING_LIVE_TIME``.
|
||||
|
||||
"""
|
||||
redis_client = redisdb.client()
|
||||
if not redis_client:
|
||||
if not ctx.redis_client:
|
||||
return False
|
||||
|
||||
ping_key = get_ping_key(network, request)
|
||||
if not redis_client.get(ping_key):
|
||||
if not ctx.redis_client.get(ping_key):
|
||||
logger.info("missing ping (IP: %s) / request: %s", network.compressed, ping_key)
|
||||
return True
|
||||
|
||||
if renew:
|
||||
redis_client.set(ping_key, 1, ex=PING_LIVE_TIME)
|
||||
ctx.redis_client.set(ping_key, 1, ex=_cfg('PING_LIVE_TIME'))
|
||||
|
||||
logger.debug("found ping for (client) network %s -> %s", network.compressed, ping_key)
|
||||
return False
|
||||
|
@ -96,22 +119,21 @@ def is_suspicious(network: IPv4Network | IPv6Network, request: flask.Request, re
|
|||
def ping(request: flask.Request, token: str):
|
||||
"""This function is called by a request to URL ``/client<token>.css``. If
|
||||
``token`` is valid a :py:obj:`PING_KEY` for the client is stored in the DB.
|
||||
The expire time of this ping-key is :py:obj:`PING_LIVE_TIME`.
|
||||
The expire time of this ping-key is ``PING_LIVE_TIME``.
|
||||
|
||||
"""
|
||||
from . import redis_client, cfg # pylint: disable=import-outside-toplevel, cyclic-import
|
||||
|
||||
if not redis_client:
|
||||
if not ctx.redis_client:
|
||||
return
|
||||
if not token_is_valid(token):
|
||||
return
|
||||
|
||||
real_ip = ip_address(get_real_ip(request))
|
||||
network = get_network(real_ip, cfg)
|
||||
network = get_network(real_ip, ctx.cfg)
|
||||
|
||||
ping_key = get_ping_key(network, request)
|
||||
logger.debug("store ping_key for (client) network %s (IP %s) -> %s", network.compressed, real_ip, ping_key)
|
||||
redis_client.set(ping_key, 1, ex=PING_LIVE_TIME)
|
||||
|
||||
ctx.redis_client.set(ping_key, 1, ex=_cfg('PING_LIVE_TIME'))
|
||||
|
||||
|
||||
def get_ping_key(network: IPv4Network | IPv6Network, request: flask.Request) -> str:
|
||||
|
@ -137,19 +159,21 @@ def get_token() -> str:
|
|||
"""Returns current token. If there is no currently active token a new token
|
||||
is generated randomly and stored in the redis DB.
|
||||
|
||||
- :py:obj:`TOKEN_LIVE_TIME`
|
||||
- :py:obj:`TOKEN_KEY`
|
||||
Config:
|
||||
|
||||
- ``TOKEN_LIVE_TIME``
|
||||
- ``TOKEN_KEY``
|
||||
|
||||
"""
|
||||
redis_client = redisdb.client()
|
||||
if not redis_client:
|
||||
if not ctx.redis_client:
|
||||
# This function is also called when limiter is inactive / no redis DB
|
||||
# (see render function in webapp.py)
|
||||
return '12345678'
|
||||
token = redis_client.get(TOKEN_KEY)
|
||||
token_key = _cfg('TOKEN_KEY')
|
||||
token = ctx.redis_client.get(token_key)
|
||||
if token:
|
||||
token = token.decode('UTF-8')
|
||||
else:
|
||||
token = ''.join(random.choice(string.ascii_lowercase + string.digits) for _ in range(16))
|
||||
redis_client.set(TOKEN_KEY, token, ex=TOKEN_LIVE_TIME)
|
||||
ctx.redis_client.set(token_key, token, ex=_cfg('TOKEN_LIVE_TIME'))
|
||||
return token
|
||||
|
|
263
searx/botdetection/redislib.py
Normal file
263
searx/botdetection/redislib.py
Normal file
|
@ -0,0 +1,263 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# lint: pylint
|
||||
"""A collection of convenient functions and redis/lua scripts.
|
||||
|
||||
This code was partial inspired by the `Bullet-Proofing Lua Scripts in RedisPy`_
|
||||
article.
|
||||
|
||||
.. _Bullet-Proofing Lua Scripts in RedisPy:
|
||||
https://redis.com/blog/bullet-proofing-lua-scripts-in-redispy/
|
||||
|
||||
Config
|
||||
~~~~~~
|
||||
|
||||
.. code:: toml
|
||||
|
||||
[botdetection.redis]
|
||||
|
||||
# FQDN of a function definition. A function with which the DB keys of the Redis
|
||||
# DB are to be annonymized.
|
||||
secret_hash = ''
|
||||
|
||||
# A prefix to all keys store by the botdetection in the redis DB
|
||||
REDIS_KEY_PREFIX = 'botdetection_'
|
||||
|
||||
|
||||
Implementations
|
||||
~~~~~~~~~~~~~~~
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from . import ctx
|
||||
|
||||
REDIS_KEY_PREFIX = 'botdetection'
|
||||
"""A prefix applied to all keys store by the botdetection in the redis DB."""
|
||||
|
||||
LUA_SCRIPT_STORAGE = {}
|
||||
"""A global dictionary to cache client's ``Script`` objects, used by
|
||||
:py:obj:`lua_script_storage`"""
|
||||
|
||||
|
||||
def secret_hash(name: str) -> str:
|
||||
"""Returns a annonymized name if ``secret_hash`` is configured, otherwise
|
||||
the ``name`` is returned unchanged."""
|
||||
func = ctx.cfg.pyobj('botdetection.redis.secret_hash', default=None) # type: ignore
|
||||
if not func:
|
||||
return name
|
||||
return func(name)
|
||||
|
||||
|
||||
def _prefix(val: str | None = None) -> str:
|
||||
if val is None:
|
||||
val = ctx.cfg.get('botdetection.redis.REDIS_KEY_PREFIX', default=REDIS_KEY_PREFIX) # type: ignore
|
||||
return str(val)
|
||||
|
||||
|
||||
def lua_script_storage(client, script):
|
||||
"""Returns a redis :py:obj:`Script
|
||||
<redis.commands.core.CoreCommands.register_script>` instance.
|
||||
|
||||
Due to performance reason the ``Script`` object is instantiated only once
|
||||
for a client (``client.register_script(..)``) and is cached in
|
||||
:py:obj:`LUA_SCRIPT_STORAGE`.
|
||||
|
||||
"""
|
||||
|
||||
# redis connection can be closed, lets use the id() of the redis connector
|
||||
# as key in the script-storage:
|
||||
client_id = id(client)
|
||||
|
||||
if LUA_SCRIPT_STORAGE.get(client_id) is None:
|
||||
LUA_SCRIPT_STORAGE[client_id] = {}
|
||||
|
||||
if LUA_SCRIPT_STORAGE[client_id].get(script) is None:
|
||||
LUA_SCRIPT_STORAGE[client_id][script] = client.register_script(script)
|
||||
|
||||
return LUA_SCRIPT_STORAGE[client_id][script]
|
||||
|
||||
|
||||
PURGE_BY_PREFIX = """
|
||||
local prefix = tostring(ARGV[1])
|
||||
for i, name in ipairs(redis.call('KEYS', prefix .. '*')) do
|
||||
redis.call('EXPIRE', name, 0)
|
||||
end
|
||||
"""
|
||||
|
||||
|
||||
def purge_by_prefix(client, prefix: str | None):
|
||||
"""Purge all keys with ``prefix`` from database.
|
||||
|
||||
Queries all keys in the database by the given prefix and set expire time to
|
||||
zero. The default prefix will drop all keys which has been set by
|
||||
:py:obj:`REDIS_KEY_PREFIX`.
|
||||
|
||||
The implementation is the lua script from string :py:obj:`PURGE_BY_PREFIX`.
|
||||
The lua script uses EXPIRE_ instead of DEL_: if there are a lot keys to
|
||||
delete and/or their values are big, `DEL` could take more time and blocks
|
||||
the command loop while `EXPIRE` turns back immediate.
|
||||
|
||||
:param prefix: prefix of the key to delete (default: :py:obj:`REDIS_KEY_PREFIX`)
|
||||
:type name: str
|
||||
|
||||
.. _EXPIRE: https://redis.io/commands/expire/
|
||||
.. _DEL: https://redis.io/commands/del/
|
||||
|
||||
"""
|
||||
script = lua_script_storage(client, PURGE_BY_PREFIX)
|
||||
script(args=[_prefix(prefix)])
|
||||
|
||||
|
||||
INCR_COUNTER = """
|
||||
local limit = tonumber(ARGV[1])
|
||||
local expire = tonumber(ARGV[2])
|
||||
local c_name = KEYS[1]
|
||||
|
||||
local c = redis.call('GET', c_name)
|
||||
|
||||
if not c then
|
||||
c = redis.call('INCR', c_name)
|
||||
if expire > 0 then
|
||||
redis.call('EXPIRE', c_name, expire)
|
||||
end
|
||||
else
|
||||
c = tonumber(c)
|
||||
if limit == 0 or c < limit then
|
||||
c = redis.call('INCR', c_name)
|
||||
end
|
||||
end
|
||||
return c
|
||||
"""
|
||||
|
||||
|
||||
def incr_counter(client, name: str, limit: int = 0, expire: int = 0):
|
||||
"""Increment a counter and return the new value.
|
||||
|
||||
If counter with redis key :py:obj:`REDIS_KEY_PREFIX` + ``counter_<name>``
|
||||
does not exists it is created with initial value 1 returned. The
|
||||
replacement ``<name>`` is a *secret hash* of the value from argument
|
||||
``name`` (see :py:func:`secret_hash`).
|
||||
|
||||
The implementation of the redis counter is the lua script from string
|
||||
:py:obj:`INCR_COUNTER`.
|
||||
|
||||
:param name: name of the counter
|
||||
:type name: str
|
||||
|
||||
:param expire: live-time of the counter in seconds (default ``None`` means
|
||||
infinite).
|
||||
:type expire: int / see EXPIRE_
|
||||
|
||||
:param limit: limit where the counter stops to increment (default ``None``)
|
||||
:type limit: int / limit is 2^64 see INCR_
|
||||
|
||||
:return: value of the incremented counter
|
||||
:type return: int
|
||||
|
||||
.. _EXPIRE: https://redis.io/commands/expire/
|
||||
.. _INCR: https://redis.io/commands/incr/
|
||||
|
||||
A simple demo of a counter with expire time and limit::
|
||||
|
||||
>>> for i in range(6):
|
||||
... i, incr_counter(client, "foo", 3, 5) # max 3, duration 5 sec
|
||||
... time.sleep(1) # from the third call on max has been reached
|
||||
...
|
||||
(0, 1)
|
||||
(1, 2)
|
||||
(2, 3)
|
||||
(3, 3)
|
||||
(4, 3)
|
||||
(5, 1)
|
||||
|
||||
"""
|
||||
script = lua_script_storage(client, INCR_COUNTER)
|
||||
name = _prefix() + "counter_" + secret_hash(name)
|
||||
c = script(args=[limit, expire], keys=[name])
|
||||
return c
|
||||
|
||||
|
||||
def drop_counter(client, name):
|
||||
"""Drop counter with redis key :py:obj:`REDIS_KEY_PREFIX` +
|
||||
``counter_<name>``
|
||||
|
||||
The replacement ``<name>`` is a *secret hash* of the value from argument
|
||||
``name`` (see :py:func:`incr_counter` and :py:func:`incr_sliding_window`).
|
||||
|
||||
"""
|
||||
name = _prefix() + "counter_" + secret_hash(name)
|
||||
client.delete(name)
|
||||
|
||||
|
||||
INCR_SLIDING_WINDOW = """
|
||||
local expire = tonumber(ARGV[1])
|
||||
local name = KEYS[1]
|
||||
local current_time = redis.call('TIME')
|
||||
|
||||
redis.call('ZREMRANGEBYSCORE', name, 0, current_time[1] - expire)
|
||||
redis.call('ZADD', name, current_time[1], current_time[1] .. current_time[2])
|
||||
local result = redis.call('ZCOUNT', name, 0, current_time[1] + 1)
|
||||
redis.call('EXPIRE', name, expire)
|
||||
return result
|
||||
"""
|
||||
|
||||
|
||||
def incr_sliding_window(client, name: str, duration: int):
|
||||
"""Increment a sliding-window counter and return the new value.
|
||||
|
||||
If counter with redis key :py:obj:`REDIS_KEY_PREFIX` + ``counter_<name>``
|
||||
does not exists it is created with initial value 1 returned. The
|
||||
replacement ``<name>`` is a *secret hash* of the value from argument
|
||||
``name`` (see :py:func:`secret_hash`).
|
||||
|
||||
:param name: name of the counter
|
||||
:type name: str
|
||||
|
||||
:param duration: live-time of the sliding window in seconds
|
||||
:typeduration: int
|
||||
|
||||
:return: value of the incremented counter
|
||||
:type return: int
|
||||
|
||||
The implementation of the redis counter is the lua script from string
|
||||
:py:obj:`INCR_SLIDING_WINDOW`. The lua script uses `sorted sets in Redis`_
|
||||
to implement a sliding window for the redis key :py:obj:`REDIS_KEY_PREFIX` +
|
||||
``counter_<name>`` (ZADD_). The current TIME_ is used to score the items in
|
||||
the sorted set and the time window is moved by removing items with a score
|
||||
lower current time minus *duration* time (ZREMRANGEBYSCORE_).
|
||||
|
||||
The EXPIRE_ time (the duration of the sliding window) is refreshed on each
|
||||
call (increment) and if there is no call in this duration, the sorted
|
||||
set expires from the redis DB.
|
||||
|
||||
The return value is the amount of items in the sorted set (ZCOUNT_), what
|
||||
means the number of calls in the sliding window.
|
||||
|
||||
.. _Sorted sets in Redis:
|
||||
https://redis.com/ebook/part-1-getting-started/chapter-1-getting-to-know-redis/1-2-what-redis-data-structures-look-like/1-2-5-sorted-sets-in-redis/
|
||||
.. _TIME: https://redis.io/commands/time/
|
||||
.. _ZADD: https://redis.io/commands/zadd/
|
||||
.. _EXPIRE: https://redis.io/commands/expire/
|
||||
.. _ZREMRANGEBYSCORE: https://redis.io/commands/zremrangebyscore/
|
||||
.. _ZCOUNT: https://redis.io/commands/zcount/
|
||||
|
||||
A simple demo of the sliding window::
|
||||
|
||||
>>> for i in range(5):
|
||||
... incr_sliding_window(client, "foo", 3) # duration 3 sec
|
||||
... time.sleep(1) # from the third call (second) on the window is moved
|
||||
...
|
||||
1
|
||||
2
|
||||
3
|
||||
3
|
||||
3
|
||||
>>> time.sleep(3) # wait until expire
|
||||
>>> incr_sliding_window(client, "foo", 3)
|
||||
1
|
||||
|
||||
"""
|
||||
script = lua_script_storage(client, INCR_SLIDING_WINDOW)
|
||||
name = _prefix() + "counter_" + secret_hash(name)
|
||||
c = script(args=[duration], keys=[name])
|
||||
return c
|
58
searx/botdetection/schema.toml
Normal file
58
searx/botdetection/schema.toml
Normal file
|
@ -0,0 +1,58 @@
|
|||
[real_ip]
|
||||
|
||||
# Number of values to trust for X-Forwarded-For.
|
||||
|
||||
x_for = 1
|
||||
|
||||
# The prefix defines the number of leading bits in an address that are compared
|
||||
# to determine whether or not an address is part of a (client) network.
|
||||
|
||||
ipv4_prefix = 32
|
||||
ipv6_prefix = 48
|
||||
|
||||
[botdetection.redis]
|
||||
|
||||
# FQDN of a function definition. A function with which the DB keys of the Redis
|
||||
# DB are to be annonymized.
|
||||
secret_hash = ''
|
||||
|
||||
# A prefix to all keys store by the botdetection in the redis DB
|
||||
REDIS_KEY_PREFIX = 'botdetection_'
|
||||
|
||||
[botdetection.ip_limit]
|
||||
|
||||
# To get unlimited access in a local network, by default link-lokal addresses
|
||||
# (networks) are not monitored by the ip_limit
|
||||
filter_link_local = false
|
||||
|
||||
# activate link_token method in the ip_limit method
|
||||
link_token = false
|
||||
|
||||
[botdetection.link_token]
|
||||
# Livetime (sec) of limiter's CSS token.
|
||||
TOKEN_LIVE_TIME = 600
|
||||
|
||||
# Livetime (sec) of the ping-key from a client (request)
|
||||
PING_LIVE_TIME = 3600
|
||||
|
||||
# Prefix of all ping-keys generated by link_token.get_ping_key
|
||||
PING_KEY = 'botdetection.link_token.PING_KEY'
|
||||
|
||||
# Key for which the current token is stored in the DB
|
||||
TOKEN_KEY = 'botdetection.link_token.TOKEN_KEY'
|
||||
|
||||
[botdetection.ip_lists]
|
||||
|
||||
# In the limiter, the ip_lists method has priority over all other methods -> if
|
||||
# an IP is in the pass_ip list, it has unrestricted access and it is also not
|
||||
# checked if e.g. the "user agent" suggests a bot (e.g. curl).
|
||||
|
||||
block_ip = [
|
||||
# '93.184.216.34', # IPv4 of example.org
|
||||
# '257.1.1.1', # invalid IP --> will be ignored, logged in ERROR class
|
||||
]
|
||||
|
||||
pass_ip = [
|
||||
# '192.168.0.0/16', # IPv4 private network
|
||||
# 'fe80::/10' # IPv6 linklocal / wins over botdetection.ip_limit.filter_link_local
|
||||
]
|
|
@ -107,7 +107,6 @@ from searx import (
|
|||
)
|
||||
from searx import botdetection
|
||||
from searx.botdetection import (
|
||||
config,
|
||||
http_accept,
|
||||
http_accept_encoding,
|
||||
http_accept_language,
|
||||
|
@ -123,31 +122,26 @@ from searx.botdetection import (
|
|||
# coherency, the logger is "limiter"
|
||||
logger = logger.getChild('limiter')
|
||||
|
||||
CFG: config.Config = None # type: ignore
|
||||
_INSTALLED = False
|
||||
_FULLY_INSTALLED = False
|
||||
|
||||
LIMITER_CFG_SCHEMA = Path(__file__).parent / "limiter.toml"
|
||||
DEFAULT_CFG = Path(__file__).parent / "limiter.toml"
|
||||
"""Base configuration (schema) of the botdetection."""
|
||||
|
||||
LIMITER_CFG = Path('/etc/searxng/limiter.toml')
|
||||
"""Local Limiter configuration."""
|
||||
|
||||
CFG_DEPRECATED = {
|
||||
# "dummy.old.foo": "config 'dummy.old.foo' exists only for tests. Don't use it in your real project config."
|
||||
}
|
||||
|
||||
|
||||
def get_cfg() -> config.Config:
|
||||
global CFG # pylint: disable=global-statement
|
||||
if CFG is None:
|
||||
CFG = config.Config.from_toml(LIMITER_CFG_SCHEMA, LIMITER_CFG, CFG_DEPRECATED)
|
||||
return CFG
|
||||
SEARXNG_ORG = [
|
||||
# https://github.com/searxng/searxng/pull/2484#issuecomment-1576639195
|
||||
'167.235.158.251', # IPv4 check.searx.space
|
||||
'2a01:04f8:1c1c:8fc2::/64', # IPv6 check.searx.space
|
||||
]
|
||||
"""Passlist of IPs from the SearXNG organization, e.g. `check.searx.space`."""
|
||||
|
||||
|
||||
def filter_request(request: flask.Request) -> werkzeug.Response | None:
|
||||
# pylint: disable=too-many-return-statements
|
||||
|
||||
cfg = get_cfg()
|
||||
cfg = botdetection.ctx.cfg
|
||||
real_ip = ip_address(get_real_ip(request))
|
||||
network = get_network(real_ip, cfg)
|
||||
|
||||
|
@ -210,34 +204,42 @@ def pre_request():
|
|||
return filter_request(flask.request)
|
||||
|
||||
|
||||
def is_installed():
|
||||
def is_fully_installed():
|
||||
"""Returns ``True`` if limiter is active and a redis DB is available."""
|
||||
return _INSTALLED
|
||||
return _FULLY_INSTALLED
|
||||
|
||||
|
||||
def initialize(app: flask.Flask, settings):
|
||||
"""Install the limiter"""
|
||||
global _INSTALLED # pylint: disable=global-statement
|
||||
global _FULLY_INSTALLED # pylint: disable=global-statement
|
||||
|
||||
if not (settings['server']['limiter'] or settings['server']['public_instance']):
|
||||
return
|
||||
# even if the limiter is not activated, the botdetection must be activated
|
||||
# (e.g. the self_info plugin uses the botdetection to get client IP)
|
||||
|
||||
redis_client = redisdb.client()
|
||||
if not redis_client:
|
||||
botdetection.ctx.init(DEFAULT_CFG, redis_client)
|
||||
cfg = botdetection.ctx.cfg
|
||||
|
||||
if settings['server']['public_instance']:
|
||||
# overwrite SearXNG and limiter.toml settings
|
||||
settings['server']['limiter'] = True
|
||||
settings['server']['pass_searxng_org'] = True
|
||||
cfg.set('botdetection.ip_limit.link_token', True)
|
||||
|
||||
if settings['server']['pass_searxng_org']:
|
||||
cfg.get('botdetection.ip_lists.pass_ip').extend(SEARXNG_ORG)
|
||||
|
||||
if settings['server']['limiter']:
|
||||
app.before_request(pre_request)
|
||||
|
||||
if redis_client:
|
||||
_FULLY_INSTALLED = True
|
||||
|
||||
else:
|
||||
logger.error(
|
||||
"The limiter requires Redis, please consult the documentation: "
|
||||
"https://docs.searxng.org/admin/searx.limiter.html"
|
||||
)
|
||||
if settings['server']['public_instance']:
|
||||
sys.exit(1)
|
||||
return
|
||||
|
||||
_INSTALLED = True
|
||||
|
||||
cfg = get_cfg()
|
||||
if settings['server']['public_instance']:
|
||||
# overwrite limiter.toml setting
|
||||
cfg.set('botdetection.ip_limit.link_token', True)
|
||||
|
||||
botdetection.init(cfg, redis_client)
|
||||
app.before_request(pre_request)
|
||||
logger.error('server:public_instance activated but redis DB is missed')
|
||||
sys.exit()
|
||||
|
|
|
@ -10,31 +10,25 @@ x_for = 1
|
|||
ipv4_prefix = 32
|
||||
ipv6_prefix = 48
|
||||
|
||||
[botdetection.ip_limit]
|
||||
[botdetection.redis]
|
||||
|
||||
# To get unlimited access in a local network, by default link-lokal addresses
|
||||
# (networks) are not monitored by the ip_limit
|
||||
filter_link_local = false
|
||||
# FQDN of a function definition. A function with which the DB keys of the Redis
|
||||
# DB are to be annonymized.
|
||||
secret_hash = 'searx.redislib.secret_hash'
|
||||
|
||||
# activate link_token method in the ip_limit method
|
||||
link_token = false
|
||||
# A prefix to all keys stored by the botdetection in the redis DB
|
||||
REDIS_KEY_PREFIX = 'SearXNG_'
|
||||
|
||||
[botdetection.ip_lists]
|
||||
[botdetection.link_token]
|
||||
|
||||
# In the limiter, the ip_lists method has priority over all other methods -> if
|
||||
# an IP is in the pass_ip list, it has unrestricted access and it is also not
|
||||
# checked if e.g. the "user agent" suggests a bot (e.g. curl).
|
||||
# Livetime (sec) of limiter's CSS token.
|
||||
TOKEN_LIVE_TIME = 600
|
||||
|
||||
block_ip = [
|
||||
# '93.184.216.34', # IPv4 of example.org
|
||||
# '257.1.1.1', # invalid IP --> will be ignored, logged in ERROR class
|
||||
]
|
||||
# Livetime (sec) of the ping-key from a client (request)
|
||||
PING_LIVE_TIME = 3600
|
||||
|
||||
pass_ip = [
|
||||
# '192.168.0.0/16', # IPv4 private network
|
||||
# 'fe80::/10' # IPv6 linklocal / wins over botdetection.ip_limit.filter_link_local
|
||||
]
|
||||
# Prefix of all ping-keys generated by link_token.get_ping_key
|
||||
PING_KEY = 'SearXNG_limiter.ping'
|
||||
|
||||
# Activate passlist of (hardcoded) IPs from the SearXNG organization,
|
||||
# e.g. `check.searx.space`.
|
||||
pass_searxng_org = true
|
||||
# Key for which the current token is stored in the DB
|
||||
TOKEN_KEY = 'SearXNG_limiter.token'
|
||||
|
|
|
@ -1,77 +1,12 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# lint: pylint
|
||||
"""A collection of convenient functions and redis/lua scripts.
|
||||
|
||||
This code was partial inspired by the `Bullet-Proofing Lua Scripts in RedisPy`_
|
||||
article.
|
||||
|
||||
.. _Bullet-Proofing Lua Scripts in RedisPy:
|
||||
https://redis.com/blog/bullet-proofing-lua-scripts-in-redispy/
|
||||
|
||||
"""
|
||||
|
||||
import hmac
|
||||
|
||||
from searx import get_setting
|
||||
|
||||
LUA_SCRIPT_STORAGE = {}
|
||||
"""A global dictionary to cache client's ``Script`` objects, used by
|
||||
:py:obj:`lua_script_storage`"""
|
||||
|
||||
|
||||
def lua_script_storage(client, script):
|
||||
"""Returns a redis :py:obj:`Script
|
||||
<redis.commands.core.CoreCommands.register_script>` instance.
|
||||
|
||||
Due to performance reason the ``Script`` object is instantiated only once
|
||||
for a client (``client.register_script(..)``) and is cached in
|
||||
:py:obj:`LUA_SCRIPT_STORAGE`.
|
||||
|
||||
"""
|
||||
|
||||
# redis connection can be closed, lets use the id() of the redis connector
|
||||
# as key in the script-storage:
|
||||
client_id = id(client)
|
||||
|
||||
if LUA_SCRIPT_STORAGE.get(client_id) is None:
|
||||
LUA_SCRIPT_STORAGE[client_id] = {}
|
||||
|
||||
if LUA_SCRIPT_STORAGE[client_id].get(script) is None:
|
||||
LUA_SCRIPT_STORAGE[client_id][script] = client.register_script(script)
|
||||
|
||||
return LUA_SCRIPT_STORAGE[client_id][script]
|
||||
|
||||
|
||||
PURGE_BY_PREFIX = """
|
||||
local prefix = tostring(ARGV[1])
|
||||
for i, name in ipairs(redis.call('KEYS', prefix .. '*')) do
|
||||
redis.call('EXPIRE', name, 0)
|
||||
end
|
||||
"""
|
||||
|
||||
|
||||
def purge_by_prefix(client, prefix: str = "SearXNG_"):
|
||||
"""Purge all keys with ``prefix`` from database.
|
||||
|
||||
Queries all keys in the database by the given prefix and set expire time to
|
||||
zero. The default prefix will drop all keys which has been set by SearXNG
|
||||
(drops SearXNG schema entirely from database).
|
||||
|
||||
The implementation is the lua script from string :py:obj:`PURGE_BY_PREFIX`.
|
||||
The lua script uses EXPIRE_ instead of DEL_: if there are a lot keys to
|
||||
delete and/or their values are big, `DEL` could take more time and blocks
|
||||
the command loop while `EXPIRE` turns back immediate.
|
||||
|
||||
:param prefix: prefix of the key to delete (default: ``SearXNG_``)
|
||||
:type name: str
|
||||
|
||||
.. _EXPIRE: https://redis.io/commands/expire/
|
||||
.. _DEL: https://redis.io/commands/del/
|
||||
|
||||
"""
|
||||
script = lua_script_storage(client, PURGE_BY_PREFIX)
|
||||
script(args=[prefix])
|
||||
|
||||
|
||||
def secret_hash(name: str):
|
||||
"""Creates a hash of the ``name``.
|
||||
|
@ -86,156 +21,3 @@ def secret_hash(name: str):
|
|||
m = hmac.new(bytes(name, encoding='utf-8'), digestmod='sha256')
|
||||
m.update(bytes(get_setting('server.secret_key'), encoding='utf-8'))
|
||||
return m.hexdigest()
|
||||
|
||||
|
||||
INCR_COUNTER = """
|
||||
local limit = tonumber(ARGV[1])
|
||||
local expire = tonumber(ARGV[2])
|
||||
local c_name = KEYS[1]
|
||||
|
||||
local c = redis.call('GET', c_name)
|
||||
|
||||
if not c then
|
||||
c = redis.call('INCR', c_name)
|
||||
if expire > 0 then
|
||||
redis.call('EXPIRE', c_name, expire)
|
||||
end
|
||||
else
|
||||
c = tonumber(c)
|
||||
if limit == 0 or c < limit then
|
||||
c = redis.call('INCR', c_name)
|
||||
end
|
||||
end
|
||||
return c
|
||||
"""
|
||||
|
||||
|
||||
def incr_counter(client, name: str, limit: int = 0, expire: int = 0):
|
||||
"""Increment a counter and return the new value.
|
||||
|
||||
If counter with redis key ``SearXNG_counter_<name>`` does not exists it is
|
||||
created with initial value 1 returned. The replacement ``<name>`` is a
|
||||
*secret hash* of the value from argument ``name`` (see
|
||||
:py:func:`secret_hash`).
|
||||
|
||||
The implementation of the redis counter is the lua script from string
|
||||
:py:obj:`INCR_COUNTER`.
|
||||
|
||||
:param name: name of the counter
|
||||
:type name: str
|
||||
|
||||
:param expire: live-time of the counter in seconds (default ``None`` means
|
||||
infinite).
|
||||
:type expire: int / see EXPIRE_
|
||||
|
||||
:param limit: limit where the counter stops to increment (default ``None``)
|
||||
:type limit: int / limit is 2^64 see INCR_
|
||||
|
||||
:return: value of the incremented counter
|
||||
:type return: int
|
||||
|
||||
.. _EXPIRE: https://redis.io/commands/expire/
|
||||
.. _INCR: https://redis.io/commands/incr/
|
||||
|
||||
A simple demo of a counter with expire time and limit::
|
||||
|
||||
>>> for i in range(6):
|
||||
... i, incr_counter(client, "foo", 3, 5) # max 3, duration 5 sec
|
||||
... time.sleep(1) # from the third call on max has been reached
|
||||
...
|
||||
(0, 1)
|
||||
(1, 2)
|
||||
(2, 3)
|
||||
(3, 3)
|
||||
(4, 3)
|
||||
(5, 1)
|
||||
|
||||
"""
|
||||
script = lua_script_storage(client, INCR_COUNTER)
|
||||
name = "SearXNG_counter_" + secret_hash(name)
|
||||
c = script(args=[limit, expire], keys=[name])
|
||||
return c
|
||||
|
||||
|
||||
def drop_counter(client, name):
|
||||
"""Drop counter with redis key ``SearXNG_counter_<name>``
|
||||
|
||||
The replacement ``<name>`` is a *secret hash* of the value from argument
|
||||
``name`` (see :py:func:`incr_counter` and :py:func:`incr_sliding_window`).
|
||||
"""
|
||||
name = "SearXNG_counter_" + secret_hash(name)
|
||||
client.delete(name)
|
||||
|
||||
|
||||
INCR_SLIDING_WINDOW = """
|
||||
local expire = tonumber(ARGV[1])
|
||||
local name = KEYS[1]
|
||||
local current_time = redis.call('TIME')
|
||||
|
||||
redis.call('ZREMRANGEBYSCORE', name, 0, current_time[1] - expire)
|
||||
redis.call('ZADD', name, current_time[1], current_time[1] .. current_time[2])
|
||||
local result = redis.call('ZCOUNT', name, 0, current_time[1] + 1)
|
||||
redis.call('EXPIRE', name, expire)
|
||||
return result
|
||||
"""
|
||||
|
||||
|
||||
def incr_sliding_window(client, name: str, duration: int):
|
||||
"""Increment a sliding-window counter and return the new value.
|
||||
|
||||
If counter with redis key ``SearXNG_counter_<name>`` does not exists it is
|
||||
created with initial value 1 returned. The replacement ``<name>`` is a
|
||||
*secret hash* of the value from argument ``name`` (see
|
||||
:py:func:`secret_hash`).
|
||||
|
||||
:param name: name of the counter
|
||||
:type name: str
|
||||
|
||||
:param duration: live-time of the sliding window in seconds
|
||||
:typeduration: int
|
||||
|
||||
:return: value of the incremented counter
|
||||
:type return: int
|
||||
|
||||
The implementation of the redis counter is the lua script from string
|
||||
:py:obj:`INCR_SLIDING_WINDOW`. The lua script uses `sorted sets in Redis`_
|
||||
to implement a sliding window for the redis key ``SearXNG_counter_<name>``
|
||||
(ZADD_). The current TIME_ is used to score the items in the sorted set and
|
||||
the time window is moved by removing items with a score lower current time
|
||||
minus *duration* time (ZREMRANGEBYSCORE_).
|
||||
|
||||
The EXPIRE_ time (the duration of the sliding window) is refreshed on each
|
||||
call (increment) and if there is no call in this duration, the sorted
|
||||
set expires from the redis DB.
|
||||
|
||||
The return value is the amount of items in the sorted set (ZCOUNT_), what
|
||||
means the number of calls in the sliding window.
|
||||
|
||||
.. _Sorted sets in Redis:
|
||||
https://redis.com/ebook/part-1-getting-started/chapter-1-getting-to-know-redis/1-2-what-redis-data-structures-look-like/1-2-5-sorted-sets-in-redis/
|
||||
.. _TIME: https://redis.io/commands/time/
|
||||
.. _ZADD: https://redis.io/commands/zadd/
|
||||
.. _EXPIRE: https://redis.io/commands/expire/
|
||||
.. _ZREMRANGEBYSCORE: https://redis.io/commands/zremrangebyscore/
|
||||
.. _ZCOUNT: https://redis.io/commands/zcount/
|
||||
|
||||
A simple demo of the sliding window::
|
||||
|
||||
>>> for i in range(5):
|
||||
... incr_sliding_window(client, "foo", 3) # duration 3 sec
|
||||
... time.sleep(1) # from the third call (second) on the window is moved
|
||||
...
|
||||
1
|
||||
2
|
||||
3
|
||||
3
|
||||
3
|
||||
>>> time.sleep(3) # wait until expire
|
||||
>>> incr_sliding_window(client, "foo", 3)
|
||||
1
|
||||
|
||||
"""
|
||||
script = lua_script_storage(client, INCR_SLIDING_WINDOW)
|
||||
name = "SearXNG_counter_" + secret_hash(name)
|
||||
c = script(args=[duration], keys=[name])
|
||||
return c
|
||||
|
|
|
@ -18,7 +18,7 @@ import importlib
|
|||
from typing import Callable
|
||||
|
||||
from searx.redisdb import client as get_redis_client
|
||||
from searx.redislib import lua_script_storage
|
||||
from searx.botdetection.redislib import lua_script_storage
|
||||
|
||||
|
||||
logger = logging.getLogger('searx.search.checker')
|
||||
|
|
|
@ -78,8 +78,9 @@ server:
|
|||
# public URL of the instance, to ensure correct inbound links. Is overwritten
|
||||
# by ${SEARXNG_URL}.
|
||||
base_url: false # "http://example.com/location"
|
||||
public_instance: false # enable best defaults designed for public instances
|
||||
limiter: false # rate limit the number of request on the instance, block some bots
|
||||
public_instance: false # enable features designed only for public instances
|
||||
pass_searxng_org: false # pass IPs from the SearXNG org (check.searx.space)
|
||||
|
||||
# If your instance owns a /etc/searxng/settings.yml file, then set the following
|
||||
# values there.
|
||||
|
|
|
@ -176,6 +176,7 @@ SCHEMA = {
|
|||
'port': SettingsValue((int, str), 8888, 'SEARXNG_PORT'),
|
||||
'bind_address': SettingsValue(str, '127.0.0.1', 'SEARXNG_BIND_ADDRESS'),
|
||||
'limiter': SettingsValue(bool, False),
|
||||
'pass_searxng_org': SettingsValue(bool, False),
|
||||
'public_instance': SettingsValue(bool, False),
|
||||
'secret_key': SettingsValue(str, environ_name='SEARXNG_SECRET'),
|
||||
'base_url': SettingsValue((False, str), False, 'SEARXNG_BASE_URL'),
|
||||
|
@ -247,4 +248,16 @@ SCHEMA = {
|
|||
|
||||
def settings_set_defaults(settings):
|
||||
apply_schema(settings, SCHEMA, [])
|
||||
public_instance(settings)
|
||||
return settings
|
||||
|
||||
|
||||
def public_instance(settings):
|
||||
if settings['server']['public_instance']:
|
||||
logger.warning(
|
||||
"Be aware you have activated features intended only for public instances. "
|
||||
"This force the usage of the limiter and link_token / "
|
||||
"see https://docs.searxng.org/admin/searx.limiter.html"
|
||||
)
|
||||
# public_instance activates by default the limiter
|
||||
settings['server']['limiter'] = True
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
{% else %}
|
||||
<link rel="stylesheet" href="{{ url_for('static', filename='css/searxng.min.css') }}" type="text/css" media="screen" />
|
||||
{% endif %}
|
||||
{% if get_setting('server.limiter') or get_setting('server.public_instance') %}
|
||||
{% if get_setting('server.limiter') %}
|
||||
<link rel="stylesheet" href="{{ url_for('client_token', token=link_token) }}" type="text/css" />
|
||||
{% endif %}
|
||||
{% block styles %}{% endblock %}
|
||||
|
|
|
@ -58,7 +58,7 @@ from searx import (
|
|||
|
||||
from searx import infopage
|
||||
from searx import limiter
|
||||
from searx.botdetection import link_token
|
||||
from searx import botdetection
|
||||
|
||||
from searx.data import ENGINE_DESCRIPTIONS
|
||||
from searx.results import Timing
|
||||
|
@ -385,7 +385,7 @@ def render(template_name: str, **kwargs):
|
|||
kwargs['endpoint'] = 'results' if 'q' in kwargs else request.endpoint
|
||||
kwargs['cookies'] = request.cookies
|
||||
kwargs['errors'] = request.errors
|
||||
kwargs['link_token'] = link_token.get_token()
|
||||
kwargs['link_token'] = botdetection.link_token.get_token()
|
||||
|
||||
# values from the preferences
|
||||
kwargs['preferences'] = request.preferences
|
||||
|
@ -617,7 +617,7 @@ def health():
|
|||
|
||||
@app.route('/client<token>.css', methods=['GET', 'POST'])
|
||||
def client_token(token=None):
|
||||
link_token.ping(request, token)
|
||||
botdetection.link_token.ping(request, token)
|
||||
return Response('', mimetype='text/css')
|
||||
|
||||
|
||||
|
@ -1267,8 +1267,6 @@ def config():
|
|||
for _ in plugins:
|
||||
_plugins.append({'name': _.name, 'enabled': _.default_on})
|
||||
|
||||
_limiter_cfg = limiter.get_cfg()
|
||||
|
||||
return jsonify(
|
||||
{
|
||||
'categories': list(categories.keys()),
|
||||
|
@ -1289,9 +1287,11 @@ def config():
|
|||
'DOCS_URL': get_setting('brand.docs_url'),
|
||||
},
|
||||
'limiter': {
|
||||
'enabled': limiter.is_installed(),
|
||||
'botdetection.ip_limit.link_token': _limiter_cfg.get('botdetection.ip_limit.link_token'),
|
||||
'botdetection.ip_lists.pass_searxng_org': _limiter_cfg.get('botdetection.ip_lists.pass_searxng_org'),
|
||||
'enabled': limiter.is_fully_installed(),
|
||||
'pass_searxng_org': settings['server']['pass_searxng_org'],
|
||||
'botdetection.ip_limit.link_token': botdetection.ctx.cfg.get('botdetection.ip_limit.link_token'),
|
||||
# depricated .. replaced by 'pass_searxng_org' from above
|
||||
'botdetection.ip_lists.pass_searxng_org': settings['server']['pass_searxng_org'],
|
||||
},
|
||||
'doi_resolvers': list(settings['doi_resolvers'].keys()),
|
||||
'default_doi_resolver': settings['default_doi_resolver'],
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
from searx import (
|
||||
plugins,
|
||||
limiter,
|
||||
botdetection,
|
||||
)
|
||||
|
||||
from mock import Mock
|
||||
|
@ -51,8 +50,10 @@ class SelfIPTest(SearxTestCase):
|
|||
plugin = plugins.load_and_initialize_plugin('searx.plugins.self_info', False, (None, {}))
|
||||
store = plugins.PluginStore()
|
||||
store.register(plugin)
|
||||
cfg = limiter.get_cfg()
|
||||
botdetection.init(cfg, None)
|
||||
|
||||
from searx import webapp # pylint disable=import-outside-toplevel
|
||||
|
||||
limiter.initialize(webapp.app, webapp.settings)
|
||||
|
||||
self.assertTrue(len(store.plugins) == 1)
|
||||
|
||||
|
|
|
@ -6,6 +6,7 @@ from mock import Mock
|
|||
from searx.results import Timing
|
||||
|
||||
import searx.search.processors
|
||||
from searx import limiter
|
||||
from searx.search import Search
|
||||
from searx.preferences import Preferences
|
||||
from tests import SearxTestCase
|
||||
|
@ -21,6 +22,8 @@ class ViewsTestCase(SearxTestCase):
|
|||
|
||||
from searx import webapp # pylint disable=import-outside-toplevel
|
||||
|
||||
limiter.initialize(webapp.app, webapp.settings)
|
||||
|
||||
webapp.app.config['TESTING'] = True # to get better error messages
|
||||
self.app = webapp.app.test_client()
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue