forked from zaclys/searxng
The checker requires Redis
Remove the abstraction in searx.shared.SharedDict. Implement a basic and dedicated scheduler for the checker using a Redis script.
This commit is contained in:
parent
d764d94a70
commit
fe419e355b
@ -93,9 +93,8 @@ def init(app, settings):
|
||||
if not settings['server']['limiter']:
|
||||
return False
|
||||
|
||||
logger.debug("init limiter DB") # pylint: disable=undefined-variable
|
||||
if not redisdb.init():
|
||||
logger.error("init limiter DB failed!!!") # pylint: disable=undefined-variable
|
||||
if not redisdb.client():
|
||||
logger.error("The limiter requires Redis") # pylint: disable=undefined-variable
|
||||
return False
|
||||
|
||||
app.before_request(pre_request)
|
||||
|
@ -2,3 +2,5 @@
|
||||
|
||||
from .impl import Checker
|
||||
from .background import initialize, get_result
|
||||
|
||||
__all__ = ('Checker', 'initialize', 'get_result')
|
||||
|
@ -1,26 +1,28 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# lint: pylint
|
||||
# pylint: disable=missing-module-docstring
|
||||
# pyright: strict
|
||||
# pyright: basic
|
||||
|
||||
import json
|
||||
import random
|
||||
import time
|
||||
import threading
|
||||
import os
|
||||
import signal
|
||||
from typing import Dict, Union, List, Any, Tuple
|
||||
from typing import Dict, Union, List, Any, Tuple, Optional
|
||||
from typing_extensions import TypedDict, Literal
|
||||
|
||||
import redis.exceptions
|
||||
|
||||
from searx import logger, settings, searx_debug
|
||||
from searx.shared.redisdb import client as get_redis_client
|
||||
from searx.exceptions import SearxSettingsException
|
||||
from searx.search.processors import PROCESSORS
|
||||
from searx.search.checker import Checker
|
||||
from searx.shared import schedule, storage # pyright: ignore
|
||||
from searx.search.checker.scheduler import scheduler_function
|
||||
|
||||
|
||||
CHECKER_RESULT = 'CHECKER_RESULT'
|
||||
running = threading.Lock()
|
||||
REDIS_RESULT_KEY = 'SearXNG_checker_result'
|
||||
REDIS_LOCK_KEY = 'SearXNG_checker_lock'
|
||||
|
||||
|
||||
CheckerResult = Union['CheckerOk', 'CheckerErr', 'CheckerOther']
|
||||
@ -77,20 +79,24 @@ def _get_interval(every: Any, error_msg: str) -> Tuple[int, int]:
|
||||
return (every[0], every[1])
|
||||
|
||||
|
||||
def _get_every():
|
||||
every = settings.get('checker', {}).get('scheduling', {}).get('every', (300, 1800))
|
||||
return _get_interval(every, 'checker.scheduling.every is not a int or list')
|
||||
|
||||
|
||||
def get_result() -> CheckerResult:
|
||||
serialized_result = storage.get_str(CHECKER_RESULT)
|
||||
if serialized_result is not None:
|
||||
return json.loads(serialized_result)
|
||||
return {'status': 'unknown'}
|
||||
client = get_redis_client()
|
||||
if client is None:
|
||||
# without Redis, the checker is disabled
|
||||
return {'status': 'disabled'}
|
||||
serialized_result: Optional[bytes] = client.get(REDIS_RESULT_KEY)
|
||||
if serialized_result is None:
|
||||
# the Redis key does not exist
|
||||
return {'status': 'unknown'}
|
||||
return json.loads(serialized_result)
|
||||
|
||||
|
||||
def _set_result(result: CheckerResult):
|
||||
storage.set_str(CHECKER_RESULT, json.dumps(result))
|
||||
client = get_redis_client()
|
||||
if client is None:
|
||||
# without Redis, the function does nothing
|
||||
return
|
||||
client.set(REDIS_RESULT_KEY, json.dumps(result))
|
||||
|
||||
|
||||
def _timestamp():
|
||||
@ -98,41 +104,29 @@ def _timestamp():
|
||||
|
||||
|
||||
def run():
|
||||
if not running.acquire(blocking=False): # pylint: disable=consider-using-with
|
||||
return
|
||||
try:
|
||||
logger.info('Starting checker')
|
||||
result: CheckerOk = {'status': 'ok', 'engines': {}, 'timestamp': _timestamp()}
|
||||
for name, processor in PROCESSORS.items():
|
||||
logger.debug('Checking %s engine', name)
|
||||
checker = Checker(processor)
|
||||
checker.run()
|
||||
if checker.test_results.successful:
|
||||
result['engines'][name] = {'success': True}
|
||||
else:
|
||||
result['engines'][name] = {'success': False, 'errors': checker.test_results.errors}
|
||||
# use a Redis lock to make sure there is no checker running at the same time
|
||||
# (this should not happen, this is a safety measure)
|
||||
with get_redis_client().lock(REDIS_LOCK_KEY, blocking_timeout=60, timeout=3600):
|
||||
logger.info('Starting checker')
|
||||
result: CheckerOk = {'status': 'ok', 'engines': {}, 'timestamp': _timestamp()}
|
||||
for name, processor in PROCESSORS.items():
|
||||
logger.debug('Checking %s engine', name)
|
||||
checker = Checker(processor)
|
||||
checker.run()
|
||||
if checker.test_results.successful:
|
||||
result['engines'][name] = {'success': True}
|
||||
else:
|
||||
result['engines'][name] = {'success': False, 'errors': checker.test_results.errors}
|
||||
|
||||
_set_result(result)
|
||||
logger.info('Check done')
|
||||
_set_result(result)
|
||||
logger.info('Check done')
|
||||
except redis.exceptions.LockError:
|
||||
_set_result({'status': 'error', 'timestamp': _timestamp()})
|
||||
logger.exception('Error while running the checker')
|
||||
except Exception: # pylint: disable=broad-except
|
||||
_set_result({'status': 'error', 'timestamp': _timestamp()})
|
||||
logger.exception('Error while running the checker')
|
||||
finally:
|
||||
running.release()
|
||||
|
||||
|
||||
def _run_with_delay():
|
||||
every = _get_every()
|
||||
delay = random.randint(0, every[1] - every[0])
|
||||
logger.debug('Start checker in %i seconds', delay)
|
||||
time.sleep(delay)
|
||||
run()
|
||||
|
||||
|
||||
def _start_scheduling():
|
||||
every = _get_every()
|
||||
if schedule(every[0], _run_with_delay):
|
||||
run()
|
||||
|
||||
|
||||
def _signal_handler(_signum: int, _frame: Any):
|
||||
@ -147,27 +141,31 @@ def initialize():
|
||||
logger.info('Send SIGUSR1 signal to pid %i to start the checker', os.getpid())
|
||||
signal.signal(signal.SIGUSR1, _signal_handler)
|
||||
|
||||
# disabled by default
|
||||
_set_result({'status': 'disabled'})
|
||||
|
||||
# special case when debug is activate
|
||||
if searx_debug and settings.get('checker', {}).get('off_when_debug', True):
|
||||
if searx_debug and settings['checker']['off_when_debug']:
|
||||
logger.info('debug mode: checker is disabled')
|
||||
return
|
||||
|
||||
# check value of checker.scheduling.every now
|
||||
scheduling = settings.get('checker', {}).get('scheduling', None)
|
||||
scheduling = settings['checker']['scheduling']
|
||||
if scheduling is None or not scheduling:
|
||||
logger.info('Checker scheduler is disabled')
|
||||
return
|
||||
|
||||
#
|
||||
_set_result({'status': 'unknown'})
|
||||
# make sure there is a Redis connection
|
||||
if get_redis_client() is None:
|
||||
logger.error('The checker requires Redis')
|
||||
return
|
||||
|
||||
start_after = scheduling.get('start_after', (300, 1800))
|
||||
start_after = _get_interval(start_after, 'checker.scheduling.start_after is not a int or list')
|
||||
delay = random.randint(start_after[0], start_after[1])
|
||||
logger.info('Start checker in %i seconds', delay)
|
||||
t = threading.Timer(delay, _start_scheduling)
|
||||
# start the background scheduler
|
||||
every_range = _get_interval(scheduling.get('every', (300, 1800)), 'checker.scheduling.every is not a int or list')
|
||||
start_after_range = _get_interval(
|
||||
scheduling.get('start_after', (300, 1800)), 'checker.scheduling.start_after is not a int or list'
|
||||
)
|
||||
t = threading.Thread(
|
||||
target=scheduler_function,
|
||||
args=(start_after_range[0], start_after_range[1], every_range[0], every_range[1], run),
|
||||
name='checker_scheduler',
|
||||
)
|
||||
t.daemon = True
|
||||
t.start()
|
||||
|
36
searx/search/checker/scheduler.lua
Normal file
36
searx/search/checker/scheduler.lua
Normal file
@ -0,0 +1,36 @@
|
||||
-- SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
--
|
||||
-- This script is not a string in scheduler.py, so editors can provide syntax highlighting.
|
||||
|
||||
-- The Redis KEY is defined here and not in Python on purpose:
|
||||
-- only this LUA script can read and update this key to avoid lock and concurrency issues.
|
||||
local redis_key = 'SearXNG_checker_next_call_ts'
|
||||
|
||||
local now = redis.call('TIME')[1]
|
||||
local start_after_from = ARGV[1]
|
||||
local start_after_to = ARGV[2]
|
||||
local every_from = ARGV[3]
|
||||
local every_to = ARGV[4]
|
||||
|
||||
local next_call_ts = redis.call('GET', redis_key)
|
||||
|
||||
if (next_call_ts == false or next_call_ts == nil) then
|
||||
-- the scheduler has never run on this Redis instance, so:
|
||||
-- 1/ the scheduler does not run now
|
||||
-- 2/ the next call is a random time between start_after_from and start_after_to
|
||||
local delay = start_after_from + math.random(start_after_to - start_after_from)
|
||||
redis.call('SET', redis_key, now + delay)
|
||||
return { false, delay }
|
||||
end
|
||||
|
||||
-- next_call_ts is defined
|
||||
-- --> if now is lower than next_call_ts then we don't run the embedded checker
|
||||
-- --> if now is higher then we update next_call_ts and ask to run the embedded checker now.
|
||||
local call_now = next_call_ts <= now
|
||||
if call_now then
|
||||
-- the checker runs now, define the timestamp of the next call:
|
||||
-- this is a random delay between every_from and every_to
|
||||
local periodic_delay = every_from + math.random(every_to - every_from)
|
||||
next_call_ts = redis.call('INCRBY', redis_key, periodic_delay)
|
||||
end
|
||||
return { call_now, next_call_ts - now }
|
57
searx/search/checker/scheduler.py
Normal file
57
searx/search/checker/scheduler.py
Normal file
@ -0,0 +1,57 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# lint: pylint
|
||||
# pylint: disable=missing-module-docstring
|
||||
"""Lame scheduler which use Redis as a source of truth:
|
||||
* the Redis key SearXNG_checker_next_call_ts contains the next time the embedded checker should run.
|
||||
* to avoid lock, a unique Redis script reads and updates the Redis key SearXNG_checker_next_call_ts.
|
||||
* this Redis script returns a list of two elements:
|
||||
* the first one is a boolean. If True, the embedded checker must run now in this worker.
|
||||
* the second element is the delay in second to wait before the next call to the Redis script.
|
||||
|
||||
This scheduler is not generic on purpose: if more feature are required, a dedicate scheduler must be used
|
||||
(= a better scheduler should not use the web workers)
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
import importlib
|
||||
from typing import Callable
|
||||
|
||||
from searx.shared.redisdb import client as get_redis_client
|
||||
from searx.redislib import lua_script_storage
|
||||
|
||||
|
||||
logger = logging.getLogger('searx.search.checker')
|
||||
|
||||
|
||||
def scheduler_function(start_after_from: int, start_after_to: int, every_from: int, every_to: int, callback: Callable):
|
||||
"""Run the checker periodically. The function never returns.
|
||||
|
||||
Parameters:
|
||||
* start_after_from and start_after_to: when to call "callback" for the first on the Redis instance
|
||||
* every_from and every_to: after the first call, how often to call "callback"
|
||||
|
||||
There is no issue:
|
||||
* to call this function is multiple workers
|
||||
* to kill workers at any time as long there is one at least one worker
|
||||
"""
|
||||
scheduler_now_script = importlib.resources.read_text(__package__, "scheduler.lua")
|
||||
while True:
|
||||
# ask the Redis script what to do
|
||||
# the script says
|
||||
# * if the checker must run now.
|
||||
# * how to long to way before calling the script again (it can be call earlier, but not later).
|
||||
script = lua_script_storage(get_redis_client(), scheduler_now_script)
|
||||
call_now, wait_time = script(args=[start_after_from, start_after_to, every_from, every_to])
|
||||
|
||||
# does the worker run the checker now?
|
||||
if call_now:
|
||||
# run the checker
|
||||
try:
|
||||
callback()
|
||||
except Exception: # pylint: disable=broad-except
|
||||
logger.exception("Error calling the embedded checker")
|
||||
# only worker display the wait_time
|
||||
logger.info("Next call to the checker in %s seconds", wait_time)
|
||||
# wait until the next call
|
||||
time.sleep(wait_time)
|
@ -225,7 +225,8 @@ SCHEMA = {
|
||||
'plugins': SettingsValue(list, []),
|
||||
'enabled_plugins': SettingsValue((None, list), None),
|
||||
'checker': {
|
||||
'off_when_debug': SettingsValue(bool, True),
|
||||
'off_when_debug': SettingsValue(bool, True, None),
|
||||
'scheduling': SettingsValue((None, dict), None, None),
|
||||
},
|
||||
'categories_as_tabs': SettingsValue(dict, CATEGORIES_AS_TABS),
|
||||
'engines': SettingsValue(list, []),
|
||||
|
@ -1,39 +1,6 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# lint: pylint
|
||||
"""Initialization of a *shared* storage.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import importlib
|
||||
|
||||
logger = logging.getLogger('searx.shared')
|
||||
|
||||
__all__ = ['SharedDict', 'schedule']
|
||||
|
||||
try:
|
||||
uwsgi = importlib.import_module('uwsgi')
|
||||
except:
|
||||
# no uwsgi
|
||||
from .shared_simple import SimpleSharedDict as SharedDict, schedule
|
||||
|
||||
logger.info('Use shared_simple implementation')
|
||||
else:
|
||||
try:
|
||||
uwsgi.cache_update('dummy', b'dummy')
|
||||
if uwsgi.cache_get('dummy') != b'dummy':
|
||||
raise Exception()
|
||||
except:
|
||||
# uwsgi.ini configuration problem: disable all scheduling
|
||||
logger.error(
|
||||
'uwsgi.ini configuration error, add this line to your uwsgi.ini\n'
|
||||
'cache2 = name=searxngcache,items=2000,blocks=2000,blocksize=4096,bitmap=1'
|
||||
)
|
||||
from .shared_simple import SimpleSharedDict as SharedDict
|
||||
|
||||
def schedule(delay, func, *args):
|
||||
return False
|
||||
|
||||
else:
|
||||
# uwsgi
|
||||
from .shared_uwsgi import UwsgiCacheSharedDict as SharedDict, schedule
|
||||
|
||||
logger.info('Use shared_uwsgi implementation')
|
||||
|
||||
storage = SharedDict()
|
||||
from . import redisdb
|
||||
|
@ -26,26 +26,20 @@ import redis
|
||||
from searx import get_setting
|
||||
|
||||
|
||||
logger = logging.getLogger('searx.shared.redis')
|
||||
logger = logging.getLogger('searx.shared.redisdb')
|
||||
_client = None
|
||||
|
||||
|
||||
def client():
|
||||
global _client # pylint: disable=global-statement
|
||||
if _client is None:
|
||||
# not thread safe: in the worst case scenario, two or more clients are
|
||||
# initialized only one is kept, the others are garbage collected.
|
||||
_client = redis.Redis.from_url(get_setting('redis.url'))
|
||||
def client() -> redis.Redis:
|
||||
return _client
|
||||
|
||||
|
||||
def init():
|
||||
def initialize():
|
||||
global _client # pylint: disable=global-statement
|
||||
try:
|
||||
c = client()
|
||||
logger.info("connected redis DB --> %s", c.acl_whoami())
|
||||
return True
|
||||
_client = redis.Redis.from_url(get_setting('redis.url'))
|
||||
logger.info("connected redis: %s", get_setting('redis.url'))
|
||||
except redis.exceptions.ConnectionError as exc:
|
||||
_pw = pwd.getpwuid(os.getuid())
|
||||
logger.error("[%s (%s)] can't connect redis DB ...", _pw.pw_name, _pw.pw_uid)
|
||||
logger.error(" %s", exc)
|
||||
return False
|
||||
|
@ -1,22 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# pyright: strict
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class SharedDict(ABC):
|
||||
@abstractmethod
|
||||
def get_int(self, key: str) -> Optional[int]:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def set_int(self, key: str, value: int):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_str(self, key: str) -> Optional[str]:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def set_str(self, key: str, value: str):
|
||||
pass
|
@ -1,40 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
|
||||
import threading
|
||||
from typing import Optional
|
||||
|
||||
from . import shared_abstract
|
||||
|
||||
|
||||
class SimpleSharedDict(shared_abstract.SharedDict):
|
||||
|
||||
__slots__ = ('d',)
|
||||
|
||||
def __init__(self):
|
||||
self.d = {}
|
||||
|
||||
def get_int(self, key: str) -> Optional[int]:
|
||||
return self.d.get(key, None)
|
||||
|
||||
def set_int(self, key: str, value: int):
|
||||
self.d[key] = value
|
||||
|
||||
def get_str(self, key: str) -> Optional[str]:
|
||||
return self.d.get(key, None)
|
||||
|
||||
def set_str(self, key: str, value: str):
|
||||
self.d[key] = value
|
||||
|
||||
|
||||
def schedule(delay, func, *args):
|
||||
def call_later():
|
||||
t = threading.Timer(delay, wrapper)
|
||||
t.daemon = True
|
||||
t.start()
|
||||
|
||||
def wrapper():
|
||||
call_later()
|
||||
func(*args)
|
||||
|
||||
call_later()
|
||||
return True
|
@ -1,64 +0,0 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
|
||||
import time
|
||||
from typing import Optional
|
||||
import uwsgi # pyright: ignore # pylint: disable=E0401
|
||||
from . import shared_abstract
|
||||
|
||||
|
||||
_last_signal = 10
|
||||
|
||||
|
||||
class UwsgiCacheSharedDict(shared_abstract.SharedDict):
|
||||
def get_int(self, key: str) -> Optional[int]:
|
||||
value = uwsgi.cache_get(key)
|
||||
if value is None:
|
||||
return value
|
||||
else:
|
||||
return int.from_bytes(value, 'big')
|
||||
|
||||
def set_int(self, key: str, value: int):
|
||||
b = value.to_bytes(4, 'big')
|
||||
uwsgi.cache_update(key, b)
|
||||
|
||||
def get_str(self, key: str) -> Optional[str]:
|
||||
value = uwsgi.cache_get(key)
|
||||
if value is None:
|
||||
return value
|
||||
else:
|
||||
return value.decode('utf-8')
|
||||
|
||||
def set_str(self, key: str, value: str):
|
||||
b = value.encode('utf-8')
|
||||
uwsgi.cache_update(key, b)
|
||||
|
||||
|
||||
def schedule(delay, func, *args):
|
||||
"""
|
||||
Can be implemented using a spooler.
|
||||
https://uwsgi-docs.readthedocs.io/en/latest/PythonDecorators.html
|
||||
|
||||
To make the uwsgi configuration simple, use the alternative implementation.
|
||||
"""
|
||||
global _last_signal
|
||||
|
||||
def sighandler(signum):
|
||||
now = int(time.time())
|
||||
key = 'scheduler_call_time_signal_' + str(signum)
|
||||
uwsgi.lock()
|
||||
try:
|
||||
updating = uwsgi.cache_get(key)
|
||||
if updating is not None:
|
||||
updating = int.from_bytes(updating, 'big')
|
||||
if now - updating < delay:
|
||||
return
|
||||
uwsgi.cache_update(key, now.to_bytes(4, 'big'))
|
||||
finally:
|
||||
uwsgi.unlock()
|
||||
func(*args)
|
||||
|
||||
signal_num = _last_signal
|
||||
_last_signal += 1
|
||||
uwsgi.register_signal(signal_num, 'worker', sighandler)
|
||||
uwsgi.add_timer(signal_num, delay)
|
||||
return True
|
@ -120,6 +120,7 @@ from searx.locales import (
|
||||
# renaming names from searx imports ...
|
||||
from searx.autocomplete import search_autocomplete, backends as autocomplete_backends
|
||||
from searx.languages import language_codes as languages
|
||||
from searx.shared.redisdb import initialize as redis_initialize
|
||||
from searx.search import SearchWithPlugins, initialize as search_initialize
|
||||
from searx.network import stream as http_stream, set_context_network_name
|
||||
from searx.search.checker import get_result as checker_get_result
|
||||
@ -1384,6 +1385,7 @@ werkzeug_reloader = flask_run_development or (searx_debug and __name__ == "__mai
|
||||
if not werkzeug_reloader or (werkzeug_reloader and os.environ.get("WERKZEUG_RUN_MAIN") == "true"):
|
||||
locales_initialize()
|
||||
_INFO_PAGES = infopage.InfoPageSet()
|
||||
redis_initialize()
|
||||
plugin_initialize(app)
|
||||
search_initialize(enable_checker=True, check_network=True, enable_metrics=settings['general']['enable_metrics'])
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user