[DRAFT] simplify searx.metrics implementation

- Moved code of the wrapper functions to to methods of class HistogramStorage
  and class CounterStorage.

- Renamed global histogram and counter objects to HISTOGRAM_STORAGE and
  COUNTER_STORAGE.

- The imports of names from the metrics module in the application code has been
  reduced to:

      from searx import metrics

   By this convention the wrapper functions can be replaced by the globals from:

      metrics.HISTOGRAM_STORAGE
      metrics.COUNTER_STORAGE

- comment out the context manager from searx.metrics.histogram_observe_time / we
  do not have a usage of this context manager.

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2022-09-21 15:17:02 +02:00
parent 0047f4226e
commit 2b69780f0d
8 changed files with 82 additions and 104 deletions

View file

@ -17,68 +17,42 @@ __all__ = [
"initialize",
"get_engines_metrics",
"get_engine_errors",
"histogram",
"histogram_observe",
"histogram_observe_time",
"counter",
"counter_inc",
"counter_add",
"count_error",
"count_exception",
]
histogram_storage: Optional[HistogramStorage] = None
counter_storage: Optional[CounterStorage] = None
HISTOGRAM_STORAGE: Optional[HistogramStorage] = None
COUNTER_STORAGE: Optional[CounterStorage] = None
@contextlib.contextmanager
def histogram_observe_time(*args):
h = histogram_storage.get(*args)
before = default_timer()
yield before
duration = default_timer() - before
if h:
h.observe(duration)
else:
raise ValueError("histogram " + repr((*args,)) + " doesn't not exist")
def histogram_observe(duration, *args):
histogram_storage.get(*args).observe(duration)
def histogram(*args, raise_on_not_found=True) -> Histogram:
h = histogram_storage.get(*args)
if raise_on_not_found and h is None:
raise ValueError("histogram " + repr((*args,)) + " doesn't not exist")
return h
def counter_inc(*args):
counter_storage.add(1, *args)
def counter_add(value, *args):
counter_storage.add(value, *args)
def counter(*args) -> int:
return counter_storage.get(*args)
# We do not have a usage of this context manager
#
# @contextlib.contextmanager
# def histogram_observe_time(*args):
# h = histogram_storage.get(*args)
# before = default_timer()
# yield before
# duration = default_timer() - before
# if h:
# h.observe(duration)
# else:
# raise ValueError("histogram " + repr((*args,)) + " doesn't not exist")
def initialize(engine_names=None, enabled=True):
"""
Initialize metrics
"""
global counter_storage, histogram_storage # pylint: disable=global-statement
global COUNTER_STORAGE, HISTOGRAM_STORAGE # pylint: disable=global-statement
if enabled:
counter_storage = CounterStorage()
histogram_storage = HistogramStorage()
COUNTER_STORAGE = CounterStorage()
HISTOGRAM_STORAGE = HistogramStorage()
else:
counter_storage = VoidCounterStorage()
histogram_storage = HistogramStorage(histogram_class=VoidHistogram)
COUNTER_STORAGE = VoidCounterStorage()
HISTOGRAM_STORAGE = HistogramStorage(histogram_class=VoidHistogram)
# max_timeout = max of all the engine.timeout
max_timeout = 2
@ -93,19 +67,19 @@ def initialize(engine_names=None, enabled=True):
# engines
for engine_name in engine_names or engines:
# search count
counter_storage.configure('engine', engine_name, 'search', 'count', 'sent')
counter_storage.configure('engine', engine_name, 'search', 'count', 'successful')
COUNTER_STORAGE.configure('engine', engine_name, 'search', 'count', 'sent')
COUNTER_STORAGE.configure('engine', engine_name, 'search', 'count', 'successful')
# global counter of errors
counter_storage.configure('engine', engine_name, 'search', 'count', 'error')
COUNTER_STORAGE.configure('engine', engine_name, 'search', 'count', 'error')
# score of the engine
counter_storage.configure('engine', engine_name, 'score')
COUNTER_STORAGE.configure('engine', engine_name, 'score')
# result count per requests
histogram_storage.configure(1, 100, 'engine', engine_name, 'result', 'count')
HISTOGRAM_STORAGE.configure(1, 100, 'engine', engine_name, 'result', 'count')
# time doing HTTP requests
histogram_storage.configure(histogram_width, histogram_size, 'engine', engine_name, 'time', 'http')
HISTOGRAM_STORAGE.configure(histogram_width, histogram_size, 'engine', engine_name, 'time', 'http')
# total time
# .time.request and ...response times may overlap .time.http time.
histogram_storage.configure(histogram_width, histogram_size, 'engine', engine_name, 'time', 'total')
HISTOGRAM_STORAGE.configure(histogram_width, histogram_size, 'engine', engine_name, 'time', 'total')
class EngineError(TypedDict):
@ -131,7 +105,7 @@ def get_engine_errors(engline_name_list) -> Dict[str, List[EngineError]]:
continue
error_stats = errors_per_engines[engine_name]
sent_search_count = max(counter('engine', engine_name, 'search', 'count', 'sent'), 1)
sent_search_count = max(COUNTER_STORAGE.get('engine', engine_name, 'search', 'count', 'sent'), 1)
sorted_context_count_list = sorted(error_stats.items(), key=lambda context_count: context_count[1])
r = []
for context, count in sorted_context_count_list:
@ -170,7 +144,7 @@ def get_reliabilities(engline_name_list, checker_results) -> Dict[str, EngineRel
checker_result = checker_results.get(engine_name, {})
checker_success = checker_result.get('success', True)
errors = engine_errors.get(engine_name) or []
if counter('engine', engine_name, 'search', 'count', 'sent') == 0:
if COUNTER_STORAGE.get('engine', engine_name, 'search', 'count', 'sent') == 0:
# no request
reliablity = None
elif checker_success and not errors:
@ -223,23 +197,23 @@ class EngineStatResult(TypedDict):
def get_engines_metrics(engine_name_list) -> EngineStatResult:
assert counter_storage is not None
assert histogram_storage is not None
assert COUNTER_STORAGE is not None
assert HISTOGRAM_STORAGE is not None
list_time = []
max_time_total = max_result_count = None
for engine_name in engine_name_list:
sent_count = counter('engine', engine_name, 'search', 'count', 'sent')
sent_count = COUNTER_STORAGE.get('engine', engine_name, 'search', 'count', 'sent')
if sent_count == 0:
continue
result_count = histogram('engine', engine_name, 'result', 'count').percentile(50)
result_count_sum = histogram('engine', engine_name, 'result', 'count').sum
successful_count = counter('engine', engine_name, 'search', 'count', 'successful')
result_count = HISTOGRAM_STORAGE.get('engine', engine_name, 'result', 'count').percentile(50)
result_count_sum = HISTOGRAM_STORAGE.get('engine', engine_name, 'result', 'count').sum
successful_count = COUNTER_STORAGE.get('engine', engine_name, 'search', 'count', 'successful')
time_total = histogram('engine', engine_name, 'time', 'total').percentile(50)
time_total = HISTOGRAM_STORAGE.get('engine', engine_name, 'time', 'total').percentile(50)
max_time_total = max(time_total or 0, max_time_total or 0)
max_result_count = max(result_count or 0, max_result_count or 0)
@ -260,18 +234,18 @@ def get_engines_metrics(engine_name_list) -> EngineStatResult:
}
if successful_count and result_count_sum:
score = counter('engine', engine_name, 'score')
score = COUNTER_STORAGE.get('engine', engine_name, 'score')
stats['score'] = score
stats['score_per_result'] = score / float(result_count_sum)
time_http = histogram('engine', engine_name, 'time', 'http').percentile(50)
time_http = HISTOGRAM_STORAGE.get('engine', engine_name, 'time', 'http').percentile(50)
time_http_p80 = time_http_p95 = 0
if time_http is not None:
time_http_p80 = histogram('engine', engine_name, 'time', 'http').percentile(80)
time_http_p95 = histogram('engine', engine_name, 'time', 'http').percentile(95)
time_http_p80 = HISTOGRAM_STORAGE.get('engine', engine_name, 'time', 'http').percentile(80)
time_http_p95 = HISTOGRAM_STORAGE.get('engine', engine_name, 'time', 'http').percentile(95)
stats['http'] = round(time_http, 1)
stats['http_p80'] = round(time_http_p80, 1)
@ -279,8 +253,8 @@ def get_engines_metrics(engine_name_list) -> EngineStatResult:
if time_total is not None:
time_total_p80 = histogram('engine', engine_name, 'time', 'total').percentile(80)
time_total_p95 = histogram('engine', engine_name, 'time', 'total').percentile(95)
time_total_p80 = HISTOGRAM_STORAGE.get('engine', engine_name, 'time', 'total').percentile(80)
time_total_p95 = HISTOGRAM_STORAGE.get('engine', engine_name, 'time', 'total').percentile(95)
stats['total'] = round(time_total, 1)
stats['total_p80'] = round(time_total_p80, 1)

View file

@ -111,8 +111,14 @@ class HistogramStorage:
self.measures[args] = measure
return measure
def get(self, *args) -> Optional[Histogram]:
return self.measures.get(args, None)
def get(self, *args, raise_on_not_found=True) -> Optional[Histogram]:
h = self.measures.get(args, None)
if raise_on_not_found and h is None:
raise ValueError("histogram " + repr((*args,)) + " doesn't not exist")
return h
def observe(self, duration, *args):
self.get(*args).observe(duration)
def dump(self):
logger.debug("Histograms:")
@ -140,6 +146,9 @@ class CounterStorage:
def get(self, *args) -> int:
return self.counters[args]
def inc(self, *args):
self.add(1, *args)
def add(self, value, *args):
with self.lock:
self.counters[args] += value

View file

@ -7,8 +7,7 @@ from urllib.parse import urlparse, unquote
from searx import logger
from searx.engines import engines
from searx.metrics import histogram_observe, counter_add, count_error
from searx import metrics
CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)
WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
@ -226,10 +225,10 @@ class ResultContainer:
if len(error_msgs) > 0:
for msg in error_msgs:
count_error(engine_name, 'some results are invalids: ' + msg, secondary=True)
metrics.count_error(engine_name, 'some results are invalids: ' + msg, secondary=True)
if engine_name in engines:
histogram_observe(standard_result_count, 'engine', engine_name, 'result', 'count')
metrics.HISTOGRAM_STORAGE.observe(standard_result_count, 'engine', engine_name, 'result', 'count')
if not self.paging and standard_result_count > 0 and engine_name in engines and engines[engine_name].paging:
self.paging = True
@ -354,7 +353,7 @@ class ResultContainer:
score = result_score(result)
result['score'] = score
for result_engine in result['engines']:
counter_add(score, 'engine', result_engine, 'score')
metrics.COUNTER_STORAGE.add(score, 'engine', result_engine, 'score')
results = sorted(self._merged_results, key=itemgetter('score'), reverse=True)

View file

@ -17,7 +17,7 @@ from searx.plugins import plugins
from searx.search.models import EngineRef, SearchQuery
from searx.engines import load_engines
from searx.network import initialize as initialize_network, check_network_configuration
from searx.metrics import initialize as initialize_metrics, counter_inc, histogram_observe_time
from searx import metrics
from searx.search.processors import PROCESSORS, initialize as initialize_processors
from searx.search.checker import initialize as initialize_checker
@ -31,7 +31,7 @@ def initialize(settings_engines=None, enable_checker=False, check_network=False,
initialize_network(settings_engines, settings['outgoing'])
if check_network:
check_network_configuration()
initialize_metrics([engine['name'] for engine in settings_engines], enable_metrics)
metrics.initialize([engine['name'] for engine in settings_engines], enable_metrics)
initialize_processors(settings_engines)
if enable_checker:
initialize_checker()
@ -98,7 +98,7 @@ class Search:
if request_params is None:
continue
counter_inc('engine', engineref.name, 'search', 'count', 'sent')
metrics.COUNTER_STORAGE.inc('engine', engineref.name, 'search', 'count', 'sent')
# append request to list
requests.append((engineref.name, self.search_query.query, request_params))

View file

@ -19,7 +19,7 @@ from searx.utils import gen_useragent
from searx.results import ResultContainer
from searx.search.models import SearchQuery, EngineRef
from searx.search.processors import EngineProcessor
from searx.metrics import counter_inc
from searx import metrics
logger = logger.getChild('searx.search.checker')
@ -414,7 +414,7 @@ class Checker:
engineref_category = search_query.engineref_list[0].category
params = self.processor.get_params(search_query, engineref_category)
if params is not None:
counter_inc('engine', search_query.engineref_list[0].name, 'search', 'count', 'sent')
metrics.COUNTER_STORAGE.inc('engine', search_query.engineref_list[0].name, 'search', 'count', 'sent')
self.processor.search(search_query.query, params, result_container, default_timer(), 5)
return result_container

View file

@ -13,7 +13,7 @@ from typing import Dict, Union
from searx import settings, logger
from searx.engines import engines
from searx.network import get_time_for_thread, get_network
from searx.metrics import histogram_observe, counter_inc, count_exception, count_error
from searx import metrics
from searx.exceptions import SearxEngineAccessDeniedException, SearxEngineResponseException
from searx.utils import get_engine_from_settings
@ -95,11 +95,11 @@ class EngineProcessor(ABC):
error_message = exception_or_message
result_container.add_unresponsive_engine(self.engine_name, error_message)
# metrics
counter_inc('engine', self.engine_name, 'search', 'count', 'error')
metrics.COUNTER_STORAGE.inc('engine', self.engine_name, 'search', 'count', 'error')
if isinstance(exception_or_message, BaseException):
count_exception(self.engine_name, exception_or_message)
metrics.count_exception(self.engine_name, exception_or_message)
else:
count_error(self.engine_name, exception_or_message)
metrics.count_error(self.engine_name, exception_or_message)
# suspend the engine ?
if suspend:
suspended_time = None
@ -114,10 +114,10 @@ class EngineProcessor(ABC):
page_load_time = get_time_for_thread()
result_container.add_timing(self.engine_name, engine_time, page_load_time)
# metrics
counter_inc('engine', self.engine_name, 'search', 'count', 'successful')
histogram_observe(engine_time, 'engine', self.engine_name, 'time', 'total')
metrics.COUNTER_STORAGE.inc('engine', self.engine_name, 'search', 'count', 'successful')
metrics.HISTOGRAM_STORAGE.observe(engine_time, 'engine', self.engine_name, 'time', 'total')
if page_load_time is not None:
histogram_observe(page_load_time, 'engine', self.engine_name, 'time', 'http')
metrics.HISTOGRAM_STORAGE.observe(page_load_time, 'engine', self.engine_name, 'time', 'http')
def extend_container(self, result_container, start_time, search_results):
if getattr(threading.current_thread(), '_timeout', False):

View file

@ -16,7 +16,8 @@ from searx.exceptions import (
SearxEngineCaptchaException,
SearxEngineTooManyRequestsException,
)
from searx.metrics.error_recorder import count_error
from searx import metrics
from .abstract import EngineProcessor
@ -113,7 +114,7 @@ class OnlineProcessor(EngineProcessor):
status_code = str(response.status_code or '')
reason = response.reason_phrase or ''
hostname = response.url.host
count_error(
metrics.count_error(
self.engine_name,
'{} redirects, maximum: {}'.format(len(response.history), soft_max_redirects),
(status_code, reason, hostname),

View file

@ -102,13 +102,8 @@ from searx.answerers import (
answerers,
ask,
)
from searx.metrics import (
get_engines_metrics,
get_engine_errors,
get_reliabilities,
histogram,
counter,
)
from searx import metrics
from searx.flaskfix import patch_application
from searx.locales import (
@ -983,15 +978,15 @@ def preferences():
stats = {} # pylint: disable=redefined-outer-name
max_rate95 = 0
for _, e in filtered_engines.items():
h = histogram('engine', e.name, 'time', 'total')
h = metrics.HISTOGRAM_STORAGE.get('engine', e.name, 'time', 'total')
median = round(h.percentile(50), 1) if h.count > 0 else None # type: ignore
rate80 = round(h.percentile(80), 1) if h.count > 0 else None # type: ignore
rate95 = round(h.percentile(95), 1) if h.count > 0 else None # type: ignore
max_rate95 = max(max_rate95, rate95 or 0)
result_count_sum = histogram('engine', e.name, 'result', 'count').sum
successful_count = counter('engine', e.name, 'search', 'count', 'successful')
result_count_sum = metrics.HISTOGRAM_STORAGE.get('engine', e.name, 'result', 'count').sum
successful_count = metrics.COUNTER_STORAGE.get('engine', e.name, 'search', 'count', 'successful')
result_count = int(result_count_sum / float(successful_count)) if successful_count else 0
stats[e.name] = {
@ -1006,7 +1001,7 @@ def preferences():
# reliabilities
reliabilities = {}
engine_errors = get_engine_errors(filtered_engines)
engine_errors = metrics.get_engine_errors(filtered_engines)
checker_results = checker_get_result()
checker_results = (
checker_results['engines'] if checker_results['status'] == 'ok' and 'engines' in checker_results else {}
@ -1015,7 +1010,7 @@ def preferences():
checker_result = checker_results.get(e.name, {})
checker_success = checker_result.get('success', True)
errors = engine_errors.get(e.name) or []
if counter('engine', e.name, 'search', 'count', 'sent') == 0:
if metrics.COUNTER_STORAGE.get('engine', e.name, 'search', 'count', 'sent') == 0:
# no request
reliablity = None
elif checker_success and not errors:
@ -1222,8 +1217,8 @@ def stats():
checker_results['engines'] if checker_results['status'] == 'ok' and 'engines' in checker_results else {}
)
engine_metrics = get_engines_metrics(filtered_engines)
engine_reliabilities = get_reliabilities(filtered_engines, checker_results)
engine_metrics = metrics.get_engines_metrics(filtered_engines)
engine_reliabilities = metrics.get_reliabilities(filtered_engines, checker_results)
if sort_order not in STATS_SORT_PARAMETERS:
sort_order = 'name'
@ -1258,7 +1253,7 @@ def stats():
@app.route('/stats/errors', methods=['GET'])
def stats_errors():
filtered_engines = dict(filter(lambda kv: request.preferences.validate_token(kv[1]), engines.items()))
result = get_engine_errors(filtered_engines)
result = metrics.get_engine_errors(filtered_engines)
return jsonify(result)