diff --git a/searx/metrics/__init__.py b/searx/metrics/__init__.py index 18d2170df..2932ae92c 100644 --- a/searx/metrics/__init__.py +++ b/searx/metrics/__init__.py @@ -2,14 +2,13 @@ # lint: pylint # pylint: disable=missing-module-docstring -import typing +from typing import Dict, TypedDict, List, Optional, Any import math import contextlib from timeit import default_timer -from operator import itemgetter from searx.engines import engines -from .models import HistogramStorage, CounterStorage, VoidHistogram, VoidCounterStorage +from .models import Histogram, HistogramStorage, CounterStorage, VoidHistogram, VoidCounterStorage from .error_recorder import count_error, count_exception, errors_per_engines __all__ = [ @@ -27,11 +26,8 @@ __all__ = [ ] -ENDPOINTS = {'search'} - - -histogram_storage: typing.Optional[HistogramStorage] = None -counter_storage: typing.Optional[CounterStorage] = None +histogram_storage: Optional[HistogramStorage] = None +counter_storage: Optional[CounterStorage] = None @contextlib.contextmanager @@ -50,7 +46,7 @@ def histogram_observe(duration, *args): histogram_storage.get(*args).observe(duration) -def histogram(*args, raise_on_not_found=True): +def histogram(*args, raise_on_not_found=True) -> Histogram: h = histogram_storage.get(*args) if raise_on_not_found and h is None: raise ValueError("histogram " + repr((*args,)) + " doesn't not exist") @@ -65,7 +61,7 @@ def counter_add(value, *args): counter_storage.add(value, *args) -def counter(*args): +def counter(*args) -> int: return counter_storage.get(*args) @@ -110,7 +106,21 @@ def initialize(engine_names=None, enabled=True): histogram_storage.configure(histogram_width, histogram_size, 'engine', engine_name, 'time', 'total') -def get_engine_errors(engline_name_list): +class EngineError(TypedDict): + """Describe an engine error. To do : check the types""" + + filename: str + function: str + line_no: int + code: str + exception_classname: str + log_message: str + log_parameters: List[str] + secondary: bool + percentage: int + + +def get_engine_errors(engline_name_list) -> Dict[str, List[EngineError]]: result = {} engine_names = list(errors_per_engines.keys()) engine_names.sort() @@ -141,7 +151,15 @@ def get_engine_errors(engline_name_list): return result -def get_reliabilities(engline_name_list, checker_results): +class EngineReliability(TypedDict): + """Describe the engine reliability. To do: update the checker field type""" + + reliability: int + errors: List[EngineError] + checker: Optional[Any] + + +def get_reliabilities(engline_name_list, checker_results) -> Dict[str, EngineReliability]: reliabilities = {} engine_errors = get_engine_errors(engline_name_list) @@ -184,11 +202,11 @@ def get_engines_stats(engine_name_list): if sent_count == 0: continue - result_count = histogram('engine', engine_name, 'result', 'count').percentage(50) + result_count = histogram('engine', engine_name, 'result', 'count').percentile(50) result_count_sum = histogram('engine', engine_name, 'result', 'count').sum successful_count = counter('engine', engine_name, 'search', 'count', 'successful') - time_total = histogram('engine', engine_name, 'time', 'total').percentage(50) + time_total = histogram('engine', engine_name, 'time', 'total').percentile(50) max_time_total = max(time_total or 0, max_time_total or 0) max_result_count = max(result_count or 0, max_result_count or 0) @@ -214,13 +232,13 @@ def get_engines_stats(engine_name_list): stats['score'] = score stats['score_per_result'] = score / float(result_count_sum) - time_http = histogram('engine', engine_name, 'time', 'http').percentage(50) + time_http = histogram('engine', engine_name, 'time', 'http').percentile(50) time_http_p80 = time_http_p95 = 0 if time_http is not None: - time_http_p80 = histogram('engine', engine_name, 'time', 'http').percentage(80) - time_http_p95 = histogram('engine', engine_name, 'time', 'http').percentage(95) + time_http_p80 = histogram('engine', engine_name, 'time', 'http').percentile(80) + time_http_p95 = histogram('engine', engine_name, 'time', 'http').percentile(95) stats['http'] = round(time_http, 1) stats['http_p80'] = round(time_http_p80, 1) @@ -228,8 +246,8 @@ def get_engines_stats(engine_name_list): if time_total is not None: - time_total_p80 = histogram('engine', engine_name, 'time', 'total').percentage(80) - time_total_p95 = histogram('engine', engine_name, 'time', 'total').percentage(95) + time_total_p80 = histogram('engine', engine_name, 'time', 'total').percentile(80) + time_total_p95 = histogram('engine', engine_name, 'time', 'total').percentile(95) stats['total'] = round(time_total, 1) stats['total_p80'] = round(time_total_p80, 1) diff --git a/searx/metrics/models.py b/searx/metrics/models.py index 900a7fa93..532ae9863 100644 --- a/searx/metrics/models.py +++ b/searx/metrics/models.py @@ -1,7 +1,9 @@ # SPDX-License-Identifier: AGPL-3.0-or-later import decimal +from numbers import Number import threading +from typing import Dict, List, Optional, Tuple from searx import logger @@ -15,15 +17,19 @@ class Histogram: _slots__ = '_lock', '_size', '_sum', '_quartiles', '_count', '_width' - def __init__(self, width=10, size=200): + def __init__(self, width: int = 10, size: int = 200): + """ + * width: quantile width + * size: number of quantiles + """ self._lock = threading.Lock() self._width = width self._size = size self._quartiles = [0] * size - self._count = 0 - self._sum = 0 + self._count: int = 0 + self._sum: int = 0 - def observe(self, value): + def observe(self, value: Number): q = int(value / self._width) if q < 0: """Value below zero is ignored""" @@ -37,19 +43,19 @@ class Histogram: self._sum += value @property - def quartiles(self): + def quartiles(self) -> List[int]: return list(self._quartiles) @property - def count(self): + def count(self) -> int: return self._count @property - def sum(self): + def sum(self) -> int: return self._sum @property - def average(self): + def average(self) -> float: with self._lock: if self._count != 0: return self._sum / self._count @@ -57,31 +63,20 @@ class Histogram: return 0 @property - def quartile_percentage(self): - '''Quartile in percentage''' + def quartile_percentages(self) -> List[int]: + """Quartile in percentage""" with self._lock: if self._count > 0: return [int(q * 100 / self._count) for q in self._quartiles] else: return self._quartiles - @property - def quartile_percentage_map(self): - result = {} - # use Decimal to avoid rounding errors - x = decimal.Decimal(0) - width = decimal.Decimal(self._width) - width_exponent = -width.as_tuple().exponent - with self._lock: - if self._count > 0: - for y in self._quartiles: - yp = int(y * 100 / self._count) - if yp != 0: - result[round(float(x), width_exponent)] = yp - x += width - return result + def percentile(self, percentage: Number) -> Optional[decimal.Decimal]: + """ + Return the percentile. - def percentage(self, percentage): + * percentage from 0 to 100 + """ # use Decimal to avoid rounding errors x = decimal.Decimal(0) width = decimal.Decimal(self._width) @@ -109,14 +104,14 @@ class HistogramStorage: self.histogram_class = histogram_class def clear(self): - self.measures = {} + self.measures: Dict[Tuple[str], Histogram] = {} def configure(self, width, size, *args): measure = self.histogram_class(width, size) self.measures[args] = measure return measure - def get(self, *args): + def get(self, *args) -> Optional[Histogram]: return self.measures.get(args, None) def dump(self): @@ -136,13 +131,13 @@ class CounterStorage: def clear(self): with self.lock: - self.counters = {} + self.counters: Dict[Tuple[str], int] = {} def configure(self, *args): with self.lock: self.counters[args] = 0 - def get(self, *args): + def get(self, *args) -> int: return self.counters[args] def add(self, value, *args): diff --git a/searx/webapp.py b/searx/webapp.py index bd76cc534..bf7e5dcd8 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -984,9 +984,9 @@ def preferences(): max_rate95 = 0 for _, e in filtered_engines.items(): h = histogram('engine', e.name, 'time', 'total') - median = round(h.percentage(50), 1) if h.count > 0 else None - rate80 = round(h.percentage(80), 1) if h.count > 0 else None - rate95 = round(h.percentage(95), 1) if h.count > 0 else None + median = round(h.percentile(50), 1) if h.count > 0 else None # type: ignore + rate80 = round(h.percentile(80), 1) if h.count > 0 else None # type: ignore + rate95 = round(h.percentile(95), 1) if h.count > 0 else None # type: ignore max_rate95 = max(max_rate95, rate95 or 0)