Merge 2b69780f0d into c0b97c6543

2024-01-01 19:24:07 +01:00 · 2024-02-28 21:57:33 +01:00 · 2024-02-28 21:57:33 +01:00 · 8cb59c6932
commit 8cb59c6932
parent c0b97c6543 2b69780f0d
8 changed files with 172 additions and 148 deletions
--- a/searx/metrics/init.py
+++ b/searx/metrics/init.py
@ -2,85 +2,57 @@
 # lint: pylint
 # pylint: disable=missing-module-docstring

-import typing
 import math
 import contextlib
 from timeit import default_timer
-from operator import itemgetter
+
+from typing import Dict, List, Optional, Any
+from typing_extensions import TypedDict

 from searx.engines import engines
-from .models import HistogramStorage, CounterStorage, VoidHistogram, VoidCounterStorage
+from .models import Histogram, HistogramStorage, CounterStorage, VoidHistogram, VoidCounterStorage
 from .error_recorder import count_error, count_exception, errors_per_engines

 __all__ = [
    "initialize",
-    "get_engines_stats",
+    "get_engines_metrics",
    "get_engine_errors",
-    "histogram",
-    "histogram_observe",
-    "histogram_observe_time",
-    "counter",
-    "counter_inc",
-    "counter_add",
    "count_error",
    "count_exception",
 ]


-ENDPOINTS = {'search'}
+HISTOGRAM_STORAGE: Optional[HistogramStorage] = None
+COUNTER_STORAGE: Optional[CounterStorage] = None


-histogram_storage: typing.Optional[HistogramStorage] = None
-counter_storage: typing.Optional[CounterStorage] = None
-
-
-@contextlib.contextmanager
-def histogram_observe_time(*args):
-    h = histogram_storage.get(*args)
-    before = default_timer()
-    yield before
-    duration = default_timer() - before
-    if h:
-        h.observe(duration)
-    else:
-        raise ValueError("histogram " + repr((*args,)) + " doesn't not exist")
-
-
-def histogram_observe(duration, *args):
-    histogram_storage.get(*args).observe(duration)
-
-
-def histogram(*args, raise_on_not_found=True):
-    h = histogram_storage.get(*args)
-    if raise_on_not_found and h is None:
-        raise ValueError("histogram " + repr((*args,)) + " doesn't not exist")
-    return h
-
-
-def counter_inc(*args):
-    counter_storage.add(1, *args)
-
-
-def counter_add(value, *args):
-    counter_storage.add(value, *args)
-
-
-def counter(*args):
-    return counter_storage.get(*args)
+# We do not have a usage of this context manager
+#
+# @contextlib.contextmanager
+# def histogram_observe_time(*args):
+#     h = histogram_storage.get(*args)
+#     before = default_timer()
+#     yield before
+#     duration = default_timer() - before
+#     if h:
+#         h.observe(duration)
+#     else:
+#         raise ValueError("histogram " + repr((*args,)) + " doesn't not exist")


 def initialize(engine_names=None, enabled=True):
    """
    Initialize metrics
    """
-    global counter_storage, histogram_storage  # pylint: disable=global-statement
+
+    global COUNTER_STORAGE, HISTOGRAM_STORAGE  # pylint: disable=global-statement

    if enabled:
-        counter_storage = CounterStorage()
-        histogram_storage = HistogramStorage()
+        COUNTER_STORAGE = CounterStorage()
+        HISTOGRAM_STORAGE = HistogramStorage()
    else:
-        counter_storage = VoidCounterStorage()
-        histogram_storage = HistogramStorage(histogram_class=VoidHistogram)
+        COUNTER_STORAGE = VoidCounterStorage()
+        HISTOGRAM_STORAGE = HistogramStorage(histogram_class=VoidHistogram)

    # max_timeout = max of all the engine.timeout
    max_timeout = 2
@ -95,22 +67,36 @@ def initialize(engine_names=None, enabled=True):
    # engines
    for engine_name in engine_names or engines:
        # search count
-        counter_storage.configure('engine', engine_name, 'search', 'count', 'sent')
-        counter_storage.configure('engine', engine_name, 'search', 'count', 'successful')
+        COUNTER_STORAGE.configure('engine', engine_name, 'search', 'count', 'sent')
+        COUNTER_STORAGE.configure('engine', engine_name, 'search', 'count', 'successful')
        # global counter of errors
-        counter_storage.configure('engine', engine_name, 'search', 'count', 'error')
+        COUNTER_STORAGE.configure('engine', engine_name, 'search', 'count', 'error')
        # score of the engine
-        counter_storage.configure('engine', engine_name, 'score')
+        COUNTER_STORAGE.configure('engine', engine_name, 'score')
        # result count per requests
-        histogram_storage.configure(1, 100, 'engine', engine_name, 'result', 'count')
+        HISTOGRAM_STORAGE.configure(1, 100, 'engine', engine_name, 'result', 'count')
        # time doing HTTP requests
-        histogram_storage.configure(histogram_width, histogram_size, 'engine', engine_name, 'time', 'http')
+        HISTOGRAM_STORAGE.configure(histogram_width, histogram_size, 'engine', engine_name, 'time', 'http')
        # total time
        # .time.request and ...response times may overlap .time.http time.
-        histogram_storage.configure(histogram_width, histogram_size, 'engine', engine_name, 'time', 'total')
+        HISTOGRAM_STORAGE.configure(histogram_width, histogram_size, 'engine', engine_name, 'time', 'total')


-def get_engine_errors(engline_name_list):
+class EngineError(TypedDict):
+    """Describe an engine error. To do : check the types"""
+
+    filename: str
+    function: str
+    line_no: int
+    code: str
+    exception_classname: str
+    log_message: str
+    log_parameters: List[str]
+    secondary: bool
+    percentage: int
+
+
+def get_engine_errors(engline_name_list) -> Dict[str, List[EngineError]]:
    result = {}
    engine_names = list(errors_per_engines.keys())
    engine_names.sort()
@ -119,7 +105,7 @@ def get_engine_errors(engline_name_list):
            continue

        error_stats = errors_per_engines[engine_name]
-        sent_search_count = max(counter('engine', engine_name, 'search', 'count', 'sent'), 1)
+        sent_search_count = max(COUNTER_STORAGE.get('engine', engine_name, 'search', 'count', 'sent'), 1)
        sorted_context_count_list = sorted(error_stats.items(), key=lambda context_count: context_count[1])
        r = []
        for context, count in sorted_context_count_list:
@ -141,7 +127,15 @@ def get_engine_errors(engline_name_list):
    return result


-def get_reliabilities(engline_name_list, checker_results):
+class EngineReliability(TypedDict):
+    """Describe the engine reliability. To do: update the checker field type"""
+
+    reliability: int
+    errors: List[EngineError]
+    checker: Optional[Any]
+
+
+def get_reliabilities(engline_name_list, checker_results) -> Dict[str, EngineReliability]:
    reliabilities = {}

    engine_errors = get_engine_errors(engline_name_list)
@ -150,7 +144,7 @@ def get_reliabilities(engline_name_list, checker_results):
        checker_result = checker_results.get(engine_name, {})
        checker_success = checker_result.get('success', True)
        errors = engine_errors.get(engine_name) or []
-        if counter('engine', engine_name, 'search', 'count', 'sent') == 0:
+        if COUNTER_STORAGE.get('engine', engine_name, 'search', 'count', 'sent') == 0:
            # no request
            reliability = None
        elif checker_success and not errors:
@ -171,24 +165,55 @@ def get_reliabilities(engline_name_list, checker_results):
    return reliabilities


-def get_engines_stats(engine_name_list):
-    assert counter_storage is not None
-    assert histogram_storage is not None
+class EngineStat(TypedDict):
+    """Metrics for one engine. To do: check the types"""
+
+    name: str
+    total: Optional[float]
+    total_p80: Optional[float]
+    totla_p95: Optional[float]
+    http: Optional[float]
+    http_p80: Optional[float]
+    http_p95: Optional[float]
+    processing: Optional[float]
+    processing_p80: Optional[float]
+    processing_p95: Optional[float]
+    score: float
+    score_per_result: float
+    result_count: int
+
+
+class EngineStatResult(TypedDict):
+    """result of the get_engines_metrics function"""
+
+    time: List[EngineStat]
+    """List of engine stat"""
+
+    max_time: float
+    """Maximum response time for all the engines"""
+
+    max_result_count: int
+    """Maximum number of result for all the engines"""
+
+
+def get_engines_metrics(engine_name_list) -> EngineStatResult:
+    assert COUNTER_STORAGE is not None
+    assert HISTOGRAM_STORAGE is not None

    list_time = []
    max_time_total = max_result_count = None

    for engine_name in engine_name_list:

-        sent_count = counter('engine', engine_name, 'search', 'count', 'sent')
+        sent_count = COUNTER_STORAGE.get('engine', engine_name, 'search', 'count', 'sent')
        if sent_count == 0:
            continue

-        result_count = histogram('engine', engine_name, 'result', 'count').percentage(50)
-        result_count_sum = histogram('engine', engine_name, 'result', 'count').sum
-        successful_count = counter('engine', engine_name, 'search', 'count', 'successful')
+        result_count = HISTOGRAM_STORAGE.get('engine', engine_name, 'result', 'count').percentile(50)
+        result_count_sum = HISTOGRAM_STORAGE.get('engine', engine_name, 'result', 'count').sum
+        successful_count = COUNTER_STORAGE.get('engine', engine_name, 'search', 'count', 'successful')

-        time_total = histogram('engine', engine_name, 'time', 'total').percentage(50)
+        time_total = HISTOGRAM_STORAGE.get('engine', engine_name, 'time', 'total').percentile(50)
        max_time_total = max(time_total or 0, max_time_total or 0)
        max_result_count = max(result_count or 0, max_result_count or 0)

@ -209,18 +234,18 @@ def get_engines_stats(engine_name_list):
        }

        if successful_count and result_count_sum:
-            score = counter('engine', engine_name, 'score')
+            score = COUNTER_STORAGE.get('engine', engine_name, 'score')

            stats['score'] = score
            stats['score_per_result'] = score / float(result_count_sum)

-        time_http = histogram('engine', engine_name, 'time', 'http').percentage(50)
+        time_http = HISTOGRAM_STORAGE.get('engine', engine_name, 'time', 'http').percentile(50)
        time_http_p80 = time_http_p95 = 0

        if time_http is not None:

-            time_http_p80 = histogram('engine', engine_name, 'time', 'http').percentage(80)
-            time_http_p95 = histogram('engine', engine_name, 'time', 'http').percentage(95)
+            time_http_p80 = HISTOGRAM_STORAGE.get('engine', engine_name, 'time', 'http').percentile(80)
+            time_http_p95 = HISTOGRAM_STORAGE.get('engine', engine_name, 'time', 'http').percentile(95)

            stats['http'] = round(time_http, 1)
            stats['http_p80'] = round(time_http_p80, 1)
@ -228,8 +253,8 @@ def get_engines_stats(engine_name_list):

        if time_total is not None:

-            time_total_p80 = histogram('engine', engine_name, 'time', 'total').percentage(80)
-            time_total_p95 = histogram('engine', engine_name, 'time', 'total').percentage(95)
+            time_total_p80 = HISTOGRAM_STORAGE.get('engine', engine_name, 'time', 'total').percentile(80)
+            time_total_p95 = HISTOGRAM_STORAGE.get('engine', engine_name, 'time', 'total').percentile(95)

            stats['total'] = round(time_total, 1)
            stats['total_p80'] = round(time_total_p80, 1)
--- a/searx/metrics/models.py
+++ b/searx/metrics/models.py
@ -1,7 +1,9 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later

 import decimal
+from numbers import Number
 import threading
+from typing import Dict, List, Optional, Tuple

 from searx import logger

@ -15,15 +17,19 @@ class Histogram:

    _slots__ = '_lock', '_size', '_sum', '_quartiles', '_count', '_width'

-    def __init__(self, width=10, size=200):
+    def __init__(self, width: int = 10, size: int = 200):
+        """
+        * width: quantile width
+        * size: number of quantiles
+        """
        self._lock = threading.Lock()
        self._width = width
        self._size = size
        self._quartiles = [0] * size
-        self._count = 0
-        self._sum = 0
+        self._count: int = 0
+        self._sum: int = 0

-    def observe(self, value):
+    def observe(self, value: Number):
        q = int(value / self._width)
        if q < 0:
            """Value below zero is ignored"""
@ -37,19 +43,19 @@ class Histogram:
            self._sum += value

    @property
-    def quartiles(self):
+    def quartiles(self) -> List[int]:
        return list(self._quartiles)

    @property
-    def count(self):
+    def count(self) -> int:
        return self._count

    @property
-    def sum(self):
+    def sum(self) -> int:
        return self._sum

    @property
-    def average(self):
+    def average(self) -> float:
        with self._lock:
            if self._count != 0:
                return self._sum / self._count
@ -57,31 +63,20 @@ class Histogram:
                return 0

    @property
-    def quartile_percentage(self):
-        '''Quartile in percentage'''
+    def quartile_percentages(self) -> List[int]:
+        """Quartile in percentage"""
        with self._lock:
            if self._count > 0:
                return [int(q * 100 / self._count) for q in self._quartiles]
            else:
                return self._quartiles

-    @property
-    def quartile_percentage_map(self):
-        result = {}
-        # use Decimal to avoid rounding errors
-        x = decimal.Decimal(0)
-        width = decimal.Decimal(self._width)
-        width_exponent = -width.as_tuple().exponent
-        with self._lock:
-            if self._count > 0:
-                for y in self._quartiles:
-                    yp = int(y * 100 / self._count)
-                    if yp != 0:
-                        result[round(float(x), width_exponent)] = yp
-                    x += width
-        return result
+    def percentile(self, percentage: Number) -> Optional[decimal.Decimal]:
+        """
+        Return the percentile.

-    def percentage(self, percentage):
+        * percentage from 0 to 100
+        """
        # use Decimal to avoid rounding errors
        x = decimal.Decimal(0)
        width = decimal.Decimal(self._width)
@ -109,15 +104,21 @@ class HistogramStorage:
        self.histogram_class = histogram_class

    def clear(self):
-        self.measures = {}
+        self.measures: Dict[Tuple[str], Histogram] = {}

    def configure(self, width, size, *args):
        measure = self.histogram_class(width, size)
        self.measures[args] = measure
        return measure

-    def get(self, *args):
-        return self.measures.get(args, None)
+    def get(self, *args, raise_on_not_found=True) -> Optional[Histogram]:
+        h = self.measures.get(args, None)
+        if raise_on_not_found and h is None:
+            raise ValueError("histogram " + repr((*args,)) + " doesn't not exist")
+        return h
+
+    def observe(self, duration, *args):
+        self.get(*args).observe(duration)

    def dump(self):
        logger.debug("Histograms:")
@ -136,15 +137,18 @@ class CounterStorage:

    def clear(self):
        with self.lock:
-            self.counters = {}
+            self.counters: Dict[Tuple[str], int] = {}

    def configure(self, *args):
        with self.lock:
            self.counters[args] = 0

-    def get(self, *args):
+    def get(self, *args) -> int:
        return self.counters[args]

+    def inc(self, *args):
+        self.add(1, *args)
+
    def add(self, value, *args):
        with self.lock:
            self.counters[args] += value