From efc728b4ed993f43b26b0e195423bdac42849333 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Sun, 18 Sep 2022 11:13:08 +0200 Subject: [PATCH 1/5] WIP --- searx/metrics/__init__.py | 56 ++++++++++++++++++++++++++------------- searx/metrics/models.py | 55 +++++++++++++++++--------------------- searx/webapp.py | 6 ++--- 3 files changed, 65 insertions(+), 52 deletions(-) diff --git a/searx/metrics/__init__.py b/searx/metrics/__init__.py index 18d2170df..2932ae92c 100644 --- a/searx/metrics/__init__.py +++ b/searx/metrics/__init__.py @@ -2,14 +2,13 @@ # lint: pylint # pylint: disable=missing-module-docstring -import typing +from typing import Dict, TypedDict, List, Optional, Any import math import contextlib from timeit import default_timer -from operator import itemgetter from searx.engines import engines -from .models import HistogramStorage, CounterStorage, VoidHistogram, VoidCounterStorage +from .models import Histogram, HistogramStorage, CounterStorage, VoidHistogram, VoidCounterStorage from .error_recorder import count_error, count_exception, errors_per_engines __all__ = [ @@ -27,11 +26,8 @@ __all__ = [ ] -ENDPOINTS = {'search'} - - -histogram_storage: typing.Optional[HistogramStorage] = None -counter_storage: typing.Optional[CounterStorage] = None +histogram_storage: Optional[HistogramStorage] = None +counter_storage: Optional[CounterStorage] = None @contextlib.contextmanager @@ -50,7 +46,7 @@ def histogram_observe(duration, *args): histogram_storage.get(*args).observe(duration) -def histogram(*args, raise_on_not_found=True): +def histogram(*args, raise_on_not_found=True) -> Histogram: h = histogram_storage.get(*args) if raise_on_not_found and h is None: raise ValueError("histogram " + repr((*args,)) + " doesn't not exist") @@ -65,7 +61,7 @@ def counter_add(value, *args): counter_storage.add(value, *args) -def counter(*args): +def counter(*args) -> int: return counter_storage.get(*args) @@ -110,7 +106,21 @@ def initialize(engine_names=None, enabled=True): histogram_storage.configure(histogram_width, histogram_size, 'engine', engine_name, 'time', 'total') -def get_engine_errors(engline_name_list): +class EngineError(TypedDict): + """Describe an engine error. To do : check the types""" + + filename: str + function: str + line_no: int + code: str + exception_classname: str + log_message: str + log_parameters: List[str] + secondary: bool + percentage: int + + +def get_engine_errors(engline_name_list) -> Dict[str, List[EngineError]]: result = {} engine_names = list(errors_per_engines.keys()) engine_names.sort() @@ -141,7 +151,15 @@ def get_engine_errors(engline_name_list): return result -def get_reliabilities(engline_name_list, checker_results): +class EngineReliability(TypedDict): + """Describe the engine reliability. To do: update the checker field type""" + + reliability: int + errors: List[EngineError] + checker: Optional[Any] + + +def get_reliabilities(engline_name_list, checker_results) -> Dict[str, EngineReliability]: reliabilities = {} engine_errors = get_engine_errors(engline_name_list) @@ -184,11 +202,11 @@ def get_engines_stats(engine_name_list): if sent_count == 0: continue - result_count = histogram('engine', engine_name, 'result', 'count').percentage(50) + result_count = histogram('engine', engine_name, 'result', 'count').percentile(50) result_count_sum = histogram('engine', engine_name, 'result', 'count').sum successful_count = counter('engine', engine_name, 'search', 'count', 'successful') - time_total = histogram('engine', engine_name, 'time', 'total').percentage(50) + time_total = histogram('engine', engine_name, 'time', 'total').percentile(50) max_time_total = max(time_total or 0, max_time_total or 0) max_result_count = max(result_count or 0, max_result_count or 0) @@ -214,13 +232,13 @@ def get_engines_stats(engine_name_list): stats['score'] = score stats['score_per_result'] = score / float(result_count_sum) - time_http = histogram('engine', engine_name, 'time', 'http').percentage(50) + time_http = histogram('engine', engine_name, 'time', 'http').percentile(50) time_http_p80 = time_http_p95 = 0 if time_http is not None: - time_http_p80 = histogram('engine', engine_name, 'time', 'http').percentage(80) - time_http_p95 = histogram('engine', engine_name, 'time', 'http').percentage(95) + time_http_p80 = histogram('engine', engine_name, 'time', 'http').percentile(80) + time_http_p95 = histogram('engine', engine_name, 'time', 'http').percentile(95) stats['http'] = round(time_http, 1) stats['http_p80'] = round(time_http_p80, 1) @@ -228,8 +246,8 @@ def get_engines_stats(engine_name_list): if time_total is not None: - time_total_p80 = histogram('engine', engine_name, 'time', 'total').percentage(80) - time_total_p95 = histogram('engine', engine_name, 'time', 'total').percentage(95) + time_total_p80 = histogram('engine', engine_name, 'time', 'total').percentile(80) + time_total_p95 = histogram('engine', engine_name, 'time', 'total').percentile(95) stats['total'] = round(time_total, 1) stats['total_p80'] = round(time_total_p80, 1) diff --git a/searx/metrics/models.py b/searx/metrics/models.py index 900a7fa93..532ae9863 100644 --- a/searx/metrics/models.py +++ b/searx/metrics/models.py @@ -1,7 +1,9 @@ # SPDX-License-Identifier: AGPL-3.0-or-later import decimal +from numbers import Number import threading +from typing import Dict, List, Optional, Tuple from searx import logger @@ -15,15 +17,19 @@ class Histogram: _slots__ = '_lock', '_size', '_sum', '_quartiles', '_count', '_width' - def __init__(self, width=10, size=200): + def __init__(self, width: int = 10, size: int = 200): + """ + * width: quantile width + * size: number of quantiles + """ self._lock = threading.Lock() self._width = width self._size = size self._quartiles = [0] * size - self._count = 0 - self._sum = 0 + self._count: int = 0 + self._sum: int = 0 - def observe(self, value): + def observe(self, value: Number): q = int(value / self._width) if q < 0: """Value below zero is ignored""" @@ -37,19 +43,19 @@ class Histogram: self._sum += value @property - def quartiles(self): + def quartiles(self) -> List[int]: return list(self._quartiles) @property - def count(self): + def count(self) -> int: return self._count @property - def sum(self): + def sum(self) -> int: return self._sum @property - def average(self): + def average(self) -> float: with self._lock: if self._count != 0: return self._sum / self._count @@ -57,31 +63,20 @@ class Histogram: return 0 @property - def quartile_percentage(self): - '''Quartile in percentage''' + def quartile_percentages(self) -> List[int]: + """Quartile in percentage""" with self._lock: if self._count > 0: return [int(q * 100 / self._count) for q in self._quartiles] else: return self._quartiles - @property - def quartile_percentage_map(self): - result = {} - # use Decimal to avoid rounding errors - x = decimal.Decimal(0) - width = decimal.Decimal(self._width) - width_exponent = -width.as_tuple().exponent - with self._lock: - if self._count > 0: - for y in self._quartiles: - yp = int(y * 100 / self._count) - if yp != 0: - result[round(float(x), width_exponent)] = yp - x += width - return result + def percentile(self, percentage: Number) -> Optional[decimal.Decimal]: + """ + Return the percentile. - def percentage(self, percentage): + * percentage from 0 to 100 + """ # use Decimal to avoid rounding errors x = decimal.Decimal(0) width = decimal.Decimal(self._width) @@ -109,14 +104,14 @@ class HistogramStorage: self.histogram_class = histogram_class def clear(self): - self.measures = {} + self.measures: Dict[Tuple[str], Histogram] = {} def configure(self, width, size, *args): measure = self.histogram_class(width, size) self.measures[args] = measure return measure - def get(self, *args): + def get(self, *args) -> Optional[Histogram]: return self.measures.get(args, None) def dump(self): @@ -136,13 +131,13 @@ class CounterStorage: def clear(self): with self.lock: - self.counters = {} + self.counters: Dict[Tuple[str], int] = {} def configure(self, *args): with self.lock: self.counters[args] = 0 - def get(self, *args): + def get(self, *args) -> int: return self.counters[args] def add(self, value, *args): diff --git a/searx/webapp.py b/searx/webapp.py index bd76cc534..bf7e5dcd8 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -984,9 +984,9 @@ def preferences(): max_rate95 = 0 for _, e in filtered_engines.items(): h = histogram('engine', e.name, 'time', 'total') - median = round(h.percentage(50), 1) if h.count > 0 else None - rate80 = round(h.percentage(80), 1) if h.count > 0 else None - rate95 = round(h.percentage(95), 1) if h.count > 0 else None + median = round(h.percentile(50), 1) if h.count > 0 else None # type: ignore + rate80 = round(h.percentile(80), 1) if h.count > 0 else None # type: ignore + rate95 = round(h.percentile(95), 1) if h.count > 0 else None # type: ignore max_rate95 = max(max_rate95, rate95 or 0) From 63cd7940bfdb16588b61ba2f0f872437b375f715 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Sun, 18 Sep 2022 11:35:15 +0200 Subject: [PATCH 2/5] WIP --- searx/metrics/__init__.py | 33 +++++++++++++++++++++++++++++++-- searx/webapp.py | 8 ++++---- 2 files changed, 35 insertions(+), 6 deletions(-) diff --git a/searx/metrics/__init__.py b/searx/metrics/__init__.py index 2932ae92c..245490564 100644 --- a/searx/metrics/__init__.py +++ b/searx/metrics/__init__.py @@ -13,7 +13,7 @@ from .error_recorder import count_error, count_exception, errors_per_engines __all__ = [ "initialize", - "get_engines_stats", + "get_engines_metrics", "get_engine_errors", "histogram", "histogram_observe", @@ -189,7 +189,36 @@ def get_reliabilities(engline_name_list, checker_results) -> Dict[str, EngineRel return reliabilities -def get_engines_stats(engine_name_list): +class EngineStat(TypedDict): + """Metrics for one engine. To do: check the types""" + + name: str + total: Optional[float] + total_p80: Optional[float] + totla_p95: Optional[float] + http: Optional[float] + http_p80: Optional[float] + http_p95: Optional[float] + processing: Optional[float] + processing_p80: Optional[float] + processing_p95: Optional[float] + score: float + score_per_result: float + result_count: int + + +class EngineStatResult(TypedDict): + time: List[EngineStat] + """List of engine stat""" + + max_time: float + """Maximum response time for all the engines""" + + max_result_count: int + """Maximum number of result for all the engines""" + + +def get_engines_metrics(engine_name_list) -> EngineStatResult: assert counter_storage is not None assert histogram_storage is not None diff --git a/searx/webapp.py b/searx/webapp.py index bf7e5dcd8..929910ca5 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -103,7 +103,7 @@ from searx.answerers import ( ask, ) from searx.metrics import ( - get_engines_stats, + get_engines_metrics, get_engine_errors, get_reliabilities, histogram, @@ -1222,7 +1222,7 @@ def stats(): checker_results['engines'] if checker_results['status'] == 'ok' and 'engines' in checker_results else {} ) - engine_stats = get_engines_stats(filtered_engines) + engine_metrics = get_engines_metrics(filtered_engines) engine_reliabilities = get_reliabilities(filtered_engines, checker_results) if sort_order not in STATS_SORT_PARAMETERS: @@ -1242,12 +1242,12 @@ def stats(): reliability_order = 1 - reliability_order return (reliability_order, key, engine_stat['name']) - engine_stats['time'] = sorted(engine_stats['time'], reverse=reverse, key=get_key) + engine_metrics['time'] = sorted(engine_metrics['time'], reverse=reverse, key=get_key) return render( # fmt: off 'stats.html', sort_order = sort_order, - engine_stats = engine_stats, + engine_stats = engine_metrics, engine_reliabilities = engine_reliabilities, selected_engine_name = selected_engine_name, searx_git_branch = GIT_BRANCH, From 048b1e220a9011c5d76e45803bcab01e8f3225d1 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Sun, 18 Sep 2022 11:37:49 +0200 Subject: [PATCH 3/5] WIP --- searx/metrics/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/searx/metrics/__init__.py b/searx/metrics/__init__.py index 245490564..685db36ef 100644 --- a/searx/metrics/__init__.py +++ b/searx/metrics/__init__.py @@ -208,6 +208,8 @@ class EngineStat(TypedDict): class EngineStatResult(TypedDict): + """result of the get_engines_metrics function""" + time: List[EngineStat] """List of engine stat""" From 0047f4226eada3e4dc08dddb36865042a90eaa04 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Wed, 21 Sep 2022 15:48:56 +0200 Subject: [PATCH 4/5] [fix] python 3.7: No name 'TypedDict' in module 'typing' Signed-off-by: Markus Heiser --- searx/metrics/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/searx/metrics/__init__.py b/searx/metrics/__init__.py index 685db36ef..d078a7fcc 100644 --- a/searx/metrics/__init__.py +++ b/searx/metrics/__init__.py @@ -2,11 +2,13 @@ # lint: pylint # pylint: disable=missing-module-docstring -from typing import Dict, TypedDict, List, Optional, Any import math import contextlib from timeit import default_timer +from typing import Dict, List, Optional, Any +from typing_extensions import TypedDict + from searx.engines import engines from .models import Histogram, HistogramStorage, CounterStorage, VoidHistogram, VoidCounterStorage from .error_recorder import count_error, count_exception, errors_per_engines From 2b69780f0d73c6ae1642230a446f7365e9ce18aa Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Wed, 21 Sep 2022 15:17:02 +0200 Subject: [PATCH 5/5] [DRAFT] simplify searx.metrics implementation - Moved code of the wrapper functions to to methods of class HistogramStorage and class CounterStorage. - Renamed global histogram and counter objects to HISTOGRAM_STORAGE and COUNTER_STORAGE. - The imports of names from the metrics module in the application code has been reduced to: from searx import metrics By this convention the wrapper functions can be replaced by the globals from: metrics.HISTOGRAM_STORAGE metrics.COUNTER_STORAGE - comment out the context manager from searx.metrics.histogram_observe_time / we do not have a usage of this context manager. Signed-off-by: Markus Heiser --- searx/metrics/__init__.py | 110 +++++++++++----------------- searx/metrics/models.py | 13 +++- searx/results.py | 9 +-- searx/search/__init__.py | 6 +- searx/search/checker/impl.py | 4 +- searx/search/processors/abstract.py | 14 ++-- searx/search/processors/online.py | 5 +- searx/webapp.py | 25 +++---- 8 files changed, 82 insertions(+), 104 deletions(-) diff --git a/searx/metrics/__init__.py b/searx/metrics/__init__.py index d078a7fcc..152ec3d5c 100644 --- a/searx/metrics/__init__.py +++ b/searx/metrics/__init__.py @@ -17,68 +17,42 @@ __all__ = [ "initialize", "get_engines_metrics", "get_engine_errors", - "histogram", - "histogram_observe", - "histogram_observe_time", - "counter", - "counter_inc", - "counter_add", "count_error", "count_exception", ] -histogram_storage: Optional[HistogramStorage] = None -counter_storage: Optional[CounterStorage] = None +HISTOGRAM_STORAGE: Optional[HistogramStorage] = None +COUNTER_STORAGE: Optional[CounterStorage] = None -@contextlib.contextmanager -def histogram_observe_time(*args): - h = histogram_storage.get(*args) - before = default_timer() - yield before - duration = default_timer() - before - if h: - h.observe(duration) - else: - raise ValueError("histogram " + repr((*args,)) + " doesn't not exist") - - -def histogram_observe(duration, *args): - histogram_storage.get(*args).observe(duration) - - -def histogram(*args, raise_on_not_found=True) -> Histogram: - h = histogram_storage.get(*args) - if raise_on_not_found and h is None: - raise ValueError("histogram " + repr((*args,)) + " doesn't not exist") - return h - - -def counter_inc(*args): - counter_storage.add(1, *args) - - -def counter_add(value, *args): - counter_storage.add(value, *args) - - -def counter(*args) -> int: - return counter_storage.get(*args) +# We do not have a usage of this context manager +# +# @contextlib.contextmanager +# def histogram_observe_time(*args): +# h = histogram_storage.get(*args) +# before = default_timer() +# yield before +# duration = default_timer() - before +# if h: +# h.observe(duration) +# else: +# raise ValueError("histogram " + repr((*args,)) + " doesn't not exist") def initialize(engine_names=None, enabled=True): """ Initialize metrics """ - global counter_storage, histogram_storage # pylint: disable=global-statement + + global COUNTER_STORAGE, HISTOGRAM_STORAGE # pylint: disable=global-statement if enabled: - counter_storage = CounterStorage() - histogram_storage = HistogramStorage() + COUNTER_STORAGE = CounterStorage() + HISTOGRAM_STORAGE = HistogramStorage() else: - counter_storage = VoidCounterStorage() - histogram_storage = HistogramStorage(histogram_class=VoidHistogram) + COUNTER_STORAGE = VoidCounterStorage() + HISTOGRAM_STORAGE = HistogramStorage(histogram_class=VoidHistogram) # max_timeout = max of all the engine.timeout max_timeout = 2 @@ -93,19 +67,19 @@ def initialize(engine_names=None, enabled=True): # engines for engine_name in engine_names or engines: # search count - counter_storage.configure('engine', engine_name, 'search', 'count', 'sent') - counter_storage.configure('engine', engine_name, 'search', 'count', 'successful') + COUNTER_STORAGE.configure('engine', engine_name, 'search', 'count', 'sent') + COUNTER_STORAGE.configure('engine', engine_name, 'search', 'count', 'successful') # global counter of errors - counter_storage.configure('engine', engine_name, 'search', 'count', 'error') + COUNTER_STORAGE.configure('engine', engine_name, 'search', 'count', 'error') # score of the engine - counter_storage.configure('engine', engine_name, 'score') + COUNTER_STORAGE.configure('engine', engine_name, 'score') # result count per requests - histogram_storage.configure(1, 100, 'engine', engine_name, 'result', 'count') + HISTOGRAM_STORAGE.configure(1, 100, 'engine', engine_name, 'result', 'count') # time doing HTTP requests - histogram_storage.configure(histogram_width, histogram_size, 'engine', engine_name, 'time', 'http') + HISTOGRAM_STORAGE.configure(histogram_width, histogram_size, 'engine', engine_name, 'time', 'http') # total time # .time.request and ...response times may overlap .time.http time. - histogram_storage.configure(histogram_width, histogram_size, 'engine', engine_name, 'time', 'total') + HISTOGRAM_STORAGE.configure(histogram_width, histogram_size, 'engine', engine_name, 'time', 'total') class EngineError(TypedDict): @@ -131,7 +105,7 @@ def get_engine_errors(engline_name_list) -> Dict[str, List[EngineError]]: continue error_stats = errors_per_engines[engine_name] - sent_search_count = max(counter('engine', engine_name, 'search', 'count', 'sent'), 1) + sent_search_count = max(COUNTER_STORAGE.get('engine', engine_name, 'search', 'count', 'sent'), 1) sorted_context_count_list = sorted(error_stats.items(), key=lambda context_count: context_count[1]) r = [] for context, count in sorted_context_count_list: @@ -170,7 +144,7 @@ def get_reliabilities(engline_name_list, checker_results) -> Dict[str, EngineRel checker_result = checker_results.get(engine_name, {}) checker_success = checker_result.get('success', True) errors = engine_errors.get(engine_name) or [] - if counter('engine', engine_name, 'search', 'count', 'sent') == 0: + if COUNTER_STORAGE.get('engine', engine_name, 'search', 'count', 'sent') == 0: # no request reliablity = None elif checker_success and not errors: @@ -223,23 +197,23 @@ class EngineStatResult(TypedDict): def get_engines_metrics(engine_name_list) -> EngineStatResult: - assert counter_storage is not None - assert histogram_storage is not None + assert COUNTER_STORAGE is not None + assert HISTOGRAM_STORAGE is not None list_time = [] max_time_total = max_result_count = None for engine_name in engine_name_list: - sent_count = counter('engine', engine_name, 'search', 'count', 'sent') + sent_count = COUNTER_STORAGE.get('engine', engine_name, 'search', 'count', 'sent') if sent_count == 0: continue - result_count = histogram('engine', engine_name, 'result', 'count').percentile(50) - result_count_sum = histogram('engine', engine_name, 'result', 'count').sum - successful_count = counter('engine', engine_name, 'search', 'count', 'successful') + result_count = HISTOGRAM_STORAGE.get('engine', engine_name, 'result', 'count').percentile(50) + result_count_sum = HISTOGRAM_STORAGE.get('engine', engine_name, 'result', 'count').sum + successful_count = COUNTER_STORAGE.get('engine', engine_name, 'search', 'count', 'successful') - time_total = histogram('engine', engine_name, 'time', 'total').percentile(50) + time_total = HISTOGRAM_STORAGE.get('engine', engine_name, 'time', 'total').percentile(50) max_time_total = max(time_total or 0, max_time_total or 0) max_result_count = max(result_count or 0, max_result_count or 0) @@ -260,18 +234,18 @@ def get_engines_metrics(engine_name_list) -> EngineStatResult: } if successful_count and result_count_sum: - score = counter('engine', engine_name, 'score') + score = COUNTER_STORAGE.get('engine', engine_name, 'score') stats['score'] = score stats['score_per_result'] = score / float(result_count_sum) - time_http = histogram('engine', engine_name, 'time', 'http').percentile(50) + time_http = HISTOGRAM_STORAGE.get('engine', engine_name, 'time', 'http').percentile(50) time_http_p80 = time_http_p95 = 0 if time_http is not None: - time_http_p80 = histogram('engine', engine_name, 'time', 'http').percentile(80) - time_http_p95 = histogram('engine', engine_name, 'time', 'http').percentile(95) + time_http_p80 = HISTOGRAM_STORAGE.get('engine', engine_name, 'time', 'http').percentile(80) + time_http_p95 = HISTOGRAM_STORAGE.get('engine', engine_name, 'time', 'http').percentile(95) stats['http'] = round(time_http, 1) stats['http_p80'] = round(time_http_p80, 1) @@ -279,8 +253,8 @@ def get_engines_metrics(engine_name_list) -> EngineStatResult: if time_total is not None: - time_total_p80 = histogram('engine', engine_name, 'time', 'total').percentile(80) - time_total_p95 = histogram('engine', engine_name, 'time', 'total').percentile(95) + time_total_p80 = HISTOGRAM_STORAGE.get('engine', engine_name, 'time', 'total').percentile(80) + time_total_p95 = HISTOGRAM_STORAGE.get('engine', engine_name, 'time', 'total').percentile(95) stats['total'] = round(time_total, 1) stats['total_p80'] = round(time_total_p80, 1) diff --git a/searx/metrics/models.py b/searx/metrics/models.py index 532ae9863..499f66632 100644 --- a/searx/metrics/models.py +++ b/searx/metrics/models.py @@ -111,8 +111,14 @@ class HistogramStorage: self.measures[args] = measure return measure - def get(self, *args) -> Optional[Histogram]: - return self.measures.get(args, None) + def get(self, *args, raise_on_not_found=True) -> Optional[Histogram]: + h = self.measures.get(args, None) + if raise_on_not_found and h is None: + raise ValueError("histogram " + repr((*args,)) + " doesn't not exist") + return h + + def observe(self, duration, *args): + self.get(*args).observe(duration) def dump(self): logger.debug("Histograms:") @@ -140,6 +146,9 @@ class CounterStorage: def get(self, *args) -> int: return self.counters[args] + def inc(self, *args): + self.add(1, *args) + def add(self, value, *args): with self.lock: self.counters[args] += value diff --git a/searx/results.py b/searx/results.py index ab242b838..da8228065 100644 --- a/searx/results.py +++ b/searx/results.py @@ -7,8 +7,7 @@ from urllib.parse import urlparse, unquote from searx import logger from searx.engines import engines -from searx.metrics import histogram_observe, counter_add, count_error - +from searx import metrics CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U) WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U) @@ -226,10 +225,10 @@ class ResultContainer: if len(error_msgs) > 0: for msg in error_msgs: - count_error(engine_name, 'some results are invalids: ' + msg, secondary=True) + metrics.count_error(engine_name, 'some results are invalids: ' + msg, secondary=True) if engine_name in engines: - histogram_observe(standard_result_count, 'engine', engine_name, 'result', 'count') + metrics.HISTOGRAM_STORAGE.observe(standard_result_count, 'engine', engine_name, 'result', 'count') if not self.paging and standard_result_count > 0 and engine_name in engines and engines[engine_name].paging: self.paging = True @@ -354,7 +353,7 @@ class ResultContainer: score = result_score(result) result['score'] = score for result_engine in result['engines']: - counter_add(score, 'engine', result_engine, 'score') + metrics.COUNTER_STORAGE.add(score, 'engine', result_engine, 'score') results = sorted(self._merged_results, key=itemgetter('score'), reverse=True) diff --git a/searx/search/__init__.py b/searx/search/__init__.py index 9d337916c..846ad4341 100644 --- a/searx/search/__init__.py +++ b/searx/search/__init__.py @@ -17,7 +17,7 @@ from searx.plugins import plugins from searx.search.models import EngineRef, SearchQuery from searx.engines import load_engines from searx.network import initialize as initialize_network, check_network_configuration -from searx.metrics import initialize as initialize_metrics, counter_inc, histogram_observe_time +from searx import metrics from searx.search.processors import PROCESSORS, initialize as initialize_processors from searx.search.checker import initialize as initialize_checker @@ -31,7 +31,7 @@ def initialize(settings_engines=None, enable_checker=False, check_network=False, initialize_network(settings_engines, settings['outgoing']) if check_network: check_network_configuration() - initialize_metrics([engine['name'] for engine in settings_engines], enable_metrics) + metrics.initialize([engine['name'] for engine in settings_engines], enable_metrics) initialize_processors(settings_engines) if enable_checker: initialize_checker() @@ -98,7 +98,7 @@ class Search: if request_params is None: continue - counter_inc('engine', engineref.name, 'search', 'count', 'sent') + metrics.COUNTER_STORAGE.inc('engine', engineref.name, 'search', 'count', 'sent') # append request to list requests.append((engineref.name, self.search_query.query, request_params)) diff --git a/searx/search/checker/impl.py b/searx/search/checker/impl.py index bc5cdf968..f11465f89 100644 --- a/searx/search/checker/impl.py +++ b/searx/search/checker/impl.py @@ -19,7 +19,7 @@ from searx.utils import gen_useragent from searx.results import ResultContainer from searx.search.models import SearchQuery, EngineRef from searx.search.processors import EngineProcessor -from searx.metrics import counter_inc +from searx import metrics logger = logger.getChild('searx.search.checker') @@ -414,7 +414,7 @@ class Checker: engineref_category = search_query.engineref_list[0].category params = self.processor.get_params(search_query, engineref_category) if params is not None: - counter_inc('engine', search_query.engineref_list[0].name, 'search', 'count', 'sent') + metrics.COUNTER_STORAGE.inc('engine', search_query.engineref_list[0].name, 'search', 'count', 'sent') self.processor.search(search_query.query, params, result_container, default_timer(), 5) return result_container diff --git a/searx/search/processors/abstract.py b/searx/search/processors/abstract.py index d74616db0..b4905b218 100644 --- a/searx/search/processors/abstract.py +++ b/searx/search/processors/abstract.py @@ -13,7 +13,7 @@ from typing import Dict, Union from searx import settings, logger from searx.engines import engines from searx.network import get_time_for_thread, get_network -from searx.metrics import histogram_observe, counter_inc, count_exception, count_error +from searx import metrics from searx.exceptions import SearxEngineAccessDeniedException, SearxEngineResponseException from searx.utils import get_engine_from_settings @@ -95,11 +95,11 @@ class EngineProcessor(ABC): error_message = exception_or_message result_container.add_unresponsive_engine(self.engine_name, error_message) # metrics - counter_inc('engine', self.engine_name, 'search', 'count', 'error') + metrics.COUNTER_STORAGE.inc('engine', self.engine_name, 'search', 'count', 'error') if isinstance(exception_or_message, BaseException): - count_exception(self.engine_name, exception_or_message) + metrics.count_exception(self.engine_name, exception_or_message) else: - count_error(self.engine_name, exception_or_message) + metrics.count_error(self.engine_name, exception_or_message) # suspend the engine ? if suspend: suspended_time = None @@ -114,10 +114,10 @@ class EngineProcessor(ABC): page_load_time = get_time_for_thread() result_container.add_timing(self.engine_name, engine_time, page_load_time) # metrics - counter_inc('engine', self.engine_name, 'search', 'count', 'successful') - histogram_observe(engine_time, 'engine', self.engine_name, 'time', 'total') + metrics.COUNTER_STORAGE.inc('engine', self.engine_name, 'search', 'count', 'successful') + metrics.HISTOGRAM_STORAGE.observe(engine_time, 'engine', self.engine_name, 'time', 'total') if page_load_time is not None: - histogram_observe(page_load_time, 'engine', self.engine_name, 'time', 'http') + metrics.HISTOGRAM_STORAGE.observe(page_load_time, 'engine', self.engine_name, 'time', 'http') def extend_container(self, result_container, start_time, search_results): if getattr(threading.current_thread(), '_timeout', False): diff --git a/searx/search/processors/online.py b/searx/search/processors/online.py index 17e9b6a96..927127a76 100644 --- a/searx/search/processors/online.py +++ b/searx/search/processors/online.py @@ -16,7 +16,8 @@ from searx.exceptions import ( SearxEngineCaptchaException, SearxEngineTooManyRequestsException, ) -from searx.metrics.error_recorder import count_error + +from searx import metrics from .abstract import EngineProcessor @@ -113,7 +114,7 @@ class OnlineProcessor(EngineProcessor): status_code = str(response.status_code or '') reason = response.reason_phrase or '' hostname = response.url.host - count_error( + metrics.count_error( self.engine_name, '{} redirects, maximum: {}'.format(len(response.history), soft_max_redirects), (status_code, reason, hostname), diff --git a/searx/webapp.py b/searx/webapp.py index 929910ca5..16a4c1fe3 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -102,13 +102,8 @@ from searx.answerers import ( answerers, ask, ) -from searx.metrics import ( - get_engines_metrics, - get_engine_errors, - get_reliabilities, - histogram, - counter, -) +from searx import metrics + from searx.flaskfix import patch_application from searx.locales import ( @@ -983,15 +978,15 @@ def preferences(): stats = {} # pylint: disable=redefined-outer-name max_rate95 = 0 for _, e in filtered_engines.items(): - h = histogram('engine', e.name, 'time', 'total') + h = metrics.HISTOGRAM_STORAGE.get('engine', e.name, 'time', 'total') median = round(h.percentile(50), 1) if h.count > 0 else None # type: ignore rate80 = round(h.percentile(80), 1) if h.count > 0 else None # type: ignore rate95 = round(h.percentile(95), 1) if h.count > 0 else None # type: ignore max_rate95 = max(max_rate95, rate95 or 0) - result_count_sum = histogram('engine', e.name, 'result', 'count').sum - successful_count = counter('engine', e.name, 'search', 'count', 'successful') + result_count_sum = metrics.HISTOGRAM_STORAGE.get('engine', e.name, 'result', 'count').sum + successful_count = metrics.COUNTER_STORAGE.get('engine', e.name, 'search', 'count', 'successful') result_count = int(result_count_sum / float(successful_count)) if successful_count else 0 stats[e.name] = { @@ -1006,7 +1001,7 @@ def preferences(): # reliabilities reliabilities = {} - engine_errors = get_engine_errors(filtered_engines) + engine_errors = metrics.get_engine_errors(filtered_engines) checker_results = checker_get_result() checker_results = ( checker_results['engines'] if checker_results['status'] == 'ok' and 'engines' in checker_results else {} @@ -1015,7 +1010,7 @@ def preferences(): checker_result = checker_results.get(e.name, {}) checker_success = checker_result.get('success', True) errors = engine_errors.get(e.name) or [] - if counter('engine', e.name, 'search', 'count', 'sent') == 0: + if metrics.COUNTER_STORAGE.get('engine', e.name, 'search', 'count', 'sent') == 0: # no request reliablity = None elif checker_success and not errors: @@ -1222,8 +1217,8 @@ def stats(): checker_results['engines'] if checker_results['status'] == 'ok' and 'engines' in checker_results else {} ) - engine_metrics = get_engines_metrics(filtered_engines) - engine_reliabilities = get_reliabilities(filtered_engines, checker_results) + engine_metrics = metrics.get_engines_metrics(filtered_engines) + engine_reliabilities = metrics.get_reliabilities(filtered_engines, checker_results) if sort_order not in STATS_SORT_PARAMETERS: sort_order = 'name' @@ -1258,7 +1253,7 @@ def stats(): @app.route('/stats/errors', methods=['GET']) def stats_errors(): filtered_engines = dict(filter(lambda kv: request.preferences.validate_token(kv[1]), engines.items())) - result = get_engine_errors(filtered_engines) + result = metrics.get_engine_errors(filtered_engines) return jsonify(result)