[DRAFT] simplify searx.metrics implementation

- Moved code of the wrapper functions to to methods of class HistogramStorage
  and class CounterStorage.

- Renamed global histogram and counter objects to HISTOGRAM_STORAGE and
  COUNTER_STORAGE.

- The imports of names from the metrics module in the application code has been
  reduced to:

      from searx import metrics

   By this convention the wrapper functions can be replaced by the globals from:

      metrics.HISTOGRAM_STORAGE
      metrics.COUNTER_STORAGE

- comment out the context manager from searx.metrics.histogram_observe_time / we
  do not have a usage of this context manager.

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2022-09-21 15:17:02 +02:00
parent 0047f4226e
commit 2b69780f0d
8 changed files with 82 additions and 104 deletions

View file

@ -17,68 +17,42 @@ __all__ = [
"initialize", "initialize",
"get_engines_metrics", "get_engines_metrics",
"get_engine_errors", "get_engine_errors",
"histogram",
"histogram_observe",
"histogram_observe_time",
"counter",
"counter_inc",
"counter_add",
"count_error", "count_error",
"count_exception", "count_exception",
] ]
histogram_storage: Optional[HistogramStorage] = None HISTOGRAM_STORAGE: Optional[HistogramStorage] = None
counter_storage: Optional[CounterStorage] = None COUNTER_STORAGE: Optional[CounterStorage] = None
@contextlib.contextmanager # We do not have a usage of this context manager
def histogram_observe_time(*args): #
h = histogram_storage.get(*args) # @contextlib.contextmanager
before = default_timer() # def histogram_observe_time(*args):
yield before # h = histogram_storage.get(*args)
duration = default_timer() - before # before = default_timer()
if h: # yield before
h.observe(duration) # duration = default_timer() - before
else: # if h:
raise ValueError("histogram " + repr((*args,)) + " doesn't not exist") # h.observe(duration)
# else:
# raise ValueError("histogram " + repr((*args,)) + " doesn't not exist")
def histogram_observe(duration, *args):
histogram_storage.get(*args).observe(duration)
def histogram(*args, raise_on_not_found=True) -> Histogram:
h = histogram_storage.get(*args)
if raise_on_not_found and h is None:
raise ValueError("histogram " + repr((*args,)) + " doesn't not exist")
return h
def counter_inc(*args):
counter_storage.add(1, *args)
def counter_add(value, *args):
counter_storage.add(value, *args)
def counter(*args) -> int:
return counter_storage.get(*args)
def initialize(engine_names=None, enabled=True): def initialize(engine_names=None, enabled=True):
""" """
Initialize metrics Initialize metrics
""" """
global counter_storage, histogram_storage # pylint: disable=global-statement
global COUNTER_STORAGE, HISTOGRAM_STORAGE # pylint: disable=global-statement
if enabled: if enabled:
counter_storage = CounterStorage() COUNTER_STORAGE = CounterStorage()
histogram_storage = HistogramStorage() HISTOGRAM_STORAGE = HistogramStorage()
else: else:
counter_storage = VoidCounterStorage() COUNTER_STORAGE = VoidCounterStorage()
histogram_storage = HistogramStorage(histogram_class=VoidHistogram) HISTOGRAM_STORAGE = HistogramStorage(histogram_class=VoidHistogram)
# max_timeout = max of all the engine.timeout # max_timeout = max of all the engine.timeout
max_timeout = 2 max_timeout = 2
@ -93,19 +67,19 @@ def initialize(engine_names=None, enabled=True):
# engines # engines
for engine_name in engine_names or engines: for engine_name in engine_names or engines:
# search count # search count
counter_storage.configure('engine', engine_name, 'search', 'count', 'sent') COUNTER_STORAGE.configure('engine', engine_name, 'search', 'count', 'sent')
counter_storage.configure('engine', engine_name, 'search', 'count', 'successful') COUNTER_STORAGE.configure('engine', engine_name, 'search', 'count', 'successful')
# global counter of errors # global counter of errors
counter_storage.configure('engine', engine_name, 'search', 'count', 'error') COUNTER_STORAGE.configure('engine', engine_name, 'search', 'count', 'error')
# score of the engine # score of the engine
counter_storage.configure('engine', engine_name, 'score') COUNTER_STORAGE.configure('engine', engine_name, 'score')
# result count per requests # result count per requests
histogram_storage.configure(1, 100, 'engine', engine_name, 'result', 'count') HISTOGRAM_STORAGE.configure(1, 100, 'engine', engine_name, 'result', 'count')
# time doing HTTP requests # time doing HTTP requests
histogram_storage.configure(histogram_width, histogram_size, 'engine', engine_name, 'time', 'http') HISTOGRAM_STORAGE.configure(histogram_width, histogram_size, 'engine', engine_name, 'time', 'http')
# total time # total time
# .time.request and ...response times may overlap .time.http time. # .time.request and ...response times may overlap .time.http time.
histogram_storage.configure(histogram_width, histogram_size, 'engine', engine_name, 'time', 'total') HISTOGRAM_STORAGE.configure(histogram_width, histogram_size, 'engine', engine_name, 'time', 'total')
class EngineError(TypedDict): class EngineError(TypedDict):
@ -131,7 +105,7 @@ def get_engine_errors(engline_name_list) -> Dict[str, List[EngineError]]:
continue continue
error_stats = errors_per_engines[engine_name] error_stats = errors_per_engines[engine_name]
sent_search_count = max(counter('engine', engine_name, 'search', 'count', 'sent'), 1) sent_search_count = max(COUNTER_STORAGE.get('engine', engine_name, 'search', 'count', 'sent'), 1)
sorted_context_count_list = sorted(error_stats.items(), key=lambda context_count: context_count[1]) sorted_context_count_list = sorted(error_stats.items(), key=lambda context_count: context_count[1])
r = [] r = []
for context, count in sorted_context_count_list: for context, count in sorted_context_count_list:
@ -170,7 +144,7 @@ def get_reliabilities(engline_name_list, checker_results) -> Dict[str, EngineRel
checker_result = checker_results.get(engine_name, {}) checker_result = checker_results.get(engine_name, {})
checker_success = checker_result.get('success', True) checker_success = checker_result.get('success', True)
errors = engine_errors.get(engine_name) or [] errors = engine_errors.get(engine_name) or []
if counter('engine', engine_name, 'search', 'count', 'sent') == 0: if COUNTER_STORAGE.get('engine', engine_name, 'search', 'count', 'sent') == 0:
# no request # no request
reliablity = None reliablity = None
elif checker_success and not errors: elif checker_success and not errors:
@ -223,23 +197,23 @@ class EngineStatResult(TypedDict):
def get_engines_metrics(engine_name_list) -> EngineStatResult: def get_engines_metrics(engine_name_list) -> EngineStatResult:
assert counter_storage is not None assert COUNTER_STORAGE is not None
assert histogram_storage is not None assert HISTOGRAM_STORAGE is not None
list_time = [] list_time = []
max_time_total = max_result_count = None max_time_total = max_result_count = None
for engine_name in engine_name_list: for engine_name in engine_name_list:
sent_count = counter('engine', engine_name, 'search', 'count', 'sent') sent_count = COUNTER_STORAGE.get('engine', engine_name, 'search', 'count', 'sent')
if sent_count == 0: if sent_count == 0:
continue continue
result_count = histogram('engine', engine_name, 'result', 'count').percentile(50) result_count = HISTOGRAM_STORAGE.get('engine', engine_name, 'result', 'count').percentile(50)
result_count_sum = histogram('engine', engine_name, 'result', 'count').sum result_count_sum = HISTOGRAM_STORAGE.get('engine', engine_name, 'result', 'count').sum
successful_count = counter('engine', engine_name, 'search', 'count', 'successful') successful_count = COUNTER_STORAGE.get('engine', engine_name, 'search', 'count', 'successful')
time_total = histogram('engine', engine_name, 'time', 'total').percentile(50) time_total = HISTOGRAM_STORAGE.get('engine', engine_name, 'time', 'total').percentile(50)
max_time_total = max(time_total or 0, max_time_total or 0) max_time_total = max(time_total or 0, max_time_total or 0)
max_result_count = max(result_count or 0, max_result_count or 0) max_result_count = max(result_count or 0, max_result_count or 0)
@ -260,18 +234,18 @@ def get_engines_metrics(engine_name_list) -> EngineStatResult:
} }
if successful_count and result_count_sum: if successful_count and result_count_sum:
score = counter('engine', engine_name, 'score') score = COUNTER_STORAGE.get('engine', engine_name, 'score')
stats['score'] = score stats['score'] = score
stats['score_per_result'] = score / float(result_count_sum) stats['score_per_result'] = score / float(result_count_sum)
time_http = histogram('engine', engine_name, 'time', 'http').percentile(50) time_http = HISTOGRAM_STORAGE.get('engine', engine_name, 'time', 'http').percentile(50)
time_http_p80 = time_http_p95 = 0 time_http_p80 = time_http_p95 = 0
if time_http is not None: if time_http is not None:
time_http_p80 = histogram('engine', engine_name, 'time', 'http').percentile(80) time_http_p80 = HISTOGRAM_STORAGE.get('engine', engine_name, 'time', 'http').percentile(80)
time_http_p95 = histogram('engine', engine_name, 'time', 'http').percentile(95) time_http_p95 = HISTOGRAM_STORAGE.get('engine', engine_name, 'time', 'http').percentile(95)
stats['http'] = round(time_http, 1) stats['http'] = round(time_http, 1)
stats['http_p80'] = round(time_http_p80, 1) stats['http_p80'] = round(time_http_p80, 1)
@ -279,8 +253,8 @@ def get_engines_metrics(engine_name_list) -> EngineStatResult:
if time_total is not None: if time_total is not None:
time_total_p80 = histogram('engine', engine_name, 'time', 'total').percentile(80) time_total_p80 = HISTOGRAM_STORAGE.get('engine', engine_name, 'time', 'total').percentile(80)
time_total_p95 = histogram('engine', engine_name, 'time', 'total').percentile(95) time_total_p95 = HISTOGRAM_STORAGE.get('engine', engine_name, 'time', 'total').percentile(95)
stats['total'] = round(time_total, 1) stats['total'] = round(time_total, 1)
stats['total_p80'] = round(time_total_p80, 1) stats['total_p80'] = round(time_total_p80, 1)

View file

@ -111,8 +111,14 @@ class HistogramStorage:
self.measures[args] = measure self.measures[args] = measure
return measure return measure
def get(self, *args) -> Optional[Histogram]: def get(self, *args, raise_on_not_found=True) -> Optional[Histogram]:
return self.measures.get(args, None) h = self.measures.get(args, None)
if raise_on_not_found and h is None:
raise ValueError("histogram " + repr((*args,)) + " doesn't not exist")
return h
def observe(self, duration, *args):
self.get(*args).observe(duration)
def dump(self): def dump(self):
logger.debug("Histograms:") logger.debug("Histograms:")
@ -140,6 +146,9 @@ class CounterStorage:
def get(self, *args) -> int: def get(self, *args) -> int:
return self.counters[args] return self.counters[args]
def inc(self, *args):
self.add(1, *args)
def add(self, value, *args): def add(self, value, *args):
with self.lock: with self.lock:
self.counters[args] += value self.counters[args] += value

View file

@ -7,8 +7,7 @@ from urllib.parse import urlparse, unquote
from searx import logger from searx import logger
from searx.engines import engines from searx.engines import engines
from searx.metrics import histogram_observe, counter_add, count_error from searx import metrics
CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U) CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)
WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U) WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
@ -226,10 +225,10 @@ class ResultContainer:
if len(error_msgs) > 0: if len(error_msgs) > 0:
for msg in error_msgs: for msg in error_msgs:
count_error(engine_name, 'some results are invalids: ' + msg, secondary=True) metrics.count_error(engine_name, 'some results are invalids: ' + msg, secondary=True)
if engine_name in engines: if engine_name in engines:
histogram_observe(standard_result_count, 'engine', engine_name, 'result', 'count') metrics.HISTOGRAM_STORAGE.observe(standard_result_count, 'engine', engine_name, 'result', 'count')
if not self.paging and standard_result_count > 0 and engine_name in engines and engines[engine_name].paging: if not self.paging and standard_result_count > 0 and engine_name in engines and engines[engine_name].paging:
self.paging = True self.paging = True
@ -354,7 +353,7 @@ class ResultContainer:
score = result_score(result) score = result_score(result)
result['score'] = score result['score'] = score
for result_engine in result['engines']: for result_engine in result['engines']:
counter_add(score, 'engine', result_engine, 'score') metrics.COUNTER_STORAGE.add(score, 'engine', result_engine, 'score')
results = sorted(self._merged_results, key=itemgetter('score'), reverse=True) results = sorted(self._merged_results, key=itemgetter('score'), reverse=True)

View file

@ -17,7 +17,7 @@ from searx.plugins import plugins
from searx.search.models import EngineRef, SearchQuery from searx.search.models import EngineRef, SearchQuery
from searx.engines import load_engines from searx.engines import load_engines
from searx.network import initialize as initialize_network, check_network_configuration from searx.network import initialize as initialize_network, check_network_configuration
from searx.metrics import initialize as initialize_metrics, counter_inc, histogram_observe_time from searx import metrics
from searx.search.processors import PROCESSORS, initialize as initialize_processors from searx.search.processors import PROCESSORS, initialize as initialize_processors
from searx.search.checker import initialize as initialize_checker from searx.search.checker import initialize as initialize_checker
@ -31,7 +31,7 @@ def initialize(settings_engines=None, enable_checker=False, check_network=False,
initialize_network(settings_engines, settings['outgoing']) initialize_network(settings_engines, settings['outgoing'])
if check_network: if check_network:
check_network_configuration() check_network_configuration()
initialize_metrics([engine['name'] for engine in settings_engines], enable_metrics) metrics.initialize([engine['name'] for engine in settings_engines], enable_metrics)
initialize_processors(settings_engines) initialize_processors(settings_engines)
if enable_checker: if enable_checker:
initialize_checker() initialize_checker()
@ -98,7 +98,7 @@ class Search:
if request_params is None: if request_params is None:
continue continue
counter_inc('engine', engineref.name, 'search', 'count', 'sent') metrics.COUNTER_STORAGE.inc('engine', engineref.name, 'search', 'count', 'sent')
# append request to list # append request to list
requests.append((engineref.name, self.search_query.query, request_params)) requests.append((engineref.name, self.search_query.query, request_params))

View file

@ -19,7 +19,7 @@ from searx.utils import gen_useragent
from searx.results import ResultContainer from searx.results import ResultContainer
from searx.search.models import SearchQuery, EngineRef from searx.search.models import SearchQuery, EngineRef
from searx.search.processors import EngineProcessor from searx.search.processors import EngineProcessor
from searx.metrics import counter_inc from searx import metrics
logger = logger.getChild('searx.search.checker') logger = logger.getChild('searx.search.checker')
@ -414,7 +414,7 @@ class Checker:
engineref_category = search_query.engineref_list[0].category engineref_category = search_query.engineref_list[0].category
params = self.processor.get_params(search_query, engineref_category) params = self.processor.get_params(search_query, engineref_category)
if params is not None: if params is not None:
counter_inc('engine', search_query.engineref_list[0].name, 'search', 'count', 'sent') metrics.COUNTER_STORAGE.inc('engine', search_query.engineref_list[0].name, 'search', 'count', 'sent')
self.processor.search(search_query.query, params, result_container, default_timer(), 5) self.processor.search(search_query.query, params, result_container, default_timer(), 5)
return result_container return result_container

View file

@ -13,7 +13,7 @@ from typing import Dict, Union
from searx import settings, logger from searx import settings, logger
from searx.engines import engines from searx.engines import engines
from searx.network import get_time_for_thread, get_network from searx.network import get_time_for_thread, get_network
from searx.metrics import histogram_observe, counter_inc, count_exception, count_error from searx import metrics
from searx.exceptions import SearxEngineAccessDeniedException, SearxEngineResponseException from searx.exceptions import SearxEngineAccessDeniedException, SearxEngineResponseException
from searx.utils import get_engine_from_settings from searx.utils import get_engine_from_settings
@ -95,11 +95,11 @@ class EngineProcessor(ABC):
error_message = exception_or_message error_message = exception_or_message
result_container.add_unresponsive_engine(self.engine_name, error_message) result_container.add_unresponsive_engine(self.engine_name, error_message)
# metrics # metrics
counter_inc('engine', self.engine_name, 'search', 'count', 'error') metrics.COUNTER_STORAGE.inc('engine', self.engine_name, 'search', 'count', 'error')
if isinstance(exception_or_message, BaseException): if isinstance(exception_or_message, BaseException):
count_exception(self.engine_name, exception_or_message) metrics.count_exception(self.engine_name, exception_or_message)
else: else:
count_error(self.engine_name, exception_or_message) metrics.count_error(self.engine_name, exception_or_message)
# suspend the engine ? # suspend the engine ?
if suspend: if suspend:
suspended_time = None suspended_time = None
@ -114,10 +114,10 @@ class EngineProcessor(ABC):
page_load_time = get_time_for_thread() page_load_time = get_time_for_thread()
result_container.add_timing(self.engine_name, engine_time, page_load_time) result_container.add_timing(self.engine_name, engine_time, page_load_time)
# metrics # metrics
counter_inc('engine', self.engine_name, 'search', 'count', 'successful') metrics.COUNTER_STORAGE.inc('engine', self.engine_name, 'search', 'count', 'successful')
histogram_observe(engine_time, 'engine', self.engine_name, 'time', 'total') metrics.HISTOGRAM_STORAGE.observe(engine_time, 'engine', self.engine_name, 'time', 'total')
if page_load_time is not None: if page_load_time is not None:
histogram_observe(page_load_time, 'engine', self.engine_name, 'time', 'http') metrics.HISTOGRAM_STORAGE.observe(page_load_time, 'engine', self.engine_name, 'time', 'http')
def extend_container(self, result_container, start_time, search_results): def extend_container(self, result_container, start_time, search_results):
if getattr(threading.current_thread(), '_timeout', False): if getattr(threading.current_thread(), '_timeout', False):

View file

@ -16,7 +16,8 @@ from searx.exceptions import (
SearxEngineCaptchaException, SearxEngineCaptchaException,
SearxEngineTooManyRequestsException, SearxEngineTooManyRequestsException,
) )
from searx.metrics.error_recorder import count_error
from searx import metrics
from .abstract import EngineProcessor from .abstract import EngineProcessor
@ -113,7 +114,7 @@ class OnlineProcessor(EngineProcessor):
status_code = str(response.status_code or '') status_code = str(response.status_code or '')
reason = response.reason_phrase or '' reason = response.reason_phrase or ''
hostname = response.url.host hostname = response.url.host
count_error( metrics.count_error(
self.engine_name, self.engine_name,
'{} redirects, maximum: {}'.format(len(response.history), soft_max_redirects), '{} redirects, maximum: {}'.format(len(response.history), soft_max_redirects),
(status_code, reason, hostname), (status_code, reason, hostname),

View file

@ -102,13 +102,8 @@ from searx.answerers import (
answerers, answerers,
ask, ask,
) )
from searx.metrics import ( from searx import metrics
get_engines_metrics,
get_engine_errors,
get_reliabilities,
histogram,
counter,
)
from searx.flaskfix import patch_application from searx.flaskfix import patch_application
from searx.locales import ( from searx.locales import (
@ -983,15 +978,15 @@ def preferences():
stats = {} # pylint: disable=redefined-outer-name stats = {} # pylint: disable=redefined-outer-name
max_rate95 = 0 max_rate95 = 0
for _, e in filtered_engines.items(): for _, e in filtered_engines.items():
h = histogram('engine', e.name, 'time', 'total') h = metrics.HISTOGRAM_STORAGE.get('engine', e.name, 'time', 'total')
median = round(h.percentile(50), 1) if h.count > 0 else None # type: ignore median = round(h.percentile(50), 1) if h.count > 0 else None # type: ignore
rate80 = round(h.percentile(80), 1) if h.count > 0 else None # type: ignore rate80 = round(h.percentile(80), 1) if h.count > 0 else None # type: ignore
rate95 = round(h.percentile(95), 1) if h.count > 0 else None # type: ignore rate95 = round(h.percentile(95), 1) if h.count > 0 else None # type: ignore
max_rate95 = max(max_rate95, rate95 or 0) max_rate95 = max(max_rate95, rate95 or 0)
result_count_sum = histogram('engine', e.name, 'result', 'count').sum result_count_sum = metrics.HISTOGRAM_STORAGE.get('engine', e.name, 'result', 'count').sum
successful_count = counter('engine', e.name, 'search', 'count', 'successful') successful_count = metrics.COUNTER_STORAGE.get('engine', e.name, 'search', 'count', 'successful')
result_count = int(result_count_sum / float(successful_count)) if successful_count else 0 result_count = int(result_count_sum / float(successful_count)) if successful_count else 0
stats[e.name] = { stats[e.name] = {
@ -1006,7 +1001,7 @@ def preferences():
# reliabilities # reliabilities
reliabilities = {} reliabilities = {}
engine_errors = get_engine_errors(filtered_engines) engine_errors = metrics.get_engine_errors(filtered_engines)
checker_results = checker_get_result() checker_results = checker_get_result()
checker_results = ( checker_results = (
checker_results['engines'] if checker_results['status'] == 'ok' and 'engines' in checker_results else {} checker_results['engines'] if checker_results['status'] == 'ok' and 'engines' in checker_results else {}
@ -1015,7 +1010,7 @@ def preferences():
checker_result = checker_results.get(e.name, {}) checker_result = checker_results.get(e.name, {})
checker_success = checker_result.get('success', True) checker_success = checker_result.get('success', True)
errors = engine_errors.get(e.name) or [] errors = engine_errors.get(e.name) or []
if counter('engine', e.name, 'search', 'count', 'sent') == 0: if metrics.COUNTER_STORAGE.get('engine', e.name, 'search', 'count', 'sent') == 0:
# no request # no request
reliablity = None reliablity = None
elif checker_success and not errors: elif checker_success and not errors:
@ -1222,8 +1217,8 @@ def stats():
checker_results['engines'] if checker_results['status'] == 'ok' and 'engines' in checker_results else {} checker_results['engines'] if checker_results['status'] == 'ok' and 'engines' in checker_results else {}
) )
engine_metrics = get_engines_metrics(filtered_engines) engine_metrics = metrics.get_engines_metrics(filtered_engines)
engine_reliabilities = get_reliabilities(filtered_engines, checker_results) engine_reliabilities = metrics.get_reliabilities(filtered_engines, checker_results)
if sort_order not in STATS_SORT_PARAMETERS: if sort_order not in STATS_SORT_PARAMETERS:
sort_order = 'name' sort_order = 'name'
@ -1258,7 +1253,7 @@ def stats():
@app.route('/stats/errors', methods=['GET']) @app.route('/stats/errors', methods=['GET'])
def stats_errors(): def stats_errors():
filtered_engines = dict(filter(lambda kv: request.preferences.validate_token(kv[1]), engines.items())) filtered_engines = dict(filter(lambda kv: request.preferences.validate_token(kv[1]), engines.items()))
result = get_engine_errors(filtered_engines) result = metrics.get_engine_errors(filtered_engines)
return jsonify(result) return jsonify(result)