Merge pull request #297 from dalf/engine-logger-enh

debug mode: more readable logging
This commit is contained in:
Alexandre Flament 2021-09-14 07:06:28 +02:00 committed by GitHub
commit 602cbc2c99
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 153 additions and 63 deletions

View File

@ -29,7 +29,7 @@ run: install
sleep 2 ; \
xdg-open http://127.0.0.1:8888/ ; \
) &
SEARX_DEBUG=1 ./manage pyenv.cmd python ./searx/webapp.py
SEARX_DEBUG=1 ./manage pyenv.cmd python -m searx.webapp
PHONY += install uninstall
install uninstall:

View File

@ -17,3 +17,4 @@ linuxdoc==20210324
aiounittest==1.4.0
yamllint==1.26.3
wlc==1.12
coloredlogs==15.0.1

View File

@ -2,13 +2,24 @@
# lint: pylint
# pylint: disable=missing-module-docstring
import sys
import os
from os.path import dirname, abspath
import logging
import searx.unixthreadname
import searx.settings_loader
from searx.settings_defaults import settings_set_defaults
# Debug
LOG_FORMAT_DEBUG = '%(levelname)-7s %(name)-30.30s: %(message)s'
# Production
LOG_FORMAT_PROD = '%(asctime)-15s %(levelname)s:%(name)s: %(message)s'
LOG_LEVEL_PROD = logging.WARNING
searx_dir = abspath(dirname(__file__))
searx_parent_dir = abspath(dirname(dirname(__file__)))
settings, settings_load_message = searx.settings_loader.load_settings()
@ -16,22 +27,6 @@ settings, settings_load_message = searx.settings_loader.load_settings()
if settings is not None:
settings = settings_set_defaults(settings)
searx_debug = settings['general']['debug']
if searx_debug:
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.WARNING)
logger = logging.getLogger('searx')
logger.info(settings_load_message)
# log max_request_timeout
max_request_timeout = settings['outgoing']['max_request_timeout']
if max_request_timeout is None:
logger.info('max_request_timeout=%s', repr(max_request_timeout))
else:
logger.info('max_request_timeout=%i second(s)', max_request_timeout)
_unset = object()
def get_setting(name, default=_unset):
@ -53,3 +48,63 @@ def get_setting(name, default=_unset):
break
return value
def is_color_terminal():
if os.getenv('TERM') in ('dumb', 'unknown'):
return False
return sys.stdout.isatty()
def logging_config_debug():
try:
import coloredlogs # pylint: disable=import-outside-toplevel
except ImportError:
coloredlogs = None
log_level = os.environ.get('SEARX_DEBUG_LOG_LEVEL', 'DEBUG')
if coloredlogs and is_color_terminal():
level_styles = {
'spam': {'color': 'green', 'faint': True},
'debug': {},
'notice': {'color': 'magenta'},
'success': {'bold': True, 'color': 'green'},
'info': {'bold': True, 'color': 'cyan'},
'warning': {'color': 'yellow'},
'error': {'color': 'red'},
'critical': {'bold': True, 'color': 'red'},
}
field_styles = {
'asctime': {'color': 'green'},
'hostname': {'color': 'magenta'},
'levelname': {'color': 8},
'name': {'color': 8},
'programname': {'color': 'cyan'},
'username': {'color': 'yellow'}
}
coloredlogs.install(
level=log_level,
level_styles=level_styles,
field_styles=field_styles,
fmt=LOG_FORMAT_DEBUG
)
else:
logging.basicConfig(level=logging.getLevelName(log_level), format=LOG_FORMAT_DEBUG)
searx_debug = settings['general']['debug']
if searx_debug:
logging_config_debug()
else:
logging.basicConfig(level=LOG_LEVEL_PROD, format=LOG_FORMAT_PROD)
logging.root.setLevel(level=LOG_LEVEL_PROD)
logging.getLogger('werkzeug').setLevel(level=LOG_LEVEL_PROD)
logger = logging.getLogger('searx')
logger.info(settings_load_message)
# log max_request_timeout
max_request_timeout = settings['outgoing']['max_request_timeout']
if max_request_timeout is None:
logger.info('max_request_timeout=%s', repr(max_request_timeout))
else:
logger.info('max_request_timeout=%i second(s)', max_request_timeout)

View File

@ -173,7 +173,6 @@ def request(query, params):
params['url'] = search_url.format(**fargs)
params['soft_max_redirects'] = soft_max_redirects
logger.debug("query_url --> %s", params['url'])
return params

View File

@ -5,7 +5,8 @@ from urllib.parse import urlparse
from httpx import HTTPError, HTTPStatusError
from searx.exceptions import (SearxXPathSyntaxException, SearxEngineXPathException, SearxEngineAPIException,
SearxEngineAccessDeniedException)
from searx import logger, searx_parent_dir
from searx import searx_parent_dir
from searx.engines import engines
errors_per_engines = {}
@ -47,7 +48,7 @@ class ErrorContext:
def add_error_context(engine_name: str, error_context: ErrorContext) -> None:
errors_for_engine = errors_per_engines.setdefault(engine_name, {})
errors_for_engine[error_context] = errors_for_engine.get(error_context, 0) + 1
logger.debug('%s: %s', engine_name, str(error_context))
engines[engine_name].logger.warning('%s', str(error_context))
def get_trace(traces):

View File

@ -194,7 +194,7 @@ def new_client(
# pylint: disable=too-many-arguments
enable_http, verify, enable_http2,
max_connections, max_keepalive_connections, keepalive_expiry,
proxies, local_address, retries, max_redirects ):
proxies, local_address, retries, max_redirects, hook_log_response ):
limit = httpx.Limits(
max_connections=max_connections,
max_keepalive_connections=max_keepalive_connections,
@ -221,7 +221,17 @@ def new_client(
mounts['http://'] = AsyncHTTPTransportNoHttp()
transport = get_transport(verify, enable_http2, local_address, None, limit, retries)
return httpx.AsyncClient(transport=transport, mounts=mounts, max_redirects=max_redirects)
event_hooks = None
if hook_log_response:
event_hooks = {'response': [ hook_log_response ]}
return httpx.AsyncClient(
transport=transport,
mounts=mounts,
max_redirects=max_redirects,
event_hooks=event_hooks,
)
def get_loop():
@ -231,7 +241,7 @@ def get_loop():
def init():
# log
for logger_name in ('hpack.hpack', 'hpack.table'):
for logger_name in ('hpack.hpack', 'hpack.table', 'httpx._client'):
logging.getLogger(logger_name).setLevel(logging.WARNING)
# loop

View File

@ -10,9 +10,11 @@ from itertools import cycle
import httpx
from searx import logger, searx_debug
from .client import new_client, get_loop
logger = logger.getChild('network')
DEFAULT_NAME = '__DEFAULT__'
NETWORKS = {}
# requests compatibility when reading proxy settings from settings.yml
@ -41,7 +43,7 @@ class Network:
'enable_http', 'verify', 'enable_http2',
'max_connections', 'max_keepalive_connections', 'keepalive_expiry',
'local_addresses', 'proxies', 'max_redirects', 'retries', 'retry_on_http_error',
'_local_addresses_cycle', '_proxies_cycle', '_clients'
'_local_addresses_cycle', '_proxies_cycle', '_clients', '_logger'
)
def __init__(
@ -57,7 +59,8 @@ class Network:
local_addresses=None,
retries=0,
retry_on_http_error=None,
max_redirects=30 ):
max_redirects=30,
logger_name=None):
self.enable_http = enable_http
self.verify = verify
@ -73,6 +76,7 @@ class Network:
self._local_addresses_cycle = self.get_ipaddress_cycle()
self._proxies_cycle = self.get_proxy_cycles()
self._clients = {}
self._logger = logger.getChild(logger_name) if logger_name else logger
self.check_parameters()
def check_parameters(self):
@ -130,12 +134,25 @@ class Network:
# pylint: disable=stop-iteration-return
yield tuple((pattern, next(proxy_url_cycle)) for pattern, proxy_url_cycle in proxy_settings.items())
async def log_response(self, response: httpx.Response):
request = response.request
status = f"{response.status_code} {response.reason_phrase}"
response_line = f"{response.http_version} {status}"
if hasattr(response, "_elapsed"):
elapsed_time = f"{response.elapsed.total_seconds()} sec"
else:
elapsed_time = "stream"
self._logger.debug(
f'HTTP Request: {request.method} {request.url} "{response_line}" ({elapsed_time})'
)
def get_client(self, verify=None, max_redirects=None):
verify = self.verify if verify is None else verify
max_redirects = self.max_redirects if max_redirects is None else max_redirects
local_address = next(self._local_addresses_cycle)
proxies = next(self._proxies_cycle) # is a tuple so it can be part of the key
key = (verify, max_redirects, local_address, proxies)
hook_log_response = self.log_response if searx_debug else None
if key not in self._clients or self._clients[key].is_closed:
self._clients[key] = new_client(
self.enable_http,
@ -147,7 +164,8 @@ class Network:
dict(proxies),
local_address,
0,
max_redirects
max_redirects,
hook_log_response
)
return self._clients[key]
@ -243,11 +261,13 @@ def initialize(settings_engines=None, settings_outgoing=None):
'retry_on_http_error': None,
}
def new_network(params):
def new_network(params, logger_name=None):
nonlocal default_params
result = {}
result.update(default_params)
result.update(params)
if logger_name:
result['logger_name'] = logger_name
return Network(**result)
def iter_networks():
@ -263,13 +283,13 @@ def initialize(settings_engines=None, settings_outgoing=None):
if NETWORKS:
done()
NETWORKS.clear()
NETWORKS[DEFAULT_NAME] = new_network({})
NETWORKS['ipv4'] = new_network({'local_addresses': '0.0.0.0'})
NETWORKS['ipv6'] = new_network({'local_addresses': '::'})
NETWORKS[DEFAULT_NAME] = new_network({}, logger_name='default')
NETWORKS['ipv4'] = new_network({'local_addresses': '0.0.0.0'}, logger_name='ipv4')
NETWORKS['ipv6'] = new_network({'local_addresses': '::'}, logger_name='ipv6')
# define networks from outgoing.networks
for network_name, network in settings_outgoing['networks'].items():
NETWORKS[network_name] = new_network(network)
NETWORKS[network_name] = new_network(network, logger_name=network_name)
# define networks from engines.[i].network (except references)
for engine_name, engine, network in iter_networks():
@ -280,9 +300,9 @@ def initialize(settings_engines=None, settings_outgoing=None):
network[attribute_name] = getattr(engine, attribute_name)
else:
network[attribute_name] = attribute_value
NETWORKS[engine_name] = new_network(network)
NETWORKS[engine_name] = new_network(network, logger_name=engine_name)
elif isinstance(network, dict):
NETWORKS[engine_name] = new_network(network)
NETWORKS[engine_name] = new_network(network, logger_name=engine_name)
# define networks from engines.[i].network (references)
for engine_name, engine, network in iter_networks():
@ -295,7 +315,7 @@ def initialize(settings_engines=None, settings_outgoing=None):
if 'image_proxy' not in NETWORKS:
image_proxy_params = default_params.copy()
image_proxy_params['enable_http2'] = False
NETWORKS['image_proxy'] = new_network(image_proxy_params)
NETWORKS['image_proxy'] = new_network(image_proxy_params, logger_name='image_proxy')
@atexit.register

View File

@ -147,7 +147,7 @@ class Search:
if th.is_alive():
th._timeout = True
self.result_container.add_unresponsive_engine(th._engine_name, 'timeout')
logger.warning('engine timeout: {0}'.format(th._engine_name))
PROCESSORS[th._engine_name].logger.error('engine timeout')
def search_standard(self):
"""

View File

@ -65,6 +65,6 @@ def initialize(engine_list):
processor = get_processor(engine, engine_name)
initialize_processor(processor)
if processor is None:
logger.error('Error get processor for engine %s', engine_name)
engine.logger.error('Error get processor for engine %s', engine_name)
else:
PROCESSORS[engine_name] = processor

View File

@ -9,8 +9,8 @@ import threading
from abc import abstractmethod, ABC
from timeit import default_timer
from searx import logger
from searx.engines import settings
from searx import settings, logger
from searx.engines import engines
from searx.network import get_time_for_thread, get_network
from searx.metrics import histogram_observe, counter_inc, count_exception, count_error
from searx.exceptions import SearxEngineAccessDeniedException, SearxEngineResponseException
@ -43,7 +43,7 @@ class SuspendedStatus:
self.continuous_errors * settings['search']['ban_time_on_fail'])
self.suspend_end_time = default_timer() + suspended_time
self.suspend_reason = suspend_reason
logger.debug('Suspend engine for %i seconds', suspended_time)
logger.debug('Suspend for %i seconds', suspended_time)
def resume(self):
with self.lock:
@ -56,11 +56,12 @@ class SuspendedStatus:
class EngineProcessor(ABC):
"""Base classes used for all types of reqest processores."""
__slots__ = 'engine', 'engine_name', 'lock', 'suspended_status'
__slots__ = 'engine', 'engine_name', 'lock', 'suspended_status', 'logger'
def __init__(self, engine, engine_name):
self.engine = engine
self.engine_name = engine_name
self.logger = engines[engine_name].logger
key = get_network(self.engine_name)
key = id(key) if key else self.engine_name
self.suspended_status = SUSPENDED_STATUS.setdefault(key, SuspendedStatus())
@ -69,11 +70,11 @@ class EngineProcessor(ABC):
try:
self.engine.init(get_engine_from_settings(self.engine_name))
except SearxEngineResponseException as exc:
logger.warn('%s engine: Fail to initialize // %s', self.engine_name, exc)
self.logger.warn('Fail to initialize // %s', exc)
except Exception: # pylint: disable=broad-except
logger.exception('%s engine: Fail to initialize', self.engine_name)
self.logger.exception('Fail to initialize')
else:
logger.debug('%s engine: Initialized', self.engine_name)
self.logger.debug('Initialized')
@property
def has_initialize_function(self):

View File

@ -5,10 +5,8 @@
"""
from searx import logger
from .abstract import EngineProcessor
logger = logger.getChild('searx.search.processor.offline')
class OfflineProcessor(EngineProcessor):
"""Processor class used by ``offline`` engines"""
@ -24,7 +22,7 @@ class OfflineProcessor(EngineProcessor):
self.extend_container(result_container, start_time, search_results)
except ValueError as e:
# do not record the error
logger.exception('engine {0} : invalid input : {1}'.format(self.engine_name, e))
self.logger.exception('engine {0} : invalid input : {1}'.format(self.engine_name, e))
except Exception as e: # pylint: disable=broad-except
self.handle_exception(result_container, e)
logger.exception('engine {0} : exception : {1}'.format(self.engine_name, e))
self.logger.exception('engine {0} : exception : {1}'.format(self.engine_name, e))

View File

@ -10,7 +10,6 @@ import asyncio
import httpx
import searx.network
from searx import logger
from searx.utils import gen_useragent
from searx.exceptions import (
SearxEngineAccessDeniedException,
@ -20,7 +19,6 @@ from searx.exceptions import (
from searx.metrics.error_recorder import count_error
from .abstract import EngineProcessor
logger = logger.getChild('searx.search.processor.online')
def default_request_params():
"""Default request parameters for ``online`` engines."""
@ -146,31 +144,37 @@ class OnlineProcessor(EngineProcessor):
except (httpx.TimeoutException, asyncio.TimeoutError) as e:
# requests timeout (connect or read)
self.handle_exception(result_container, e, suspend=True)
logger.error("engine {0} : HTTP requests timeout"
"(search duration : {1} s, timeout: {2} s) : {3}"
.format(self.engine_name, default_timer() - start_time,
timeout_limit,
e.__class__.__name__))
self.logger.error(
"HTTP requests timeout (search duration : {0} s, timeout: {1} s) : {2}"
.format(
default_timer() - start_time,
timeout_limit,
e.__class__.__name__
)
)
except (httpx.HTTPError, httpx.StreamError) as e:
# other requests exception
self.handle_exception(result_container, e, suspend=True)
logger.exception("engine {0} : requests exception"
"(search duration : {1} s, timeout: {2} s) : {3}"
.format(self.engine_name, default_timer() - start_time,
timeout_limit,
e))
self.logger.exception(
"requests exception (search duration : {0} s, timeout: {1} s) : {2}"
.format(
default_timer() - start_time,
timeout_limit,
e
)
)
except SearxEngineCaptchaException as e:
self.handle_exception(result_container, e, suspend=True)
logger.exception('engine {0} : CAPTCHA'.format(self.engine_name))
self.logger.exception('CAPTCHA')
except SearxEngineTooManyRequestsException as e:
self.handle_exception(result_container, e, suspend=True)
logger.exception('engine {0} : Too many requests'.format(self.engine_name))
self.logger.exception('Too many requests')
except SearxEngineAccessDeniedException as e:
self.handle_exception(result_container, e, suspend=True)
logger.exception('engine {0} : Searx is blocked'.format(self.engine_name))
self.logger.exception('Searx is blocked')
except Exception as e: # pylint: disable=broad-except
self.handle_exception(result_container, e)
logger.exception('engine {0} : exception : {1}'.format(self.engine_name, e))
self.logger.exception('exception : {0}'.format(e))
def get_default_tests(self):
tests = {}

View File

@ -3,6 +3,7 @@ import os
import aiounittest
os.environ['SEARX_DEBUG'] = '1'
os.environ['SEARX_DEBUG_LOG_LEVEL'] = 'WARNING'
os.environ['SEARX_DISABLE_ETC_SETTINGS'] = '1'
os.environ.pop('SEARX_SETTINGS_PATH', None)