forked from zaclys/searxng
Merge pull request #297 from dalf/engine-logger-enh
debug mode: more readable logging
This commit is contained in:
commit
602cbc2c99
2
Makefile
2
Makefile
|
@ -29,7 +29,7 @@ run: install
|
|||
sleep 2 ; \
|
||||
xdg-open http://127.0.0.1:8888/ ; \
|
||||
) &
|
||||
SEARX_DEBUG=1 ./manage pyenv.cmd python ./searx/webapp.py
|
||||
SEARX_DEBUG=1 ./manage pyenv.cmd python -m searx.webapp
|
||||
|
||||
PHONY += install uninstall
|
||||
install uninstall:
|
||||
|
|
|
@ -17,3 +17,4 @@ linuxdoc==20210324
|
|||
aiounittest==1.4.0
|
||||
yamllint==1.26.3
|
||||
wlc==1.12
|
||||
coloredlogs==15.0.1
|
||||
|
|
|
@ -2,13 +2,24 @@
|
|||
# lint: pylint
|
||||
# pylint: disable=missing-module-docstring
|
||||
|
||||
import sys
|
||||
import os
|
||||
from os.path import dirname, abspath
|
||||
|
||||
import logging
|
||||
|
||||
import searx.unixthreadname
|
||||
import searx.settings_loader
|
||||
from searx.settings_defaults import settings_set_defaults
|
||||
|
||||
|
||||
# Debug
|
||||
LOG_FORMAT_DEBUG = '%(levelname)-7s %(name)-30.30s: %(message)s'
|
||||
|
||||
# Production
|
||||
LOG_FORMAT_PROD = '%(asctime)-15s %(levelname)s:%(name)s: %(message)s'
|
||||
LOG_LEVEL_PROD = logging.WARNING
|
||||
|
||||
searx_dir = abspath(dirname(__file__))
|
||||
searx_parent_dir = abspath(dirname(dirname(__file__)))
|
||||
settings, settings_load_message = searx.settings_loader.load_settings()
|
||||
|
@ -16,22 +27,6 @@ settings, settings_load_message = searx.settings_loader.load_settings()
|
|||
if settings is not None:
|
||||
settings = settings_set_defaults(settings)
|
||||
|
||||
searx_debug = settings['general']['debug']
|
||||
if searx_debug:
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
else:
|
||||
logging.basicConfig(level=logging.WARNING)
|
||||
|
||||
logger = logging.getLogger('searx')
|
||||
logger.info(settings_load_message)
|
||||
|
||||
# log max_request_timeout
|
||||
max_request_timeout = settings['outgoing']['max_request_timeout']
|
||||
if max_request_timeout is None:
|
||||
logger.info('max_request_timeout=%s', repr(max_request_timeout))
|
||||
else:
|
||||
logger.info('max_request_timeout=%i second(s)', max_request_timeout)
|
||||
|
||||
_unset = object()
|
||||
|
||||
def get_setting(name, default=_unset):
|
||||
|
@ -53,3 +48,63 @@ def get_setting(name, default=_unset):
|
|||
break
|
||||
|
||||
return value
|
||||
|
||||
|
||||
def is_color_terminal():
|
||||
if os.getenv('TERM') in ('dumb', 'unknown'):
|
||||
return False
|
||||
return sys.stdout.isatty()
|
||||
|
||||
|
||||
def logging_config_debug():
|
||||
try:
|
||||
import coloredlogs # pylint: disable=import-outside-toplevel
|
||||
except ImportError:
|
||||
coloredlogs = None
|
||||
|
||||
log_level = os.environ.get('SEARX_DEBUG_LOG_LEVEL', 'DEBUG')
|
||||
if coloredlogs and is_color_terminal():
|
||||
level_styles = {
|
||||
'spam': {'color': 'green', 'faint': True},
|
||||
'debug': {},
|
||||
'notice': {'color': 'magenta'},
|
||||
'success': {'bold': True, 'color': 'green'},
|
||||
'info': {'bold': True, 'color': 'cyan'},
|
||||
'warning': {'color': 'yellow'},
|
||||
'error': {'color': 'red'},
|
||||
'critical': {'bold': True, 'color': 'red'},
|
||||
}
|
||||
field_styles = {
|
||||
'asctime': {'color': 'green'},
|
||||
'hostname': {'color': 'magenta'},
|
||||
'levelname': {'color': 8},
|
||||
'name': {'color': 8},
|
||||
'programname': {'color': 'cyan'},
|
||||
'username': {'color': 'yellow'}
|
||||
}
|
||||
coloredlogs.install(
|
||||
level=log_level,
|
||||
level_styles=level_styles,
|
||||
field_styles=field_styles,
|
||||
fmt=LOG_FORMAT_DEBUG
|
||||
)
|
||||
else:
|
||||
logging.basicConfig(level=logging.getLevelName(log_level), format=LOG_FORMAT_DEBUG)
|
||||
|
||||
|
||||
searx_debug = settings['general']['debug']
|
||||
if searx_debug:
|
||||
logging_config_debug()
|
||||
else:
|
||||
logging.basicConfig(level=LOG_LEVEL_PROD, format=LOG_FORMAT_PROD)
|
||||
logging.root.setLevel(level=LOG_LEVEL_PROD)
|
||||
logging.getLogger('werkzeug').setLevel(level=LOG_LEVEL_PROD)
|
||||
logger = logging.getLogger('searx')
|
||||
logger.info(settings_load_message)
|
||||
|
||||
# log max_request_timeout
|
||||
max_request_timeout = settings['outgoing']['max_request_timeout']
|
||||
if max_request_timeout is None:
|
||||
logger.info('max_request_timeout=%s', repr(max_request_timeout))
|
||||
else:
|
||||
logger.info('max_request_timeout=%i second(s)', max_request_timeout)
|
||||
|
|
|
@ -173,7 +173,6 @@ def request(query, params):
|
|||
|
||||
params['url'] = search_url.format(**fargs)
|
||||
params['soft_max_redirects'] = soft_max_redirects
|
||||
logger.debug("query_url --> %s", params['url'])
|
||||
|
||||
return params
|
||||
|
||||
|
|
|
@ -5,7 +5,8 @@ from urllib.parse import urlparse
|
|||
from httpx import HTTPError, HTTPStatusError
|
||||
from searx.exceptions import (SearxXPathSyntaxException, SearxEngineXPathException, SearxEngineAPIException,
|
||||
SearxEngineAccessDeniedException)
|
||||
from searx import logger, searx_parent_dir
|
||||
from searx import searx_parent_dir
|
||||
from searx.engines import engines
|
||||
|
||||
|
||||
errors_per_engines = {}
|
||||
|
@ -47,7 +48,7 @@ class ErrorContext:
|
|||
def add_error_context(engine_name: str, error_context: ErrorContext) -> None:
|
||||
errors_for_engine = errors_per_engines.setdefault(engine_name, {})
|
||||
errors_for_engine[error_context] = errors_for_engine.get(error_context, 0) + 1
|
||||
logger.debug('%s: %s', engine_name, str(error_context))
|
||||
engines[engine_name].logger.warning('%s', str(error_context))
|
||||
|
||||
|
||||
def get_trace(traces):
|
||||
|
|
|
@ -194,7 +194,7 @@ def new_client(
|
|||
# pylint: disable=too-many-arguments
|
||||
enable_http, verify, enable_http2,
|
||||
max_connections, max_keepalive_connections, keepalive_expiry,
|
||||
proxies, local_address, retries, max_redirects ):
|
||||
proxies, local_address, retries, max_redirects, hook_log_response ):
|
||||
limit = httpx.Limits(
|
||||
max_connections=max_connections,
|
||||
max_keepalive_connections=max_keepalive_connections,
|
||||
|
@ -221,7 +221,17 @@ def new_client(
|
|||
mounts['http://'] = AsyncHTTPTransportNoHttp()
|
||||
|
||||
transport = get_transport(verify, enable_http2, local_address, None, limit, retries)
|
||||
return httpx.AsyncClient(transport=transport, mounts=mounts, max_redirects=max_redirects)
|
||||
|
||||
event_hooks = None
|
||||
if hook_log_response:
|
||||
event_hooks = {'response': [ hook_log_response ]}
|
||||
|
||||
return httpx.AsyncClient(
|
||||
transport=transport,
|
||||
mounts=mounts,
|
||||
max_redirects=max_redirects,
|
||||
event_hooks=event_hooks,
|
||||
)
|
||||
|
||||
|
||||
def get_loop():
|
||||
|
@ -231,7 +241,7 @@ def get_loop():
|
|||
|
||||
def init():
|
||||
# log
|
||||
for logger_name in ('hpack.hpack', 'hpack.table'):
|
||||
for logger_name in ('hpack.hpack', 'hpack.table', 'httpx._client'):
|
||||
logging.getLogger(logger_name).setLevel(logging.WARNING)
|
||||
|
||||
# loop
|
||||
|
|
|
@ -10,9 +10,11 @@ from itertools import cycle
|
|||
|
||||
import httpx
|
||||
|
||||
from searx import logger, searx_debug
|
||||
from .client import new_client, get_loop
|
||||
|
||||
|
||||
logger = logger.getChild('network')
|
||||
DEFAULT_NAME = '__DEFAULT__'
|
||||
NETWORKS = {}
|
||||
# requests compatibility when reading proxy settings from settings.yml
|
||||
|
@ -41,7 +43,7 @@ class Network:
|
|||
'enable_http', 'verify', 'enable_http2',
|
||||
'max_connections', 'max_keepalive_connections', 'keepalive_expiry',
|
||||
'local_addresses', 'proxies', 'max_redirects', 'retries', 'retry_on_http_error',
|
||||
'_local_addresses_cycle', '_proxies_cycle', '_clients'
|
||||
'_local_addresses_cycle', '_proxies_cycle', '_clients', '_logger'
|
||||
)
|
||||
|
||||
def __init__(
|
||||
|
@ -57,7 +59,8 @@ class Network:
|
|||
local_addresses=None,
|
||||
retries=0,
|
||||
retry_on_http_error=None,
|
||||
max_redirects=30 ):
|
||||
max_redirects=30,
|
||||
logger_name=None):
|
||||
|
||||
self.enable_http = enable_http
|
||||
self.verify = verify
|
||||
|
@ -73,6 +76,7 @@ class Network:
|
|||
self._local_addresses_cycle = self.get_ipaddress_cycle()
|
||||
self._proxies_cycle = self.get_proxy_cycles()
|
||||
self._clients = {}
|
||||
self._logger = logger.getChild(logger_name) if logger_name else logger
|
||||
self.check_parameters()
|
||||
|
||||
def check_parameters(self):
|
||||
|
@ -130,12 +134,25 @@ class Network:
|
|||
# pylint: disable=stop-iteration-return
|
||||
yield tuple((pattern, next(proxy_url_cycle)) for pattern, proxy_url_cycle in proxy_settings.items())
|
||||
|
||||
async def log_response(self, response: httpx.Response):
|
||||
request = response.request
|
||||
status = f"{response.status_code} {response.reason_phrase}"
|
||||
response_line = f"{response.http_version} {status}"
|
||||
if hasattr(response, "_elapsed"):
|
||||
elapsed_time = f"{response.elapsed.total_seconds()} sec"
|
||||
else:
|
||||
elapsed_time = "stream"
|
||||
self._logger.debug(
|
||||
f'HTTP Request: {request.method} {request.url} "{response_line}" ({elapsed_time})'
|
||||
)
|
||||
|
||||
def get_client(self, verify=None, max_redirects=None):
|
||||
verify = self.verify if verify is None else verify
|
||||
max_redirects = self.max_redirects if max_redirects is None else max_redirects
|
||||
local_address = next(self._local_addresses_cycle)
|
||||
proxies = next(self._proxies_cycle) # is a tuple so it can be part of the key
|
||||
key = (verify, max_redirects, local_address, proxies)
|
||||
hook_log_response = self.log_response if searx_debug else None
|
||||
if key not in self._clients or self._clients[key].is_closed:
|
||||
self._clients[key] = new_client(
|
||||
self.enable_http,
|
||||
|
@ -147,7 +164,8 @@ class Network:
|
|||
dict(proxies),
|
||||
local_address,
|
||||
0,
|
||||
max_redirects
|
||||
max_redirects,
|
||||
hook_log_response
|
||||
)
|
||||
return self._clients[key]
|
||||
|
||||
|
@ -243,11 +261,13 @@ def initialize(settings_engines=None, settings_outgoing=None):
|
|||
'retry_on_http_error': None,
|
||||
}
|
||||
|
||||
def new_network(params):
|
||||
def new_network(params, logger_name=None):
|
||||
nonlocal default_params
|
||||
result = {}
|
||||
result.update(default_params)
|
||||
result.update(params)
|
||||
if logger_name:
|
||||
result['logger_name'] = logger_name
|
||||
return Network(**result)
|
||||
|
||||
def iter_networks():
|
||||
|
@ -263,13 +283,13 @@ def initialize(settings_engines=None, settings_outgoing=None):
|
|||
if NETWORKS:
|
||||
done()
|
||||
NETWORKS.clear()
|
||||
NETWORKS[DEFAULT_NAME] = new_network({})
|
||||
NETWORKS['ipv4'] = new_network({'local_addresses': '0.0.0.0'})
|
||||
NETWORKS['ipv6'] = new_network({'local_addresses': '::'})
|
||||
NETWORKS[DEFAULT_NAME] = new_network({}, logger_name='default')
|
||||
NETWORKS['ipv4'] = new_network({'local_addresses': '0.0.0.0'}, logger_name='ipv4')
|
||||
NETWORKS['ipv6'] = new_network({'local_addresses': '::'}, logger_name='ipv6')
|
||||
|
||||
# define networks from outgoing.networks
|
||||
for network_name, network in settings_outgoing['networks'].items():
|
||||
NETWORKS[network_name] = new_network(network)
|
||||
NETWORKS[network_name] = new_network(network, logger_name=network_name)
|
||||
|
||||
# define networks from engines.[i].network (except references)
|
||||
for engine_name, engine, network in iter_networks():
|
||||
|
@ -280,9 +300,9 @@ def initialize(settings_engines=None, settings_outgoing=None):
|
|||
network[attribute_name] = getattr(engine, attribute_name)
|
||||
else:
|
||||
network[attribute_name] = attribute_value
|
||||
NETWORKS[engine_name] = new_network(network)
|
||||
NETWORKS[engine_name] = new_network(network, logger_name=engine_name)
|
||||
elif isinstance(network, dict):
|
||||
NETWORKS[engine_name] = new_network(network)
|
||||
NETWORKS[engine_name] = new_network(network, logger_name=engine_name)
|
||||
|
||||
# define networks from engines.[i].network (references)
|
||||
for engine_name, engine, network in iter_networks():
|
||||
|
@ -295,7 +315,7 @@ def initialize(settings_engines=None, settings_outgoing=None):
|
|||
if 'image_proxy' not in NETWORKS:
|
||||
image_proxy_params = default_params.copy()
|
||||
image_proxy_params['enable_http2'] = False
|
||||
NETWORKS['image_proxy'] = new_network(image_proxy_params)
|
||||
NETWORKS['image_proxy'] = new_network(image_proxy_params, logger_name='image_proxy')
|
||||
|
||||
|
||||
@atexit.register
|
||||
|
|
|
@ -147,7 +147,7 @@ class Search:
|
|||
if th.is_alive():
|
||||
th._timeout = True
|
||||
self.result_container.add_unresponsive_engine(th._engine_name, 'timeout')
|
||||
logger.warning('engine timeout: {0}'.format(th._engine_name))
|
||||
PROCESSORS[th._engine_name].logger.error('engine timeout')
|
||||
|
||||
def search_standard(self):
|
||||
"""
|
||||
|
|
|
@ -65,6 +65,6 @@ def initialize(engine_list):
|
|||
processor = get_processor(engine, engine_name)
|
||||
initialize_processor(processor)
|
||||
if processor is None:
|
||||
logger.error('Error get processor for engine %s', engine_name)
|
||||
engine.logger.error('Error get processor for engine %s', engine_name)
|
||||
else:
|
||||
PROCESSORS[engine_name] = processor
|
||||
|
|
|
@ -9,8 +9,8 @@ import threading
|
|||
from abc import abstractmethod, ABC
|
||||
from timeit import default_timer
|
||||
|
||||
from searx import logger
|
||||
from searx.engines import settings
|
||||
from searx import settings, logger
|
||||
from searx.engines import engines
|
||||
from searx.network import get_time_for_thread, get_network
|
||||
from searx.metrics import histogram_observe, counter_inc, count_exception, count_error
|
||||
from searx.exceptions import SearxEngineAccessDeniedException, SearxEngineResponseException
|
||||
|
@ -43,7 +43,7 @@ class SuspendedStatus:
|
|||
self.continuous_errors * settings['search']['ban_time_on_fail'])
|
||||
self.suspend_end_time = default_timer() + suspended_time
|
||||
self.suspend_reason = suspend_reason
|
||||
logger.debug('Suspend engine for %i seconds', suspended_time)
|
||||
logger.debug('Suspend for %i seconds', suspended_time)
|
||||
|
||||
def resume(self):
|
||||
with self.lock:
|
||||
|
@ -56,11 +56,12 @@ class SuspendedStatus:
|
|||
class EngineProcessor(ABC):
|
||||
"""Base classes used for all types of reqest processores."""
|
||||
|
||||
__slots__ = 'engine', 'engine_name', 'lock', 'suspended_status'
|
||||
__slots__ = 'engine', 'engine_name', 'lock', 'suspended_status', 'logger'
|
||||
|
||||
def __init__(self, engine, engine_name):
|
||||
self.engine = engine
|
||||
self.engine_name = engine_name
|
||||
self.logger = engines[engine_name].logger
|
||||
key = get_network(self.engine_name)
|
||||
key = id(key) if key else self.engine_name
|
||||
self.suspended_status = SUSPENDED_STATUS.setdefault(key, SuspendedStatus())
|
||||
|
@ -69,11 +70,11 @@ class EngineProcessor(ABC):
|
|||
try:
|
||||
self.engine.init(get_engine_from_settings(self.engine_name))
|
||||
except SearxEngineResponseException as exc:
|
||||
logger.warn('%s engine: Fail to initialize // %s', self.engine_name, exc)
|
||||
self.logger.warn('Fail to initialize // %s', exc)
|
||||
except Exception: # pylint: disable=broad-except
|
||||
logger.exception('%s engine: Fail to initialize', self.engine_name)
|
||||
self.logger.exception('Fail to initialize')
|
||||
else:
|
||||
logger.debug('%s engine: Initialized', self.engine_name)
|
||||
self.logger.debug('Initialized')
|
||||
|
||||
@property
|
||||
def has_initialize_function(self):
|
||||
|
|
|
@ -5,10 +5,8 @@
|
|||
|
||||
"""
|
||||
|
||||
from searx import logger
|
||||
from .abstract import EngineProcessor
|
||||
|
||||
logger = logger.getChild('searx.search.processor.offline')
|
||||
|
||||
class OfflineProcessor(EngineProcessor):
|
||||
"""Processor class used by ``offline`` engines"""
|
||||
|
@ -24,7 +22,7 @@ class OfflineProcessor(EngineProcessor):
|
|||
self.extend_container(result_container, start_time, search_results)
|
||||
except ValueError as e:
|
||||
# do not record the error
|
||||
logger.exception('engine {0} : invalid input : {1}'.format(self.engine_name, e))
|
||||
self.logger.exception('engine {0} : invalid input : {1}'.format(self.engine_name, e))
|
||||
except Exception as e: # pylint: disable=broad-except
|
||||
self.handle_exception(result_container, e)
|
||||
logger.exception('engine {0} : exception : {1}'.format(self.engine_name, e))
|
||||
self.logger.exception('engine {0} : exception : {1}'.format(self.engine_name, e))
|
||||
|
|
|
@ -10,7 +10,6 @@ import asyncio
|
|||
import httpx
|
||||
|
||||
import searx.network
|
||||
from searx import logger
|
||||
from searx.utils import gen_useragent
|
||||
from searx.exceptions import (
|
||||
SearxEngineAccessDeniedException,
|
||||
|
@ -20,7 +19,6 @@ from searx.exceptions import (
|
|||
from searx.metrics.error_recorder import count_error
|
||||
from .abstract import EngineProcessor
|
||||
|
||||
logger = logger.getChild('searx.search.processor.online')
|
||||
|
||||
def default_request_params():
|
||||
"""Default request parameters for ``online`` engines."""
|
||||
|
@ -146,31 +144,37 @@ class OnlineProcessor(EngineProcessor):
|
|||
except (httpx.TimeoutException, asyncio.TimeoutError) as e:
|
||||
# requests timeout (connect or read)
|
||||
self.handle_exception(result_container, e, suspend=True)
|
||||
logger.error("engine {0} : HTTP requests timeout"
|
||||
"(search duration : {1} s, timeout: {2} s) : {3}"
|
||||
.format(self.engine_name, default_timer() - start_time,
|
||||
timeout_limit,
|
||||
e.__class__.__name__))
|
||||
self.logger.error(
|
||||
"HTTP requests timeout (search duration : {0} s, timeout: {1} s) : {2}"
|
||||
.format(
|
||||
default_timer() - start_time,
|
||||
timeout_limit,
|
||||
e.__class__.__name__
|
||||
)
|
||||
)
|
||||
except (httpx.HTTPError, httpx.StreamError) as e:
|
||||
# other requests exception
|
||||
self.handle_exception(result_container, e, suspend=True)
|
||||
logger.exception("engine {0} : requests exception"
|
||||
"(search duration : {1} s, timeout: {2} s) : {3}"
|
||||
.format(self.engine_name, default_timer() - start_time,
|
||||
timeout_limit,
|
||||
e))
|
||||
self.logger.exception(
|
||||
"requests exception (search duration : {0} s, timeout: {1} s) : {2}"
|
||||
.format(
|
||||
default_timer() - start_time,
|
||||
timeout_limit,
|
||||
e
|
||||
)
|
||||
)
|
||||
except SearxEngineCaptchaException as e:
|
||||
self.handle_exception(result_container, e, suspend=True)
|
||||
logger.exception('engine {0} : CAPTCHA'.format(self.engine_name))
|
||||
self.logger.exception('CAPTCHA')
|
||||
except SearxEngineTooManyRequestsException as e:
|
||||
self.handle_exception(result_container, e, suspend=True)
|
||||
logger.exception('engine {0} : Too many requests'.format(self.engine_name))
|
||||
self.logger.exception('Too many requests')
|
||||
except SearxEngineAccessDeniedException as e:
|
||||
self.handle_exception(result_container, e, suspend=True)
|
||||
logger.exception('engine {0} : Searx is blocked'.format(self.engine_name))
|
||||
self.logger.exception('Searx is blocked')
|
||||
except Exception as e: # pylint: disable=broad-except
|
||||
self.handle_exception(result_container, e)
|
||||
logger.exception('engine {0} : exception : {1}'.format(self.engine_name, e))
|
||||
self.logger.exception('exception : {0}'.format(e))
|
||||
|
||||
def get_default_tests(self):
|
||||
tests = {}
|
||||
|
|
|
@ -3,6 +3,7 @@ import os
|
|||
import aiounittest
|
||||
|
||||
os.environ['SEARX_DEBUG'] = '1'
|
||||
os.environ['SEARX_DEBUG_LOG_LEVEL'] = 'WARNING'
|
||||
os.environ['SEARX_DISABLE_ETC_SETTINGS'] = '1'
|
||||
os.environ.pop('SEARX_SETTINGS_PATH', None)
|
||||
|
||||
|
|
Loading…
Reference in New Issue