[mod] multithreading only in searx.search.* packages

it prepares the new architecture change,
everything about multithreading in moved in the searx.search.* packages

previously the call to the "init" function of the engines was done in searx.engines:
* the network was not set (request not sent using the defined proxy)
* it requires to monkey patch the code to avoid HTTP requests during the tests
This commit is contained in:
Alexandre Flament 2021-05-05 13:08:54 +02:00
parent d36adfa59f
commit 8c1a65d32f
10 changed files with 85 additions and 65 deletions

View file

@ -11,9 +11,11 @@ __all__ = [
'OnlineProcessor',
'OnlineDictionaryProcessor',
'OnlineCurrencyProcessor',
'processors',
'PROCESSORS',
]
import threading
from searx import logger
import searx.engines as engines
@ -24,7 +26,7 @@ from .online_currency import OnlineCurrencyProcessor
from .abstract import EngineProcessor
logger = logger.getChild('search.processors')
processors = {}
PROCESSORS = {}
"""Cache request processores, stored by *engine-name* (:py:func:`initialize`)"""
def get_processor_class(engine_type):
@ -34,6 +36,7 @@ def get_processor_class(engine_type):
return c
return None
def get_processor(engine, engine_name):
"""Return processor instance that fits to ``engine.engine.type``)"""
engine_type = getattr(engine, 'engine_type', 'online')
@ -42,12 +45,26 @@ def get_processor(engine, engine_name):
return processor_class(engine, engine_name)
return None
def initialize_processor(processor):
"""Initialize one processor
Call the init function of the engine
"""
if processor.has_initialize_function:
t = threading.Thread(target=processor.initialize, daemon=True)
t.start()
def initialize(engine_list):
"""Initialize all engines and store a processor for each engine in :py:obj:`processors`."""
engines.initialize_engines(engine_list)
for engine_name, engine in engines.engines.items():
processor = get_processor(engine, engine_name)
if processor is None:
logger.error('Error get processor for engine %s', engine_name)
else:
processors[engine_name] = processor
"""Initialize all engines and store a processor for each engine in :py:obj:`PROCESSORS`."""
for engine_data in engine_list:
engine_name = engine_data['name']
engine = engines.engines.get(engine_name)
if engine:
processor = get_processor(engine, engine_name)
initialize_processor(processor)
if processor is None:
logger.error('Error get processor for engine %s', engine_name)
else:
PROCESSORS[engine_name] = processor

View file

@ -13,7 +13,8 @@ from searx import logger
from searx.engines import settings
from searx.network import get_time_for_thread, get_network
from searx.metrics import histogram_observe, counter_inc, count_exception, count_error
from searx.exceptions import SearxEngineAccessDeniedException
from searx.exceptions import SearxEngineAccessDeniedException, SearxEngineResponseException
from searx.utils import get_engine_from_settings
logger = logger.getChild('searx.search.processor')
SUSPENDED_STATUS = {}
@ -66,6 +67,20 @@ class EngineProcessor(ABC):
key = id(key) if key else self.engine_name
self.suspended_status = SUSPENDED_STATUS.setdefault(key, SuspendedStatus())
def initialize(self):
try:
self.engine.init(get_engine_from_settings(self.engine_name))
except SearxEngineResponseException as exc:
logger.warn('%s engine: Fail to initialize // %s', self.engine_name, exc)
except Exception: # pylint: disable=broad-except
logger.exception('%s engine: Fail to initialize', self.engine_name)
else:
logger.debug('%s engine: Initialized', self.engine_name)
@property
def has_initialize_function(self):
return hasattr(self.engine, 'init')
def handle_exception(self, result_container, exception_or_message, suspend=False):
# update result_container
if isinstance(exception_or_message, BaseException):

View file

@ -5,7 +5,7 @@
"""
from time import time
from timeit import default_timer
import asyncio
import httpx
@ -40,6 +40,15 @@ class OnlineProcessor(EngineProcessor):
engine_type = 'online'
def initialize(self):
# set timeout for all HTTP requests
searx.network.set_timeout_for_thread(self.engine.timeout, start_time=default_timer())
# reset the HTTP total time
searx.network.reset_time_for_thread()
# set the network
searx.network.set_context_network_name(self.engine_name)
super().initialize()
def get_params(self, search_query, engine_category):
params = super().get_params(search_query, engine_category)
if params is None:
@ -139,7 +148,7 @@ class OnlineProcessor(EngineProcessor):
self.handle_exception(result_container, e, suspend=True)
logger.error("engine {0} : HTTP requests timeout"
"(search duration : {1} s, timeout: {2} s) : {3}"
.format(self.engine_name, time() - start_time,
.format(self.engine_name, default_timer() - start_time,
timeout_limit,
e.__class__.__name__))
except (httpx.HTTPError, httpx.StreamError) as e:
@ -147,7 +156,7 @@ class OnlineProcessor(EngineProcessor):
self.handle_exception(result_container, e, suspend=True)
logger.exception("engine {0} : requests exception"
"(search duration : {1} s, timeout: {2} s) : {3}"
.format(self.engine_name, time() - start_time,
.format(self.engine_name, default_timer() - start_time,
timeout_limit,
e))
except SearxEngineCaptchaException as e: