mirror of
https://github.com/searxng/searxng
synced 2024-01-01 19:24:07 +01:00
[enh] introduce Engine class
Previously engines were defined in modules, which where then stapled with logger, supported_languages & language_aliases via monkey patching in searx/engines/__init__.py. Monkey patching is obviously a bad practice since it confuses humans as well as static type checkers. But there were more problems: * there were no type hints for the method input and output types * the request method did not even make a clear distinction between input and output (abusing a single "params" dictionary for both) This commit introduces a new class-based architecture for engines, in a backwards-compatiable manner: allowing old-style module engines to be used along witn new-style class-based engines.
This commit is contained in:
parent
73e71508e4
commit
5dd28ff04b
5 changed files with 147 additions and 7 deletions
|
@ -48,12 +48,14 @@ Explanation of the :ref:`general engine configuration` shown in the table
|
||||||
|
|
||||||
{% for mod in engines %}
|
{% for mod in engines %}
|
||||||
|
|
||||||
|
{% set mod_name = mod['__name__'] or mod.__module__ %}
|
||||||
|
|
||||||
* - `{{mod.name}} <{{mod.about and mod.about.website}}>`_
|
* - `{{mod.name}} <{{mod.about and mod.about.website}}>`_
|
||||||
- ``!{{mod.shortcut}}``
|
- ``!{{mod.shortcut}}``
|
||||||
- {%- if 'searx.engines.' + mod.__name__ in documented_modules %}
|
- {%- if 'searx.engines.' + mod_name in documented_modules %}
|
||||||
:py:mod:`~searx.engines.{{mod.__name__}}`
|
:py:mod:`~searx.engines.{{mod_name}}`
|
||||||
{%- else %}
|
{%- else %}
|
||||||
:origin:`{{mod.__name__}} <searx/engines/{{mod.__name__}}.py>`
|
:origin:`{{mod_name}} <searx/engines/{{mod_name}}.py>`
|
||||||
{%- endif %}
|
{%- endif %}
|
||||||
- {{(mod.disabled and "y") or ""}}
|
- {{(mod.disabled and "y") or ""}}
|
||||||
{%- if mod.about and mod.about.language %}
|
{%- if mod.about and mod.about.language %}
|
||||||
|
|
100
searx/engine.py
Normal file
100
searx/engine.py
Normal file
|
@ -0,0 +1,100 @@
|
||||||
|
# pyright: strict
|
||||||
|
from logging import Logger
|
||||||
|
from typing import Iterable, List, NamedTuple, Optional, Dict, Union
|
||||||
|
from typing_extensions import Literal, TypedDict, NotRequired
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
from httpx import Response
|
||||||
|
|
||||||
|
|
||||||
|
class Engine:
|
||||||
|
categories: Optional[List[str]]
|
||||||
|
paging = False
|
||||||
|
time_range_support = False
|
||||||
|
supported_languages: List[str]
|
||||||
|
language_aliases: Dict[str, str]
|
||||||
|
about: 'About'
|
||||||
|
|
||||||
|
def __init__(self, logger: Logger) -> None:
|
||||||
|
self.logger = logger
|
||||||
|
|
||||||
|
|
||||||
|
class About(TypedDict, total=False):
|
||||||
|
website: str
|
||||||
|
wikidata_id: Optional[str]
|
||||||
|
official_api_documentation: Optional[str]
|
||||||
|
use_official_api: bool
|
||||||
|
require_api_key: bool
|
||||||
|
results: Literal["HTML", "JSON"]
|
||||||
|
language: NotRequired[str]
|
||||||
|
|
||||||
|
|
||||||
|
class OnlineEngine(Engine):
|
||||||
|
def request(self, query: str, ctx: 'QueryContext') -> 'OnlineRequest':
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
def response(self, response: Response) -> List['Result']:
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
|
||||||
|
class QueryContext(NamedTuple):
|
||||||
|
category: str
|
||||||
|
"""current category"""
|
||||||
|
safesearch: Literal[0, 1, 2]
|
||||||
|
"""desired content safety (normal, moderate, strict)"""
|
||||||
|
time_range: Optional[Literal['day', 'week', 'month', 'year']]
|
||||||
|
"""current time range (if any)"""
|
||||||
|
pageno: int
|
||||||
|
"""current page number"""
|
||||||
|
language: str
|
||||||
|
"""specific language code like ``en_US``, or ``all`` if unspecified"""
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class OnlineRequest:
|
||||||
|
url: str
|
||||||
|
"""requested URL"""
|
||||||
|
method: Literal['GET', 'POST'] = 'GET'
|
||||||
|
"""HTTP request method"""
|
||||||
|
headers: Optional[Dict[str, str]] = None
|
||||||
|
"""HTTP headers"""
|
||||||
|
data: Optional[Dict[str, str]] = None
|
||||||
|
"""data to be sent as the HTTP body"""
|
||||||
|
cookies: Optional[Dict[str, str]] = None
|
||||||
|
"""HTTP cookies"""
|
||||||
|
verify: bool = True
|
||||||
|
"""Assert that the TLS certificate is valid"""
|
||||||
|
allow_redirects: bool = True
|
||||||
|
"""follow redirects"""
|
||||||
|
max_redirects: Optional[int] = None
|
||||||
|
"""maximum redirects, hard limit"""
|
||||||
|
soft_max_redirects: Optional[int] = None
|
||||||
|
"""maximum redirects, soft limit. Record an error but don't stop the engine"""
|
||||||
|
raise_for_httperror: bool = True
|
||||||
|
"""raise an exception if the HTTP code of response is >= 300"""
|
||||||
|
|
||||||
|
def set_header(self, name: str, value: str):
|
||||||
|
if self.headers is None:
|
||||||
|
self.headers = {}
|
||||||
|
self.headers[name] = value
|
||||||
|
|
||||||
|
|
||||||
|
Result = Union['StandardResult', 'InfoBox']
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class StandardResult:
|
||||||
|
url: str
|
||||||
|
title: str
|
||||||
|
content: str = ''
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class InfoBox(StandardResult):
|
||||||
|
img_src: Optional[str] = None
|
||||||
|
links: Iterable['Link'] = ()
|
||||||
|
|
||||||
|
|
||||||
|
class Link(TypedDict):
|
||||||
|
title: str
|
||||||
|
url: str
|
|
@ -20,6 +20,11 @@ from searx import logger, settings
|
||||||
from searx.data import ENGINES_LANGUAGES
|
from searx.data import ENGINES_LANGUAGES
|
||||||
from searx.network import get
|
from searx.network import get
|
||||||
from searx.utils import load_module, gen_useragent, find_language_aliases
|
from searx.utils import load_module, gen_useragent, find_language_aliases
|
||||||
|
from searx.engine import Engine
|
||||||
|
|
||||||
|
_NEXTGEN_ENGINES = {
|
||||||
|
}
|
||||||
|
"""maps module names to class names for engines that are implemented using the new class-based approach"""
|
||||||
|
|
||||||
|
|
||||||
logger = logger.getChild('engines')
|
logger = logger.getChild('engines')
|
||||||
|
@ -121,6 +126,11 @@ def load_engine(engine_data: dict) -> Optional[ConfiguredEngine]:
|
||||||
logger.exception('Cannot load engine "{}"'.format(engine_module))
|
logger.exception('Cannot load engine "{}"'.format(engine_module))
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
if engine_data['engine'] in _NEXTGEN_ENGINES:
|
||||||
|
engine = getattr(engine, _NEXTGEN_ENGINES[engine_data['engine']])(
|
||||||
|
logger=logger.getChild(engine_name),
|
||||||
|
)
|
||||||
|
|
||||||
update_engine_attributes(engine, engine_data)
|
update_engine_attributes(engine, engine_data)
|
||||||
set_language_attributes(engine)
|
set_language_attributes(engine)
|
||||||
update_attributes_for_tor(engine)
|
update_attributes_for_tor(engine)
|
||||||
|
@ -204,6 +214,7 @@ def _get_supported_languages(engine: ConfiguredEngine) -> Collection[str]:
|
||||||
|
|
||||||
|
|
||||||
def set_language_attributes(engine: ConfiguredEngine):
|
def set_language_attributes(engine: ConfiguredEngine):
|
||||||
|
if not isinstance(engine, Engine):
|
||||||
engine.supported_languages = _get_supported_languages(engine)
|
engine.supported_languages = _get_supported_languages(engine)
|
||||||
|
|
||||||
# find custom aliases for non standard language codes
|
# find custom aliases for non standard language codes
|
||||||
|
|
|
@ -6,6 +6,7 @@ from typing import List, NamedTuple, Set
|
||||||
from urllib.parse import urlparse, unquote
|
from urllib.parse import urlparse, unquote
|
||||||
|
|
||||||
from searx import logger
|
from searx import logger
|
||||||
|
from searx.engine import InfoBox, StandardResult
|
||||||
from searx.engines import engines
|
from searx.engines import engines
|
||||||
from searx.metrics import histogram_observe, counter_add, count_error
|
from searx.metrics import histogram_observe, counter_add, count_error
|
||||||
|
|
||||||
|
@ -195,6 +196,17 @@ class ResultContainer:
|
||||||
standard_result_count = 0
|
standard_result_count = 0
|
||||||
error_msgs = set()
|
error_msgs = set()
|
||||||
for result in list(results):
|
for result in list(results):
|
||||||
|
if isinstance(result, InfoBox):
|
||||||
|
result = {
|
||||||
|
'infobox': result.title,
|
||||||
|
'id': result.url,
|
||||||
|
'content': result.content,
|
||||||
|
'img_src': result.img_src,
|
||||||
|
'urls': result.links,
|
||||||
|
}
|
||||||
|
elif isinstance(result, StandardResult):
|
||||||
|
result = result.__dict__
|
||||||
|
|
||||||
result['engine'] = engine_name
|
result['engine'] = engine_name
|
||||||
if 'suggestion' in result and self.on_result(result):
|
if 'suggestion' in result and self.on_result(result):
|
||||||
self.suggestions.add(result['suggestion'])
|
self.suggestions.add(result['suggestion'])
|
||||||
|
|
|
@ -17,6 +17,7 @@ from searx.exceptions import (
|
||||||
SearxEngineTooManyRequestsException,
|
SearxEngineTooManyRequestsException,
|
||||||
)
|
)
|
||||||
from searx.metrics.error_recorder import count_error
|
from searx.metrics.error_recorder import count_error
|
||||||
|
from searx.engine import OnlineEngine, QueryContext
|
||||||
from .abstract import EngineProcessor
|
from .abstract import EngineProcessor
|
||||||
|
|
||||||
|
|
||||||
|
@ -114,6 +115,20 @@ class OnlineProcessor(EngineProcessor):
|
||||||
def _search_basic(self, query, params):
|
def _search_basic(self, query, params):
|
||||||
# update request parameters dependent on
|
# update request parameters dependent on
|
||||||
# search-engine (contained in engines folder)
|
# search-engine (contained in engines folder)
|
||||||
|
if isinstance(self.engine, OnlineEngine):
|
||||||
|
params.update(
|
||||||
|
self.engine.request(
|
||||||
|
query,
|
||||||
|
QueryContext(
|
||||||
|
category=params['category'],
|
||||||
|
safesearch=params['safesearch'],
|
||||||
|
time_range=params['time_range'],
|
||||||
|
pageno=params['pageno'],
|
||||||
|
language=params['language'],
|
||||||
|
),
|
||||||
|
).__dict__
|
||||||
|
)
|
||||||
|
else:
|
||||||
self.engine.request(query, params)
|
self.engine.request(query, params)
|
||||||
|
|
||||||
# ignoring empty urls
|
# ignoring empty urls
|
||||||
|
|
Loading…
Add table
Reference in a new issue