mirror of
https://github.com/searxng/searxng
synced 2024-01-01 19:24:07 +01:00
[enh] introduce Engine class
Previously engines were defined in modules, which where then stapled with logger, supported_languages & language_aliases via monkey patching in searx/engines/__init__.py. Monkey patching is obviously a bad practice since it confuses humans as well as static type checkers. But there were more problems: * there were no type hints for the method input and output types * the request method did not even make a clear distinction between input and output (abusing a single "params" dictionary for both) This commit introduces a new class-based architecture for engines, in a backwards-compatiable manner: allowing old-style module engines to be used along witn new-style class-based engines.
This commit is contained in:
parent
73e71508e4
commit
5dd28ff04b
5 changed files with 147 additions and 7 deletions
|
@ -48,12 +48,14 @@ Explanation of the :ref:`general engine configuration` shown in the table
|
|||
|
||||
{% for mod in engines %}
|
||||
|
||||
{% set mod_name = mod['__name__'] or mod.__module__ %}
|
||||
|
||||
* - `{{mod.name}} <{{mod.about and mod.about.website}}>`_
|
||||
- ``!{{mod.shortcut}}``
|
||||
- {%- if 'searx.engines.' + mod.__name__ in documented_modules %}
|
||||
:py:mod:`~searx.engines.{{mod.__name__}}`
|
||||
- {%- if 'searx.engines.' + mod_name in documented_modules %}
|
||||
:py:mod:`~searx.engines.{{mod_name}}`
|
||||
{%- else %}
|
||||
:origin:`{{mod.__name__}} <searx/engines/{{mod.__name__}}.py>`
|
||||
:origin:`{{mod_name}} <searx/engines/{{mod_name}}.py>`
|
||||
{%- endif %}
|
||||
- {{(mod.disabled and "y") or ""}}
|
||||
{%- if mod.about and mod.about.language %}
|
||||
|
|
100
searx/engine.py
Normal file
100
searx/engine.py
Normal file
|
@ -0,0 +1,100 @@
|
|||
# pyright: strict
|
||||
from logging import Logger
|
||||
from typing import Iterable, List, NamedTuple, Optional, Dict, Union
|
||||
from typing_extensions import Literal, TypedDict, NotRequired
|
||||
from dataclasses import dataclass
|
||||
|
||||
from httpx import Response
|
||||
|
||||
|
||||
class Engine:
|
||||
categories: Optional[List[str]]
|
||||
paging = False
|
||||
time_range_support = False
|
||||
supported_languages: List[str]
|
||||
language_aliases: Dict[str, str]
|
||||
about: 'About'
|
||||
|
||||
def __init__(self, logger: Logger) -> None:
|
||||
self.logger = logger
|
||||
|
||||
|
||||
class About(TypedDict, total=False):
|
||||
website: str
|
||||
wikidata_id: Optional[str]
|
||||
official_api_documentation: Optional[str]
|
||||
use_official_api: bool
|
||||
require_api_key: bool
|
||||
results: Literal["HTML", "JSON"]
|
||||
language: NotRequired[str]
|
||||
|
||||
|
||||
class OnlineEngine(Engine):
|
||||
def request(self, query: str, ctx: 'QueryContext') -> 'OnlineRequest':
|
||||
raise NotImplementedError()
|
||||
|
||||
def response(self, response: Response) -> List['Result']:
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class QueryContext(NamedTuple):
|
||||
category: str
|
||||
"""current category"""
|
||||
safesearch: Literal[0, 1, 2]
|
||||
"""desired content safety (normal, moderate, strict)"""
|
||||
time_range: Optional[Literal['day', 'week', 'month', 'year']]
|
||||
"""current time range (if any)"""
|
||||
pageno: int
|
||||
"""current page number"""
|
||||
language: str
|
||||
"""specific language code like ``en_US``, or ``all`` if unspecified"""
|
||||
|
||||
|
||||
@dataclass
|
||||
class OnlineRequest:
|
||||
url: str
|
||||
"""requested URL"""
|
||||
method: Literal['GET', 'POST'] = 'GET'
|
||||
"""HTTP request method"""
|
||||
headers: Optional[Dict[str, str]] = None
|
||||
"""HTTP headers"""
|
||||
data: Optional[Dict[str, str]] = None
|
||||
"""data to be sent as the HTTP body"""
|
||||
cookies: Optional[Dict[str, str]] = None
|
||||
"""HTTP cookies"""
|
||||
verify: bool = True
|
||||
"""Assert that the TLS certificate is valid"""
|
||||
allow_redirects: bool = True
|
||||
"""follow redirects"""
|
||||
max_redirects: Optional[int] = None
|
||||
"""maximum redirects, hard limit"""
|
||||
soft_max_redirects: Optional[int] = None
|
||||
"""maximum redirects, soft limit. Record an error but don't stop the engine"""
|
||||
raise_for_httperror: bool = True
|
||||
"""raise an exception if the HTTP code of response is >= 300"""
|
||||
|
||||
def set_header(self, name: str, value: str):
|
||||
if self.headers is None:
|
||||
self.headers = {}
|
||||
self.headers[name] = value
|
||||
|
||||
|
||||
Result = Union['StandardResult', 'InfoBox']
|
||||
|
||||
|
||||
@dataclass
|
||||
class StandardResult:
|
||||
url: str
|
||||
title: str
|
||||
content: str = ''
|
||||
|
||||
|
||||
@dataclass
|
||||
class InfoBox(StandardResult):
|
||||
img_src: Optional[str] = None
|
||||
links: Iterable['Link'] = ()
|
||||
|
||||
|
||||
class Link(TypedDict):
|
||||
title: str
|
||||
url: str
|
|
@ -20,6 +20,11 @@ from searx import logger, settings
|
|||
from searx.data import ENGINES_LANGUAGES
|
||||
from searx.network import get
|
||||
from searx.utils import load_module, gen_useragent, find_language_aliases
|
||||
from searx.engine import Engine
|
||||
|
||||
_NEXTGEN_ENGINES = {
|
||||
}
|
||||
"""maps module names to class names for engines that are implemented using the new class-based approach"""
|
||||
|
||||
|
||||
logger = logger.getChild('engines')
|
||||
|
@ -121,6 +126,11 @@ def load_engine(engine_data: dict) -> Optional[ConfiguredEngine]:
|
|||
logger.exception('Cannot load engine "{}"'.format(engine_module))
|
||||
return None
|
||||
|
||||
if engine_data['engine'] in _NEXTGEN_ENGINES:
|
||||
engine = getattr(engine, _NEXTGEN_ENGINES[engine_data['engine']])(
|
||||
logger=logger.getChild(engine_name),
|
||||
)
|
||||
|
||||
update_engine_attributes(engine, engine_data)
|
||||
set_language_attributes(engine)
|
||||
update_attributes_for_tor(engine)
|
||||
|
@ -204,6 +214,7 @@ def _get_supported_languages(engine: ConfiguredEngine) -> Collection[str]:
|
|||
|
||||
|
||||
def set_language_attributes(engine: ConfiguredEngine):
|
||||
if not isinstance(engine, Engine):
|
||||
engine.supported_languages = _get_supported_languages(engine)
|
||||
|
||||
# find custom aliases for non standard language codes
|
||||
|
|
|
@ -6,6 +6,7 @@ from typing import List, NamedTuple, Set
|
|||
from urllib.parse import urlparse, unquote
|
||||
|
||||
from searx import logger
|
||||
from searx.engine import InfoBox, StandardResult
|
||||
from searx.engines import engines
|
||||
from searx.metrics import histogram_observe, counter_add, count_error
|
||||
|
||||
|
@ -195,6 +196,17 @@ class ResultContainer:
|
|||
standard_result_count = 0
|
||||
error_msgs = set()
|
||||
for result in list(results):
|
||||
if isinstance(result, InfoBox):
|
||||
result = {
|
||||
'infobox': result.title,
|
||||
'id': result.url,
|
||||
'content': result.content,
|
||||
'img_src': result.img_src,
|
||||
'urls': result.links,
|
||||
}
|
||||
elif isinstance(result, StandardResult):
|
||||
result = result.__dict__
|
||||
|
||||
result['engine'] = engine_name
|
||||
if 'suggestion' in result and self.on_result(result):
|
||||
self.suggestions.add(result['suggestion'])
|
||||
|
|
|
@ -17,6 +17,7 @@ from searx.exceptions import (
|
|||
SearxEngineTooManyRequestsException,
|
||||
)
|
||||
from searx.metrics.error_recorder import count_error
|
||||
from searx.engine import OnlineEngine, QueryContext
|
||||
from .abstract import EngineProcessor
|
||||
|
||||
|
||||
|
@ -114,6 +115,20 @@ class OnlineProcessor(EngineProcessor):
|
|||
def _search_basic(self, query, params):
|
||||
# update request parameters dependent on
|
||||
# search-engine (contained in engines folder)
|
||||
if isinstance(self.engine, OnlineEngine):
|
||||
params.update(
|
||||
self.engine.request(
|
||||
query,
|
||||
QueryContext(
|
||||
category=params['category'],
|
||||
safesearch=params['safesearch'],
|
||||
time_range=params['time_range'],
|
||||
pageno=params['pageno'],
|
||||
language=params['language'],
|
||||
),
|
||||
).__dict__
|
||||
)
|
||||
else:
|
||||
self.engine.request(query, params)
|
||||
|
||||
# ignoring empty urls
|
||||
|
|
Loading…
Add table
Reference in a new issue