forked from zaclys/searxng
[mod] replace engines_languages.json by engines_traits.json
Implementations of the *traits* of the engines. Engine's traits are fetched from the origin engine and stored in a JSON file in the *data folder*. Most often traits are languages and region codes and their mapping from SearXNG's representation to the representation in the origin search engine. To load traits from the persistence:: searx.enginelib.traits.EngineTraitsMap.from_data() For new traits new properties can be added to the class:: searx.enginelib.traits.EngineTraits .. hint:: Implementation is downward compatible to the deprecated *supported_languages method* from the vintage implementation. The vintage code is tagged as *deprecated* an can be removed when all engines has been ported to the *traits method*. Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
64fea2f9cb
commit
6e5f22e558
|
@ -17,7 +17,7 @@ jobs:
|
||||||
- update_currencies.py
|
- update_currencies.py
|
||||||
- update_external_bangs.py
|
- update_external_bangs.py
|
||||||
- update_firefox_version.py
|
- update_firefox_version.py
|
||||||
- update_languages.py
|
- update_engine_traits.py
|
||||||
- update_wikidata_units.py
|
- update_wikidata_units.py
|
||||||
- update_engine_descriptions.py
|
- update_engine_descriptions.py
|
||||||
steps:
|
steps:
|
||||||
|
|
|
@ -42,7 +42,7 @@ Explanation of the :ref:`general engine configuration` shown in the table
|
||||||
- Timeout
|
- Timeout
|
||||||
- Weight
|
- Weight
|
||||||
- Paging
|
- Paging
|
||||||
- Language
|
- Language, Region
|
||||||
- Safe search
|
- Safe search
|
||||||
- Time range
|
- Time range
|
||||||
|
|
||||||
|
|
|
@ -569,10 +569,13 @@ engine is shown. Most of the options have a default value or even are optional.
|
||||||
To disable by default the engine, but not deleting it. It will allow the user
|
To disable by default the engine, but not deleting it. It will allow the user
|
||||||
to manually activate it in the settings.
|
to manually activate it in the settings.
|
||||||
|
|
||||||
|
``inactive``: optional
|
||||||
|
Remove the engine from the settings (*disabled & removed*).
|
||||||
|
|
||||||
``language`` : optional
|
``language`` : optional
|
||||||
If you want to use another language for a specific engine, you can define it
|
If you want to use another language for a specific engine, you can define it
|
||||||
by using the full ISO code of language and country, like ``fr_FR``, ``en_US``,
|
by using the ISO code of language (and region), like ``fr``, ``en-US``,
|
||||||
``de_DE``.
|
``de-DE``.
|
||||||
|
|
||||||
``tokens`` : optional
|
``tokens`` : optional
|
||||||
A list of secret tokens to make this engine *private*, more details see
|
A list of secret tokens to make this engine *private*, more details see
|
||||||
|
|
|
@ -127,6 +127,10 @@ extensions = [
|
||||||
'notfound.extension', # https://github.com/readthedocs/sphinx-notfound-page
|
'notfound.extension', # https://github.com/readthedocs/sphinx-notfound-page
|
||||||
]
|
]
|
||||||
|
|
||||||
|
autodoc_default_options = {
|
||||||
|
'member-order': 'groupwise',
|
||||||
|
}
|
||||||
|
|
||||||
myst_enable_extensions = [
|
myst_enable_extensions = [
|
||||||
"replacements", "smartquotes"
|
"replacements", "smartquotes"
|
||||||
]
|
]
|
||||||
|
@ -135,6 +139,7 @@ suppress_warnings = ['myst.domains']
|
||||||
|
|
||||||
intersphinx_mapping = {
|
intersphinx_mapping = {
|
||||||
"python": ("https://docs.python.org/3/", None),
|
"python": ("https://docs.python.org/3/", None),
|
||||||
|
"babel" : ("https://babel.readthedocs.io/en/latest/", None),
|
||||||
"flask": ("https://flask.palletsprojects.com/", None),
|
"flask": ("https://flask.palletsprojects.com/", None),
|
||||||
"flask_babel": ("https://python-babel.github.io/flask-babel/", None),
|
"flask_babel": ("https://python-babel.github.io/flask-babel/", None),
|
||||||
# "werkzeug": ("https://werkzeug.palletsprojects.com/", None),
|
# "werkzeug": ("https://werkzeug.palletsprojects.com/", None),
|
||||||
|
|
|
@ -54,6 +54,7 @@ Engine File
|
||||||
- ``offline`` :ref:`[ref] <offline engines>`
|
- ``offline`` :ref:`[ref] <offline engines>`
|
||||||
- ``online_dictionary``
|
- ``online_dictionary``
|
||||||
- ``online_currency``
|
- ``online_currency``
|
||||||
|
- ``online_url_search``
|
||||||
======================= =========== ========================================================
|
======================= =========== ========================================================
|
||||||
|
|
||||||
.. _engine settings:
|
.. _engine settings:
|
||||||
|
@ -131,8 +132,10 @@ Passed Arguments (request)
|
||||||
These arguments can be used to construct the search query. Furthermore,
|
These arguments can be used to construct the search query. Furthermore,
|
||||||
parameters with default value can be redefined for special purposes.
|
parameters with default value can be redefined for special purposes.
|
||||||
|
|
||||||
|
.. _engine request online:
|
||||||
|
|
||||||
.. table:: If the ``engine_type`` is ``online``
|
.. table:: If the ``engine_type`` is :py:obj:`online
|
||||||
|
<searx.search.processors.online.OnlineProcessor.get_params>`
|
||||||
:width: 100%
|
:width: 100%
|
||||||
|
|
||||||
====================== ============== ========================================================================
|
====================== ============== ========================================================================
|
||||||
|
@ -149,12 +152,16 @@ parameters with default value can be redefined for special purposes.
|
||||||
safesearch int ``0``, between ``0`` and ``2`` (normal, moderate, strict)
|
safesearch int ``0``, between ``0`` and ``2`` (normal, moderate, strict)
|
||||||
time_range Optional[str] ``None``, can be ``day``, ``week``, ``month``, ``year``
|
time_range Optional[str] ``None``, can be ``day``, ``week``, ``month``, ``year``
|
||||||
pageno int current pagenumber
|
pageno int current pagenumber
|
||||||
language str specific language code like ``'en_US'``, or ``'all'`` if unspecified
|
searxng_locale str SearXNG's locale selected by user. Specific language code like
|
||||||
|
``'en'``, ``'en-US'``, or ``'all'`` if unspecified.
|
||||||
====================== ============== ========================================================================
|
====================== ============== ========================================================================
|
||||||
|
|
||||||
|
|
||||||
.. table:: If the ``engine_type`` is ``online_dictionary``, in addition to the
|
.. _engine request online_dictionary:
|
||||||
``online`` arguments:
|
|
||||||
|
.. table:: If the ``engine_type`` is :py:obj:`online_dictionary
|
||||||
|
<searx.search.processors.online_dictionary.OnlineDictionaryProcessor.get_params>`,
|
||||||
|
in addition to the :ref:`online <engine request online>` arguments:
|
||||||
:width: 100%
|
:width: 100%
|
||||||
|
|
||||||
====================== ============== ========================================================================
|
====================== ============== ========================================================================
|
||||||
|
@ -165,8 +172,11 @@ parameters with default value can be redefined for special purposes.
|
||||||
query str the text query without the languages
|
query str the text query without the languages
|
||||||
====================== ============== ========================================================================
|
====================== ============== ========================================================================
|
||||||
|
|
||||||
.. table:: If the ``engine_type`` is ``online_currency```, in addition to the
|
.. _engine request online_currency:
|
||||||
``online`` arguments:
|
|
||||||
|
.. table:: If the ``engine_type`` is :py:obj:`online_currency
|
||||||
|
<searx.search.processors.online_currency.OnlineCurrencyProcessor.get_params>`,
|
||||||
|
in addition to the :ref:`online <engine request online>` arguments:
|
||||||
:width: 100%
|
:width: 100%
|
||||||
|
|
||||||
====================== ============== ========================================================================
|
====================== ============== ========================================================================
|
||||||
|
@ -179,6 +189,26 @@ parameters with default value can be redefined for special purposes.
|
||||||
to_name str currency name
|
to_name str currency name
|
||||||
====================== ============== ========================================================================
|
====================== ============== ========================================================================
|
||||||
|
|
||||||
|
.. _engine request online_url_search:
|
||||||
|
|
||||||
|
.. table:: If the ``engine_type`` is :py:obj:`online_url_search
|
||||||
|
<searx.search.processors.online_url_search.OnlineUrlSearchProcessor.get_params>`,
|
||||||
|
in addition to the :ref:`online <engine request online>` arguments:
|
||||||
|
:width: 100%
|
||||||
|
|
||||||
|
====================== ============== ========================================================================
|
||||||
|
argument type default-value, information
|
||||||
|
====================== ============== ========================================================================
|
||||||
|
search_url dict URLs from the search query:
|
||||||
|
|
||||||
|
.. code:: python
|
||||||
|
|
||||||
|
{
|
||||||
|
'http': str,
|
||||||
|
'ftp': str,
|
||||||
|
'data:image': str
|
||||||
|
}
|
||||||
|
====================== ============== ========================================================================
|
||||||
|
|
||||||
Specify Request
|
Specify Request
|
||||||
---------------
|
---------------
|
||||||
|
|
|
@ -52,12 +52,12 @@ Scripts to update static data in :origin:`searx/data/`
|
||||||
:members:
|
:members:
|
||||||
|
|
||||||
|
|
||||||
``update_languages.py``
|
``update_engine_traits.py``
|
||||||
=======================
|
===========================
|
||||||
|
|
||||||
:origin:`[source] <searxng_extra/update/update_languages.py>`
|
:origin:`[source] <searxng_extra/update/update_engine_traits.py>`
|
||||||
|
|
||||||
.. automodule:: searxng_extra.update.update_languages
|
.. automodule:: searxng_extra.update.update_engine_traits
|
||||||
:members:
|
:members:
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,17 @@
|
||||||
|
.. _searx.enginelib:
|
||||||
|
|
||||||
|
============
|
||||||
|
Engine model
|
||||||
|
============
|
||||||
|
|
||||||
|
.. automodule:: searx.enginelib
|
||||||
|
:members:
|
||||||
|
|
||||||
|
.. _searx.enginelib.traits:
|
||||||
|
|
||||||
|
=============
|
||||||
|
Engine traits
|
||||||
|
=============
|
||||||
|
|
||||||
|
.. automodule:: searx.enginelib.traits
|
||||||
|
:members:
|
|
@ -1,8 +1,8 @@
|
||||||
.. _load_engines:
|
.. _searx.engines:
|
||||||
|
|
||||||
============
|
=================
|
||||||
Load Engines
|
SearXNG's engines
|
||||||
============
|
=================
|
||||||
|
|
||||||
.. automodule:: searx.engines
|
.. automodule:: searx.engines
|
||||||
:members:
|
:members:
|
||||||
|
|
|
@ -0,0 +1,47 @@
|
||||||
|
.. _searx.search.processors:
|
||||||
|
|
||||||
|
=================
|
||||||
|
Search processors
|
||||||
|
=================
|
||||||
|
|
||||||
|
.. contents:: Contents
|
||||||
|
:depth: 2
|
||||||
|
:local:
|
||||||
|
:backlinks: entry
|
||||||
|
|
||||||
|
|
||||||
|
Abstract processor class
|
||||||
|
========================
|
||||||
|
|
||||||
|
.. automodule:: searx.search.processors.abstract
|
||||||
|
:members:
|
||||||
|
|
||||||
|
Offline processor
|
||||||
|
=================
|
||||||
|
|
||||||
|
.. automodule:: searx.search.processors.offline
|
||||||
|
:members:
|
||||||
|
|
||||||
|
Online processor
|
||||||
|
================
|
||||||
|
|
||||||
|
.. automodule:: searx.search.processors.online
|
||||||
|
:members:
|
||||||
|
|
||||||
|
Online currency processor
|
||||||
|
=========================
|
||||||
|
|
||||||
|
.. automodule:: searx.search.processors.online_currency
|
||||||
|
:members:
|
||||||
|
|
||||||
|
Online Dictionary processor
|
||||||
|
===========================
|
||||||
|
|
||||||
|
.. automodule:: searx.search.processors.online_dictionary
|
||||||
|
:members:
|
||||||
|
|
||||||
|
Online URL search processor
|
||||||
|
===========================
|
||||||
|
|
||||||
|
.. automodule:: searx.search.processors.online_url_search
|
||||||
|
:members:
|
2
manage
2
manage
|
@ -63,7 +63,7 @@ PYLINT_SEARXNG_DISABLE_OPTION="\
|
||||||
I,C,R,\
|
I,C,R,\
|
||||||
W0105,W0212,W0511,W0603,W0613,W0621,W0702,W0703,W1401,\
|
W0105,W0212,W0511,W0603,W0613,W0621,W0702,W0703,W1401,\
|
||||||
E1136"
|
E1136"
|
||||||
PYLINT_ADDITIONAL_BUILTINS_FOR_ENGINES="supported_languages,language_aliases,logger,categories"
|
PYLINT_ADDITIONAL_BUILTINS_FOR_ENGINES="traits,supported_languages,language_aliases,logger,categories"
|
||||||
PYLINT_OPTIONS="-m pylint -j 0 --rcfile .pylintrc"
|
PYLINT_OPTIONS="-m pylint -j 0 --rcfile .pylintrc"
|
||||||
|
|
||||||
help() {
|
help() {
|
||||||
|
|
|
@ -12,7 +12,7 @@ from lxml import etree
|
||||||
from httpx import HTTPError
|
from httpx import HTTPError
|
||||||
|
|
||||||
from searx import settings
|
from searx import settings
|
||||||
from searx.data import ENGINES_LANGUAGES
|
from searx.engines import engines
|
||||||
from searx.network import get as http_get
|
from searx.network import get as http_get
|
||||||
from searx.exceptions import SearxEngineResponseException
|
from searx.exceptions import SearxEngineResponseException
|
||||||
|
|
||||||
|
@ -111,7 +111,7 @@ def seznam(query, _lang):
|
||||||
|
|
||||||
def startpage(query, lang):
|
def startpage(query, lang):
|
||||||
# startpage autocompleter
|
# startpage autocompleter
|
||||||
lui = ENGINES_LANGUAGES['startpage'].get(lang, 'english')
|
lui = engines['startpage'].supported_languages.get(lang, 'english') # vintage / deprecated
|
||||||
url = 'https://startpage.com/suggestions?{query}'
|
url = 'https://startpage.com/suggestions?{query}'
|
||||||
resp = get(url.format(query=urlencode({'q': query, 'segment': 'startpage.udog', 'lui': lui})))
|
resp = get(url.format(query=urlencode({'q': query, 'segment': 'startpage.udog', 'lui': lui})))
|
||||||
data = resp.json()
|
data = resp.json()
|
||||||
|
@ -177,12 +177,19 @@ backends = {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def search_autocomplete(backend_name, query, lang):
|
def search_autocomplete(backend_name, query, sxng_locale):
|
||||||
backend = backends.get(backend_name)
|
backend = backends.get(backend_name)
|
||||||
if backend is None:
|
if backend is None:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
if engines[backend_name].traits.data_type != "traits_v1":
|
||||||
|
# vintage / deprecated
|
||||||
|
if not sxng_locale or sxng_locale == 'all':
|
||||||
|
sxng_locale = 'en'
|
||||||
|
else:
|
||||||
|
sxng_locale = sxng_locale.split('-')[0]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return backend(query, lang)
|
return backend(query, sxng_locale)
|
||||||
except (HTTPError, SearxEngineResponseException):
|
except (HTTPError, SearxEngineResponseException):
|
||||||
return []
|
return []
|
||||||
|
|
|
@ -7,7 +7,7 @@
|
||||||
"""
|
"""
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
'ENGINES_LANGUAGES',
|
'ENGINE_TRAITS',
|
||||||
'CURRENCIES',
|
'CURRENCIES',
|
||||||
'USER_AGENTS',
|
'USER_AGENTS',
|
||||||
'EXTERNAL_URLS',
|
'EXTERNAL_URLS',
|
||||||
|
@ -42,7 +42,6 @@ def ahmia_blacklist_loader():
|
||||||
return f.read().split()
|
return f.read().split()
|
||||||
|
|
||||||
|
|
||||||
ENGINES_LANGUAGES = _load('engines_languages.json')
|
|
||||||
CURRENCIES = _load('currencies.json')
|
CURRENCIES = _load('currencies.json')
|
||||||
USER_AGENTS = _load('useragents.json')
|
USER_AGENTS = _load('useragents.json')
|
||||||
EXTERNAL_URLS = _load('external_urls.json')
|
EXTERNAL_URLS = _load('external_urls.json')
|
||||||
|
@ -50,3 +49,4 @@ WIKIDATA_UNITS = _load('wikidata_units.json')
|
||||||
EXTERNAL_BANGS = _load('external_bangs.json')
|
EXTERNAL_BANGS = _load('external_bangs.json')
|
||||||
OSM_KEYS_TAGS = _load('osm_keys_tags.json')
|
OSM_KEYS_TAGS = _load('osm_keys_tags.json')
|
||||||
ENGINE_DESCRIPTIONS = _load('engine_descriptions.json')
|
ENGINE_DESCRIPTIONS = _load('engine_descriptions.json')
|
||||||
|
ENGINE_TRAITS = _load('engine_traits.json')
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,143 @@
|
||||||
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
# lint: pylint
|
||||||
|
"""Engine related implementations
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
The long term goal is to modularize all relevant implementations to the
|
||||||
|
engines here in this Python package. In addition to improved modularization,
|
||||||
|
this will also be necessary in part because the probability of circular
|
||||||
|
imports will increase due to the increased typification of implementations in
|
||||||
|
the future.
|
||||||
|
|
||||||
|
ToDo:
|
||||||
|
|
||||||
|
- move :py:obj:`searx.engines.load_engine` to a new module `searx.enginelib`.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
from typing import Union, Dict, List, Callable, TYPE_CHECKING
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from searx.enginelib import traits
|
||||||
|
|
||||||
|
|
||||||
|
class Engine: # pylint: disable=too-few-public-methods
|
||||||
|
"""Class of engine instances build from YAML settings.
|
||||||
|
|
||||||
|
Further documentation see :ref:`general engine configuration`.
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
This class is currently never initialized and only used for type hinting.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Common options in the engine module
|
||||||
|
|
||||||
|
engine_type: str
|
||||||
|
"""Type of the engine (:origin:`searx/search/processors`)"""
|
||||||
|
|
||||||
|
paging: bool
|
||||||
|
"""Engine supports multiple pages."""
|
||||||
|
|
||||||
|
time_range_support: bool
|
||||||
|
"""Engine supports search time range."""
|
||||||
|
|
||||||
|
safesearch: bool
|
||||||
|
"""Engine supports SafeSearch"""
|
||||||
|
|
||||||
|
language_support: bool
|
||||||
|
"""Engine supports languages (locales) search."""
|
||||||
|
|
||||||
|
language: str
|
||||||
|
"""For an engine, when there is ``language: ...`` in the YAML settings the engine
|
||||||
|
does support only this one language:
|
||||||
|
|
||||||
|
.. code:: yaml
|
||||||
|
|
||||||
|
- name: google french
|
||||||
|
engine: google
|
||||||
|
language: fr
|
||||||
|
"""
|
||||||
|
|
||||||
|
region: str
|
||||||
|
"""For an engine, when there is ``region: ...`` in the YAML settings the engine
|
||||||
|
does support only this one region::
|
||||||
|
|
||||||
|
.. code:: yaml
|
||||||
|
|
||||||
|
- name: google belgium
|
||||||
|
engine: google
|
||||||
|
region: fr-BE
|
||||||
|
"""
|
||||||
|
|
||||||
|
fetch_traits: Callable
|
||||||
|
"""Function to to fetch engine's traits from origin."""
|
||||||
|
|
||||||
|
traits: traits.EngineTraits
|
||||||
|
"""Traits of the engine."""
|
||||||
|
|
||||||
|
# settings.yml
|
||||||
|
|
||||||
|
categories: List[str]
|
||||||
|
"""Tabs, in which the engine is working."""
|
||||||
|
|
||||||
|
name: str
|
||||||
|
"""Name that will be used across SearXNG to define this engine. In settings, on
|
||||||
|
the result page .."""
|
||||||
|
|
||||||
|
engine: str
|
||||||
|
"""Name of the python file used to handle requests and responses to and from
|
||||||
|
this search engine (file name from :origin:`searx/engines` without
|
||||||
|
``.py``)."""
|
||||||
|
|
||||||
|
enable_http: bool
|
||||||
|
"""Enable HTTP (by default only HTTPS is enabled)."""
|
||||||
|
|
||||||
|
shortcut: str
|
||||||
|
"""Code used to execute bang requests (``!foo``)"""
|
||||||
|
|
||||||
|
timeout: float
|
||||||
|
"""Specific timeout for search-engine."""
|
||||||
|
|
||||||
|
display_error_messages: bool
|
||||||
|
"""Display error messages on the web UI."""
|
||||||
|
|
||||||
|
proxies: dict
|
||||||
|
"""Set proxies for a specific engine (YAML):
|
||||||
|
|
||||||
|
.. code:: yaml
|
||||||
|
|
||||||
|
proxies :
|
||||||
|
http: socks5://proxy:port
|
||||||
|
https: socks5://proxy:port
|
||||||
|
"""
|
||||||
|
|
||||||
|
disabled: bool
|
||||||
|
"""To disable by default the engine, but not deleting it. It will allow the
|
||||||
|
user to manually activate it in the settings."""
|
||||||
|
|
||||||
|
inactive: bool
|
||||||
|
"""Remove the engine from the settings (*disabled & removed*)."""
|
||||||
|
|
||||||
|
about: dict
|
||||||
|
"""Additional fileds describing the engine.
|
||||||
|
|
||||||
|
.. code:: yaml
|
||||||
|
|
||||||
|
about:
|
||||||
|
website: https://example.com
|
||||||
|
wikidata_id: Q306656
|
||||||
|
official_api_documentation: https://example.com/api-doc
|
||||||
|
use_official_api: true
|
||||||
|
require_api_key: true
|
||||||
|
results: HTML
|
||||||
|
"""
|
||||||
|
|
||||||
|
# deprecated properties
|
||||||
|
|
||||||
|
_fetch_supported_languages: Callable # deprecated use fetch_traits
|
||||||
|
supported_languages: Union[List[str], Dict[str, str]] # deprecated use traits
|
||||||
|
language_aliases: Dict[str, str] # deprecated not needed when using triats
|
||||||
|
supported_languages_url: str # deprecated not needed when using triats
|
|
@ -0,0 +1,387 @@
|
||||||
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
# lint: pylint
|
||||||
|
"""Engine's traits are fetched from the origin engines and stored in a JSON file
|
||||||
|
in the *data folder*. Most often traits are languages and region codes and
|
||||||
|
their mapping from SearXNG's representation to the representation in the origin
|
||||||
|
search engine. For new traits new properties can be added to the class
|
||||||
|
:py:class:`EngineTraits`.
|
||||||
|
|
||||||
|
To load traits from the persistence :py:obj:`EngineTraitsMap.from_data` can be
|
||||||
|
used.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
import json
|
||||||
|
import dataclasses
|
||||||
|
from typing import Dict, Union, List, Callable, Optional, TYPE_CHECKING
|
||||||
|
from typing_extensions import Literal, Self
|
||||||
|
|
||||||
|
from babel.localedata import locale_identifiers
|
||||||
|
|
||||||
|
from searx import locales
|
||||||
|
from searx.data import data_dir, ENGINE_TRAITS
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from . import Engine
|
||||||
|
|
||||||
|
|
||||||
|
class EngineTraitsEncoder(json.JSONEncoder):
|
||||||
|
"""Encodes :class:`EngineTraits` to a serializable object, see
|
||||||
|
:class:`json.JSONEncoder`."""
|
||||||
|
|
||||||
|
def default(self, o):
|
||||||
|
"""Return dictionary of a :class:`EngineTraits` object."""
|
||||||
|
if isinstance(o, EngineTraits):
|
||||||
|
return o.__dict__
|
||||||
|
return super().default(o)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclasses.dataclass
|
||||||
|
class EngineTraits:
|
||||||
|
"""The class is intended to be instantiated for each engine."""
|
||||||
|
|
||||||
|
regions: Dict[str, str] = dataclasses.field(default_factory=dict)
|
||||||
|
"""Maps SearXNG's internal representation of a region to the one of the engine.
|
||||||
|
|
||||||
|
SearXNG's internal representation can be parsed by babel and the value is
|
||||||
|
send to the engine:
|
||||||
|
|
||||||
|
.. code:: python
|
||||||
|
|
||||||
|
regions ={
|
||||||
|
'fr-BE' : <engine's region name>,
|
||||||
|
}
|
||||||
|
|
||||||
|
for key, egnine_region regions.items():
|
||||||
|
searxng_region = babel.Locale.parse(key, sep='-')
|
||||||
|
...
|
||||||
|
"""
|
||||||
|
|
||||||
|
languages: Dict[str, str] = dataclasses.field(default_factory=dict)
|
||||||
|
"""Maps SearXNG's internal representation of a language to the one of the engine.
|
||||||
|
|
||||||
|
SearXNG's internal representation can be parsed by babel and the value is
|
||||||
|
send to the engine:
|
||||||
|
|
||||||
|
.. code:: python
|
||||||
|
|
||||||
|
languages = {
|
||||||
|
'ca' : <engine's language name>,
|
||||||
|
}
|
||||||
|
|
||||||
|
for key, egnine_lang in languages.items():
|
||||||
|
searxng_lang = babel.Locale.parse(key)
|
||||||
|
...
|
||||||
|
"""
|
||||||
|
|
||||||
|
all_locale: Optional[str] = None
|
||||||
|
"""To which locale value SearXNG's ``all`` language is mapped (shown a "Default
|
||||||
|
language").
|
||||||
|
"""
|
||||||
|
|
||||||
|
data_type: Literal['traits_v1', 'supported_languages'] = 'traits_v1'
|
||||||
|
"""Data type, default is 'traits_v1' for vintage use 'supported_languages'.
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
For the transition period until the *fetch* functions of all the engines
|
||||||
|
are converted there will be the data_type 'supported_languages', which
|
||||||
|
maps the old logic unchanged 1:1.
|
||||||
|
|
||||||
|
Instances of data_type 'supported_languages' do not implement methods
|
||||||
|
like ``self.get_language(..)`` and ``self.get_region(..)``
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
custom: Dict[str, Dict] = dataclasses.field(default_factory=dict)
|
||||||
|
"""A place to store engine's custom traits, not related to the SearXNG core
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def get_language(self, searxng_locale: str, default=None):
|
||||||
|
"""Return engine's language string that *best fits* to SearXNG's locale.
|
||||||
|
|
||||||
|
:param searxng_locale: SearXNG's internal representation of locale
|
||||||
|
selected by the user.
|
||||||
|
|
||||||
|
:param default: engine's default language
|
||||||
|
|
||||||
|
The *best fits* rules are implemented in
|
||||||
|
:py:obj:`locales.get_engine_locale`. Except for the special value ``all``
|
||||||
|
which is determined from :py:obj`EngineTraits.all_language`.
|
||||||
|
"""
|
||||||
|
if searxng_locale == 'all' and self.all_locale is not None:
|
||||||
|
return self.all_locale
|
||||||
|
return locales.get_engine_locale(searxng_locale, self.languages, default=default)
|
||||||
|
|
||||||
|
def get_region(self, searxng_locale: str, default=None):
|
||||||
|
"""Return engine's region string that best fits to SearXNG's locale.
|
||||||
|
|
||||||
|
:param searxng_locale: SearXNG's internal representation of locale
|
||||||
|
selected by the user.
|
||||||
|
|
||||||
|
:param default: engine's default region
|
||||||
|
|
||||||
|
The *best fits* rules are implemented in
|
||||||
|
:py:obj:`locales.get_engine_locale`. Except for the special value ``all``
|
||||||
|
which is determined from :py:obj`EngineTraits.all_language`.
|
||||||
|
"""
|
||||||
|
if searxng_locale == 'all' and self.all_locale is not None:
|
||||||
|
return self.all_locale
|
||||||
|
return locales.get_engine_locale(searxng_locale, self.regions, default=default)
|
||||||
|
|
||||||
|
def is_locale_supported(self, searxng_locale: str) -> bool:
|
||||||
|
"""A *locale* (SearXNG's internal representation) is considered to be supported
|
||||||
|
by the engine if the *region* or the *language* is supported by the
|
||||||
|
engine. For verification the functions :py:func:`self.get_region` and
|
||||||
|
:py:func:`self.get_region` are used.
|
||||||
|
"""
|
||||||
|
if self.data_type == 'traits_v1':
|
||||||
|
return bool(self.get_region(searxng_locale) or self.get_language(searxng_locale))
|
||||||
|
|
||||||
|
if self.data_type == 'supported_languages': # vintage / deprecated
|
||||||
|
# pylint: disable=import-outside-toplevel
|
||||||
|
from searx.utils import match_language
|
||||||
|
|
||||||
|
if searxng_locale == 'all':
|
||||||
|
return True
|
||||||
|
x = match_language(searxng_locale, self.supported_languages, self.language_aliases, None)
|
||||||
|
return bool(x)
|
||||||
|
|
||||||
|
# return bool(self.get_supported_language(searxng_locale))
|
||||||
|
raise TypeError('engine traits of type %s is unknown' % self.data_type)
|
||||||
|
|
||||||
|
def copy(self):
|
||||||
|
"""Create a copy of the dataclass object."""
|
||||||
|
return EngineTraits(**dataclasses.asdict(self))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def fetch_traits(cls, engine: Engine) -> Union[Self, None]:
|
||||||
|
"""Call a function ``fetch_traits(engine_traits)`` from engines namespace to fetch
|
||||||
|
and set properties from the origin engine in the object ``engine_traits``. If
|
||||||
|
function does not exists, ``None`` is returned.
|
||||||
|
"""
|
||||||
|
|
||||||
|
fetch_traits = getattr(engine, 'fetch_traits', None)
|
||||||
|
engine_traits = None
|
||||||
|
|
||||||
|
if fetch_traits:
|
||||||
|
engine_traits = cls()
|
||||||
|
fetch_traits(engine_traits)
|
||||||
|
return engine_traits
|
||||||
|
|
||||||
|
def set_traits(self, engine: Engine):
|
||||||
|
"""Set traits from self object in a :py:obj:`.Engine` namespace.
|
||||||
|
|
||||||
|
:param engine: engine instance build by :py:func:`searx.engines.load_engine`
|
||||||
|
"""
|
||||||
|
|
||||||
|
if self.data_type == 'traits_v1':
|
||||||
|
self._set_traits_v1(engine)
|
||||||
|
|
||||||
|
elif self.data_type == 'supported_languages': # vintage / deprecated
|
||||||
|
self._set_supported_languages(engine)
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise TypeError('engine traits of type %s is unknown' % self.data_type)
|
||||||
|
|
||||||
|
def _set_traits_v1(self, engine: Engine):
|
||||||
|
# For an engine, when there is `language: ...` in the YAML settings the engine
|
||||||
|
# does support only this one language (region)::
|
||||||
|
#
|
||||||
|
# - name: google italian
|
||||||
|
# engine: google
|
||||||
|
# language: it
|
||||||
|
# region: it-IT
|
||||||
|
|
||||||
|
traits = self.copy()
|
||||||
|
|
||||||
|
_msg = "settings.yml - engine: '%s' / %s: '%s' not supported"
|
||||||
|
|
||||||
|
languages = traits.languages
|
||||||
|
if hasattr(engine, 'language'):
|
||||||
|
if engine.language not in languages:
|
||||||
|
raise ValueError(_msg % (engine.name, 'language', engine.language))
|
||||||
|
traits.languages = {engine.language: languages[engine.language]}
|
||||||
|
|
||||||
|
regions = traits.regions
|
||||||
|
if hasattr(engine, 'region'):
|
||||||
|
if engine.region not in regions:
|
||||||
|
raise ValueError(_msg % (engine.name, 'region', engine.region))
|
||||||
|
traits.regions = {engine.region: regions[engine.region]}
|
||||||
|
|
||||||
|
engine.language_support = bool(traits.languages or traits.regions)
|
||||||
|
|
||||||
|
# set the copied & modified traits in engine's namespace
|
||||||
|
engine.traits = traits
|
||||||
|
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
# The code below is deprecated an can hopefully be deleted at one day
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
supported_languages: Union[List[str], Dict[str, str]] = dataclasses.field(default_factory=dict)
|
||||||
|
"""depricated: does not work for engines that do support languages based on a
|
||||||
|
region. With this type it is not guaranteed that the key values can be
|
||||||
|
parsed by :py:obj:`babel.Locale.parse`!
|
||||||
|
"""
|
||||||
|
|
||||||
|
# language_aliases: Dict[str, str] = dataclasses.field(default_factory=dict)
|
||||||
|
# """depricated: does not work for engines that do support languages based on a
|
||||||
|
# region. With this type it is not guaranteed that the key values can be
|
||||||
|
# parsed by :py:obj:`babel.Locale.parse`!
|
||||||
|
# """
|
||||||
|
|
||||||
|
BABEL_LANGS = [
|
||||||
|
lang_parts[0] + '-' + lang_parts[-1] if len(lang_parts) > 1 else lang_parts[0]
|
||||||
|
for lang_parts in (lang_code.split('_') for lang_code in locale_identifiers())
|
||||||
|
]
|
||||||
|
|
||||||
|
# def get_supported_language(self, searxng_locale, default=None): # vintage / deprecated
|
||||||
|
# """Return engine's language string that *best fits* to SearXNG's locale."""
|
||||||
|
# if searxng_locale == 'all' and self.all_locale is not None:
|
||||||
|
# return self.all_locale
|
||||||
|
# return locales.get_engine_locale(searxng_locale, self.supported_languages, default=default)
|
||||||
|
|
||||||
|
@classmethod # vintage / deprecated
|
||||||
|
def fetch_supported_languages(cls, engine: Engine) -> Union[Self, None]:
|
||||||
|
"""DEPRECATED: Calls a function ``_fetch_supported_languages`` from engine's
|
||||||
|
namespace to fetch languages from the origin engine. If function does
|
||||||
|
not exists, ``None`` is returned.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# pylint: disable=import-outside-toplevel
|
||||||
|
from searx import network
|
||||||
|
from searx.utils import gen_useragent
|
||||||
|
|
||||||
|
fetch_languages = getattr(engine, '_fetch_supported_languages', None)
|
||||||
|
if fetch_languages is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# The headers has been moved here from commit 9b6ffed06: Some engines (at
|
||||||
|
# least bing and startpage) return a different result list of supported
|
||||||
|
# languages depending on the IP location where the HTTP request comes from.
|
||||||
|
# The IP based results (from bing) can be avoided by setting a
|
||||||
|
# 'Accept-Language' in the HTTP request.
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
'User-Agent': gen_useragent(),
|
||||||
|
'Accept-Language': "en-US,en;q=0.5", # bing needs to set the English language
|
||||||
|
}
|
||||||
|
resp = network.get(engine.supported_languages_url, headers=headers)
|
||||||
|
supported_languages = fetch_languages(resp)
|
||||||
|
if isinstance(supported_languages, list):
|
||||||
|
supported_languages.sort()
|
||||||
|
|
||||||
|
engine_traits = cls()
|
||||||
|
engine_traits.data_type = 'supported_languages'
|
||||||
|
engine_traits.supported_languages = supported_languages
|
||||||
|
return engine_traits
|
||||||
|
|
||||||
|
def _set_supported_languages(self, engine: Engine): # vintage / deprecated
|
||||||
|
traits = self.copy()
|
||||||
|
|
||||||
|
# pylint: disable=import-outside-toplevel
|
||||||
|
from searx.utils import match_language
|
||||||
|
|
||||||
|
_msg = "settings.yml - engine: '%s' / %s: '%s' not supported"
|
||||||
|
|
||||||
|
if hasattr(engine, 'language'):
|
||||||
|
if engine.language not in self.supported_languages:
|
||||||
|
raise ValueError(_msg % (engine.name, 'language', engine.language))
|
||||||
|
|
||||||
|
if isinstance(self.supported_languages, dict):
|
||||||
|
traits.supported_languages = {engine.language: self.supported_languages[engine.language]}
|
||||||
|
else:
|
||||||
|
traits.supported_languages = [engine.language]
|
||||||
|
|
||||||
|
engine.language_support = bool(traits.supported_languages)
|
||||||
|
engine.supported_languages = traits.supported_languages
|
||||||
|
|
||||||
|
# find custom aliases for non standard language codes
|
||||||
|
traits.language_aliases = {} # pylint: disable=attribute-defined-outside-init
|
||||||
|
|
||||||
|
for engine_lang in getattr(engine, 'language_aliases', {}):
|
||||||
|
iso_lang = match_language(engine_lang, self.BABEL_LANGS, fallback=None)
|
||||||
|
if (
|
||||||
|
iso_lang
|
||||||
|
and iso_lang != engine_lang
|
||||||
|
and not engine_lang.startswith(iso_lang)
|
||||||
|
and iso_lang not in self.supported_languages
|
||||||
|
):
|
||||||
|
traits.language_aliases[iso_lang] = engine_lang
|
||||||
|
|
||||||
|
engine.language_aliases = traits.language_aliases
|
||||||
|
|
||||||
|
# set the copied & modified traits in engine's namespace
|
||||||
|
engine.traits = traits
|
||||||
|
|
||||||
|
|
||||||
|
class EngineTraitsMap(Dict[str, EngineTraits]):
|
||||||
|
"""A python dictionary to map :class:`EngineTraits` by engine name."""
|
||||||
|
|
||||||
|
ENGINE_TRAITS_FILE = (data_dir / 'engine_traits.json').resolve()
|
||||||
|
"""File with persistence of the :py:obj:`EngineTraitsMap`."""
|
||||||
|
|
||||||
|
def save_data(self):
|
||||||
|
"""Store EngineTraitsMap in in file :py:obj:`self.ENGINE_TRAITS_FILE`"""
|
||||||
|
with open(self.ENGINE_TRAITS_FILE, 'w', encoding='utf-8') as f:
|
||||||
|
json.dump(self, f, indent=2, sort_keys=True, cls=EngineTraitsEncoder)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_data(cls) -> Self:
|
||||||
|
"""Instantiate :class:`EngineTraitsMap` object from :py:obj:`ENGINE_TRAITS`"""
|
||||||
|
obj = cls()
|
||||||
|
for k, v in ENGINE_TRAITS.items():
|
||||||
|
obj[k] = EngineTraits(**v)
|
||||||
|
return obj
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def fetch_traits(cls, log: Callable) -> Self:
|
||||||
|
from searx import engines # pylint: disable=cyclic-import, import-outside-toplevel
|
||||||
|
|
||||||
|
names = list(engines.engines)
|
||||||
|
names.sort()
|
||||||
|
obj = cls()
|
||||||
|
|
||||||
|
for engine_name in names:
|
||||||
|
engine = engines.engines[engine_name]
|
||||||
|
|
||||||
|
traits = EngineTraits.fetch_traits(engine)
|
||||||
|
if traits is not None:
|
||||||
|
log("%-20s: SearXNG languages --> %s " % (engine_name, len(traits.languages)))
|
||||||
|
log("%-20s: SearXNG regions --> %s" % (engine_name, len(traits.regions)))
|
||||||
|
obj[engine_name] = traits
|
||||||
|
|
||||||
|
# vintage / deprecated
|
||||||
|
_traits = EngineTraits.fetch_supported_languages(engine)
|
||||||
|
if _traits is not None:
|
||||||
|
log("%-20s: %s supported_languages (deprecated)" % (engine_name, len(_traits.supported_languages)))
|
||||||
|
if traits is not None:
|
||||||
|
traits.supported_languages = _traits.supported_languages
|
||||||
|
obj[engine_name] = traits
|
||||||
|
else:
|
||||||
|
obj[engine_name] = _traits
|
||||||
|
continue
|
||||||
|
|
||||||
|
return obj
|
||||||
|
|
||||||
|
def set_traits(self, engine: Engine):
|
||||||
|
"""Set traits in a :py:obj:`Engine` namespace.
|
||||||
|
|
||||||
|
:param engine: engine instance build by :py:func:`searx.engines.load_engine`
|
||||||
|
"""
|
||||||
|
|
||||||
|
engine_traits = EngineTraits(data_type='traits_v1')
|
||||||
|
if engine.name in self.keys():
|
||||||
|
engine_traits = self[engine.name]
|
||||||
|
|
||||||
|
elif engine.engine in self.keys():
|
||||||
|
# The key of the dictionary traits_map is the *engine name*
|
||||||
|
# configured in settings.xml. When multiple engines are configured
|
||||||
|
# in settings.yml to use the same origin engine (python module)
|
||||||
|
# these additional engines can use the languages from the origin
|
||||||
|
# engine. For this use the configured ``engine: ...`` from
|
||||||
|
# settings.yml
|
||||||
|
engine_traits = self[engine.engine]
|
||||||
|
|
||||||
|
engine_traits.set_traits(engine)
|
|
@ -11,24 +11,22 @@ usage::
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import copy
|
import copy
|
||||||
from typing import Dict, List, Optional
|
|
||||||
|
|
||||||
from os.path import realpath, dirname
|
from os.path import realpath, dirname
|
||||||
from babel.localedata import locale_identifiers
|
|
||||||
from searx import logger, settings
|
|
||||||
from searx.data import ENGINES_LANGUAGES
|
|
||||||
from searx.network import get
|
|
||||||
from searx.utils import load_module, match_language, gen_useragent
|
|
||||||
|
|
||||||
|
from typing import TYPE_CHECKING, Dict, Optional
|
||||||
|
|
||||||
|
from searx import logger, settings
|
||||||
|
from searx.utils import load_module
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from searx.enginelib import Engine
|
||||||
|
|
||||||
logger = logger.getChild('engines')
|
logger = logger.getChild('engines')
|
||||||
ENGINE_DIR = dirname(realpath(__file__))
|
ENGINE_DIR = dirname(realpath(__file__))
|
||||||
BABEL_LANGS = [
|
|
||||||
lang_parts[0] + '-' + lang_parts[-1] if len(lang_parts) > 1 else lang_parts[0]
|
|
||||||
for lang_parts in (lang_code.split('_') for lang_code in locale_identifiers())
|
|
||||||
]
|
|
||||||
ENGINE_DEFAULT_ARGS = {
|
ENGINE_DEFAULT_ARGS = {
|
||||||
"engine_type": "online",
|
"engine_type": "online",
|
||||||
"inactive": False,
|
"inactive": False,
|
||||||
|
@ -36,8 +34,6 @@ ENGINE_DEFAULT_ARGS = {
|
||||||
"timeout": settings["outgoing"]["request_timeout"],
|
"timeout": settings["outgoing"]["request_timeout"],
|
||||||
"shortcut": "-",
|
"shortcut": "-",
|
||||||
"categories": ["general"],
|
"categories": ["general"],
|
||||||
"supported_languages": [],
|
|
||||||
"language_aliases": {},
|
|
||||||
"paging": False,
|
"paging": False,
|
||||||
"safesearch": False,
|
"safesearch": False,
|
||||||
"time_range_support": False,
|
"time_range_support": False,
|
||||||
|
@ -47,29 +43,13 @@ ENGINE_DEFAULT_ARGS = {
|
||||||
"send_accept_language_header": False,
|
"send_accept_language_header": False,
|
||||||
"tokens": [],
|
"tokens": [],
|
||||||
"about": {},
|
"about": {},
|
||||||
|
"supported_languages": [], # deprecated use traits
|
||||||
|
"language_aliases": {}, # deprecated not needed when using traits
|
||||||
}
|
}
|
||||||
# set automatically when an engine does not have any tab category
|
# set automatically when an engine does not have any tab category
|
||||||
OTHER_CATEGORY = 'other'
|
OTHER_CATEGORY = 'other'
|
||||||
|
|
||||||
|
|
||||||
class Engine: # pylint: disable=too-few-public-methods
|
|
||||||
"""This class is currently never initialized and only used for type hinting."""
|
|
||||||
|
|
||||||
name: str
|
|
||||||
engine: str
|
|
||||||
shortcut: str
|
|
||||||
categories: List[str]
|
|
||||||
supported_languages: List[str]
|
|
||||||
about: dict
|
|
||||||
inactive: bool
|
|
||||||
disabled: bool
|
|
||||||
language_support: bool
|
|
||||||
paging: bool
|
|
||||||
safesearch: bool
|
|
||||||
time_range_support: bool
|
|
||||||
timeout: float
|
|
||||||
|
|
||||||
|
|
||||||
# Defaults for the namespace of an engine module, see :py:func:`load_engine`
|
# Defaults for the namespace of an engine module, see :py:func:`load_engine`
|
||||||
|
|
||||||
categories = {'general': []}
|
categories = {'general': []}
|
||||||
|
@ -136,9 +116,15 @@ def load_engine(engine_data: dict) -> Optional[Engine]:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
update_engine_attributes(engine, engine_data)
|
update_engine_attributes(engine, engine_data)
|
||||||
set_language_attributes(engine)
|
|
||||||
update_attributes_for_tor(engine)
|
update_attributes_for_tor(engine)
|
||||||
|
|
||||||
|
# avoid cyclic imports
|
||||||
|
# pylint: disable=import-outside-toplevel
|
||||||
|
from searx.enginelib.traits import EngineTraitsMap
|
||||||
|
|
||||||
|
trait_map = EngineTraitsMap.from_data()
|
||||||
|
trait_map.set_traits(engine)
|
||||||
|
|
||||||
if not is_engine_active(engine):
|
if not is_engine_active(engine):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
@ -190,60 +176,6 @@ def update_engine_attributes(engine: Engine, engine_data):
|
||||||
setattr(engine, arg_name, copy.deepcopy(arg_value))
|
setattr(engine, arg_name, copy.deepcopy(arg_value))
|
||||||
|
|
||||||
|
|
||||||
def set_language_attributes(engine: Engine):
|
|
||||||
# assign supported languages from json file
|
|
||||||
if engine.name in ENGINES_LANGUAGES:
|
|
||||||
engine.supported_languages = ENGINES_LANGUAGES[engine.name]
|
|
||||||
|
|
||||||
elif engine.engine in ENGINES_LANGUAGES:
|
|
||||||
# The key of the dictionary ENGINES_LANGUAGES is the *engine name*
|
|
||||||
# configured in settings.xml. When multiple engines are configured in
|
|
||||||
# settings.yml to use the same origin engine (python module) these
|
|
||||||
# additional engines can use the languages from the origin engine.
|
|
||||||
# For this use the configured ``engine: ...`` from settings.yml
|
|
||||||
engine.supported_languages = ENGINES_LANGUAGES[engine.engine]
|
|
||||||
|
|
||||||
if hasattr(engine, 'language'):
|
|
||||||
# For an engine, when there is `language: ...` in the YAML settings, the
|
|
||||||
# engine supports only one language, in this case
|
|
||||||
# engine.supported_languages should contains this value defined in
|
|
||||||
# settings.yml
|
|
||||||
if engine.language not in engine.supported_languages:
|
|
||||||
raise ValueError(
|
|
||||||
"settings.yml - engine: '%s' / language: '%s' not supported" % (engine.name, engine.language)
|
|
||||||
)
|
|
||||||
|
|
||||||
if isinstance(engine.supported_languages, dict):
|
|
||||||
engine.supported_languages = {engine.language: engine.supported_languages[engine.language]}
|
|
||||||
else:
|
|
||||||
engine.supported_languages = [engine.language]
|
|
||||||
|
|
||||||
# find custom aliases for non standard language codes
|
|
||||||
for engine_lang in engine.supported_languages:
|
|
||||||
iso_lang = match_language(engine_lang, BABEL_LANGS, fallback=None)
|
|
||||||
if (
|
|
||||||
iso_lang
|
|
||||||
and iso_lang != engine_lang
|
|
||||||
and not engine_lang.startswith(iso_lang)
|
|
||||||
and iso_lang not in engine.supported_languages
|
|
||||||
):
|
|
||||||
engine.language_aliases[iso_lang] = engine_lang
|
|
||||||
|
|
||||||
# language_support
|
|
||||||
engine.language_support = len(engine.supported_languages) > 0
|
|
||||||
|
|
||||||
# assign language fetching method if auxiliary method exists
|
|
||||||
if hasattr(engine, '_fetch_supported_languages'):
|
|
||||||
headers = {
|
|
||||||
'User-Agent': gen_useragent(),
|
|
||||||
'Accept-Language': "en-US,en;q=0.5", # bing needs to set the English language
|
|
||||||
}
|
|
||||||
engine.fetch_supported_languages = (
|
|
||||||
# pylint: disable=protected-access
|
|
||||||
lambda: engine._fetch_supported_languages(get(engine.supported_languages_url, headers=headers))
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def update_attributes_for_tor(engine: Engine) -> bool:
|
def update_attributes_for_tor(engine: Engine) -> bool:
|
||||||
if using_tor_proxy(engine) and hasattr(engine, 'onion_url'):
|
if using_tor_proxy(engine) and hasattr(engine, 'onion_url'):
|
||||||
engine.search_url = engine.onion_url + getattr(engine, 'search_path', '')
|
engine.search_url = engine.onion_url + getattr(engine, 'search_path', '')
|
||||||
|
|
|
@ -63,7 +63,7 @@ def search(query, request_params):
|
||||||
for row in result_list:
|
for row in result_list:
|
||||||
entry = {
|
entry = {
|
||||||
'query': query,
|
'query': query,
|
||||||
'language': request_params['language'],
|
'language': request_params['searxng_locale'],
|
||||||
'value': row.get("value"),
|
'value': row.get("value"),
|
||||||
# choose a result template or comment out to use the *default*
|
# choose a result template or comment out to use the *default*
|
||||||
'template': 'key-value.html',
|
'template': 'key-value.html',
|
||||||
|
|
|
@ -8,7 +8,7 @@ from typing import Set
|
||||||
import os
|
import os
|
||||||
import pathlib
|
import pathlib
|
||||||
|
|
||||||
from babel import Locale
|
import babel
|
||||||
from babel.support import Translations
|
from babel.support import Translations
|
||||||
import babel.languages
|
import babel.languages
|
||||||
import babel.core
|
import babel.core
|
||||||
|
@ -134,7 +134,7 @@ def locales_initialize(directory=None):
|
||||||
flask_babel.get_translations = get_translations
|
flask_babel.get_translations = get_translations
|
||||||
|
|
||||||
for tag, descr in ADDITIONAL_TRANSLATIONS.items():
|
for tag, descr in ADDITIONAL_TRANSLATIONS.items():
|
||||||
locale = Locale.parse(LOCALE_BEST_MATCH[tag], sep='-')
|
locale = babel.Locale.parse(LOCALE_BEST_MATCH[tag], sep='-')
|
||||||
LOCALE_NAMES[tag] = descr
|
LOCALE_NAMES[tag] = descr
|
||||||
if locale.text_direction == 'rtl':
|
if locale.text_direction == 'rtl':
|
||||||
RTL_LOCALES.add(tag)
|
RTL_LOCALES.add(tag)
|
||||||
|
@ -142,7 +142,7 @@ def locales_initialize(directory=None):
|
||||||
for tag in LOCALE_BEST_MATCH:
|
for tag in LOCALE_BEST_MATCH:
|
||||||
descr = LOCALE_NAMES.get(tag)
|
descr = LOCALE_NAMES.get(tag)
|
||||||
if not descr:
|
if not descr:
|
||||||
locale = Locale.parse(tag, sep='-')
|
locale = babel.Locale.parse(tag, sep='-')
|
||||||
LOCALE_NAMES[tag] = get_locale_descr(locale, tag.replace('-', '_'))
|
LOCALE_NAMES[tag] = get_locale_descr(locale, tag.replace('-', '_'))
|
||||||
if locale.text_direction == 'rtl':
|
if locale.text_direction == 'rtl':
|
||||||
RTL_LOCALES.add(tag)
|
RTL_LOCALES.add(tag)
|
||||||
|
@ -154,12 +154,66 @@ def locales_initialize(directory=None):
|
||||||
tag = dirname.replace('_', '-')
|
tag = dirname.replace('_', '-')
|
||||||
descr = LOCALE_NAMES.get(tag)
|
descr = LOCALE_NAMES.get(tag)
|
||||||
if not descr:
|
if not descr:
|
||||||
locale = Locale.parse(dirname)
|
locale = babel.Locale.parse(dirname)
|
||||||
LOCALE_NAMES[tag] = get_locale_descr(locale, dirname)
|
LOCALE_NAMES[tag] = get_locale_descr(locale, dirname)
|
||||||
if locale.text_direction == 'rtl':
|
if locale.text_direction == 'rtl':
|
||||||
RTL_LOCALES.add(tag)
|
RTL_LOCALES.add(tag)
|
||||||
|
|
||||||
|
|
||||||
|
def region_tag(locale: babel.Locale) -> str:
|
||||||
|
"""Returns SearXNG's region tag from the locale (e.g. zh-TW , en-US)."""
|
||||||
|
if not locale.territory:
|
||||||
|
raise ValueError('%s missed a territory')
|
||||||
|
return locale.language + '-' + locale.territory
|
||||||
|
|
||||||
|
|
||||||
|
def language_tag(locale: babel.Locale) -> str:
|
||||||
|
"""Returns SearXNG's language tag from the locale and if exits, the tag
|
||||||
|
includes the script name (e.g. en, zh_Hant).
|
||||||
|
"""
|
||||||
|
sxng_lang = locale.language
|
||||||
|
if locale.script:
|
||||||
|
sxng_lang += '_' + locale.script
|
||||||
|
return sxng_lang
|
||||||
|
|
||||||
|
|
||||||
|
def get_offical_locales(
|
||||||
|
territory: str, languages=None, regional: bool = False, de_facto: bool = True
|
||||||
|
) -> Set[babel.Locale]:
|
||||||
|
"""Returns a list of :py:obj:`babel.Locale` with languages from
|
||||||
|
:py:obj:`babel.languages.get_official_languages`.
|
||||||
|
|
||||||
|
:param territory: The territory (country or region) code.
|
||||||
|
|
||||||
|
:param languages: A list of language codes the languages from
|
||||||
|
:py:obj:`babel.languages.get_official_languages` should be in
|
||||||
|
(intersection). If this argument is ``None``, all official languages in
|
||||||
|
this territory are used.
|
||||||
|
|
||||||
|
:param regional: If the regional flag is set, then languages which are
|
||||||
|
regionally official are also returned.
|
||||||
|
|
||||||
|
:param de_facto: If the de_facto flag is set to `False`, then languages
|
||||||
|
which are “de facto” official are not returned.
|
||||||
|
|
||||||
|
"""
|
||||||
|
ret_val = set()
|
||||||
|
o_languages = babel.languages.get_official_languages(territory, regional=regional, de_facto=de_facto)
|
||||||
|
|
||||||
|
if languages:
|
||||||
|
languages = [l.lower() for l in languages]
|
||||||
|
o_languages = set(l for l in o_languages if l.lower() in languages)
|
||||||
|
|
||||||
|
for lang in o_languages:
|
||||||
|
try:
|
||||||
|
locale = babel.Locale.parse(lang + '_' + territory)
|
||||||
|
ret_val.add(locale)
|
||||||
|
except babel.UnknownLocaleError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
return ret_val
|
||||||
|
|
||||||
|
|
||||||
def get_engine_locale(searxng_locale, engine_locales, default=None):
|
def get_engine_locale(searxng_locale, engine_locales, default=None):
|
||||||
"""Return engine's language (aka locale) string that best fits to argument
|
"""Return engine's language (aka locale) string that best fits to argument
|
||||||
``searxng_locale``.
|
``searxng_locale``.
|
||||||
|
@ -177,6 +231,10 @@ def get_engine_locale(searxng_locale, engine_locales, default=None):
|
||||||
...
|
...
|
||||||
'pl-PL' : 'pl_PL',
|
'pl-PL' : 'pl_PL',
|
||||||
'pt-PT' : 'pt_PT'
|
'pt-PT' : 'pt_PT'
|
||||||
|
..
|
||||||
|
'zh' : 'zh'
|
||||||
|
'zh_Hans' : 'zh'
|
||||||
|
'zh_Hant' : 'zh-classical'
|
||||||
}
|
}
|
||||||
|
|
||||||
.. hint::
|
.. hint::
|
||||||
|
@ -210,13 +268,13 @@ def get_engine_locale(searxng_locale, engine_locales, default=None):
|
||||||
engine.
|
engine.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
# pylint: disable=too-many-branches
|
# pylint: disable=too-many-branches, too-many-return-statements
|
||||||
|
|
||||||
engine_locale = engine_locales.get(searxng_locale)
|
engine_locale = engine_locales.get(searxng_locale)
|
||||||
|
|
||||||
if engine_locale is not None:
|
if engine_locale is not None:
|
||||||
# There was a 1:1 mapping (e.g. "fr-BE --> fr_BE" or "fr --> fr_FR"), no
|
# There was a 1:1 mapping (e.g. a region "fr-BE --> fr_BE" or a language
|
||||||
# need to narrow language nor territory.
|
# "zh --> zh"), no need to narrow language-script nor territory.
|
||||||
return engine_locale
|
return engine_locale
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -227,6 +285,12 @@ def get_engine_locale(searxng_locale, engine_locales, default=None):
|
||||||
except babel.core.UnknownLocaleError:
|
except babel.core.UnknownLocaleError:
|
||||||
return default
|
return default
|
||||||
|
|
||||||
|
searxng_lang = language_tag(locale)
|
||||||
|
engine_locale = engine_locales.get(searxng_lang)
|
||||||
|
if engine_locale is not None:
|
||||||
|
# There was a 1:1 mapping (e.g. "zh-HK --> zh_Hant" or "zh-CN --> zh_Hans")
|
||||||
|
return engine_locale
|
||||||
|
|
||||||
# SearXNG's selected locale is not supported by the engine ..
|
# SearXNG's selected locale is not supported by the engine ..
|
||||||
|
|
||||||
if locale.territory:
|
if locale.territory:
|
||||||
|
@ -247,10 +311,6 @@ def get_engine_locale(searxng_locale, engine_locales, default=None):
|
||||||
|
|
||||||
if locale.language:
|
if locale.language:
|
||||||
|
|
||||||
searxng_lang = locale.language
|
|
||||||
if locale.script:
|
|
||||||
searxng_lang += '_' + locale.script
|
|
||||||
|
|
||||||
terr_lang_dict = {}
|
terr_lang_dict = {}
|
||||||
for territory, langs in babel.core.get_global("territory_languages").items():
|
for territory, langs in babel.core.get_global("territory_languages").items():
|
||||||
if not langs.get(searxng_lang, {}).get('official_status'):
|
if not langs.get(searxng_lang, {}).get('official_status'):
|
||||||
|
|
|
@ -13,7 +13,7 @@ from typing import Iterable, Dict, List
|
||||||
import flask
|
import flask
|
||||||
|
|
||||||
from searx import settings, autocomplete
|
from searx import settings, autocomplete
|
||||||
from searx.engines import Engine
|
from searx.enginelib import Engine
|
||||||
from searx.plugins import Plugin
|
from searx.plugins import Plugin
|
||||||
from searx.locales import LOCALE_NAMES
|
from searx.locales import LOCALE_NAMES
|
||||||
from searx.webutils import VALID_LANGUAGE_CODE
|
from searx.webutils import VALID_LANGUAGE_CODE
|
||||||
|
|
|
@ -30,7 +30,10 @@ from .abstract import EngineProcessor
|
||||||
|
|
||||||
logger = logger.getChild('search.processors')
|
logger = logger.getChild('search.processors')
|
||||||
PROCESSORS: Dict[str, EngineProcessor] = {}
|
PROCESSORS: Dict[str, EngineProcessor] = {}
|
||||||
"""Cache request processores, stored by *engine-name* (:py:func:`initialize`)"""
|
"""Cache request processores, stored by *engine-name* (:py:func:`initialize`)
|
||||||
|
|
||||||
|
:meta hide-value:
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
def get_processor_class(engine_type):
|
def get_processor_class(engine_type):
|
||||||
|
|
|
@ -138,7 +138,8 @@ class EngineProcessor(ABC):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def get_params(self, search_query, engine_category):
|
def get_params(self, search_query, engine_category):
|
||||||
"""Returns a set of *request params* or ``None`` if request is not supported.
|
"""Returns a set of (see :ref:`request params <engine request arguments>`) or
|
||||||
|
``None`` if request is not supported.
|
||||||
|
|
||||||
Not supported conditions (``None`` is returned):
|
Not supported conditions (``None`` is returned):
|
||||||
|
|
||||||
|
@ -159,11 +160,20 @@ class EngineProcessor(ABC):
|
||||||
params['safesearch'] = search_query.safesearch
|
params['safesearch'] = search_query.safesearch
|
||||||
params['time_range'] = search_query.time_range
|
params['time_range'] = search_query.time_range
|
||||||
params['engine_data'] = search_query.engine_data.get(self.engine_name, {})
|
params['engine_data'] = search_query.engine_data.get(self.engine_name, {})
|
||||||
|
params['searxng_locale'] = search_query.lang
|
||||||
|
|
||||||
|
# deprecated / vintage --> use params['searxng_locale']
|
||||||
|
#
|
||||||
|
# Conditions related to engine's traits are implemented in engine.traits
|
||||||
|
# module. Don't do 'locale' decissions here in the abstract layer of the
|
||||||
|
# search processor, just pass the value from user's choice unchanged to
|
||||||
|
# the engine request.
|
||||||
|
|
||||||
if hasattr(self.engine, 'language') and self.engine.language:
|
if hasattr(self.engine, 'language') and self.engine.language:
|
||||||
params['language'] = self.engine.language
|
params['language'] = self.engine.language
|
||||||
else:
|
else:
|
||||||
params['language'] = search_query.lang
|
params['language'] = search_query.lang
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
|
|
|
@ -51,6 +51,9 @@ class OnlineProcessor(EngineProcessor):
|
||||||
super().initialize()
|
super().initialize()
|
||||||
|
|
||||||
def get_params(self, search_query, engine_category):
|
def get_params(self, search_query, engine_category):
|
||||||
|
"""Returns a set of :ref:`request params <engine request online>` or ``None``
|
||||||
|
if request is not supported.
|
||||||
|
"""
|
||||||
params = super().get_params(search_query, engine_category)
|
params = super().get_params(search_query, engine_category)
|
||||||
if params is None:
|
if params is None:
|
||||||
return None
|
return None
|
||||||
|
|
|
@ -38,8 +38,8 @@ class OnlineCurrencyProcessor(OnlineProcessor):
|
||||||
engine_type = 'online_currency'
|
engine_type = 'online_currency'
|
||||||
|
|
||||||
def get_params(self, search_query, engine_category):
|
def get_params(self, search_query, engine_category):
|
||||||
"""Returns a set of *request params* or ``None`` if search query does not match
|
"""Returns a set of :ref:`request params <engine request online_currency>`
|
||||||
to :py:obj:`parser_re`."""
|
or ``None`` if search query does not match to :py:obj:`parser_re`."""
|
||||||
|
|
||||||
params = super().get_params(search_query, engine_category)
|
params = super().get_params(search_query, engine_category)
|
||||||
if params is None:
|
if params is None:
|
||||||
|
|
|
@ -18,8 +18,9 @@ class OnlineDictionaryProcessor(OnlineProcessor):
|
||||||
engine_type = 'online_dictionary'
|
engine_type = 'online_dictionary'
|
||||||
|
|
||||||
def get_params(self, search_query, engine_category):
|
def get_params(self, search_query, engine_category):
|
||||||
"""Returns a set of *request params* or ``None`` if search query does not match
|
"""Returns a set of :ref:`request params <engine request online_dictionary>` or
|
||||||
to :py:obj:`parser_re`."""
|
``None`` if search query does not match to :py:obj:`parser_re`.
|
||||||
|
"""
|
||||||
params = super().get_params(search_query, engine_category)
|
params = super().get_params(search_query, engine_category)
|
||||||
if params is None:
|
if params is None:
|
||||||
return None
|
return None
|
||||||
|
|
|
@ -20,9 +20,10 @@ class OnlineUrlSearchProcessor(OnlineProcessor):
|
||||||
engine_type = 'online_url_search'
|
engine_type = 'online_url_search'
|
||||||
|
|
||||||
def get_params(self, search_query, engine_category):
|
def get_params(self, search_query, engine_category):
|
||||||
"""Returns a set of *request params* or ``None`` if search query does not match
|
"""Returns a set of :ref:`request params <engine request online>` or ``None`` if
|
||||||
to at least one of :py:obj:`re_search_urls`.
|
search query does not match to :py:obj:`re_search_urls`.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
params = super().get_params(search_query, engine_category)
|
params = super().get_params(search_query, engine_category)
|
||||||
if params is None:
|
if params is None:
|
||||||
return None
|
return None
|
||||||
|
|
|
@ -907,16 +907,11 @@ def autocompleter():
|
||||||
# and there is a query part
|
# and there is a query part
|
||||||
if len(raw_text_query.autocomplete_list) == 0 and len(sug_prefix) > 0:
|
if len(raw_text_query.autocomplete_list) == 0 and len(sug_prefix) > 0:
|
||||||
|
|
||||||
# get language from cookie
|
# get SearXNG's locale and autocomplete backend from cookie
|
||||||
language = request.preferences.get_value('language')
|
sxng_locale = request.preferences.get_value('language')
|
||||||
if not language or language == 'all':
|
backend_name = request.preferences.get_value('autocomplete')
|
||||||
language = 'en'
|
|
||||||
else:
|
|
||||||
language = language.split('-')[0]
|
|
||||||
|
|
||||||
# run autocompletion
|
for result in search_autocomplete(backend_name, sug_prefix, sxng_locale):
|
||||||
raw_results = search_autocomplete(request.preferences.get_value('autocomplete'), sug_prefix, language)
|
|
||||||
for result in raw_results:
|
|
||||||
# attention: this loop will change raw_text_query object and this is
|
# attention: this loop will change raw_text_query object and this is
|
||||||
# the reason why the sug_prefix was stored before (see above)
|
# the reason why the sug_prefix was stored before (see above)
|
||||||
if result != sug_prefix:
|
if result != sug_prefix:
|
||||||
|
@ -1001,7 +996,9 @@ def preferences():
|
||||||
'rate80': rate80,
|
'rate80': rate80,
|
||||||
'rate95': rate95,
|
'rate95': rate95,
|
||||||
'warn_timeout': e.timeout > settings['outgoing']['request_timeout'],
|
'warn_timeout': e.timeout > settings['outgoing']['request_timeout'],
|
||||||
'supports_selected_language': _is_selected_language_supported(e, request.preferences),
|
'supports_selected_language': e.traits.is_locale_supported(
|
||||||
|
str(request.preferences.get_value('language') or 'all')
|
||||||
|
),
|
||||||
'result_count': result_count,
|
'result_count': result_count,
|
||||||
}
|
}
|
||||||
# end of stats
|
# end of stats
|
||||||
|
@ -1052,7 +1049,9 @@ def preferences():
|
||||||
# supports
|
# supports
|
||||||
supports = {}
|
supports = {}
|
||||||
for _, e in filtered_engines.items():
|
for _, e in filtered_engines.items():
|
||||||
supports_selected_language = _is_selected_language_supported(e, request.preferences)
|
supports_selected_language = e.traits.is_locale_supported(
|
||||||
|
str(request.preferences.get_value('language') or 'all')
|
||||||
|
)
|
||||||
safesearch = e.safesearch
|
safesearch = e.safesearch
|
||||||
time_range_support = e.time_range_support
|
time_range_support = e.time_range_support
|
||||||
for checker_test_name in checker_results.get(e.name, {}).get('errors', {}):
|
for checker_test_name in checker_results.get(e.name, {}).get('errors', {}):
|
||||||
|
@ -1099,16 +1098,6 @@ def preferences():
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def _is_selected_language_supported(engine, preferences: Preferences): # pylint: disable=redefined-outer-name
|
|
||||||
language = preferences.get_value('language')
|
|
||||||
if language == 'all':
|
|
||||||
return True
|
|
||||||
x = match_language(
|
|
||||||
language, getattr(engine, 'supported_languages', []), getattr(engine, 'language_aliases', {}), None
|
|
||||||
)
|
|
||||||
return bool(x)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route('/image_proxy', methods=['GET'])
|
@app.route('/image_proxy', methods=['GET'])
|
||||||
def image_proxy():
|
def image_proxy():
|
||||||
# pylint: disable=too-many-return-statements, too-many-branches
|
# pylint: disable=too-many-return-statements, too-many-branches
|
||||||
|
@ -1327,9 +1316,11 @@ def config():
|
||||||
if not request.preferences.validate_token(engine):
|
if not request.preferences.validate_token(engine):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
supported_languages = engine.supported_languages
|
_languages = engine.traits.languages.keys()
|
||||||
if isinstance(engine.supported_languages, dict):
|
if engine.traits.data_type == 'supported_languages': # vintage / deprecated
|
||||||
supported_languages = list(engine.supported_languages.keys())
|
_languages = engine.traits.supported_languages
|
||||||
|
if isinstance(_languages, dict):
|
||||||
|
_languages = _languages.keys()
|
||||||
|
|
||||||
_engines.append(
|
_engines.append(
|
||||||
{
|
{
|
||||||
|
@ -1339,7 +1330,8 @@ def config():
|
||||||
'enabled': not engine.disabled,
|
'enabled': not engine.disabled,
|
||||||
'paging': engine.paging,
|
'paging': engine.paging,
|
||||||
'language_support': engine.language_support,
|
'language_support': engine.language_support,
|
||||||
'supported_languages': supported_languages,
|
'languages': list(_languages),
|
||||||
|
'regions': list(engine.traits.regions.keys()),
|
||||||
'safesearch': engine.safesearch,
|
'safesearch': engine.safesearch,
|
||||||
'time_range_support': engine.time_range_support,
|
'time_range_support': engine.time_range_support,
|
||||||
'timeout': engine.timeout,
|
'timeout': engine.timeout,
|
||||||
|
|
|
@ -1,4 +1,6 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import pathlib
|
import pathlib
|
||||||
import csv
|
import csv
|
||||||
|
@ -8,7 +10,7 @@ import re
|
||||||
import inspect
|
import inspect
|
||||||
import itertools
|
import itertools
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from typing import Iterable, List, Tuple, Dict
|
from typing import Iterable, List, Tuple, Dict, TYPE_CHECKING
|
||||||
|
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
from codecs import getincrementalencoder
|
from codecs import getincrementalencoder
|
||||||
|
@ -16,7 +18,10 @@ from codecs import getincrementalencoder
|
||||||
from flask_babel import gettext, format_date
|
from flask_babel import gettext, format_date
|
||||||
|
|
||||||
from searx import logger, settings
|
from searx import logger, settings
|
||||||
from searx.engines import Engine, OTHER_CATEGORY
|
from searx.engines import OTHER_CATEGORY
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from searx.enginelib import Engine
|
||||||
|
|
||||||
|
|
||||||
VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$')
|
VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$')
|
||||||
|
|
|
@ -1,19 +1,21 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# lint: pylint
|
# lint: pylint
|
||||||
|
|
||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
"""This script generates languages.py from intersecting each engine's supported
|
"""Update :py:obj:`searx.enginelib.traits.EngineTraitsMap` and :origin:`searx/languages.py`
|
||||||
languages.
|
|
||||||
|
|
||||||
Output files: :origin:`searx/data/engines_languages.json` and
|
:py:obj:`searx.enginelib.traits.EngineTraitsMap.ENGINE_TRAITS_FILE`:
|
||||||
:origin:`searx/languages.py` (:origin:`CI Update data ...
|
Persistence of engines traits, fetched from the engines.
|
||||||
<.github/workflows/data-update.yml>`).
|
|
||||||
|
:origin:`searx/languages.py`
|
||||||
|
Is generated from intersecting each engine's supported traits.
|
||||||
|
|
||||||
|
The script :origin:`searxng_extra/update/update_engine_traits.py` is called in
|
||||||
|
the :origin:`CI Update data ... <.github/workflows/data-update.yml>`
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# pylint: disable=invalid-name
|
# pylint: disable=invalid-name
|
||||||
from unicodedata import lookup
|
from unicodedata import lookup
|
||||||
import json
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from pprint import pformat
|
from pprint import pformat
|
||||||
from babel import Locale, UnknownLocaleError
|
from babel import Locale, UnknownLocaleError
|
||||||
|
@ -21,36 +23,26 @@ from babel.languages import get_global
|
||||||
from babel.core import parse_locale
|
from babel.core import parse_locale
|
||||||
|
|
||||||
from searx import settings, searx_dir
|
from searx import settings, searx_dir
|
||||||
|
from searx import network
|
||||||
from searx.engines import load_engines, engines
|
from searx.engines import load_engines, engines
|
||||||
from searx.network import set_timeout_for_thread
|
from searx.enginelib.traits import EngineTraitsMap
|
||||||
|
|
||||||
# Output files.
|
# Output files.
|
||||||
engines_languages_file = Path(searx_dir) / 'data' / 'engines_languages.json'
|
|
||||||
languages_file = Path(searx_dir) / 'languages.py'
|
languages_file = Path(searx_dir) / 'languages.py'
|
||||||
|
|
||||||
|
|
||||||
# Fetches supported languages for each engine and writes json file with those.
|
def fetch_traits_map():
|
||||||
def fetch_supported_languages():
|
"""Fetchs supported languages for each engine and writes json file with those."""
|
||||||
set_timeout_for_thread(10.0)
|
network.set_timeout_for_thread(10.0)
|
||||||
|
|
||||||
engines_languages = {}
|
def log(msg):
|
||||||
names = list(engines)
|
print(msg)
|
||||||
names.sort()
|
|
||||||
|
|
||||||
for engine_name in names:
|
traits_map = EngineTraitsMap.fetch_traits(log=log)
|
||||||
if hasattr(engines[engine_name], 'fetch_supported_languages'):
|
print("fetched properties from %s engines" % len(traits_map))
|
||||||
engines_languages[engine_name] = engines[engine_name].fetch_supported_languages()
|
print("write json file: %s" % traits_map.ENGINE_TRAITS_FILE)
|
||||||
print("fetched %s languages from engine %s" % (len(engines_languages[engine_name]), engine_name))
|
traits_map.save_data()
|
||||||
if type(engines_languages[engine_name]) == list: # pylint: disable=unidiomatic-typecheck
|
return traits_map
|
||||||
engines_languages[engine_name] = sorted(engines_languages[engine_name])
|
|
||||||
|
|
||||||
print("fetched languages from %s engines" % len(engines_languages))
|
|
||||||
|
|
||||||
# write json file
|
|
||||||
with open(engines_languages_file, 'w', encoding='utf-8') as f:
|
|
||||||
json.dump(engines_languages, f, indent=2, sort_keys=True)
|
|
||||||
|
|
||||||
return engines_languages
|
|
||||||
|
|
||||||
|
|
||||||
# Get babel Locale object from lang_code if possible.
|
# Get babel Locale object from lang_code if possible.
|
||||||
|
@ -124,17 +116,43 @@ def get_territory_name(lang_code):
|
||||||
return country_name
|
return country_name
|
||||||
|
|
||||||
|
|
||||||
# Join all language lists.
|
def join_language_lists(traits_map: EngineTraitsMap):
|
||||||
def join_language_lists(engines_languages):
|
"""Join all languages of the engines into one list. The returned language list
|
||||||
language_list = {}
|
contains language codes (``zh``) and region codes (``zh-TW``). The codes can
|
||||||
for engine_name in engines_languages:
|
be parsed by babel::
|
||||||
for lang_code in engines_languages[engine_name]:
|
|
||||||
|
|
||||||
|
babel.Locale.parse(language_list[n])
|
||||||
|
"""
|
||||||
|
# pylint: disable=too-many-branches
|
||||||
|
language_list = {}
|
||||||
|
|
||||||
|
for eng_name, eng_traits in traits_map.items():
|
||||||
|
eng = engines[eng_name]
|
||||||
|
eng_codes = set()
|
||||||
|
|
||||||
|
if eng_traits.data_type == 'traits_v1':
|
||||||
|
# items of type 'engine_traits' do have regions & languages, the
|
||||||
|
# list of eng_codes should contain both.
|
||||||
|
eng_codes.update(eng_traits.regions.keys())
|
||||||
|
eng_codes.update(eng_traits.languages.keys())
|
||||||
|
|
||||||
|
elif eng_traits.data_type == 'supported_languages':
|
||||||
|
# vintage / deprecated
|
||||||
|
_codes = set()
|
||||||
|
if isinstance(eng_traits.supported_languages, dict):
|
||||||
|
_codes.update(eng_traits.supported_languages.keys())
|
||||||
|
elif isinstance(eng_traits.supported_languages, list):
|
||||||
|
_codes.update(eng_traits.supported_languages)
|
||||||
|
else:
|
||||||
|
raise TypeError('engine.supported_languages type %s is unknown' % type(eng_traits.supported_languages))
|
||||||
|
|
||||||
|
for lang_code in _codes:
|
||||||
# apply custom fixes if necessary
|
# apply custom fixes if necessary
|
||||||
if lang_code in getattr(engines[engine_name], 'language_aliases', {}).values():
|
if lang_code in getattr(eng, 'language_aliases', {}).values():
|
||||||
lang_code = next(
|
lang_code = next(lc for lc, alias in eng.language_aliases.items() if lang_code == alias)
|
||||||
lc for lc, alias in engines[engine_name].language_aliases.items() if lang_code == alias
|
eng_codes.add(lang_code)
|
||||||
)
|
|
||||||
|
for lang_code in eng_codes:
|
||||||
|
|
||||||
locale = get_locale(lang_code)
|
locale = get_locale(lang_code)
|
||||||
|
|
||||||
|
@ -149,10 +167,10 @@ def join_language_lists(engines_languages):
|
||||||
# get language's data from babel's Locale object
|
# get language's data from babel's Locale object
|
||||||
language_name = locale.get_language_name().title()
|
language_name = locale.get_language_name().title()
|
||||||
english_name = locale.english_name.split(' (')[0]
|
english_name = locale.english_name.split(' (')[0]
|
||||||
elif short_code in engines_languages['wikipedia']:
|
elif short_code in traits_map['wikipedia'].supported_languages:
|
||||||
# get language's data from wikipedia if not known by babel
|
# get language's data from wikipedia if not known by babel
|
||||||
language_name = engines_languages['wikipedia'][short_code]['name']
|
language_name = traits_map['wikipedia'].supported_languages[short_code]['name']
|
||||||
english_name = engines_languages['wikipedia'][short_code]['english_name']
|
english_name = traits_map['wikipedia'].supported_languages[short_code]['english_name']
|
||||||
else:
|
else:
|
||||||
language_name = None
|
language_name = None
|
||||||
english_name = None
|
english_name = None
|
||||||
|
@ -182,15 +200,15 @@ def join_language_lists(engines_languages):
|
||||||
}
|
}
|
||||||
|
|
||||||
# count engine for both language_country combination and language alone
|
# count engine for both language_country combination and language alone
|
||||||
language_list[short_code]['counter'].add(engine_name)
|
language_list[short_code]['counter'].add(eng_name)
|
||||||
if lang_code != short_code:
|
if lang_code != short_code:
|
||||||
language_list[short_code]['countries'][lang_code]['counter'].add(engine_name)
|
language_list[short_code]['countries'][lang_code]['counter'].add(eng_name)
|
||||||
|
|
||||||
return language_list
|
return language_list
|
||||||
|
|
||||||
|
|
||||||
# Filter language list so it only includes the most supported languages and countries
|
# Filter language list so it only includes the most supported languages and countries
|
||||||
def filter_language_list(all_languages):
|
def filter_language_list(joined_languages_map):
|
||||||
min_engines_per_lang = 12
|
min_engines_per_lang = 12
|
||||||
min_engines_per_country = 7
|
min_engines_per_country = 7
|
||||||
# pylint: disable=consider-using-dict-items, consider-iterating-dictionary
|
# pylint: disable=consider-using-dict-items, consider-iterating-dictionary
|
||||||
|
@ -198,6 +216,7 @@ def filter_language_list(all_languages):
|
||||||
engine_name
|
engine_name
|
||||||
for engine_name in engines.keys()
|
for engine_name in engines.keys()
|
||||||
if 'general' in engines[engine_name].categories
|
if 'general' in engines[engine_name].categories
|
||||||
|
and hasattr(engines[engine_name], 'supported_languages')
|
||||||
and engines[engine_name].supported_languages
|
and engines[engine_name].supported_languages
|
||||||
and not engines[engine_name].disabled
|
and not engines[engine_name].disabled
|
||||||
]
|
]
|
||||||
|
@ -205,7 +224,7 @@ def filter_language_list(all_languages):
|
||||||
# filter list to include only languages supported by most engines or all default general engines
|
# filter list to include only languages supported by most engines or all default general engines
|
||||||
filtered_languages = {
|
filtered_languages = {
|
||||||
code: lang
|
code: lang
|
||||||
for code, lang in all_languages.items()
|
for code, lang in joined_languages_map.items()
|
||||||
if (
|
if (
|
||||||
len(lang['counter']) >= min_engines_per_lang
|
len(lang['counter']) >= min_engines_per_lang
|
||||||
or all(main_engine in lang['counter'] for main_engine in main_engines)
|
or all(main_engine in lang['counter'] for main_engine in main_engines)
|
||||||
|
@ -214,8 +233,8 @@ def filter_language_list(all_languages):
|
||||||
|
|
||||||
def _copy_lang_data(lang, country_name=None):
|
def _copy_lang_data(lang, country_name=None):
|
||||||
new_dict = {}
|
new_dict = {}
|
||||||
new_dict['name'] = all_languages[lang]['name']
|
new_dict['name'] = joined_languages_map[lang]['name']
|
||||||
new_dict['english_name'] = all_languages[lang]['english_name']
|
new_dict['english_name'] = joined_languages_map[lang]['english_name']
|
||||||
if country_name:
|
if country_name:
|
||||||
new_dict['country_name'] = country_name
|
new_dict['country_name'] = country_name
|
||||||
return new_dict
|
return new_dict
|
||||||
|
@ -305,9 +324,13 @@ def write_languages_file(languages):
|
||||||
new_file.close()
|
new_file.close()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
def main():
|
||||||
load_engines(settings['engines'])
|
load_engines(settings['engines'])
|
||||||
_engines_languages = fetch_supported_languages()
|
traits_map = fetch_traits_map()
|
||||||
_all_languages = join_language_lists(_engines_languages)
|
joined_languages_map = join_language_lists(traits_map)
|
||||||
_filtered_languages = filter_language_list(_all_languages)
|
filtered_languages = filter_language_list(joined_languages_map)
|
||||||
write_languages_file(_filtered_languages)
|
write_languages_file(filtered_languages)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
Loading…
Reference in New Issue