forked from zaclys/searxng
		
	[mod] replace engines_languages.json by engines_traits.json
Implementations of the *traits* of the engines.
Engine's traits are fetched from the origin engine and stored in a JSON file in
the *data folder*.  Most often traits are languages and region codes and their
mapping from SearXNG's representation to the representation in the origin search
engine.
To load traits from the persistence::
    searx.enginelib.traits.EngineTraitsMap.from_data()
For new traits new properties can be added to the class::
    searx.enginelib.traits.EngineTraits
.. hint::
   Implementation is downward compatible to the deprecated *supported_languages
   method* from the vintage implementation.
   The vintage code is tagged as *deprecated* an can be removed when all engines
   has been ported to the *traits method*.
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
			
			
This commit is contained in:
		
							parent
							
								
									64fea2f9cb
								
							
						
					
					
						commit
						6e5f22e558
					
				
					 29 changed files with 5415 additions and 4593 deletions
				
			
		
							
								
								
									
										2
									
								
								.github/workflows/data-update.yml
									
										
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/workflows/data-update.yml
									
										
									
									
										vendored
									
									
								
							| 
						 | 
				
			
			@ -17,7 +17,7 @@ jobs:
 | 
			
		|||
          - update_currencies.py
 | 
			
		||||
          - update_external_bangs.py
 | 
			
		||||
          - update_firefox_version.py
 | 
			
		||||
          - update_languages.py
 | 
			
		||||
          - update_engine_traits.py
 | 
			
		||||
          - update_wikidata_units.py
 | 
			
		||||
          - update_engine_descriptions.py
 | 
			
		||||
    steps:
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -42,7 +42,7 @@ Explanation of the :ref:`general engine configuration` shown in the table
 | 
			
		|||
        - Timeout
 | 
			
		||||
        - Weight
 | 
			
		||||
        - Paging
 | 
			
		||||
        - Language
 | 
			
		||||
        - Language, Region
 | 
			
		||||
        - Safe search
 | 
			
		||||
        - Time range
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -569,10 +569,13 @@ engine is shown.  Most of the options have a default value or even are optional.
 | 
			
		|||
  To disable by default the engine, but not deleting it.  It will allow the user
 | 
			
		||||
  to manually activate it in the settings.
 | 
			
		||||
 | 
			
		||||
``inactive``: optional
 | 
			
		||||
  Remove the engine from the settings (*disabled & removed*).
 | 
			
		||||
 | 
			
		||||
``language`` : optional
 | 
			
		||||
  If you want to use another language for a specific engine, you can define it
 | 
			
		||||
  by using the full ISO code of language and country, like ``fr_FR``, ``en_US``,
 | 
			
		||||
  ``de_DE``.
 | 
			
		||||
  by using the ISO code of language (and region), like ``fr``, ``en-US``,
 | 
			
		||||
  ``de-DE``.
 | 
			
		||||
 | 
			
		||||
``tokens`` : optional
 | 
			
		||||
  A list of secret tokens to make this engine *private*, more details see
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -127,6 +127,10 @@ extensions = [
 | 
			
		|||
    'notfound.extension',  # https://github.com/readthedocs/sphinx-notfound-page
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
autodoc_default_options = {
 | 
			
		||||
    'member-order': 'groupwise',
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
myst_enable_extensions = [
 | 
			
		||||
  "replacements", "smartquotes"
 | 
			
		||||
]
 | 
			
		||||
| 
						 | 
				
			
			@ -135,6 +139,7 @@ suppress_warnings = ['myst.domains']
 | 
			
		|||
 | 
			
		||||
intersphinx_mapping = {
 | 
			
		||||
    "python": ("https://docs.python.org/3/", None),
 | 
			
		||||
    "babel" : ("https://babel.readthedocs.io/en/latest/", None),
 | 
			
		||||
    "flask": ("https://flask.palletsprojects.com/", None),
 | 
			
		||||
    "flask_babel": ("https://python-babel.github.io/flask-babel/", None),
 | 
			
		||||
    # "werkzeug": ("https://werkzeug.palletsprojects.com/", None),
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -54,6 +54,7 @@ Engine File
 | 
			
		|||
                                       - ``offline`` :ref:`[ref] <offline engines>`
 | 
			
		||||
                                       - ``online_dictionary``
 | 
			
		||||
                                       - ``online_currency``
 | 
			
		||||
                                       - ``online_url_search``
 | 
			
		||||
   ======================= =========== ========================================================
 | 
			
		||||
 | 
			
		||||
.. _engine settings:
 | 
			
		||||
| 
						 | 
				
			
			@ -131,8 +132,10 @@ Passed Arguments (request)
 | 
			
		|||
These arguments can be used to construct the search query.  Furthermore,
 | 
			
		||||
parameters with default value can be redefined for special purposes.
 | 
			
		||||
 | 
			
		||||
.. _engine request online:
 | 
			
		||||
 | 
			
		||||
.. table:: If the ``engine_type`` is ``online``
 | 
			
		||||
.. table:: If the ``engine_type`` is :py:obj:`online
 | 
			
		||||
           <searx.search.processors.online.OnlineProcessor.get_params>`
 | 
			
		||||
   :width: 100%
 | 
			
		||||
 | 
			
		||||
   ====================== ============== ========================================================================
 | 
			
		||||
| 
						 | 
				
			
			@ -149,12 +152,16 @@ parameters with default value can be redefined for special purposes.
 | 
			
		|||
   safesearch             int            ``0``, between ``0`` and ``2`` (normal, moderate, strict)
 | 
			
		||||
   time_range             Optional[str]  ``None``, can be ``day``, ``week``, ``month``, ``year``
 | 
			
		||||
   pageno                 int            current pagenumber
 | 
			
		||||
   language               str            specific language code like ``'en_US'``, or ``'all'`` if unspecified
 | 
			
		||||
   searxng_locale         str            SearXNG's locale selected by user.  Specific language code like
 | 
			
		||||
                                         ``'en'``, ``'en-US'``, or ``'all'`` if unspecified.
 | 
			
		||||
   ====================== ============== ========================================================================
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
.. table:: If the ``engine_type`` is ``online_dictionary``, in addition to the
 | 
			
		||||
           ``online`` arguments:
 | 
			
		||||
.. _engine request online_dictionary:
 | 
			
		||||
 | 
			
		||||
.. table:: If the ``engine_type`` is :py:obj:`online_dictionary
 | 
			
		||||
           <searx.search.processors.online_dictionary.OnlineDictionaryProcessor.get_params>`,
 | 
			
		||||
           in addition to the :ref:`online <engine request online>` arguments:
 | 
			
		||||
   :width: 100%
 | 
			
		||||
 | 
			
		||||
   ====================== ============== ========================================================================
 | 
			
		||||
| 
						 | 
				
			
			@ -165,8 +172,11 @@ parameters with default value can be redefined for special purposes.
 | 
			
		|||
   query                  str            the text query without the languages
 | 
			
		||||
   ====================== ============== ========================================================================
 | 
			
		||||
 | 
			
		||||
.. table:: If the ``engine_type`` is ``online_currency```, in addition to the
 | 
			
		||||
           ``online`` arguments:
 | 
			
		||||
.. _engine request online_currency:
 | 
			
		||||
 | 
			
		||||
.. table:: If the ``engine_type`` is :py:obj:`online_currency
 | 
			
		||||
           <searx.search.processors.online_currency.OnlineCurrencyProcessor.get_params>`,
 | 
			
		||||
           in addition to the :ref:`online <engine request online>` arguments:
 | 
			
		||||
   :width: 100%
 | 
			
		||||
 | 
			
		||||
   ====================== ============== ========================================================================
 | 
			
		||||
| 
						 | 
				
			
			@ -179,6 +189,26 @@ parameters with default value can be redefined for special purposes.
 | 
			
		|||
   to_name                str            currency name
 | 
			
		||||
   ====================== ============== ========================================================================
 | 
			
		||||
 | 
			
		||||
.. _engine request online_url_search:
 | 
			
		||||
 | 
			
		||||
.. table:: If the ``engine_type`` is :py:obj:`online_url_search
 | 
			
		||||
           <searx.search.processors.online_url_search.OnlineUrlSearchProcessor.get_params>`,
 | 
			
		||||
           in addition to the :ref:`online <engine request online>` arguments:
 | 
			
		||||
   :width: 100%
 | 
			
		||||
 | 
			
		||||
   ====================== ============== ========================================================================
 | 
			
		||||
   argument               type           default-value, information
 | 
			
		||||
   ====================== ============== ========================================================================
 | 
			
		||||
   search_url             dict           URLs from the search query:
 | 
			
		||||
 | 
			
		||||
                                         .. code:: python
 | 
			
		||||
 | 
			
		||||
                                            {
 | 
			
		||||
                                              'http': str,
 | 
			
		||||
                                              'ftp': str,
 | 
			
		||||
                                              'data:image': str
 | 
			
		||||
                                            }
 | 
			
		||||
   ====================== ============== ========================================================================
 | 
			
		||||
 | 
			
		||||
Specify Request
 | 
			
		||||
---------------
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -52,12 +52,12 @@ Scripts to update static data in :origin:`searx/data/`
 | 
			
		|||
  :members:
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
``update_languages.py``
 | 
			
		||||
=======================
 | 
			
		||||
``update_engine_traits.py``
 | 
			
		||||
===========================
 | 
			
		||||
 | 
			
		||||
:origin:`[source] <searxng_extra/update/update_languages.py>`
 | 
			
		||||
:origin:`[source] <searxng_extra/update/update_engine_traits.py>`
 | 
			
		||||
 | 
			
		||||
.. automodule:: searxng_extra.update.update_languages
 | 
			
		||||
.. automodule:: searxng_extra.update.update_engine_traits
 | 
			
		||||
  :members:
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										17
									
								
								docs/src/searx.enginelib.rst
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										17
									
								
								docs/src/searx.enginelib.rst
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,17 @@
 | 
			
		|||
.. _searx.enginelib:
 | 
			
		||||
 | 
			
		||||
============
 | 
			
		||||
Engine model
 | 
			
		||||
============
 | 
			
		||||
 | 
			
		||||
.. automodule:: searx.enginelib
 | 
			
		||||
  :members:
 | 
			
		||||
 | 
			
		||||
.. _searx.enginelib.traits:
 | 
			
		||||
 | 
			
		||||
=============
 | 
			
		||||
Engine traits
 | 
			
		||||
=============
 | 
			
		||||
 | 
			
		||||
.. automodule:: searx.enginelib.traits
 | 
			
		||||
  :members:
 | 
			
		||||
| 
						 | 
				
			
			@ -1,8 +1,8 @@
 | 
			
		|||
.. _load_engines:
 | 
			
		||||
.. _searx.engines:
 | 
			
		||||
 | 
			
		||||
============
 | 
			
		||||
Load Engines
 | 
			
		||||
============
 | 
			
		||||
=================
 | 
			
		||||
SearXNG's engines
 | 
			
		||||
=================
 | 
			
		||||
 | 
			
		||||
.. automodule:: searx.engines
 | 
			
		||||
  :members:
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										47
									
								
								docs/src/searx.search.processors.rst
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										47
									
								
								docs/src/searx.search.processors.rst
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,47 @@
 | 
			
		|||
.. _searx.search.processors:
 | 
			
		||||
 | 
			
		||||
=================
 | 
			
		||||
Search processors
 | 
			
		||||
=================
 | 
			
		||||
 | 
			
		||||
.. contents:: Contents
 | 
			
		||||
   :depth: 2
 | 
			
		||||
   :local:
 | 
			
		||||
   :backlinks: entry
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Abstract processor class
 | 
			
		||||
========================
 | 
			
		||||
 | 
			
		||||
.. automodule:: searx.search.processors.abstract
 | 
			
		||||
  :members:
 | 
			
		||||
 | 
			
		||||
Offline processor
 | 
			
		||||
=================
 | 
			
		||||
 | 
			
		||||
.. automodule:: searx.search.processors.offline
 | 
			
		||||
  :members:
 | 
			
		||||
 | 
			
		||||
Online processor
 | 
			
		||||
================
 | 
			
		||||
 | 
			
		||||
.. automodule:: searx.search.processors.online
 | 
			
		||||
  :members:
 | 
			
		||||
 | 
			
		||||
Online currency processor
 | 
			
		||||
=========================
 | 
			
		||||
 | 
			
		||||
.. automodule:: searx.search.processors.online_currency
 | 
			
		||||
  :members:
 | 
			
		||||
 | 
			
		||||
Online Dictionary processor
 | 
			
		||||
===========================
 | 
			
		||||
 | 
			
		||||
.. automodule:: searx.search.processors.online_dictionary
 | 
			
		||||
  :members:
 | 
			
		||||
 | 
			
		||||
Online URL search processor
 | 
			
		||||
===========================
 | 
			
		||||
 | 
			
		||||
.. automodule:: searx.search.processors.online_url_search
 | 
			
		||||
  :members:
 | 
			
		||||
							
								
								
									
										2
									
								
								manage
									
										
									
									
									
								
							
							
						
						
									
										2
									
								
								manage
									
										
									
									
									
								
							| 
						 | 
				
			
			@ -63,7 +63,7 @@ PYLINT_SEARXNG_DISABLE_OPTION="\
 | 
			
		|||
I,C,R,\
 | 
			
		||||
W0105,W0212,W0511,W0603,W0613,W0621,W0702,W0703,W1401,\
 | 
			
		||||
E1136"
 | 
			
		||||
PYLINT_ADDITIONAL_BUILTINS_FOR_ENGINES="supported_languages,language_aliases,logger,categories"
 | 
			
		||||
PYLINT_ADDITIONAL_BUILTINS_FOR_ENGINES="traits,supported_languages,language_aliases,logger,categories"
 | 
			
		||||
PYLINT_OPTIONS="-m pylint -j 0 --rcfile .pylintrc"
 | 
			
		||||
 | 
			
		||||
help() {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -12,7 +12,7 @@ from lxml import etree
 | 
			
		|||
from httpx import HTTPError
 | 
			
		||||
 | 
			
		||||
from searx import settings
 | 
			
		||||
from searx.data import ENGINES_LANGUAGES
 | 
			
		||||
from searx.engines import engines
 | 
			
		||||
from searx.network import get as http_get
 | 
			
		||||
from searx.exceptions import SearxEngineResponseException
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -111,7 +111,7 @@ def seznam(query, _lang):
 | 
			
		|||
 | 
			
		||||
def startpage(query, lang):
 | 
			
		||||
    # startpage autocompleter
 | 
			
		||||
    lui = ENGINES_LANGUAGES['startpage'].get(lang, 'english')
 | 
			
		||||
    lui = engines['startpage'].supported_languages.get(lang, 'english')  # vintage / deprecated
 | 
			
		||||
    url = 'https://startpage.com/suggestions?{query}'
 | 
			
		||||
    resp = get(url.format(query=urlencode({'q': query, 'segment': 'startpage.udog', 'lui': lui})))
 | 
			
		||||
    data = resp.json()
 | 
			
		||||
| 
						 | 
				
			
			@ -177,12 +177,19 @@ backends = {
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def search_autocomplete(backend_name, query, lang):
 | 
			
		||||
def search_autocomplete(backend_name, query, sxng_locale):
 | 
			
		||||
    backend = backends.get(backend_name)
 | 
			
		||||
    if backend is None:
 | 
			
		||||
        return []
 | 
			
		||||
 | 
			
		||||
    if engines[backend_name].traits.data_type != "traits_v1":
 | 
			
		||||
        # vintage / deprecated
 | 
			
		||||
        if not sxng_locale or sxng_locale == 'all':
 | 
			
		||||
            sxng_locale = 'en'
 | 
			
		||||
        else:
 | 
			
		||||
            sxng_locale = sxng_locale.split('-')[0]
 | 
			
		||||
 | 
			
		||||
    try:
 | 
			
		||||
        return backend(query, lang)
 | 
			
		||||
        return backend(query, sxng_locale)
 | 
			
		||||
    except (HTTPError, SearxEngineResponseException):
 | 
			
		||||
        return []
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -7,7 +7,7 @@
 | 
			
		|||
"""
 | 
			
		||||
 | 
			
		||||
__all__ = [
 | 
			
		||||
    'ENGINES_LANGUAGES',
 | 
			
		||||
    'ENGINE_TRAITS',
 | 
			
		||||
    'CURRENCIES',
 | 
			
		||||
    'USER_AGENTS',
 | 
			
		||||
    'EXTERNAL_URLS',
 | 
			
		||||
| 
						 | 
				
			
			@ -42,7 +42,6 @@ def ahmia_blacklist_loader():
 | 
			
		|||
        return f.read().split()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
ENGINES_LANGUAGES = _load('engines_languages.json')
 | 
			
		||||
CURRENCIES = _load('currencies.json')
 | 
			
		||||
USER_AGENTS = _load('useragents.json')
 | 
			
		||||
EXTERNAL_URLS = _load('external_urls.json')
 | 
			
		||||
| 
						 | 
				
			
			@ -50,3 +49,4 @@ WIKIDATA_UNITS = _load('wikidata_units.json')
 | 
			
		|||
EXTERNAL_BANGS = _load('external_bangs.json')
 | 
			
		||||
OSM_KEYS_TAGS = _load('osm_keys_tags.json')
 | 
			
		||||
ENGINE_DESCRIPTIONS = _load('engine_descriptions.json')
 | 
			
		||||
ENGINE_TRAITS = _load('engine_traits.json')
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										4534
									
								
								searx/data/engine_traits.json
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										4534
									
								
								searx/data/engine_traits.json
									
										
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
							
								
								
									
										143
									
								
								searx/enginelib/__init__.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										143
									
								
								searx/enginelib/__init__.py
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,143 @@
 | 
			
		|||
# SPDX-License-Identifier: AGPL-3.0-or-later
 | 
			
		||||
# lint: pylint
 | 
			
		||||
"""Engine related implementations
 | 
			
		||||
 | 
			
		||||
.. note::
 | 
			
		||||
 | 
			
		||||
   The long term goal is to modularize all relevant implementations to the
 | 
			
		||||
   engines here in this Python package.  In addition to improved modularization,
 | 
			
		||||
   this will also be necessary in part because the probability of circular
 | 
			
		||||
   imports will increase due to the increased typification of implementations in
 | 
			
		||||
   the future.
 | 
			
		||||
 | 
			
		||||
   ToDo:
 | 
			
		||||
 | 
			
		||||
   - move :py:obj:`searx.engines.load_engine` to a new module `searx.enginelib`.
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
from __future__ import annotations
 | 
			
		||||
from typing import Union, Dict, List, Callable, TYPE_CHECKING
 | 
			
		||||
 | 
			
		||||
if TYPE_CHECKING:
 | 
			
		||||
    from searx.enginelib import traits
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Engine:  # pylint: disable=too-few-public-methods
 | 
			
		||||
    """Class of engine instances build from YAML settings.
 | 
			
		||||
 | 
			
		||||
    Further documentation see :ref:`general engine configuration`.
 | 
			
		||||
 | 
			
		||||
    .. hint::
 | 
			
		||||
 | 
			
		||||
       This class is currently never initialized and only used for type hinting.
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    # Common options in the engine module
 | 
			
		||||
 | 
			
		||||
    engine_type: str
 | 
			
		||||
    """Type of the engine (:origin:`searx/search/processors`)"""
 | 
			
		||||
 | 
			
		||||
    paging: bool
 | 
			
		||||
    """Engine supports multiple pages."""
 | 
			
		||||
 | 
			
		||||
    time_range_support: bool
 | 
			
		||||
    """Engine supports search time range."""
 | 
			
		||||
 | 
			
		||||
    safesearch: bool
 | 
			
		||||
    """Engine supports SafeSearch"""
 | 
			
		||||
 | 
			
		||||
    language_support: bool
 | 
			
		||||
    """Engine supports languages (locales) search."""
 | 
			
		||||
 | 
			
		||||
    language: str
 | 
			
		||||
    """For an engine, when there is ``language: ...`` in the YAML settings the engine
 | 
			
		||||
    does support only this one language:
 | 
			
		||||
 | 
			
		||||
    .. code:: yaml
 | 
			
		||||
 | 
			
		||||
      - name: google french
 | 
			
		||||
        engine: google
 | 
			
		||||
        language: fr
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    region: str
 | 
			
		||||
    """For an engine, when there is ``region: ...`` in the YAML settings the engine
 | 
			
		||||
    does support only this one region::
 | 
			
		||||
 | 
			
		||||
    .. code:: yaml
 | 
			
		||||
 | 
			
		||||
      - name: google belgium
 | 
			
		||||
        engine: google
 | 
			
		||||
        region: fr-BE
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    fetch_traits: Callable
 | 
			
		||||
    """Function to to fetch engine's traits from origin."""
 | 
			
		||||
 | 
			
		||||
    traits: traits.EngineTraits
 | 
			
		||||
    """Traits of the engine."""
 | 
			
		||||
 | 
			
		||||
    # settings.yml
 | 
			
		||||
 | 
			
		||||
    categories: List[str]
 | 
			
		||||
    """Tabs, in which the engine is working."""
 | 
			
		||||
 | 
			
		||||
    name: str
 | 
			
		||||
    """Name that will be used across SearXNG to define this engine.  In settings, on
 | 
			
		||||
    the result page .."""
 | 
			
		||||
 | 
			
		||||
    engine: str
 | 
			
		||||
    """Name of the python file used to handle requests and responses to and from
 | 
			
		||||
    this search engine (file name from :origin:`searx/engines` without
 | 
			
		||||
    ``.py``)."""
 | 
			
		||||
 | 
			
		||||
    enable_http: bool
 | 
			
		||||
    """Enable HTTP (by default only HTTPS is enabled)."""
 | 
			
		||||
 | 
			
		||||
    shortcut: str
 | 
			
		||||
    """Code used to execute bang requests (``!foo``)"""
 | 
			
		||||
 | 
			
		||||
    timeout: float
 | 
			
		||||
    """Specific timeout for search-engine."""
 | 
			
		||||
 | 
			
		||||
    display_error_messages: bool
 | 
			
		||||
    """Display error messages on the web UI."""
 | 
			
		||||
 | 
			
		||||
    proxies: dict
 | 
			
		||||
    """Set proxies for a specific engine (YAML):
 | 
			
		||||
 | 
			
		||||
    .. code:: yaml
 | 
			
		||||
 | 
			
		||||
       proxies :
 | 
			
		||||
         http:  socks5://proxy:port
 | 
			
		||||
         https: socks5://proxy:port
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    disabled: bool
 | 
			
		||||
    """To disable by default the engine, but not deleting it.  It will allow the
 | 
			
		||||
    user to manually activate it in the settings."""
 | 
			
		||||
 | 
			
		||||
    inactive: bool
 | 
			
		||||
    """Remove the engine from the settings (*disabled & removed*)."""
 | 
			
		||||
 | 
			
		||||
    about: dict
 | 
			
		||||
    """Additional fileds describing the engine.
 | 
			
		||||
 | 
			
		||||
    .. code:: yaml
 | 
			
		||||
 | 
			
		||||
       about:
 | 
			
		||||
          website: https://example.com
 | 
			
		||||
          wikidata_id: Q306656
 | 
			
		||||
          official_api_documentation: https://example.com/api-doc
 | 
			
		||||
          use_official_api: true
 | 
			
		||||
          require_api_key: true
 | 
			
		||||
          results: HTML
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    # deprecated properties
 | 
			
		||||
 | 
			
		||||
    _fetch_supported_languages: Callable  # deprecated use fetch_traits
 | 
			
		||||
    supported_languages: Union[List[str], Dict[str, str]]  # deprecated use traits
 | 
			
		||||
    language_aliases: Dict[str, str]  # deprecated not needed when using triats
 | 
			
		||||
    supported_languages_url: str  # deprecated not needed when using triats
 | 
			
		||||
							
								
								
									
										387
									
								
								searx/enginelib/traits.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										387
									
								
								searx/enginelib/traits.py
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,387 @@
 | 
			
		|||
# SPDX-License-Identifier: AGPL-3.0-or-later
 | 
			
		||||
# lint: pylint
 | 
			
		||||
"""Engine's traits are fetched from the origin engines and stored in a JSON file
 | 
			
		||||
in the *data folder*.  Most often traits are languages and region codes and
 | 
			
		||||
their mapping from SearXNG's representation to the representation in the origin
 | 
			
		||||
search engine.  For new traits new properties can be added to the class
 | 
			
		||||
:py:class:`EngineTraits`.
 | 
			
		||||
 | 
			
		||||
To load traits from the persistence :py:obj:`EngineTraitsMap.from_data` can be
 | 
			
		||||
used.
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
from __future__ import annotations
 | 
			
		||||
import json
 | 
			
		||||
import dataclasses
 | 
			
		||||
from typing import Dict, Union, List, Callable, Optional, TYPE_CHECKING
 | 
			
		||||
from typing_extensions import Literal, Self
 | 
			
		||||
 | 
			
		||||
from babel.localedata import locale_identifiers
 | 
			
		||||
 | 
			
		||||
from searx import locales
 | 
			
		||||
from searx.data import data_dir, ENGINE_TRAITS
 | 
			
		||||
 | 
			
		||||
if TYPE_CHECKING:
 | 
			
		||||
    from . import Engine
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class EngineTraitsEncoder(json.JSONEncoder):
 | 
			
		||||
    """Encodes :class:`EngineTraits` to a serializable object, see
 | 
			
		||||
    :class:`json.JSONEncoder`."""
 | 
			
		||||
 | 
			
		||||
    def default(self, o):
 | 
			
		||||
        """Return dictionary of a :class:`EngineTraits` object."""
 | 
			
		||||
        if isinstance(o, EngineTraits):
 | 
			
		||||
            return o.__dict__
 | 
			
		||||
        return super().default(o)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@dataclasses.dataclass
 | 
			
		||||
class EngineTraits:
 | 
			
		||||
    """The class is intended to be instantiated for each engine."""
 | 
			
		||||
 | 
			
		||||
    regions: Dict[str, str] = dataclasses.field(default_factory=dict)
 | 
			
		||||
    """Maps SearXNG's internal representation of a region to the one of the engine.
 | 
			
		||||
 | 
			
		||||
    SearXNG's internal representation can be parsed by babel and the value is
 | 
			
		||||
    send to the engine:
 | 
			
		||||
 | 
			
		||||
    .. code:: python
 | 
			
		||||
 | 
			
		||||
       regions ={
 | 
			
		||||
           'fr-BE' : <engine's region name>,
 | 
			
		||||
       }
 | 
			
		||||
 | 
			
		||||
       for key, egnine_region regions.items():
 | 
			
		||||
          searxng_region = babel.Locale.parse(key, sep='-')
 | 
			
		||||
          ...
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    languages: Dict[str, str] = dataclasses.field(default_factory=dict)
 | 
			
		||||
    """Maps SearXNG's internal representation of a language to the one of the engine.
 | 
			
		||||
 | 
			
		||||
    SearXNG's internal representation can be parsed by babel and the value is
 | 
			
		||||
    send to the engine:
 | 
			
		||||
 | 
			
		||||
    .. code:: python
 | 
			
		||||
 | 
			
		||||
       languages = {
 | 
			
		||||
           'ca' : <engine's language name>,
 | 
			
		||||
       }
 | 
			
		||||
 | 
			
		||||
       for key, egnine_lang in languages.items():
 | 
			
		||||
          searxng_lang = babel.Locale.parse(key)
 | 
			
		||||
          ...
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    all_locale: Optional[str] = None
 | 
			
		||||
    """To which locale value SearXNG's ``all`` language is mapped (shown a "Default
 | 
			
		||||
    language").
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    data_type: Literal['traits_v1', 'supported_languages'] = 'traits_v1'
 | 
			
		||||
    """Data type, default is 'traits_v1' for vintage use 'supported_languages'.
 | 
			
		||||
 | 
			
		||||
    .. hint::
 | 
			
		||||
 | 
			
		||||
       For the transition period until the *fetch* functions of all the engines
 | 
			
		||||
       are converted there will be the data_type 'supported_languages', which
 | 
			
		||||
       maps the old logic unchanged 1:1.
 | 
			
		||||
 | 
			
		||||
       Instances of data_type 'supported_languages' do not implement methods
 | 
			
		||||
       like ``self.get_language(..)`` and ``self.get_region(..)``
 | 
			
		||||
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    custom: Dict[str, Dict] = dataclasses.field(default_factory=dict)
 | 
			
		||||
    """A place to store engine's custom traits, not related to the SearXNG core
 | 
			
		||||
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    def get_language(self, searxng_locale: str, default=None):
 | 
			
		||||
        """Return engine's language string that *best fits* to SearXNG's locale.
 | 
			
		||||
 | 
			
		||||
        :param searxng_locale: SearXNG's internal representation of locale
 | 
			
		||||
          selected by the user.
 | 
			
		||||
 | 
			
		||||
        :param default: engine's default language
 | 
			
		||||
 | 
			
		||||
        The *best fits* rules are implemented in
 | 
			
		||||
        :py:obj:`locales.get_engine_locale`.  Except for the special value ``all``
 | 
			
		||||
        which is determined from :py:obj`EngineTraits.all_language`.
 | 
			
		||||
        """
 | 
			
		||||
        if searxng_locale == 'all' and self.all_locale is not None:
 | 
			
		||||
            return self.all_locale
 | 
			
		||||
        return locales.get_engine_locale(searxng_locale, self.languages, default=default)
 | 
			
		||||
 | 
			
		||||
    def get_region(self, searxng_locale: str, default=None):
 | 
			
		||||
        """Return engine's region string that best fits to SearXNG's locale.
 | 
			
		||||
 | 
			
		||||
        :param searxng_locale: SearXNG's internal representation of locale
 | 
			
		||||
          selected by the user.
 | 
			
		||||
 | 
			
		||||
        :param default: engine's default region
 | 
			
		||||
 | 
			
		||||
        The *best fits* rules are implemented in
 | 
			
		||||
        :py:obj:`locales.get_engine_locale`.  Except for the special value ``all``
 | 
			
		||||
        which is determined from :py:obj`EngineTraits.all_language`.
 | 
			
		||||
        """
 | 
			
		||||
        if searxng_locale == 'all' and self.all_locale is not None:
 | 
			
		||||
            return self.all_locale
 | 
			
		||||
        return locales.get_engine_locale(searxng_locale, self.regions, default=default)
 | 
			
		||||
 | 
			
		||||
    def is_locale_supported(self, searxng_locale: str) -> bool:
 | 
			
		||||
        """A *locale* (SearXNG's internal representation) is considered to be supported
 | 
			
		||||
        by the engine if the *region* or the *language* is supported by the
 | 
			
		||||
        engine.  For verification the functions :py:func:`self.get_region` and
 | 
			
		||||
        :py:func:`self.get_region` are used.
 | 
			
		||||
        """
 | 
			
		||||
        if self.data_type == 'traits_v1':
 | 
			
		||||
            return bool(self.get_region(searxng_locale) or self.get_language(searxng_locale))
 | 
			
		||||
 | 
			
		||||
        if self.data_type == 'supported_languages':  # vintage / deprecated
 | 
			
		||||
            # pylint: disable=import-outside-toplevel
 | 
			
		||||
            from searx.utils import match_language
 | 
			
		||||
 | 
			
		||||
            if searxng_locale == 'all':
 | 
			
		||||
                return True
 | 
			
		||||
            x = match_language(searxng_locale, self.supported_languages, self.language_aliases, None)
 | 
			
		||||
            return bool(x)
 | 
			
		||||
 | 
			
		||||
            # return bool(self.get_supported_language(searxng_locale))
 | 
			
		||||
        raise TypeError('engine traits of type %s is unknown' % self.data_type)
 | 
			
		||||
 | 
			
		||||
    def copy(self):
 | 
			
		||||
        """Create a copy of the dataclass object."""
 | 
			
		||||
        return EngineTraits(**dataclasses.asdict(self))
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def fetch_traits(cls, engine: Engine) -> Union[Self, None]:
 | 
			
		||||
        """Call a function ``fetch_traits(engine_traits)`` from engines namespace to fetch
 | 
			
		||||
        and set properties from the origin engine in the object ``engine_traits``.  If
 | 
			
		||||
        function does not exists, ``None`` is returned.
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        fetch_traits = getattr(engine, 'fetch_traits', None)
 | 
			
		||||
        engine_traits = None
 | 
			
		||||
 | 
			
		||||
        if fetch_traits:
 | 
			
		||||
            engine_traits = cls()
 | 
			
		||||
            fetch_traits(engine_traits)
 | 
			
		||||
        return engine_traits
 | 
			
		||||
 | 
			
		||||
    def set_traits(self, engine: Engine):
 | 
			
		||||
        """Set traits from self object in a :py:obj:`.Engine` namespace.
 | 
			
		||||
 | 
			
		||||
        :param engine: engine instance build by :py:func:`searx.engines.load_engine`
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        if self.data_type == 'traits_v1':
 | 
			
		||||
            self._set_traits_v1(engine)
 | 
			
		||||
 | 
			
		||||
        elif self.data_type == 'supported_languages':  # vintage / deprecated
 | 
			
		||||
            self._set_supported_languages(engine)
 | 
			
		||||
 | 
			
		||||
        else:
 | 
			
		||||
            raise TypeError('engine traits of type %s is unknown' % self.data_type)
 | 
			
		||||
 | 
			
		||||
    def _set_traits_v1(self, engine: Engine):
 | 
			
		||||
        # For an engine, when there is `language: ...` in the YAML settings the engine
 | 
			
		||||
        # does support only this one language (region)::
 | 
			
		||||
        #
 | 
			
		||||
        #   - name: google italian
 | 
			
		||||
        #     engine: google
 | 
			
		||||
        #     language: it
 | 
			
		||||
        #     region: it-IT
 | 
			
		||||
 | 
			
		||||
        traits = self.copy()
 | 
			
		||||
 | 
			
		||||
        _msg = "settings.yml - engine: '%s' / %s: '%s' not supported"
 | 
			
		||||
 | 
			
		||||
        languages = traits.languages
 | 
			
		||||
        if hasattr(engine, 'language'):
 | 
			
		||||
            if engine.language not in languages:
 | 
			
		||||
                raise ValueError(_msg % (engine.name, 'language', engine.language))
 | 
			
		||||
            traits.languages = {engine.language: languages[engine.language]}
 | 
			
		||||
 | 
			
		||||
        regions = traits.regions
 | 
			
		||||
        if hasattr(engine, 'region'):
 | 
			
		||||
            if engine.region not in regions:
 | 
			
		||||
                raise ValueError(_msg % (engine.name, 'region', engine.region))
 | 
			
		||||
            traits.regions = {engine.region: regions[engine.region]}
 | 
			
		||||
 | 
			
		||||
        engine.language_support = bool(traits.languages or traits.regions)
 | 
			
		||||
 | 
			
		||||
        # set the copied & modified traits in engine's namespace
 | 
			
		||||
        engine.traits = traits
 | 
			
		||||
 | 
			
		||||
    # -------------------------------------------------------------------------
 | 
			
		||||
    # The code below is deprecated an can hopefully be deleted at one day
 | 
			
		||||
    # -------------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
    supported_languages: Union[List[str], Dict[str, str]] = dataclasses.field(default_factory=dict)
 | 
			
		||||
    """depricated: does not work for engines that do support languages based on a
 | 
			
		||||
    region.  With this type it is not guaranteed that the key values can be
 | 
			
		||||
    parsed by :py:obj:`babel.Locale.parse`!
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    # language_aliases: Dict[str, str] = dataclasses.field(default_factory=dict)
 | 
			
		||||
    # """depricated: does not work for engines that do support languages based on a
 | 
			
		||||
    # region.  With this type it is not guaranteed that the key values can be
 | 
			
		||||
    # parsed by :py:obj:`babel.Locale.parse`!
 | 
			
		||||
    # """
 | 
			
		||||
 | 
			
		||||
    BABEL_LANGS = [
 | 
			
		||||
        lang_parts[0] + '-' + lang_parts[-1] if len(lang_parts) > 1 else lang_parts[0]
 | 
			
		||||
        for lang_parts in (lang_code.split('_') for lang_code in locale_identifiers())
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
    # def get_supported_language(self, searxng_locale, default=None):  # vintage / deprecated
 | 
			
		||||
    #     """Return engine's language string that *best fits* to SearXNG's locale."""
 | 
			
		||||
    #     if searxng_locale == 'all' and self.all_locale is not None:
 | 
			
		||||
    #         return self.all_locale
 | 
			
		||||
    #     return locales.get_engine_locale(searxng_locale, self.supported_languages, default=default)
 | 
			
		||||
 | 
			
		||||
    @classmethod  # vintage / deprecated
 | 
			
		||||
    def fetch_supported_languages(cls, engine: Engine) -> Union[Self, None]:
 | 
			
		||||
        """DEPRECATED: Calls a function ``_fetch_supported_languages`` from engine's
 | 
			
		||||
        namespace to fetch languages from the origin engine.  If function does
 | 
			
		||||
        not exists, ``None`` is returned.
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        # pylint: disable=import-outside-toplevel
 | 
			
		||||
        from searx import network
 | 
			
		||||
        from searx.utils import gen_useragent
 | 
			
		||||
 | 
			
		||||
        fetch_languages = getattr(engine, '_fetch_supported_languages', None)
 | 
			
		||||
        if fetch_languages is None:
 | 
			
		||||
            return None
 | 
			
		||||
 | 
			
		||||
        # The headers has been moved here from commit 9b6ffed06: Some engines (at
 | 
			
		||||
        # least bing and startpage) return a different result list of supported
 | 
			
		||||
        # languages depending on the IP location where the HTTP request comes from.
 | 
			
		||||
        # The IP based results (from bing) can be avoided by setting a
 | 
			
		||||
        # 'Accept-Language' in the HTTP request.
 | 
			
		||||
 | 
			
		||||
        headers = {
 | 
			
		||||
            'User-Agent': gen_useragent(),
 | 
			
		||||
            'Accept-Language': "en-US,en;q=0.5",  # bing needs to set the English language
 | 
			
		||||
        }
 | 
			
		||||
        resp = network.get(engine.supported_languages_url, headers=headers)
 | 
			
		||||
        supported_languages = fetch_languages(resp)
 | 
			
		||||
        if isinstance(supported_languages, list):
 | 
			
		||||
            supported_languages.sort()
 | 
			
		||||
 | 
			
		||||
        engine_traits = cls()
 | 
			
		||||
        engine_traits.data_type = 'supported_languages'
 | 
			
		||||
        engine_traits.supported_languages = supported_languages
 | 
			
		||||
        return engine_traits
 | 
			
		||||
 | 
			
		||||
    def _set_supported_languages(self, engine: Engine):  # vintage / deprecated
 | 
			
		||||
        traits = self.copy()
 | 
			
		||||
 | 
			
		||||
        # pylint: disable=import-outside-toplevel
 | 
			
		||||
        from searx.utils import match_language
 | 
			
		||||
 | 
			
		||||
        _msg = "settings.yml - engine: '%s' / %s: '%s' not supported"
 | 
			
		||||
 | 
			
		||||
        if hasattr(engine, 'language'):
 | 
			
		||||
            if engine.language not in self.supported_languages:
 | 
			
		||||
                raise ValueError(_msg % (engine.name, 'language', engine.language))
 | 
			
		||||
 | 
			
		||||
            if isinstance(self.supported_languages, dict):
 | 
			
		||||
                traits.supported_languages = {engine.language: self.supported_languages[engine.language]}
 | 
			
		||||
            else:
 | 
			
		||||
                traits.supported_languages = [engine.language]
 | 
			
		||||
 | 
			
		||||
        engine.language_support = bool(traits.supported_languages)
 | 
			
		||||
        engine.supported_languages = traits.supported_languages
 | 
			
		||||
 | 
			
		||||
        # find custom aliases for non standard language codes
 | 
			
		||||
        traits.language_aliases = {}  # pylint: disable=attribute-defined-outside-init
 | 
			
		||||
 | 
			
		||||
        for engine_lang in getattr(engine, 'language_aliases', {}):
 | 
			
		||||
            iso_lang = match_language(engine_lang, self.BABEL_LANGS, fallback=None)
 | 
			
		||||
            if (
 | 
			
		||||
                iso_lang
 | 
			
		||||
                and iso_lang != engine_lang
 | 
			
		||||
                and not engine_lang.startswith(iso_lang)
 | 
			
		||||
                and iso_lang not in self.supported_languages
 | 
			
		||||
            ):
 | 
			
		||||
                traits.language_aliases[iso_lang] = engine_lang
 | 
			
		||||
 | 
			
		||||
        engine.language_aliases = traits.language_aliases
 | 
			
		||||
 | 
			
		||||
        # set the copied & modified traits in engine's namespace
 | 
			
		||||
        engine.traits = traits
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class EngineTraitsMap(Dict[str, EngineTraits]):
 | 
			
		||||
    """A python dictionary to map :class:`EngineTraits` by engine name."""
 | 
			
		||||
 | 
			
		||||
    ENGINE_TRAITS_FILE = (data_dir / 'engine_traits.json').resolve()
 | 
			
		||||
    """File with persistence of the :py:obj:`EngineTraitsMap`."""
 | 
			
		||||
 | 
			
		||||
    def save_data(self):
 | 
			
		||||
        """Store EngineTraitsMap in in file :py:obj:`self.ENGINE_TRAITS_FILE`"""
 | 
			
		||||
        with open(self.ENGINE_TRAITS_FILE, 'w', encoding='utf-8') as f:
 | 
			
		||||
            json.dump(self, f, indent=2, sort_keys=True, cls=EngineTraitsEncoder)
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def from_data(cls) -> Self:
 | 
			
		||||
        """Instantiate :class:`EngineTraitsMap` object from :py:obj:`ENGINE_TRAITS`"""
 | 
			
		||||
        obj = cls()
 | 
			
		||||
        for k, v in ENGINE_TRAITS.items():
 | 
			
		||||
            obj[k] = EngineTraits(**v)
 | 
			
		||||
        return obj
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def fetch_traits(cls, log: Callable) -> Self:
 | 
			
		||||
        from searx import engines  # pylint: disable=cyclic-import, import-outside-toplevel
 | 
			
		||||
 | 
			
		||||
        names = list(engines.engines)
 | 
			
		||||
        names.sort()
 | 
			
		||||
        obj = cls()
 | 
			
		||||
 | 
			
		||||
        for engine_name in names:
 | 
			
		||||
            engine = engines.engines[engine_name]
 | 
			
		||||
 | 
			
		||||
            traits = EngineTraits.fetch_traits(engine)
 | 
			
		||||
            if traits is not None:
 | 
			
		||||
                log("%-20s: SearXNG languages --> %s " % (engine_name, len(traits.languages)))
 | 
			
		||||
                log("%-20s: SearXNG regions   --> %s" % (engine_name, len(traits.regions)))
 | 
			
		||||
                obj[engine_name] = traits
 | 
			
		||||
 | 
			
		||||
            # vintage / deprecated
 | 
			
		||||
            _traits = EngineTraits.fetch_supported_languages(engine)
 | 
			
		||||
            if _traits is not None:
 | 
			
		||||
                log("%-20s: %s supported_languages (deprecated)" % (engine_name, len(_traits.supported_languages)))
 | 
			
		||||
                if traits is not None:
 | 
			
		||||
                    traits.supported_languages = _traits.supported_languages
 | 
			
		||||
                    obj[engine_name] = traits
 | 
			
		||||
                else:
 | 
			
		||||
                    obj[engine_name] = _traits
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
        return obj
 | 
			
		||||
 | 
			
		||||
    def set_traits(self, engine: Engine):
 | 
			
		||||
        """Set traits in a :py:obj:`Engine` namespace.
 | 
			
		||||
 | 
			
		||||
        :param engine: engine instance build by :py:func:`searx.engines.load_engine`
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        engine_traits = EngineTraits(data_type='traits_v1')
 | 
			
		||||
        if engine.name in self.keys():
 | 
			
		||||
            engine_traits = self[engine.name]
 | 
			
		||||
 | 
			
		||||
        elif engine.engine in self.keys():
 | 
			
		||||
            # The key of the dictionary traits_map is the *engine name*
 | 
			
		||||
            # configured in settings.xml.  When multiple engines are configured
 | 
			
		||||
            # in settings.yml to use the same origin engine (python module)
 | 
			
		||||
            # these additional engines can use the languages from the origin
 | 
			
		||||
            # engine.  For this use the configured ``engine: ...`` from
 | 
			
		||||
            # settings.yml
 | 
			
		||||
            engine_traits = self[engine.engine]
 | 
			
		||||
 | 
			
		||||
        engine_traits.set_traits(engine)
 | 
			
		||||
| 
						 | 
				
			
			@ -11,24 +11,22 @@ usage::
 | 
			
		|||
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
from __future__ import annotations
 | 
			
		||||
 | 
			
		||||
import sys
 | 
			
		||||
import copy
 | 
			
		||||
from typing import Dict, List, Optional
 | 
			
		||||
 | 
			
		||||
from os.path import realpath, dirname
 | 
			
		||||
from babel.localedata import locale_identifiers
 | 
			
		||||
from searx import logger, settings
 | 
			
		||||
from searx.data import ENGINES_LANGUAGES
 | 
			
		||||
from searx.network import get
 | 
			
		||||
from searx.utils import load_module, match_language, gen_useragent
 | 
			
		||||
 | 
			
		||||
from typing import TYPE_CHECKING, Dict, Optional
 | 
			
		||||
 | 
			
		||||
from searx import logger, settings
 | 
			
		||||
from searx.utils import load_module
 | 
			
		||||
 | 
			
		||||
if TYPE_CHECKING:
 | 
			
		||||
    from searx.enginelib import Engine
 | 
			
		||||
 | 
			
		||||
logger = logger.getChild('engines')
 | 
			
		||||
ENGINE_DIR = dirname(realpath(__file__))
 | 
			
		||||
BABEL_LANGS = [
 | 
			
		||||
    lang_parts[0] + '-' + lang_parts[-1] if len(lang_parts) > 1 else lang_parts[0]
 | 
			
		||||
    for lang_parts in (lang_code.split('_') for lang_code in locale_identifiers())
 | 
			
		||||
]
 | 
			
		||||
ENGINE_DEFAULT_ARGS = {
 | 
			
		||||
    "engine_type": "online",
 | 
			
		||||
    "inactive": False,
 | 
			
		||||
| 
						 | 
				
			
			@ -36,8 +34,6 @@ ENGINE_DEFAULT_ARGS = {
 | 
			
		|||
    "timeout": settings["outgoing"]["request_timeout"],
 | 
			
		||||
    "shortcut": "-",
 | 
			
		||||
    "categories": ["general"],
 | 
			
		||||
    "supported_languages": [],
 | 
			
		||||
    "language_aliases": {},
 | 
			
		||||
    "paging": False,
 | 
			
		||||
    "safesearch": False,
 | 
			
		||||
    "time_range_support": False,
 | 
			
		||||
| 
						 | 
				
			
			@ -47,29 +43,13 @@ ENGINE_DEFAULT_ARGS = {
 | 
			
		|||
    "send_accept_language_header": False,
 | 
			
		||||
    "tokens": [],
 | 
			
		||||
    "about": {},
 | 
			
		||||
    "supported_languages": [],  # deprecated use traits
 | 
			
		||||
    "language_aliases": {},  # deprecated not needed when using traits
 | 
			
		||||
}
 | 
			
		||||
# set automatically when an engine does not have any tab category
 | 
			
		||||
OTHER_CATEGORY = 'other'
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Engine:  # pylint: disable=too-few-public-methods
 | 
			
		||||
    """This class is currently never initialized and only used for type hinting."""
 | 
			
		||||
 | 
			
		||||
    name: str
 | 
			
		||||
    engine: str
 | 
			
		||||
    shortcut: str
 | 
			
		||||
    categories: List[str]
 | 
			
		||||
    supported_languages: List[str]
 | 
			
		||||
    about: dict
 | 
			
		||||
    inactive: bool
 | 
			
		||||
    disabled: bool
 | 
			
		||||
    language_support: bool
 | 
			
		||||
    paging: bool
 | 
			
		||||
    safesearch: bool
 | 
			
		||||
    time_range_support: bool
 | 
			
		||||
    timeout: float
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Defaults for the namespace of an engine module, see :py:func:`load_engine`
 | 
			
		||||
 | 
			
		||||
categories = {'general': []}
 | 
			
		||||
| 
						 | 
				
			
			@ -136,9 +116,15 @@ def load_engine(engine_data: dict) -> Optional[Engine]:
 | 
			
		|||
        return None
 | 
			
		||||
 | 
			
		||||
    update_engine_attributes(engine, engine_data)
 | 
			
		||||
    set_language_attributes(engine)
 | 
			
		||||
    update_attributes_for_tor(engine)
 | 
			
		||||
 | 
			
		||||
    # avoid cyclic imports
 | 
			
		||||
    # pylint: disable=import-outside-toplevel
 | 
			
		||||
    from searx.enginelib.traits import EngineTraitsMap
 | 
			
		||||
 | 
			
		||||
    trait_map = EngineTraitsMap.from_data()
 | 
			
		||||
    trait_map.set_traits(engine)
 | 
			
		||||
 | 
			
		||||
    if not is_engine_active(engine):
 | 
			
		||||
        return None
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -190,60 +176,6 @@ def update_engine_attributes(engine: Engine, engine_data):
 | 
			
		|||
            setattr(engine, arg_name, copy.deepcopy(arg_value))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def set_language_attributes(engine: Engine):
 | 
			
		||||
    # assign supported languages from json file
 | 
			
		||||
    if engine.name in ENGINES_LANGUAGES:
 | 
			
		||||
        engine.supported_languages = ENGINES_LANGUAGES[engine.name]
 | 
			
		||||
 | 
			
		||||
    elif engine.engine in ENGINES_LANGUAGES:
 | 
			
		||||
        # The key of the dictionary ENGINES_LANGUAGES is the *engine name*
 | 
			
		||||
        # configured in settings.xml.  When multiple engines are configured in
 | 
			
		||||
        # settings.yml to use the same origin engine (python module) these
 | 
			
		||||
        # additional engines can use the languages from the origin engine.
 | 
			
		||||
        # For this use the configured ``engine: ...`` from settings.yml
 | 
			
		||||
        engine.supported_languages = ENGINES_LANGUAGES[engine.engine]
 | 
			
		||||
 | 
			
		||||
    if hasattr(engine, 'language'):
 | 
			
		||||
        # For an engine, when there is `language: ...` in the YAML settings, the
 | 
			
		||||
        # engine supports only one language, in this case
 | 
			
		||||
        # engine.supported_languages should contains this value defined in
 | 
			
		||||
        # settings.yml
 | 
			
		||||
        if engine.language not in engine.supported_languages:
 | 
			
		||||
            raise ValueError(
 | 
			
		||||
                "settings.yml - engine: '%s' / language: '%s' not supported" % (engine.name, engine.language)
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
        if isinstance(engine.supported_languages, dict):
 | 
			
		||||
            engine.supported_languages = {engine.language: engine.supported_languages[engine.language]}
 | 
			
		||||
        else:
 | 
			
		||||
            engine.supported_languages = [engine.language]
 | 
			
		||||
 | 
			
		||||
    # find custom aliases for non standard language codes
 | 
			
		||||
    for engine_lang in engine.supported_languages:
 | 
			
		||||
        iso_lang = match_language(engine_lang, BABEL_LANGS, fallback=None)
 | 
			
		||||
        if (
 | 
			
		||||
            iso_lang
 | 
			
		||||
            and iso_lang != engine_lang
 | 
			
		||||
            and not engine_lang.startswith(iso_lang)
 | 
			
		||||
            and iso_lang not in engine.supported_languages
 | 
			
		||||
        ):
 | 
			
		||||
            engine.language_aliases[iso_lang] = engine_lang
 | 
			
		||||
 | 
			
		||||
    # language_support
 | 
			
		||||
    engine.language_support = len(engine.supported_languages) > 0
 | 
			
		||||
 | 
			
		||||
    # assign language fetching method if auxiliary method exists
 | 
			
		||||
    if hasattr(engine, '_fetch_supported_languages'):
 | 
			
		||||
        headers = {
 | 
			
		||||
            'User-Agent': gen_useragent(),
 | 
			
		||||
            'Accept-Language': "en-US,en;q=0.5",  # bing needs to set the English language
 | 
			
		||||
        }
 | 
			
		||||
        engine.fetch_supported_languages = (
 | 
			
		||||
            # pylint: disable=protected-access
 | 
			
		||||
            lambda: engine._fetch_supported_languages(get(engine.supported_languages_url, headers=headers))
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def update_attributes_for_tor(engine: Engine) -> bool:
 | 
			
		||||
    if using_tor_proxy(engine) and hasattr(engine, 'onion_url'):
 | 
			
		||||
        engine.search_url = engine.onion_url + getattr(engine, 'search_path', '')
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -63,7 +63,7 @@ def search(query, request_params):
 | 
			
		|||
    for row in result_list:
 | 
			
		||||
        entry = {
 | 
			
		||||
            'query': query,
 | 
			
		||||
            'language': request_params['language'],
 | 
			
		||||
            'language': request_params['searxng_locale'],
 | 
			
		||||
            'value': row.get("value"),
 | 
			
		||||
            # choose a result template or comment out to use the *default*
 | 
			
		||||
            'template': 'key-value.html',
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -8,7 +8,7 @@ from typing import Set
 | 
			
		|||
import os
 | 
			
		||||
import pathlib
 | 
			
		||||
 | 
			
		||||
from babel import Locale
 | 
			
		||||
import babel
 | 
			
		||||
from babel.support import Translations
 | 
			
		||||
import babel.languages
 | 
			
		||||
import babel.core
 | 
			
		||||
| 
						 | 
				
			
			@ -134,7 +134,7 @@ def locales_initialize(directory=None):
 | 
			
		|||
    flask_babel.get_translations = get_translations
 | 
			
		||||
 | 
			
		||||
    for tag, descr in ADDITIONAL_TRANSLATIONS.items():
 | 
			
		||||
        locale = Locale.parse(LOCALE_BEST_MATCH[tag], sep='-')
 | 
			
		||||
        locale = babel.Locale.parse(LOCALE_BEST_MATCH[tag], sep='-')
 | 
			
		||||
        LOCALE_NAMES[tag] = descr
 | 
			
		||||
        if locale.text_direction == 'rtl':
 | 
			
		||||
            RTL_LOCALES.add(tag)
 | 
			
		||||
| 
						 | 
				
			
			@ -142,7 +142,7 @@ def locales_initialize(directory=None):
 | 
			
		|||
    for tag in LOCALE_BEST_MATCH:
 | 
			
		||||
        descr = LOCALE_NAMES.get(tag)
 | 
			
		||||
        if not descr:
 | 
			
		||||
            locale = Locale.parse(tag, sep='-')
 | 
			
		||||
            locale = babel.Locale.parse(tag, sep='-')
 | 
			
		||||
            LOCALE_NAMES[tag] = get_locale_descr(locale, tag.replace('-', '_'))
 | 
			
		||||
            if locale.text_direction == 'rtl':
 | 
			
		||||
                RTL_LOCALES.add(tag)
 | 
			
		||||
| 
						 | 
				
			
			@ -154,12 +154,66 @@ def locales_initialize(directory=None):
 | 
			
		|||
        tag = dirname.replace('_', '-')
 | 
			
		||||
        descr = LOCALE_NAMES.get(tag)
 | 
			
		||||
        if not descr:
 | 
			
		||||
            locale = Locale.parse(dirname)
 | 
			
		||||
            locale = babel.Locale.parse(dirname)
 | 
			
		||||
            LOCALE_NAMES[tag] = get_locale_descr(locale, dirname)
 | 
			
		||||
            if locale.text_direction == 'rtl':
 | 
			
		||||
                RTL_LOCALES.add(tag)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def region_tag(locale: babel.Locale) -> str:
 | 
			
		||||
    """Returns SearXNG's region tag from the locale (e.g. zh-TW , en-US)."""
 | 
			
		||||
    if not locale.territory:
 | 
			
		||||
        raise ValueError('%s missed a territory')
 | 
			
		||||
    return locale.language + '-' + locale.territory
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def language_tag(locale: babel.Locale) -> str:
 | 
			
		||||
    """Returns SearXNG's language tag from the locale and if exits, the tag
 | 
			
		||||
    includes the script name (e.g. en, zh_Hant).
 | 
			
		||||
    """
 | 
			
		||||
    sxng_lang = locale.language
 | 
			
		||||
    if locale.script:
 | 
			
		||||
        sxng_lang += '_' + locale.script
 | 
			
		||||
    return sxng_lang
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_offical_locales(
 | 
			
		||||
    territory: str, languages=None, regional: bool = False, de_facto: bool = True
 | 
			
		||||
) -> Set[babel.Locale]:
 | 
			
		||||
    """Returns a list of :py:obj:`babel.Locale` with languages from
 | 
			
		||||
    :py:obj:`babel.languages.get_official_languages`.
 | 
			
		||||
 | 
			
		||||
    :param territory: The territory (country or region) code.
 | 
			
		||||
 | 
			
		||||
    :param languages: A list of language codes the languages from
 | 
			
		||||
      :py:obj:`babel.languages.get_official_languages` should be in
 | 
			
		||||
      (intersection).  If this argument is ``None``, all official languages in
 | 
			
		||||
      this territory are used.
 | 
			
		||||
 | 
			
		||||
    :param regional: If the regional flag is set, then languages which are
 | 
			
		||||
      regionally official are also returned.
 | 
			
		||||
 | 
			
		||||
    :param de_facto: If the de_facto flag is set to `False`, then languages
 | 
			
		||||
      which are “de facto” official are not returned.
 | 
			
		||||
 | 
			
		||||
    """
 | 
			
		||||
    ret_val = set()
 | 
			
		||||
    o_languages = babel.languages.get_official_languages(territory, regional=regional, de_facto=de_facto)
 | 
			
		||||
 | 
			
		||||
    if languages:
 | 
			
		||||
        languages = [l.lower() for l in languages]
 | 
			
		||||
        o_languages = set(l for l in o_languages if l.lower() in languages)
 | 
			
		||||
 | 
			
		||||
    for lang in o_languages:
 | 
			
		||||
        try:
 | 
			
		||||
            locale = babel.Locale.parse(lang + '_' + territory)
 | 
			
		||||
            ret_val.add(locale)
 | 
			
		||||
        except babel.UnknownLocaleError:
 | 
			
		||||
            continue
 | 
			
		||||
 | 
			
		||||
    return ret_val
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_engine_locale(searxng_locale, engine_locales, default=None):
 | 
			
		||||
    """Return engine's language (aka locale) string that best fits to argument
 | 
			
		||||
    ``searxng_locale``.
 | 
			
		||||
| 
						 | 
				
			
			@ -177,6 +231,10 @@ def get_engine_locale(searxng_locale, engine_locales, default=None):
 | 
			
		|||
          ...
 | 
			
		||||
          'pl-PL'          : 'pl_PL',
 | 
			
		||||
          'pt-PT'          : 'pt_PT'
 | 
			
		||||
          ..
 | 
			
		||||
          'zh'             : 'zh'
 | 
			
		||||
          'zh_Hans'        : 'zh'
 | 
			
		||||
          'zh_Hant'        : 'zh-classical'
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
    .. hint::
 | 
			
		||||
| 
						 | 
				
			
			@ -210,13 +268,13 @@ def get_engine_locale(searxng_locale, engine_locales, default=None):
 | 
			
		|||
      engine.
 | 
			
		||||
 | 
			
		||||
    """
 | 
			
		||||
    # pylint: disable=too-many-branches
 | 
			
		||||
    # pylint: disable=too-many-branches, too-many-return-statements
 | 
			
		||||
 | 
			
		||||
    engine_locale = engine_locales.get(searxng_locale)
 | 
			
		||||
 | 
			
		||||
    if engine_locale is not None:
 | 
			
		||||
        # There was a 1:1 mapping (e.g. "fr-BE --> fr_BE" or "fr --> fr_FR"), no
 | 
			
		||||
        # need to narrow language nor territory.
 | 
			
		||||
        # There was a 1:1 mapping (e.g. a region "fr-BE --> fr_BE" or a language
 | 
			
		||||
        # "zh --> zh"), no need to narrow language-script nor territory.
 | 
			
		||||
        return engine_locale
 | 
			
		||||
 | 
			
		||||
    try:
 | 
			
		||||
| 
						 | 
				
			
			@ -227,6 +285,12 @@ def get_engine_locale(searxng_locale, engine_locales, default=None):
 | 
			
		|||
        except babel.core.UnknownLocaleError:
 | 
			
		||||
            return default
 | 
			
		||||
 | 
			
		||||
    searxng_lang = language_tag(locale)
 | 
			
		||||
    engine_locale = engine_locales.get(searxng_lang)
 | 
			
		||||
    if engine_locale is not None:
 | 
			
		||||
        # There was a 1:1 mapping (e.g. "zh-HK --> zh_Hant" or "zh-CN --> zh_Hans")
 | 
			
		||||
        return engine_locale
 | 
			
		||||
 | 
			
		||||
    # SearXNG's selected locale is not supported by the engine ..
 | 
			
		||||
 | 
			
		||||
    if locale.territory:
 | 
			
		||||
| 
						 | 
				
			
			@ -247,10 +311,6 @@ def get_engine_locale(searxng_locale, engine_locales, default=None):
 | 
			
		|||
 | 
			
		||||
    if locale.language:
 | 
			
		||||
 | 
			
		||||
        searxng_lang = locale.language
 | 
			
		||||
        if locale.script:
 | 
			
		||||
            searxng_lang += '_' + locale.script
 | 
			
		||||
 | 
			
		||||
        terr_lang_dict = {}
 | 
			
		||||
        for territory, langs in babel.core.get_global("territory_languages").items():
 | 
			
		||||
            if not langs.get(searxng_lang, {}).get('official_status'):
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -13,7 +13,7 @@ from typing import Iterable, Dict, List
 | 
			
		|||
import flask
 | 
			
		||||
 | 
			
		||||
from searx import settings, autocomplete
 | 
			
		||||
from searx.engines import Engine
 | 
			
		||||
from searx.enginelib import Engine
 | 
			
		||||
from searx.plugins import Plugin
 | 
			
		||||
from searx.locales import LOCALE_NAMES
 | 
			
		||||
from searx.webutils import VALID_LANGUAGE_CODE
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -30,7 +30,10 @@ from .abstract import EngineProcessor
 | 
			
		|||
 | 
			
		||||
logger = logger.getChild('search.processors')
 | 
			
		||||
PROCESSORS: Dict[str, EngineProcessor] = {}
 | 
			
		||||
"""Cache request processores, stored by *engine-name* (:py:func:`initialize`)"""
 | 
			
		||||
"""Cache request processores, stored by *engine-name* (:py:func:`initialize`)
 | 
			
		||||
 | 
			
		||||
:meta hide-value:
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_processor_class(engine_type):
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -138,7 +138,8 @@ class EngineProcessor(ABC):
 | 
			
		|||
        return False
 | 
			
		||||
 | 
			
		||||
    def get_params(self, search_query, engine_category):
 | 
			
		||||
        """Returns a set of *request params* or ``None`` if request is not supported.
 | 
			
		||||
        """Returns a set of (see :ref:`request params <engine request arguments>`) or
 | 
			
		||||
        ``None`` if request is not supported.
 | 
			
		||||
 | 
			
		||||
        Not supported conditions (``None`` is returned):
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -159,11 +160,20 @@ class EngineProcessor(ABC):
 | 
			
		|||
        params['safesearch'] = search_query.safesearch
 | 
			
		||||
        params['time_range'] = search_query.time_range
 | 
			
		||||
        params['engine_data'] = search_query.engine_data.get(self.engine_name, {})
 | 
			
		||||
        params['searxng_locale'] = search_query.lang
 | 
			
		||||
 | 
			
		||||
        # deprecated / vintage --> use params['searxng_locale']
 | 
			
		||||
        #
 | 
			
		||||
        # Conditions related to engine's traits are implemented in engine.traits
 | 
			
		||||
        # module. Don't do 'locale' decissions here in the abstract layer of the
 | 
			
		||||
        # search processor, just pass the value from user's choice unchanged to
 | 
			
		||||
        # the engine request.
 | 
			
		||||
 | 
			
		||||
        if hasattr(self.engine, 'language') and self.engine.language:
 | 
			
		||||
            params['language'] = self.engine.language
 | 
			
		||||
        else:
 | 
			
		||||
            params['language'] = search_query.lang
 | 
			
		||||
 | 
			
		||||
        return params
 | 
			
		||||
 | 
			
		||||
    @abstractmethod
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -51,6 +51,9 @@ class OnlineProcessor(EngineProcessor):
 | 
			
		|||
        super().initialize()
 | 
			
		||||
 | 
			
		||||
    def get_params(self, search_query, engine_category):
 | 
			
		||||
        """Returns a set of :ref:`request params <engine request online>` or ``None``
 | 
			
		||||
        if request is not supported.
 | 
			
		||||
        """
 | 
			
		||||
        params = super().get_params(search_query, engine_category)
 | 
			
		||||
        if params is None:
 | 
			
		||||
            return None
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -38,8 +38,8 @@ class OnlineCurrencyProcessor(OnlineProcessor):
 | 
			
		|||
    engine_type = 'online_currency'
 | 
			
		||||
 | 
			
		||||
    def get_params(self, search_query, engine_category):
 | 
			
		||||
        """Returns a set of *request params* or ``None`` if search query does not match
 | 
			
		||||
        to :py:obj:`parser_re`."""
 | 
			
		||||
        """Returns a set of :ref:`request params <engine request online_currency>`
 | 
			
		||||
        or ``None`` if search query does not match to :py:obj:`parser_re`."""
 | 
			
		||||
 | 
			
		||||
        params = super().get_params(search_query, engine_category)
 | 
			
		||||
        if params is None:
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -18,8 +18,9 @@ class OnlineDictionaryProcessor(OnlineProcessor):
 | 
			
		|||
    engine_type = 'online_dictionary'
 | 
			
		||||
 | 
			
		||||
    def get_params(self, search_query, engine_category):
 | 
			
		||||
        """Returns a set of *request params* or ``None`` if search query does not match
 | 
			
		||||
        to :py:obj:`parser_re`."""
 | 
			
		||||
        """Returns a set of :ref:`request params <engine request online_dictionary>` or
 | 
			
		||||
        ``None`` if search query does not match to :py:obj:`parser_re`.
 | 
			
		||||
        """
 | 
			
		||||
        params = super().get_params(search_query, engine_category)
 | 
			
		||||
        if params is None:
 | 
			
		||||
            return None
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -20,9 +20,10 @@ class OnlineUrlSearchProcessor(OnlineProcessor):
 | 
			
		|||
    engine_type = 'online_url_search'
 | 
			
		||||
 | 
			
		||||
    def get_params(self, search_query, engine_category):
 | 
			
		||||
        """Returns a set of *request params* or ``None`` if search query does not match
 | 
			
		||||
        to at least one of :py:obj:`re_search_urls`.
 | 
			
		||||
        """Returns a set of :ref:`request params <engine request online>` or ``None`` if
 | 
			
		||||
        search query does not match to :py:obj:`re_search_urls`.
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        params = super().get_params(search_query, engine_category)
 | 
			
		||||
        if params is None:
 | 
			
		||||
            return None
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -907,16 +907,11 @@ def autocompleter():
 | 
			
		|||
    # and there is a query part
 | 
			
		||||
    if len(raw_text_query.autocomplete_list) == 0 and len(sug_prefix) > 0:
 | 
			
		||||
 | 
			
		||||
        # get language from cookie
 | 
			
		||||
        language = request.preferences.get_value('language')
 | 
			
		||||
        if not language or language == 'all':
 | 
			
		||||
            language = 'en'
 | 
			
		||||
        else:
 | 
			
		||||
            language = language.split('-')[0]
 | 
			
		||||
        # get SearXNG's locale and autocomplete backend from cookie
 | 
			
		||||
        sxng_locale = request.preferences.get_value('language')
 | 
			
		||||
        backend_name = request.preferences.get_value('autocomplete')
 | 
			
		||||
 | 
			
		||||
        # run autocompletion
 | 
			
		||||
        raw_results = search_autocomplete(request.preferences.get_value('autocomplete'), sug_prefix, language)
 | 
			
		||||
        for result in raw_results:
 | 
			
		||||
        for result in search_autocomplete(backend_name, sug_prefix, sxng_locale):
 | 
			
		||||
            # attention: this loop will change raw_text_query object and this is
 | 
			
		||||
            # the reason why the sug_prefix was stored before (see above)
 | 
			
		||||
            if result != sug_prefix:
 | 
			
		||||
| 
						 | 
				
			
			@ -1001,7 +996,9 @@ def preferences():
 | 
			
		|||
            'rate80': rate80,
 | 
			
		||||
            'rate95': rate95,
 | 
			
		||||
            'warn_timeout': e.timeout > settings['outgoing']['request_timeout'],
 | 
			
		||||
            'supports_selected_language': _is_selected_language_supported(e, request.preferences),
 | 
			
		||||
            'supports_selected_language': e.traits.is_locale_supported(
 | 
			
		||||
                str(request.preferences.get_value('language') or 'all')
 | 
			
		||||
            ),
 | 
			
		||||
            'result_count': result_count,
 | 
			
		||||
        }
 | 
			
		||||
    # end of stats
 | 
			
		||||
| 
						 | 
				
			
			@ -1052,7 +1049,9 @@ def preferences():
 | 
			
		|||
    # supports
 | 
			
		||||
    supports = {}
 | 
			
		||||
    for _, e in filtered_engines.items():
 | 
			
		||||
        supports_selected_language = _is_selected_language_supported(e, request.preferences)
 | 
			
		||||
        supports_selected_language = e.traits.is_locale_supported(
 | 
			
		||||
            str(request.preferences.get_value('language') or 'all')
 | 
			
		||||
        )
 | 
			
		||||
        safesearch = e.safesearch
 | 
			
		||||
        time_range_support = e.time_range_support
 | 
			
		||||
        for checker_test_name in checker_results.get(e.name, {}).get('errors', {}):
 | 
			
		||||
| 
						 | 
				
			
			@ -1099,16 +1098,6 @@ def preferences():
 | 
			
		|||
    )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _is_selected_language_supported(engine, preferences: Preferences):  # pylint: disable=redefined-outer-name
 | 
			
		||||
    language = preferences.get_value('language')
 | 
			
		||||
    if language == 'all':
 | 
			
		||||
        return True
 | 
			
		||||
    x = match_language(
 | 
			
		||||
        language, getattr(engine, 'supported_languages', []), getattr(engine, 'language_aliases', {}), None
 | 
			
		||||
    )
 | 
			
		||||
    return bool(x)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@app.route('/image_proxy', methods=['GET'])
 | 
			
		||||
def image_proxy():
 | 
			
		||||
    # pylint: disable=too-many-return-statements, too-many-branches
 | 
			
		||||
| 
						 | 
				
			
			@ -1327,9 +1316,11 @@ def config():
 | 
			
		|||
        if not request.preferences.validate_token(engine):
 | 
			
		||||
            continue
 | 
			
		||||
 | 
			
		||||
        supported_languages = engine.supported_languages
 | 
			
		||||
        if isinstance(engine.supported_languages, dict):
 | 
			
		||||
            supported_languages = list(engine.supported_languages.keys())
 | 
			
		||||
        _languages = engine.traits.languages.keys()
 | 
			
		||||
        if engine.traits.data_type == 'supported_languages':  # vintage / deprecated
 | 
			
		||||
            _languages = engine.traits.supported_languages
 | 
			
		||||
            if isinstance(_languages, dict):
 | 
			
		||||
                _languages = _languages.keys()
 | 
			
		||||
 | 
			
		||||
        _engines.append(
 | 
			
		||||
            {
 | 
			
		||||
| 
						 | 
				
			
			@ -1339,7 +1330,8 @@ def config():
 | 
			
		|||
                'enabled': not engine.disabled,
 | 
			
		||||
                'paging': engine.paging,
 | 
			
		||||
                'language_support': engine.language_support,
 | 
			
		||||
                'supported_languages': supported_languages,
 | 
			
		||||
                'languages': list(_languages),
 | 
			
		||||
                'regions': list(engine.traits.regions.keys()),
 | 
			
		||||
                'safesearch': engine.safesearch,
 | 
			
		||||
                'time_range_support': engine.time_range_support,
 | 
			
		||||
                'timeout': engine.timeout,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,4 +1,6 @@
 | 
			
		|||
# -*- coding: utf-8 -*-
 | 
			
		||||
from __future__ import annotations
 | 
			
		||||
 | 
			
		||||
import os
 | 
			
		||||
import pathlib
 | 
			
		||||
import csv
 | 
			
		||||
| 
						 | 
				
			
			@ -8,7 +10,7 @@ import re
 | 
			
		|||
import inspect
 | 
			
		||||
import itertools
 | 
			
		||||
from datetime import datetime, timedelta
 | 
			
		||||
from typing import Iterable, List, Tuple, Dict
 | 
			
		||||
from typing import Iterable, List, Tuple, Dict, TYPE_CHECKING
 | 
			
		||||
 | 
			
		||||
from io import StringIO
 | 
			
		||||
from codecs import getincrementalencoder
 | 
			
		||||
| 
						 | 
				
			
			@ -16,7 +18,10 @@ from codecs import getincrementalencoder
 | 
			
		|||
from flask_babel import gettext, format_date
 | 
			
		||||
 | 
			
		||||
from searx import logger, settings
 | 
			
		||||
from searx.engines import Engine, OTHER_CATEGORY
 | 
			
		||||
from searx.engines import OTHER_CATEGORY
 | 
			
		||||
 | 
			
		||||
if TYPE_CHECKING:
 | 
			
		||||
    from searx.enginelib import Engine
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$')
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,19 +1,21 @@
 | 
			
		|||
#!/usr/bin/env python
 | 
			
		||||
# lint: pylint
 | 
			
		||||
 | 
			
		||||
# SPDX-License-Identifier: AGPL-3.0-or-later
 | 
			
		||||
"""This script generates languages.py from intersecting each engine's supported
 | 
			
		||||
languages.
 | 
			
		||||
"""Update :py:obj:`searx.enginelib.traits.EngineTraitsMap` and :origin:`searx/languages.py`
 | 
			
		||||
 | 
			
		||||
Output files: :origin:`searx/data/engines_languages.json` and
 | 
			
		||||
:origin:`searx/languages.py` (:origin:`CI Update data ...
 | 
			
		||||
<.github/workflows/data-update.yml>`).
 | 
			
		||||
:py:obj:`searx.enginelib.traits.EngineTraitsMap.ENGINE_TRAITS_FILE`:
 | 
			
		||||
  Persistence of engines traits, fetched from the engines.
 | 
			
		||||
 | 
			
		||||
:origin:`searx/languages.py`
 | 
			
		||||
  Is generated  from intersecting each engine's supported traits.
 | 
			
		||||
 | 
			
		||||
The script :origin:`searxng_extra/update/update_engine_traits.py` is called in
 | 
			
		||||
the :origin:`CI Update data ... <.github/workflows/data-update.yml>`
 | 
			
		||||
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
# pylint: disable=invalid-name
 | 
			
		||||
from unicodedata import lookup
 | 
			
		||||
import json
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
from pprint import pformat
 | 
			
		||||
from babel import Locale, UnknownLocaleError
 | 
			
		||||
| 
						 | 
				
			
			@ -21,36 +23,26 @@ from babel.languages import get_global
 | 
			
		|||
from babel.core import parse_locale
 | 
			
		||||
 | 
			
		||||
from searx import settings, searx_dir
 | 
			
		||||
from searx import network
 | 
			
		||||
from searx.engines import load_engines, engines
 | 
			
		||||
from searx.network import set_timeout_for_thread
 | 
			
		||||
from searx.enginelib.traits import EngineTraitsMap
 | 
			
		||||
 | 
			
		||||
# Output files.
 | 
			
		||||
engines_languages_file = Path(searx_dir) / 'data' / 'engines_languages.json'
 | 
			
		||||
languages_file = Path(searx_dir) / 'languages.py'
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Fetches supported languages for each engine and writes json file with those.
 | 
			
		||||
def fetch_supported_languages():
 | 
			
		||||
    set_timeout_for_thread(10.0)
 | 
			
		||||
def fetch_traits_map():
 | 
			
		||||
    """Fetchs supported languages for each engine and writes json file with those."""
 | 
			
		||||
    network.set_timeout_for_thread(10.0)
 | 
			
		||||
 | 
			
		||||
    engines_languages = {}
 | 
			
		||||
    names = list(engines)
 | 
			
		||||
    names.sort()
 | 
			
		||||
    def log(msg):
 | 
			
		||||
        print(msg)
 | 
			
		||||
 | 
			
		||||
    for engine_name in names:
 | 
			
		||||
        if hasattr(engines[engine_name], 'fetch_supported_languages'):
 | 
			
		||||
            engines_languages[engine_name] = engines[engine_name].fetch_supported_languages()
 | 
			
		||||
            print("fetched %s languages from engine %s" % (len(engines_languages[engine_name]), engine_name))
 | 
			
		||||
            if type(engines_languages[engine_name]) == list:  # pylint: disable=unidiomatic-typecheck
 | 
			
		||||
                engines_languages[engine_name] = sorted(engines_languages[engine_name])
 | 
			
		||||
 | 
			
		||||
    print("fetched languages from %s engines" % len(engines_languages))
 | 
			
		||||
 | 
			
		||||
    # write json file
 | 
			
		||||
    with open(engines_languages_file, 'w', encoding='utf-8') as f:
 | 
			
		||||
        json.dump(engines_languages, f, indent=2, sort_keys=True)
 | 
			
		||||
 | 
			
		||||
    return engines_languages
 | 
			
		||||
    traits_map = EngineTraitsMap.fetch_traits(log=log)
 | 
			
		||||
    print("fetched properties from %s engines" % len(traits_map))
 | 
			
		||||
    print("write json file: %s" % traits_map.ENGINE_TRAITS_FILE)
 | 
			
		||||
    traits_map.save_data()
 | 
			
		||||
    return traits_map
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Get babel Locale object from lang_code if possible.
 | 
			
		||||
| 
						 | 
				
			
			@ -124,17 +116,43 @@ def get_territory_name(lang_code):
 | 
			
		|||
    return country_name
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Join all language lists.
 | 
			
		||||
def join_language_lists(engines_languages):
 | 
			
		||||
    language_list = {}
 | 
			
		||||
    for engine_name in engines_languages:
 | 
			
		||||
        for lang_code in engines_languages[engine_name]:
 | 
			
		||||
def join_language_lists(traits_map: EngineTraitsMap):
 | 
			
		||||
    """Join all languages of the engines into one list.  The returned language list
 | 
			
		||||
    contains language codes (``zh``) and region codes (``zh-TW``).  The codes can
 | 
			
		||||
    be parsed by babel::
 | 
			
		||||
 | 
			
		||||
            # apply custom fixes if necessary
 | 
			
		||||
            if lang_code in getattr(engines[engine_name], 'language_aliases', {}).values():
 | 
			
		||||
                lang_code = next(
 | 
			
		||||
                    lc for lc, alias in engines[engine_name].language_aliases.items() if lang_code == alias
 | 
			
		||||
                )
 | 
			
		||||
      babel.Locale.parse(language_list[n])
 | 
			
		||||
    """
 | 
			
		||||
    # pylint: disable=too-many-branches
 | 
			
		||||
    language_list = {}
 | 
			
		||||
 | 
			
		||||
    for eng_name, eng_traits in traits_map.items():
 | 
			
		||||
        eng = engines[eng_name]
 | 
			
		||||
        eng_codes = set()
 | 
			
		||||
 | 
			
		||||
        if eng_traits.data_type == 'traits_v1':
 | 
			
		||||
            # items of type 'engine_traits' do have regions & languages, the
 | 
			
		||||
            # list of eng_codes should contain both.
 | 
			
		||||
            eng_codes.update(eng_traits.regions.keys())
 | 
			
		||||
            eng_codes.update(eng_traits.languages.keys())
 | 
			
		||||
 | 
			
		||||
        elif eng_traits.data_type == 'supported_languages':
 | 
			
		||||
            # vintage / deprecated
 | 
			
		||||
            _codes = set()
 | 
			
		||||
            if isinstance(eng_traits.supported_languages, dict):
 | 
			
		||||
                _codes.update(eng_traits.supported_languages.keys())
 | 
			
		||||
            elif isinstance(eng_traits.supported_languages, list):
 | 
			
		||||
                _codes.update(eng_traits.supported_languages)
 | 
			
		||||
            else:
 | 
			
		||||
                raise TypeError('engine.supported_languages type %s is unknown' % type(eng_traits.supported_languages))
 | 
			
		||||
 | 
			
		||||
            for lang_code in _codes:
 | 
			
		||||
                # apply custom fixes if necessary
 | 
			
		||||
                if lang_code in getattr(eng, 'language_aliases', {}).values():
 | 
			
		||||
                    lang_code = next(lc for lc, alias in eng.language_aliases.items() if lang_code == alias)
 | 
			
		||||
                eng_codes.add(lang_code)
 | 
			
		||||
 | 
			
		||||
        for lang_code in eng_codes:
 | 
			
		||||
 | 
			
		||||
            locale = get_locale(lang_code)
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -149,10 +167,10 @@ def join_language_lists(engines_languages):
 | 
			
		|||
                    # get language's data from babel's Locale object
 | 
			
		||||
                    language_name = locale.get_language_name().title()
 | 
			
		||||
                    english_name = locale.english_name.split(' (')[0]
 | 
			
		||||
                elif short_code in engines_languages['wikipedia']:
 | 
			
		||||
                elif short_code in traits_map['wikipedia'].supported_languages:
 | 
			
		||||
                    # get language's data from wikipedia if not known by babel
 | 
			
		||||
                    language_name = engines_languages['wikipedia'][short_code]['name']
 | 
			
		||||
                    english_name = engines_languages['wikipedia'][short_code]['english_name']
 | 
			
		||||
                    language_name = traits_map['wikipedia'].supported_languages[short_code]['name']
 | 
			
		||||
                    english_name = traits_map['wikipedia'].supported_languages[short_code]['english_name']
 | 
			
		||||
                else:
 | 
			
		||||
                    language_name = None
 | 
			
		||||
                    english_name = None
 | 
			
		||||
| 
						 | 
				
			
			@ -182,15 +200,15 @@ def join_language_lists(engines_languages):
 | 
			
		|||
                }
 | 
			
		||||
 | 
			
		||||
            # count engine for both language_country combination and language alone
 | 
			
		||||
            language_list[short_code]['counter'].add(engine_name)
 | 
			
		||||
            language_list[short_code]['counter'].add(eng_name)
 | 
			
		||||
            if lang_code != short_code:
 | 
			
		||||
                language_list[short_code]['countries'][lang_code]['counter'].add(engine_name)
 | 
			
		||||
                language_list[short_code]['countries'][lang_code]['counter'].add(eng_name)
 | 
			
		||||
 | 
			
		||||
    return language_list
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Filter language list so it only includes the most supported languages and countries
 | 
			
		||||
def filter_language_list(all_languages):
 | 
			
		||||
def filter_language_list(joined_languages_map):
 | 
			
		||||
    min_engines_per_lang = 12
 | 
			
		||||
    min_engines_per_country = 7
 | 
			
		||||
    # pylint: disable=consider-using-dict-items, consider-iterating-dictionary
 | 
			
		||||
| 
						 | 
				
			
			@ -198,6 +216,7 @@ def filter_language_list(all_languages):
 | 
			
		|||
        engine_name
 | 
			
		||||
        for engine_name in engines.keys()
 | 
			
		||||
        if 'general' in engines[engine_name].categories
 | 
			
		||||
        and hasattr(engines[engine_name], 'supported_languages')
 | 
			
		||||
        and engines[engine_name].supported_languages
 | 
			
		||||
        and not engines[engine_name].disabled
 | 
			
		||||
    ]
 | 
			
		||||
| 
						 | 
				
			
			@ -205,7 +224,7 @@ def filter_language_list(all_languages):
 | 
			
		|||
    # filter list to include only languages supported by most engines or all default general engines
 | 
			
		||||
    filtered_languages = {
 | 
			
		||||
        code: lang
 | 
			
		||||
        for code, lang in all_languages.items()
 | 
			
		||||
        for code, lang in joined_languages_map.items()
 | 
			
		||||
        if (
 | 
			
		||||
            len(lang['counter']) >= min_engines_per_lang
 | 
			
		||||
            or all(main_engine in lang['counter'] for main_engine in main_engines)
 | 
			
		||||
| 
						 | 
				
			
			@ -214,8 +233,8 @@ def filter_language_list(all_languages):
 | 
			
		|||
 | 
			
		||||
    def _copy_lang_data(lang, country_name=None):
 | 
			
		||||
        new_dict = {}
 | 
			
		||||
        new_dict['name'] = all_languages[lang]['name']
 | 
			
		||||
        new_dict['english_name'] = all_languages[lang]['english_name']
 | 
			
		||||
        new_dict['name'] = joined_languages_map[lang]['name']
 | 
			
		||||
        new_dict['english_name'] = joined_languages_map[lang]['english_name']
 | 
			
		||||
        if country_name:
 | 
			
		||||
            new_dict['country_name'] = country_name
 | 
			
		||||
        return new_dict
 | 
			
		||||
| 
						 | 
				
			
			@ -305,9 +324,13 @@ def write_languages_file(languages):
 | 
			
		|||
        new_file.close()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
def main():
 | 
			
		||||
    load_engines(settings['engines'])
 | 
			
		||||
    _engines_languages = fetch_supported_languages()
 | 
			
		||||
    _all_languages = join_language_lists(_engines_languages)
 | 
			
		||||
    _filtered_languages = filter_language_list(_all_languages)
 | 
			
		||||
    write_languages_file(_filtered_languages)
 | 
			
		||||
    traits_map = fetch_traits_map()
 | 
			
		||||
    joined_languages_map = join_language_lists(traits_map)
 | 
			
		||||
    filtered_languages = filter_language_list(joined_languages_map)
 | 
			
		||||
    write_languages_file(filtered_languages)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
    main()
 | 
			
		||||
		Loading…
	
	Add table
		
		Reference in a new issue