Merge pull request #2269 from return42/locale-revision

Revision of the locale- and language- handling in SearXNG
2023-03-29 09:47:21 +02:00 · 2023-03-29 09:47:21 +02:00 · f950119ca8
commit f950119ca8
parent 64fea2f9cb 6f9e678346
75 changed files with 7823 additions and 6414 deletions
--- a/.github/workflows/data-update.yml
+++ b/.github/workflows/data-update.yml
@ -17,7 +17,7 @@ jobs:
          - update_currencies.py
          - update_external_bangs.py
          - update_firefox_version.py
-          - update_languages.py
+          - update_engine_traits.py
          - update_wikidata_units.py
          - update_engine_descriptions.py
    steps:
--- a/docs/admin/engines/configured_engines.rst
+++ b/docs/admin/engines/configured_engines.rst
@ -42,7 +42,7 @@ Explanation of the :ref:`general engine configuration` shown in the table
        - Timeout
        - Weight
        - Paging
-        - Language
+        - Language, Region
        - Safe search
        - Time range
--- a/docs/admin/engines/settings.rst
+++ b/docs/admin/engines/settings.rst
@ -569,10 +569,13 @@ engine is shown.  Most of the options have a default value or even are optional.
  To disable by default the engine, but not deleting it.  It will allow the user
  to manually activate it in the settings.
 ``inactive``: optional
  Remove the engine from the settings (*disabled & removed*).
 ``language`` : optional
  If you want to use another language for a specific engine, you can define it
-  by using the full ISO code of language and country, like ``fr_FR``, ``en_US``,
+  by using the ISO code of language (and region), like ``fr``, ``en-US``,
-  ``de_DE``.
+  ``de-DE``.
 ``tokens`` : optional
  A list of secret tokens to make this engine *private*, more details see
--- a/docs/conf.py
+++ b/docs/conf.py
@ -127,6 +127,10 @@ extensions = [
    'notfound.extension',  # https://github.com/readthedocs/sphinx-notfound-page
 ]
 autodoc_default_options = {
    'member-order': 'groupwise',
 }
 myst_enable_extensions = [
  "replacements", "smartquotes"
 ]
@ -135,6 +139,7 @@ suppress_warnings = ['myst.domains']
 intersphinx_mapping = {
    "python": ("https://docs.python.org/3/", None),
    "babel" : ("https://babel.readthedocs.io/en/latest/", None),
    "flask": ("https://flask.palletsprojects.com/", None),
    "flask_babel": ("https://python-babel.github.io/flask-babel/", None),
    # "werkzeug": ("https://werkzeug.palletsprojects.com/", None),
--- a/docs/dev/engine_overview.rst
+++ b/docs/dev/engine_overview.rst
@ -54,6 +54,7 @@ Engine File
                                       - ``offline`` :ref:`[ref] <offline engines>`
                                       - ``online_dictionary``
                                       - ``online_currency``
                                       - ``online_url_search``
   ======================= =========== ========================================================
 .. _engine settings:
@ -131,8 +132,10 @@ Passed Arguments (request)
 These arguments can be used to construct the search query.  Furthermore,
 parameters with default value can be redefined for special purposes.
 .. _engine request online:
-.. table:: If the ``engine_type`` is ``online``
+.. table:: If the ``engine_type`` is :py:obj:`online
           <searx.search.processors.online.OnlineProcessor.get_params>`
   :width: 100%
   ====================== ============== ========================================================================
@ -149,12 +152,16 @@ parameters with default value can be redefined for special purposes.
   safesearch             int            ``0``, between ``0`` and ``2`` (normal, moderate, strict)
   time_range             Optional[str]  ``None``, can be ``day``, ``week``, ``month``, ``year``
   pageno                 int            current pagenumber
-   language               str            specific language code like ``'en_US'``, or ``'all'`` if unspecified
+   searxng_locale         str            SearXNG's locale selected by user.  Specific language code like
                                         ``'en'``, ``'en-US'``, or ``'all'`` if unspecified.
   ====================== ============== ========================================================================
-.. table:: If the ``engine_type`` is ``online_dictionary``, in addition to the
+.. _engine request online_dictionary:
-           ``online`` arguments:
+
 .. table:: If the ``engine_type`` is :py:obj:`online_dictionary
           <searx.search.processors.online_dictionary.OnlineDictionaryProcessor.get_params>`,
           in addition to the :ref:`online <engine request online>` arguments:
   :width: 100%
   ====================== ============== ========================================================================
@ -165,8 +172,11 @@ parameters with default value can be redefined for special purposes.
   query                  str            the text query without the languages
   ====================== ============== ========================================================================
-.. table:: If the ``engine_type`` is ``online_currency```, in addition to the
+.. _engine request online_currency:
-           ``online`` arguments:
+
 .. table:: If the ``engine_type`` is :py:obj:`online_currency
           <searx.search.processors.online_currency.OnlineCurrencyProcessor.get_params>`,
           in addition to the :ref:`online <engine request online>` arguments:
   :width: 100%
   ====================== ============== ========================================================================
@ -179,6 +189,26 @@ parameters with default value can be redefined for special purposes.
   to_name                str            currency name
   ====================== ============== ========================================================================
 .. _engine request online_url_search:
 .. table:: If the ``engine_type`` is :py:obj:`online_url_search
           <searx.search.processors.online_url_search.OnlineUrlSearchProcessor.get_params>`,
           in addition to the :ref:`online <engine request online>` arguments:
   :width: 100%
   ====================== ============== ========================================================================
   argument               type           default-value, information
   ====================== ============== ========================================================================
   search_url             dict           URLs from the search query:
                                         .. code:: python
                                            {
                                              'http': str,
                                              'ftp': str,
                                              'data:image': str
                                            }
   ====================== ============== ========================================================================
 Specify Request
 ---------------
--- a/docs/dev/searxng_extra/update.rst
+++ b/docs/dev/searxng_extra/update.rst
@ -52,12 +52,12 @@ Scripts to update static data in :origin:`searx/data/`
  :members:
-``update_languages.py``
+``update_engine_traits.py``
-=======================
+===========================
-:origin:`[source] <searxng_extra/update/update_languages.py>`
+:origin:`[source] <searxng_extra/update/update_engine_traits.py>`
-.. automodule:: searxng_extra.update.update_languages
+.. automodule:: searxng_extra.update.update_engine_traits
  :members:
--- a/docs/src/searx.engine.archlinux.rst
+++ b/docs/src/searx.engine.archlinux.rst
@ -0,0 +1,9 @@
 .. _archlinux engine:
 ==========
 Arch Linux
 ==========
 .. automodule:: searx.engines.archlinux
  :members:
--- a/docs/src/searx.engine.dailymotion.rst
+++ b/docs/src/searx.engine.dailymotion.rst
@ -0,0 +1,8 @@
 .. _dailymotion engine:
 ===========
 Dailymotion
 ===========
 .. automodule:: searx.engines.dailymotion
  :members:
--- a/docs/src/searx.engine.duckduckgo.rst
+++ b/docs/src/searx.engine.duckduckgo.rst
@ -0,0 +1,22 @@
 .. _duckduckgo engines:
 =================
 DukcDukGo engines
 =================
 .. contents:: Contents
   :depth: 2
   :local:
   :backlinks: entry
 .. automodule:: searx.engines.duckduckgo
   :members:
 .. automodule:: searx.engines.duckduckgo_images
   :members:
 .. automodule:: searx.engines.duckduckgo_definitions
   :members:
 .. automodule:: searx.engines.duckduckgo_weather
   :members:
--- a/docs/src/searx.enginelib.rst
+++ b/docs/src/searx.enginelib.rst
@ -0,0 +1,17 @@
 .. _searx.enginelib:
 ============
 Engine model
 ============
 .. automodule:: searx.enginelib
  :members:
 .. _searx.enginelib.traits:
 =============
 Engine traits
 =============
 .. automodule:: searx.enginelib.traits
  :members:
--- a/docs/src/searx.engines.bing.rst
+++ b/docs/src/searx.engines.bing.rst
@ -0,0 +1,43 @@
 .. _bing engines:
 ============
 Bing Engines
 ============
 .. contents:: Contents
   :depth: 2
   :local:
   :backlinks: entry
 .. _bing web engine:
 Bing WEB
 ========
 .. automodule:: searx.engines.bing
  :members:
 .. _bing images engine:
 Bing Images
 ===========
 .. automodule:: searx.engines.bing_images
  :members:
 .. _bing videos engine:
 Bing Videos
 ===========
 .. automodule:: searx.engines.bing_videos
  :members:
 .. _bing news engine:
 Bing News
 =========
 .. automodule:: searx.engines.bing_news
  :members:
--- a/docs/src/searx.engines.google.rst
+++ b/docs/src/searx.engines.google.rst
@ -12,15 +12,21 @@ Google Engines
 .. _google API:
-google API
+Google API
 ==========
 .. _Query Parameter Definitions:
   https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions
 SearXNG's implementation of the Google API is mainly done in
 :py:obj:`get_google_info <searx.engines.google.get_google_info>`.
 For detailed description of the *REST-full* API see: `Query Parameter
-Definitions`_.  Not all parameters can be appied and some engines are *special*
+Definitions`_.  The linked API documentation can sometimes be helpful during
-(e.g. :ref:`google news engine`).
+reverse engineering.  However, we cannot use it in the freely accessible WEB
 services; not all parameters can be applied and some engines are more *special*
 than other (e.g. :ref:`google news engine`).
 .. _google web engine:
@ -30,6 +36,13 @@ Google WEB
 .. automodule:: searx.engines.google
  :members:
 .. _google autocomplete:
 Google Autocomplete
 ====================
 .. autofunction:: searx.autocomplete.google_complete
 .. _google images engine:
 Google Images
@ -53,3 +66,11 @@ Google News
 .. automodule:: searx.engines.google_news
  :members:
 .. _google scholar engine:
 Google Scholar
 ==============
 .. automodule:: searx.engines.google_scholar
  :members:
--- a/docs/src/searx.engines.peertube.rst
+++ b/docs/src/searx.engines.peertube.rst
@ -0,0 +1,27 @@
 .. _peertube engines:
 ================
 Peertube Engines
 ================
 .. contents:: Contents
   :depth: 2
   :local:
   :backlinks: entry
 .. _peertube video engine:
 Peertube Video
 ==============
 .. automodule:: searx.engines.peertube
  :members:
 .. _sepiasearch engine:
 SepiaSearch
 ===========
 .. automodule:: searx.engines.sepiasearch
  :members:
--- a/docs/src/searx.engines.rst
+++ b/docs/src/searx.engines.rst
@ -1,8 +1,8 @@
-.. _load_engines:
+.. _searx.engines:
-============
+=================
-Load Engines
+SearXNG's engines
-============
+=================
 .. automodule:: searx.engines
  :members:
--- a/docs/src/searx.engines.startpage.rst
+++ b/docs/src/searx.engines.startpage.rst
@ -0,0 +1,13 @@
 .. _startpage engines:
 =================
 Startpage engines
 =================
 .. contents:: Contents
   :depth: 2
   :local:
   :backlinks: entry
 .. automodule:: searx.engines.startpage
   :members:
--- a/docs/src/searx.engines.wikipedia.rst
+++ b/docs/src/searx.engines.wikipedia.rst
@ -0,0 +1,27 @@
 .. _wikimedia engines:
 =========
 Wikimedia
 =========
 .. contents:: Contents
   :depth: 2
   :local:
   :backlinks: entry
 .. _wikipedia engine:
 Wikipedia
 =========
 .. automodule:: searx.engines.wikipedia
  :members:
 .. _wikidata engine:
 Wikidata
 =========
 .. automodule:: searx.engines.wikidata
  :members:
--- a/docs/src/searx.locales.rst
+++ b/docs/src/searx.locales.rst
@ -4,5 +4,17 @@
 Locales
 =======
 .. contents:: Contents
   :depth: 2
   :local:
   :backlinks: entry
 .. automodule:: searx.locales
  :members:
 SearXNG's locale codes
 ======================
 .. automodule:: searx.sxng_locales
  :members:
--- a/docs/src/searx.search.processors.rst
+++ b/docs/src/searx.search.processors.rst
@ -0,0 +1,47 @@
 .. _searx.search.processors:
 =================
 Search processors
 =================
 .. contents:: Contents
   :depth: 2
   :local:
   :backlinks: entry
 Abstract processor class
 ========================
 .. automodule:: searx.search.processors.abstract
  :members:
 Offline processor
 =================
 .. automodule:: searx.search.processors.offline
  :members:
 Online processor
 ================
 .. automodule:: searx.search.processors.online
  :members:
 Online currency processor
 =========================
 .. automodule:: searx.search.processors.online_currency
  :members:
 Online Dictionary processor
 ===========================
 .. automodule:: searx.search.processors.online_dictionary
  :members:
 Online URL search processor
 ===========================
 .. automodule:: searx.search.processors.online_url_search
  :members:
--- a/2
+++ b/2
@ -63,7 +63,7 @@ PYLINT_SEARXNG_DISABLE_OPTION="\
 I,C,R,\
 W0105,W0212,W0511,W0603,W0613,W0621,W0702,W0703,W1401,\
 E1136"
-PYLINT_ADDITIONAL_BUILTINS_FOR_ENGINES="supported_languages,language_aliases,logger,categories"
+PYLINT_ADDITIONAL_BUILTINS_FOR_ENGINES="traits,supported_languages,language_aliases,logger,categories"
 PYLINT_OPTIONS="-m pylint -j 0 --rcfile .pylintrc"
 help() {
--- a/requirements.txt
+++ b/requirements.txt
@ -1,5 +1,5 @@
 certifi==2022.12.7
-babel==2.11.0
+babel==2.12.1
 flask-babel==3.0.1
 flask==2.2.3
 jinja2==3.1.2
--- a/searx/autocomplete.py
+++ b/searx/autocomplete.py
@ -5,20 +5,20 @@
 """
 # pylint: disable=use-dict-literal
-from json import loads
+import json
 from urllib.parse import urlencode
-from lxml import etree
+import lxml
 from httpx import HTTPError
 from searx import settings
-from searx.data import ENGINES_LANGUAGES
+from searx.engines import (
    engines,
    google,
 )
 from searx.network import get as http_get
 from searx.exceptions import SearxEngineResponseException
 # a fetch_supported_languages() for XPath engines isn't available right now
 # _brave = ENGINES_LANGUAGES['brave'].keys()
 def get(*args, **kwargs):
    if 'timeout' not in kwargs:
@ -55,34 +55,58 @@ def dbpedia(query, _lang):
    results = []
    if response.ok:
-        dom = etree.fromstring(response.content)
+        dom = lxml.etree.fromstring(response.content)
        results = dom.xpath('//Result/Label//text()')
    return results
-def duckduckgo(query, _lang):
+def duckduckgo(query, sxng_locale):
-    # duckduckgo autocompleter
+    """Autocomplete from DuckDuckGo. Supports DuckDuckGo's languages"""
    url = 'https://ac.duckduckgo.com/ac/?{0}&type=list'
-    resp = loads(get(url.format(urlencode(dict(q=query)))).text)
+    traits = engines['duckduckgo'].traits
-    if len(resp) > 1:
+    args = {
-        return resp[1]
+        'q': query,
-    return []
+        'kl': traits.get_region(sxng_locale, traits.all_locale),
    }
    url = 'https://duckduckgo.com/ac/?type=list&' + urlencode(args)
    resp = get(url)
    ret_val = []
    if resp.ok:
        j = resp.json()
        if len(j) > 1:
            ret_val = j[1]
    return ret_val
-def google(query, lang):
+def google_complete(query, sxng_locale):
-    # google autocompleter
+    """Autocomplete from Google.  Supports Google's languages and subdomains
-    autocomplete_url = 'https://suggestqueries.google.com/complete/search?client=toolbar&'
+    (:py:obj:`searx.engines.google.get_google_info`) by using the async REST
    API::
-    response = get(autocomplete_url + urlencode(dict(hl=lang, q=query)))
+        https://{subdomain}/complete/search?{args}
    """
    google_info = google.get_google_info({'searxng_locale': sxng_locale}, engines['google'].traits)
    url = 'https://{subdomain}/complete/search?{args}'
    args = urlencode(
        {
            'q': query,
            'client': 'gws-wiz',
            'hl': google_info['params']['hl'],
        }
    )
    results = []
-
+    resp = get(url.format(subdomain=google_info['subdomain'], args=args))
-    if response.ok:
+    if resp.ok:
-        dom = etree.fromstring(response.text)
+        json_txt = resp.text[resp.text.find('[') : resp.text.find(']', -3) + 1]
-        results = dom.xpath('//suggestion/@data')
+        data = json.loads(json_txt)
-
+        for item in data[0]:
            results.append(lxml.html.fromstring(item[0]).text_content())
    return results
@ -109,9 +133,9 @@ def seznam(query, _lang):
    ]
-def startpage(query, lang):
+def startpage(query, sxng_locale):
-    # startpage autocompleter
+    """Autocomplete from Startpage. Supports Startpage's languages"""
-    lui = ENGINES_LANGUAGES['startpage'].get(lang, 'english')
+    lui = engines['startpage'].traits.get_language(sxng_locale, 'english')
    url = 'https://startpage.com/suggestions?{query}'
    resp = get(url.format(query=urlencode({'q': query, 'segment': 'startpage.udog', 'lui': lui})))
    data = resp.json()
@ -122,20 +146,20 @@ def swisscows(query, _lang):
    # swisscows autocompleter
    url = 'https://swisscows.ch/api/suggest?{query}&itemsCount=5'
-    resp = loads(get(url.format(query=urlencode({'query': query}))).text)
+    resp = json.loads(get(url.format(query=urlencode({'query': query}))).text)
    return resp
-def qwant(query, lang):
+def qwant(query, sxng_locale):
-    # qwant autocompleter (additional parameter : lang=en_en&count=xxx )
+    """Autocomplete from Qwant. Supports Qwant's regions."""
    url = 'https://api.qwant.com/api/suggest?{query}'
    resp = get(url.format(query=urlencode({'q': query, 'lang': lang})))
    results = []
    locale = engines['qwant'].traits.get_region(sxng_locale, 'en_US')
    url = 'https://api.qwant.com/v3/suggest?{query}'
    resp = get(url.format(query=urlencode({'q': query, 'locale': locale, 'version': '2'})))
    if resp.ok:
-        data = loads(resp.text)
+        data = resp.json()
        if data['status'] == 'success':
            for item in data['data']['items']:
                results.append(item['value'])
@ -143,21 +167,38 @@ def qwant(query, lang):
    return results
-def wikipedia(query, lang):
+def wikipedia(query, sxng_locale):
-    # wikipedia autocompleter
+    """Autocomplete from Wikipedia. Supports Wikipedia's languages (aka netloc)."""
-    url = 'https://' + lang + '.wikipedia.org/w/api.php?action=opensearch&{0}&limit=10&namespace=0&format=json'
+    results = []
    eng_traits = engines['wikipedia'].traits
    wiki_lang = eng_traits.get_language(sxng_locale, 'en')
    wiki_netloc = eng_traits.custom['wiki_netloc'].get(wiki_lang, 'en.wikipedia.org')
-    resp = loads(get(url.format(urlencode(dict(search=query)))).text)
+    url = 'https://{wiki_netloc}/w/api.php?{args}'
-    if len(resp) > 1:
+    args = urlencode(
-        return resp[1]
+        {
-    return []
+            'action': 'opensearch',
            'format': 'json',
            'formatversion': '2',
            'search': query,
            'namespace': '0',
            'limit': '10',
        }
    )
    resp = get(url.format(args=args, wiki_netloc=wiki_netloc))
    if resp.ok:
        data = resp.json()
        if len(data) > 1:
            results = data[1]
    return results
 def yandex(query, _lang):
    # yandex autocompleter
    url = "https://suggest.yandex.com/suggest-ff.cgi?{0}"
-    resp = loads(get(url.format(urlencode(dict(part=query)))).text)
+    resp = json.loads(get(url.format(urlencode(dict(part=query)))).text)
    if len(resp) > 1:
        return resp[1]
    return []
@ -166,7 +207,7 @@ def yandex(query, _lang):
 backends = {
    'dbpedia': dbpedia,
    'duckduckgo': duckduckgo,
-    'google': google,
+    'google': google_complete,
    'seznam': seznam,
    'startpage': startpage,
    'swisscows': swisscows,
@ -177,12 +218,11 @@ backends = {
 }
-def search_autocomplete(backend_name, query, lang):
+def search_autocomplete(backend_name, query, sxng_locale):
    backend = backends.get(backend_name)
    if backend is None:
        return []
    try:
-        return backend(query, lang)
+        return backend(query, sxng_locale)
    except (HTTPError, SearxEngineResponseException):
        return []
--- a/searx/data/init.py
+++ b/searx/data/init.py
@ -7,7 +7,7 @@
 """
 __all__ = [
-    'ENGINES_LANGUAGES',
+    'ENGINE_TRAITS',
    'CURRENCIES',
    'USER_AGENTS',
    'EXTERNAL_URLS',
@ -42,7 +42,6 @@ def ahmia_blacklist_loader():
        return f.read().split()
 ENGINES_LANGUAGES = _load('engines_languages.json')
 CURRENCIES = _load('currencies.json')
 USER_AGENTS = _load('useragents.json')
 EXTERNAL_URLS = _load('external_urls.json')
@ -50,3 +49,4 @@ WIKIDATA_UNITS = _load('wikidata_units.json')
 EXTERNAL_BANGS = _load('external_bangs.json')
 OSM_KEYS_TAGS = _load('osm_keys_tags.json')
 ENGINE_DESCRIPTIONS = _load('engine_descriptions.json')
 ENGINE_TRAITS = _load('engine_traits.json')
--- a/searx/data/engine_traits.json
+++ b/searx/data/engine_traits.json
--- a/searx/data/engines_languages.json
+++ b/searx/data/engines_languages.json
--- a/searx/enginelib/init.py
+++ b/searx/enginelib/init.py
@ -0,0 +1,136 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """Engine related implementations
 .. note::
   The long term goal is to modularize all relevant implementations to the
   engines here in this Python package.  In addition to improved modularization,
   this will also be necessary in part because the probability of circular
   imports will increase due to the increased typification of implementations in
   the future.
   ToDo:
   - move :py:obj:`searx.engines.load_engine` to a new module `searx.enginelib`.
 """
 from __future__ import annotations
 from typing import Union, Dict, List, Callable, TYPE_CHECKING
 if TYPE_CHECKING:
    from searx.enginelib import traits
 class Engine:  # pylint: disable=too-few-public-methods
    """Class of engine instances build from YAML settings.
    Further documentation see :ref:`general engine configuration`.
    .. hint::
       This class is currently never initialized and only used for type hinting.
    """
    # Common options in the engine module
    engine_type: str
    """Type of the engine (:origin:`searx/search/processors`)"""
    paging: bool
    """Engine supports multiple pages."""
    time_range_support: bool
    """Engine supports search time range."""
    safesearch: bool
    """Engine supports SafeSearch"""
    language_support: bool
    """Engine supports languages (locales) search."""
    language: str
    """For an engine, when there is ``language: ...`` in the YAML settings the engine
    does support only this one language:
    .. code:: yaml
      - name: google french
        engine: google
        language: fr
    """
    region: str
    """For an engine, when there is ``region: ...`` in the YAML settings the engine
    does support only this one region::
    .. code:: yaml
      - name: google belgium
        engine: google
        region: fr-BE
    """
    fetch_traits: Callable
    """Function to to fetch engine's traits from origin."""
    traits: traits.EngineTraits
    """Traits of the engine."""
    # settings.yml
    categories: List[str]
    """Tabs, in which the engine is working."""
    name: str
    """Name that will be used across SearXNG to define this engine.  In settings, on
    the result page .."""
    engine: str
    """Name of the python file used to handle requests and responses to and from
    this search engine (file name from :origin:`searx/engines` without
    ``.py``)."""
    enable_http: bool
    """Enable HTTP (by default only HTTPS is enabled)."""
    shortcut: str
    """Code used to execute bang requests (``!foo``)"""
    timeout: float
    """Specific timeout for search-engine."""
    display_error_messages: bool
    """Display error messages on the web UI."""
    proxies: dict
    """Set proxies for a specific engine (YAML):
    .. code:: yaml
       proxies :
         http:  socks5://proxy:port
         https: socks5://proxy:port
    """
    disabled: bool
    """To disable by default the engine, but not deleting it.  It will allow the
    user to manually activate it in the settings."""
    inactive: bool
    """Remove the engine from the settings (*disabled & removed*)."""
    about: dict
    """Additional fileds describing the engine.
    .. code:: yaml
       about:
          website: https://example.com
          wikidata_id: Q306656
          official_api_documentation: https://example.com/api-doc
          use_official_api: true
          require_api_key: true
          results: HTML
    """
--- a/searx/enginelib/traits.py
+++ b/searx/enginelib/traits.py
@ -0,0 +1,250 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """Engine's traits are fetched from the origin engines and stored in a JSON file
 in the *data folder*.  Most often traits are languages and region codes and
 their mapping from SearXNG's representation to the representation in the origin
 search engine.  For new traits new properties can be added to the class
 :py:class:`EngineTraits`.
 To load traits from the persistence :py:obj:`EngineTraitsMap.from_data` can be
 used.
 """
 from __future__ import annotations
 import json
 import dataclasses
 from typing import Dict, Union, Callable, Optional, TYPE_CHECKING
 from typing_extensions import Literal, Self
 from searx import locales
 from searx.data import data_dir, ENGINE_TRAITS
 if TYPE_CHECKING:
    from . import Engine
 class EngineTraitsEncoder(json.JSONEncoder):
    """Encodes :class:`EngineTraits` to a serializable object, see
    :class:`json.JSONEncoder`."""
    def default(self, o):
        """Return dictionary of a :class:`EngineTraits` object."""
        if isinstance(o, EngineTraits):
            return o.__dict__
        return super().default(o)
@dataclasses.dataclass
 class EngineTraits:
    """The class is intended to be instantiated for each engine."""
    regions: Dict[str, str] = dataclasses.field(default_factory=dict)
    """Maps SearXNG's internal representation of a region to the one of the engine.
    SearXNG's internal representation can be parsed by babel and the value is
    send to the engine:
    .. code:: python
       regions ={
           'fr-BE' : <engine's region name>,
       }
       for key, egnine_region regions.items():
          searxng_region = babel.Locale.parse(key, sep='-')
          ...
    """
    languages: Dict[str, str] = dataclasses.field(default_factory=dict)
    """Maps SearXNG's internal representation of a language to the one of the engine.
    SearXNG's internal representation can be parsed by babel and the value is
    send to the engine:
    .. code:: python
       languages = {
           'ca' : <engine's language name>,
       }
       for key, egnine_lang in languages.items():
          searxng_lang = babel.Locale.parse(key)
          ...
    """
    all_locale: Optional[str] = None
    """To which locale value SearXNG's ``all`` language is mapped (shown a "Default
    language").
    """
    data_type: Literal['traits_v1'] = 'traits_v1'
    """Data type, default is 'traits_v1'.
    """
    custom: Dict[str, Dict] = dataclasses.field(default_factory=dict)
    """A place to store engine's custom traits, not related to the SearXNG core
    """
    def get_language(self, searxng_locale: str, default=None):
        """Return engine's language string that *best fits* to SearXNG's locale.
        :param searxng_locale: SearXNG's internal representation of locale
          selected by the user.
        :param default: engine's default language
        The *best fits* rules are implemented in
        :py:obj:`locales.get_engine_locale`.  Except for the special value ``all``
        which is determined from :py:obj`EngineTraits.all_language`.
        """
        if searxng_locale == 'all' and self.all_locale is not None:
            return self.all_locale
        return locales.get_engine_locale(searxng_locale, self.languages, default=default)
    def get_region(self, searxng_locale: str, default=None):
        """Return engine's region string that best fits to SearXNG's locale.
        :param searxng_locale: SearXNG's internal representation of locale
          selected by the user.
        :param default: engine's default region
        The *best fits* rules are implemented in
        :py:obj:`locales.get_engine_locale`.  Except for the special value ``all``
        which is determined from :py:obj`EngineTraits.all_language`.
        """
        if searxng_locale == 'all' and self.all_locale is not None:
            return self.all_locale
        return locales.get_engine_locale(searxng_locale, self.regions, default=default)
    def is_locale_supported(self, searxng_locale: str) -> bool:
        """A *locale* (SearXNG's internal representation) is considered to be supported
        by the engine if the *region* or the *language* is supported by the
        engine.  For verification the functions :py:func:`self.get_region` and
        :py:func:`self.get_region` are used.
        """
        if self.data_type == 'traits_v1':
            return bool(self.get_region(searxng_locale) or self.get_language(searxng_locale))
        raise TypeError('engine traits of type %s is unknown' % self.data_type)
    def copy(self):
        """Create a copy of the dataclass object."""
        return EngineTraits(**dataclasses.asdict(self))
    @classmethod
    def fetch_traits(cls, engine: Engine) -> Union[Self, None]:
        """Call a function ``fetch_traits(engine_traits)`` from engines namespace to fetch
        and set properties from the origin engine in the object ``engine_traits``.  If
        function does not exists, ``None`` is returned.
        """
        fetch_traits = getattr(engine, 'fetch_traits', None)
        engine_traits = None
        if fetch_traits:
            engine_traits = cls()
            fetch_traits(engine_traits)
        return engine_traits
    def set_traits(self, engine: Engine):
        """Set traits from self object in a :py:obj:`.Engine` namespace.
        :param engine: engine instance build by :py:func:`searx.engines.load_engine`
        """
        if self.data_type == 'traits_v1':
            self._set_traits_v1(engine)
        else:
            raise TypeError('engine traits of type %s is unknown' % self.data_type)
    def _set_traits_v1(self, engine: Engine):
        # For an engine, when there is `language: ...` in the YAML settings the engine
        # does support only this one language (region)::
        #
        #   - name: google italian
        #     engine: google
        #     language: it
        #     region: it-IT
        traits = self.copy()
        _msg = "settings.yml - engine: '%s' / %s: '%s' not supported"
        languages = traits.languages
        if hasattr(engine, 'language'):
            if engine.language not in languages:
                raise ValueError(_msg % (engine.name, 'language', engine.language))
            traits.languages = {engine.language: languages[engine.language]}
        regions = traits.regions
        if hasattr(engine, 'region'):
            if engine.region not in regions:
                raise ValueError(_msg % (engine.name, 'region', engine.region))
            traits.regions = {engine.region: regions[engine.region]}
        engine.language_support = bool(traits.languages or traits.regions)
        # set the copied & modified traits in engine's namespace
        engine.traits = traits
 class EngineTraitsMap(Dict[str, EngineTraits]):
    """A python dictionary to map :class:`EngineTraits` by engine name."""
    ENGINE_TRAITS_FILE = (data_dir / 'engine_traits.json').resolve()
    """File with persistence of the :py:obj:`EngineTraitsMap`."""
    def save_data(self):
        """Store EngineTraitsMap in in file :py:obj:`self.ENGINE_TRAITS_FILE`"""
        with open(self.ENGINE_TRAITS_FILE, 'w', encoding='utf-8') as f:
            json.dump(self, f, indent=2, sort_keys=True, cls=EngineTraitsEncoder)
    @classmethod
    def from_data(cls) -> Self:
        """Instantiate :class:`EngineTraitsMap` object from :py:obj:`ENGINE_TRAITS`"""
        obj = cls()
        for k, v in ENGINE_TRAITS.items():
            obj[k] = EngineTraits(**v)
        return obj
    @classmethod
    def fetch_traits(cls, log: Callable) -> Self:
        from searx import engines  # pylint: disable=cyclic-import, import-outside-toplevel
        names = list(engines.engines)
        names.sort()
        obj = cls()
        for engine_name in names:
            engine = engines.engines[engine_name]
            traits = EngineTraits.fetch_traits(engine)
            if traits is not None:
                log("%-20s: SearXNG languages --> %s " % (engine_name, len(traits.languages)))
                log("%-20s: SearXNG regions   --> %s" % (engine_name, len(traits.regions)))
                obj[engine_name] = traits
        return obj
    def set_traits(self, engine: Engine):
        """Set traits in a :py:obj:`Engine` namespace.
        :param engine: engine instance build by :py:func:`searx.engines.load_engine`
        """
        engine_traits = EngineTraits(data_type='traits_v1')
        if engine.name in self.keys():
            engine_traits = self[engine.name]
        elif engine.engine in self.keys():
            # The key of the dictionary traits_map is the *engine name*
            # configured in settings.xml.  When multiple engines are configured
            # in settings.yml to use the same origin engine (python module)
            # these additional engines can use the languages from the origin
            # engine.  For this use the configured ``engine: ...`` from
            # settings.yml
            engine_traits = self[engine.engine]
        engine_traits.set_traits(engine)
--- a/searx/engines/init.py
+++ b/searx/engines/init.py
@ -11,24 +11,22 @@ usage::
 """
 from __future__ import annotations
 import sys
 import copy
 from typing import Dict, List, Optional
 from os.path import realpath, dirname
 from babel.localedata import locale_identifiers
 from searx import logger, settings
 from searx.data import ENGINES_LANGUAGES
 from searx.network import get
 from searx.utils import load_module, match_language, gen_useragent
 from typing import TYPE_CHECKING, Dict, Optional
 from searx import logger, settings
 from searx.utils import load_module
 if TYPE_CHECKING:
    from searx.enginelib import Engine
 logger = logger.getChild('engines')
 ENGINE_DIR = dirname(realpath(__file__))
 BABEL_LANGS = [
    lang_parts[0] + '-' + lang_parts[-1] if len(lang_parts) > 1 else lang_parts[0]
    for lang_parts in (lang_code.split('_') for lang_code in locale_identifiers())
 ]
 ENGINE_DEFAULT_ARGS = {
    "engine_type": "online",
    "inactive": False,
@ -36,8 +34,6 @@ ENGINE_DEFAULT_ARGS = {
    "timeout": settings["outgoing"]["request_timeout"],
    "shortcut": "-",
    "categories": ["general"],
    "supported_languages": [],
    "language_aliases": {},
    "paging": False,
    "safesearch": False,
    "time_range_support": False,
@ -52,24 +48,6 @@ ENGINE_DEFAULT_ARGS = {
 OTHER_CATEGORY = 'other'
 class Engine:  # pylint: disable=too-few-public-methods
    """This class is currently never initialized and only used for type hinting."""
    name: str
    engine: str
    shortcut: str
    categories: List[str]
    supported_languages: List[str]
    about: dict
    inactive: bool
    disabled: bool
    language_support: bool
    paging: bool
    safesearch: bool
    time_range_support: bool
    timeout: float
 # Defaults for the namespace of an engine module, see :py:func:`load_engine`
 categories = {'general': []}
@ -136,9 +114,15 @@ def load_engine(engine_data: dict) -> Optional[Engine]:
        return None
    update_engine_attributes(engine, engine_data)
    set_language_attributes(engine)
    update_attributes_for_tor(engine)
    # avoid cyclic imports
    # pylint: disable=import-outside-toplevel
    from searx.enginelib.traits import EngineTraitsMap
    trait_map = EngineTraitsMap.from_data()
    trait_map.set_traits(engine)
    if not is_engine_active(engine):
        return None
@ -190,60 +174,6 @@ def update_engine_attributes(engine: Engine, engine_data):
            setattr(engine, arg_name, copy.deepcopy(arg_value))
 def set_language_attributes(engine: Engine):
    # assign supported languages from json file
    if engine.name in ENGINES_LANGUAGES:
        engine.supported_languages = ENGINES_LANGUAGES[engine.name]
    elif engine.engine in ENGINES_LANGUAGES:
        # The key of the dictionary ENGINES_LANGUAGES is the *engine name*
        # configured in settings.xml.  When multiple engines are configured in
        # settings.yml to use the same origin engine (python module) these
        # additional engines can use the languages from the origin engine.
        # For this use the configured ``engine: ...`` from settings.yml
        engine.supported_languages = ENGINES_LANGUAGES[engine.engine]
    if hasattr(engine, 'language'):
        # For an engine, when there is `language: ...` in the YAML settings, the
        # engine supports only one language, in this case
        # engine.supported_languages should contains this value defined in
        # settings.yml
        if engine.language not in engine.supported_languages:
            raise ValueError(
                "settings.yml - engine: '%s' / language: '%s' not supported" % (engine.name, engine.language)
            )
        if isinstance(engine.supported_languages, dict):
            engine.supported_languages = {engine.language: engine.supported_languages[engine.language]}
        else:
            engine.supported_languages = [engine.language]
    # find custom aliases for non standard language codes
    for engine_lang in engine.supported_languages:
        iso_lang = match_language(engine_lang, BABEL_LANGS, fallback=None)
        if (
            iso_lang
            and iso_lang != engine_lang
            and not engine_lang.startswith(iso_lang)
            and iso_lang not in engine.supported_languages
        ):
            engine.language_aliases[iso_lang] = engine_lang
    # language_support
    engine.language_support = len(engine.supported_languages) > 0
    # assign language fetching method if auxiliary method exists
    if hasattr(engine, '_fetch_supported_languages'):
        headers = {
            'User-Agent': gen_useragent(),
            'Accept-Language': "en-US,en;q=0.5",  # bing needs to set the English language
        }
        engine.fetch_supported_languages = (
            # pylint: disable=protected-access
            lambda: engine._fetch_supported_languages(get(engine.supported_languages_url, headers=headers))
        )
 def update_attributes_for_tor(engine: Engine) -> bool:
    if using_tor_proxy(engine) and hasattr(engine, 'onion_url'):
        engine.search_url = engine.onion_url + getattr(engine, 'search_path', '')
--- a/searx/engines/archlinux.py
+++ b/searx/engines/archlinux.py
@ -1,15 +1,32 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """
- Arch Linux Wiki
+Arch Linux Wiki
 ~~~~~~~~~~~~~~~
 This implementation does not use a official API: Mediawiki provides API, but
 Arch Wiki blocks access to it.
 API: Mediawiki provides API, but Arch Wiki blocks access to it
 """
-from urllib.parse import urlencode, urljoin
+from typing import TYPE_CHECKING
-from lxml import html
+from urllib.parse import urlencode, urljoin, urlparse
 import lxml
 import babel
 from searx import network
 from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex
 from searx.enginelib.traits import EngineTraits
 from searx.locales import language_tag
 if TYPE_CHECKING:
    import logging
    logger: logging.Logger
 traits: EngineTraits
 # about
 about = {
    "website": 'https://wiki.archlinux.org/',
    "wikidata_id": 'Q101445877',
@ -22,125 +39,113 @@ about = {
 # engine dependent config
 categories = ['it', 'software wikis']
 paging = True
-base_url = 'https://wiki.archlinux.org'
+main_wiki = 'wiki.archlinux.org'
 # xpath queries
 xpath_results = '//ul[@class="mw-search-results"]/li'
 xpath_link = './/div[@class="mw-search-result-heading"]/a'
 # cut 'en' from 'en-US', 'de' from 'de-CH', and so on
 def locale_to_lang_code(locale):
    if locale.find('-') >= 0:
        locale = locale.split('-')[0]
    return locale
 # wikis for some languages were moved off from the main site, we need to make
 # requests to correct URLs to be able to get results in those languages
 lang_urls = {
    # fmt: off
    'all': {
        'base': 'https://wiki.archlinux.org',
        'search': '/index.php?title=Special:Search&offset={offset}&{query}'
    },
    'de': {
        'base': 'https://wiki.archlinux.de',
        'search': '/index.php?title=Spezial:Suche&offset={offset}&{query}'
    },
    'fr': {
        'base': 'https://wiki.archlinux.fr',
        'search': '/index.php?title=Spécial:Recherche&offset={offset}&{query}'
    },
    'ja': {
        'base': 'https://wiki.archlinuxjp.org',
        'search': '/index.php?title=特別:検索&offset={offset}&{query}'
    },
    'ro': {
        'base': 'http://wiki.archlinux.ro',
        'search': '/index.php?title=Special:Căutare&offset={offset}&{query}'
    },
    'tr': {
        'base': 'http://archtr.org/wiki',
        'search': '/index.php?title=Özel:Ara&offset={offset}&{query}'
    }
    # fmt: on
 }
 # get base & search URLs for selected language
 def get_lang_urls(language):
    if language in lang_urls:
        return lang_urls[language]
    return lang_urls['all']
 # Language names to build search requests for
 # those languages which are hosted on the main site.
 main_langs = {
    'ar': 'العربية',
    'bg': 'Български',
    'cs': 'Česky',
    'da': 'Dansk',
    'el': 'Ελληνικά',
    'es': 'Español',
    'he': 'עברית',
    'hr': 'Hrvatski',
    'hu': 'Magyar',
    'it': 'Italiano',
    'ko': '한국어',
    'lt': 'Lietuviškai',
    'nl': 'Nederlands',
    'pl': 'Polski',
    'pt': 'Português',
    'ru': 'Русский',
    'sl': 'Slovenský',
    'th': 'ไทย',
    'uk': 'Українська',
    'zh': '简体中文',
 }
 supported_languages = dict(lang_urls, **main_langs)
 # do search-request
 def request(query, params):
    # translate the locale (e.g. 'en-US') to language code ('en')
    language = locale_to_lang_code(params['language'])
-    # if our language is hosted on the main site, we need to add its name
+    sxng_lang = params['searxng_locale'].split('-')[0]
-    # to the query in order to narrow the results to that language
+    netloc = traits.custom['wiki_netloc'].get(sxng_lang, main_wiki)
-    if language in main_langs:
+    title = traits.custom['title'].get(sxng_lang, 'Special:Search')
-        query += ' (' + main_langs[language] + ')'
+    base_url = 'https://' + netloc + '/index.php?'
    # prepare the request parameters
    query = urlencode({'search': query})
    offset = (params['pageno'] - 1) * 20
-    # get request URLs for our language of choice
+    if netloc == main_wiki:
-    urls = get_lang_urls(language)
+        eng_lang: str = traits.get_language(sxng_lang, 'English')
-    search_url = urls['base'] + urls['search']
+        query += ' (' + eng_lang + ')'
    elif netloc == 'wiki.archlinuxcn.org':
        base_url = 'https://' + netloc + '/wzh/index.php?'
-    params['url'] = search_url.format(query=query, offset=offset)
+    args = {
        'search': query,
        'title': title,
        'limit': 20,
        'offset': offset,
        'profile': 'default',
    }
    params['url'] = base_url + urlencode(args)
    return params
 # get response from search-request
 def response(resp):
    # get the base URL for the language in which request was made
    language = locale_to_lang_code(resp.search_params['language'])
    base_url = get_lang_urls(language)['base']
    results = []
    dom = lxml.html.fromstring(resp.text)
-    dom = html.fromstring(resp.text)
+    # get the base URL for the language in which request was made
    sxng_lang = resp.search_params['searxng_locale'].split('-')[0]
    netloc = traits.custom['wiki_netloc'].get(sxng_lang, main_wiki)
    base_url = 'https://' + netloc + '/index.php?'
-    # parse results
+    for result in eval_xpath_list(dom, '//ul[@class="mw-search-results"]/li'):
-    for result in eval_xpath_list(dom, xpath_results):
+        link = eval_xpath_getindex(result, './/div[@class="mw-search-result-heading"]/a', 0)
-        link = eval_xpath_getindex(result, xpath_link, 0)
+        content = extract_text(result.xpath('.//div[@class="searchresult"]'))
-        href = urljoin(base_url, link.attrib.get('href'))
+        results.append(
-        title = extract_text(link)
+            {
-
+                'url': urljoin(base_url, link.get('href')),
-        results.append({'url': href, 'title': title})
+                'title': extract_text(link),
                'content': content,
            }
        )
    return results
 def fetch_traits(engine_traits: EngineTraits):
    """Fetch languages from Archlinix-Wiki.  The location of the Wiki address of a
    language is mapped in a :py:obj:`custom field
    <searx.enginelib.traits.EngineTraits.custom>` (``wiki_netloc``).  Depending
    on the location, the ``title`` argument in the request is translated.
    .. code:: python
       "custom": {
         "wiki_netloc": {
           "de": "wiki.archlinux.de",
            # ...
           "zh": "wiki.archlinuxcn.org"
         }
         "title": {
           "de": "Spezial:Suche",
            # ...
           "zh": "Special:\u641c\u7d22"
         },
       },
    """
    engine_traits.custom['wiki_netloc'] = {}
    engine_traits.custom['title'] = {}
    title_map = {
        'de': 'Spezial:Suche',
        'fa': 'ویژه:جستجو',
        'ja': '特別:検索',
        'zh': 'Special:搜索',
    }
    resp = network.get('https://wiki.archlinux.org/')
    if not resp.ok:
        print("ERROR: response from wiki.archlinix.org is not OK.")
    dom = lxml.html.fromstring(resp.text)
    for a in eval_xpath_list(dom, "//a[@class='interlanguage-link-target']"):
        sxng_tag = language_tag(babel.Locale.parse(a.get('lang'), sep='-'))
        # zh_Hans --> zh
        sxng_tag = sxng_tag.split('_')[0]
        netloc = urlparse(a.get('href')).netloc
        if netloc != 'wiki.archlinux.org':
            title = title_map.get(sxng_tag)
            if not title:
                print("ERROR: title tag from %s (%s) is unknown" % (netloc, sxng_tag))
                continue
            engine_traits.custom['wiki_netloc'][sxng_tag] = netloc
            engine_traits.custom['title'][sxng_tag] = title
        eng_tag = extract_text(eval_xpath_list(a, ".//span"))
        engine_traits.languages[sxng_tag] = eng_tag
    engine_traits.languages['en'] = 'English'
--- a/searx/engines/bing.py
+++ b/searx/engines/bing.py
@ -1,16 +1,53 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
-"""Bing (Web)
+"""This is the implementation of the Bing-WEB engine. Some of this
 implementations are shared by other engines:
 - :ref:`bing images engine`
 - :ref:`bing news engine`
 - :ref:`bing videos engine`
 On the `preference page`_ Bing offers a lot of languages an regions (see section
 'Search results languages' and 'Country/region').  However, the abundant choice
 does not correspond to reality, where Bing has a full-text indexer only for a
 limited number of languages.  By example: you can select a language like Māori
 but you never get a result in this language.
 What comes a bit closer to the truth are the `search-APIs`_ but they don`t seem
 to be completely correct either (if you take a closer look you will find some
 inaccuracies there too):
 - :py:obj:`searx.engines.bing.bing_traits_url`
 - :py:obj:`searx.engines.bing_videos.bing_traits_url`
 - :py:obj:`searx.engines.bing_images.bing_traits_url`
 - :py:obj:`searx.engines.bing_news.bing_traits_url`
 .. _preference page: https://www.bing.com/account/general
 .. _search-APIs: https://learn.microsoft.com/en-us/bing/search-apis/
 - https://github.com/searx/searx/issues/2019#issuecomment-648227442
 """
-# pylint: disable=too-many-branches
+# pylint: disable=too-many-branches, invalid-name
 from typing import TYPE_CHECKING
 import datetime
 import re
-from urllib.parse import urlencode, urlparse, parse_qs
+import uuid
 from urllib.parse import urlencode
 from lxml import html
-from searx.utils import eval_xpath, extract_text, eval_xpath_list, match_language, eval_xpath_getindex
+import babel
-from searx.network import multi_requests, Request
+import babel.languages
 from searx.utils import eval_xpath, extract_text, eval_xpath_list, eval_xpath_getindex
 from searx import network
 from searx.locales import language_tag, region_tag
 from searx.enginelib.traits import EngineTraits
 if TYPE_CHECKING:
    import logging
    logger: logging.Logger
 traits: EngineTraits
 about = {
    "website": 'https://www.bing.com',
@ -21,56 +58,124 @@ about = {
    "results": 'HTML',
 }
 send_accept_language_header = True
 """Bing tries to guess user's language and territory from the HTTP
 Accept-Language.  Optional the user can select a search-language (can be
 different to the UI language) and a region (market code)."""
 # engine dependent config
 categories = ['general', 'web']
 paging = True
-time_range_support = False
+time_range_support = True
-safesearch = False
+safesearch = True
-send_accept_language_header = True
+safesearch_types = {2: 'STRICT', 1: 'DEMOTE', 0: 'OFF'}  # cookie: ADLT=STRICT
 supported_languages_url = 'https://www.bing.com/account/general'
 language_aliases = {}
-# search-url
+base_url = 'https://www.bing.com/search'
-base_url = 'https://www.bing.com/'
+"""Bing (Web) search URL"""
-# initial query:     https://www.bing.com/search?q=foo&search=&form=QBLH
+bing_traits_url = 'https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/reference/market-codes'
-inital_query = 'search?{query}&search=&form=QBLH'
+"""Bing (Web) search API description"""
 # following queries: https://www.bing.com/search?q=foo&search=&first=11&FORM=PERE
 page_query = 'search?{query}&search=&first={offset}&FORM=PERE'
 def _get_offset_from_pageno(pageno):
    return (pageno - 1) * 10 + 1
 def set_bing_cookies(params, engine_language, engine_region, SID):
    # set cookies
    # -----------
    params['cookies']['_EDGE_V'] = '1'
    # _EDGE_S: F=1&SID=3A5253BD6BCA609509B741876AF961CA&mkt=zh-tw
    _EDGE_S = [
        'F=1',
        'SID=%s' % SID,
        'mkt=%s' % engine_region.lower(),
        'ui=%s' % engine_language.lower(),
    ]
    params['cookies']['_EDGE_S'] = '&'.join(_EDGE_S)
    logger.debug("cookie _EDGE_S=%s", params['cookies']['_EDGE_S'])
    # "_EDGE_CD": "m=zh-tw",
    _EDGE_CD = [  # pylint: disable=invalid-name
        'm=%s' % engine_region.lower(),  # search region: zh-cn
        'u=%s' % engine_language.lower(),  # UI: en-us
    ]
    params['cookies']['_EDGE_CD'] = '&'.join(_EDGE_CD) + ';'
    logger.debug("cookie _EDGE_CD=%s", params['cookies']['_EDGE_CD'])
    SRCHHPGUSR = [  # pylint: disable=invalid-name
        'SRCHLANG=%s' % engine_language,
        # Trying to set ADLT cookie here seems not to have any effect, I assume
        # there is some age verification by a cookie (and/or session ID) needed,
        # to disable the SafeSearch.
        'ADLT=%s' % safesearch_types.get(params['safesearch'], 'DEMOTE'),
    ]
    params['cookies']['SRCHHPGUSR'] = '&'.join(SRCHHPGUSR)
    logger.debug("cookie SRCHHPGUSR=%s", params['cookies']['SRCHHPGUSR'])
 def request(query, params):
    """Assemble a Bing-Web request."""
-    offset = _get_offset_from_pageno(params.get('pageno', 1))
+    engine_region = traits.get_region(params['searxng_locale'], 'en-US')
    engine_language = traits.get_language(params['searxng_locale'], 'en')
-    # logger.debug("params['pageno'] --> %s", params.get('pageno'))
+    SID = uuid.uuid1().hex.upper()
-    # logger.debug("          offset --> %s", offset)
+    CVID = uuid.uuid1().hex.upper()
-    search_string = page_query
+    set_bing_cookies(params, engine_language, engine_region, SID)
    if offset == 1:
        search_string = inital_query
-    if params['language'] == 'all':
+    # build URL query
-        lang = 'EN'
+    # ---------------
    else:
        lang = match_language(params['language'], supported_languages, language_aliases)
-    query = 'language:{} {}'.format(lang.split('-')[0].upper(), query)
+    # query term
    page = int(params.get('pageno', 1))
    query_params = {
        # fmt: off
        'q': query,
        'pq': query,
        'cvid': CVID,
        'qs': 'n',
        'sp': '-1'
        # fmt: on
    }
-    search_path = search_string.format(query=urlencode({'q': query}), offset=offset)
+    # page
-
+    if page > 1:
-    if offset > 1:
+        referer = base_url + '?' + urlencode(query_params)
        referer = base_url + inital_query.format(query=urlencode({'q': query}))
        params['headers']['Referer'] = referer
        logger.debug("headers.Referer --> %s", referer)
-    params['url'] = base_url + search_path
+    query_params['first'] = _get_offset_from_pageno(page)
-    params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
+
    if page == 2:
        query_params['FORM'] = 'PERE'
    elif page > 2:
        query_params['FORM'] = 'PERE%s' % (page - 2)
    filters = ''
    if params['time_range']:
        query_params['filt'] = 'custom'
        if params['time_range'] == 'day':
            filters = 'ex1:"ez1"'
        elif params['time_range'] == 'week':
            filters = 'ex1:"ez2"'
        elif params['time_range'] == 'month':
            filters = 'ex1:"ez3"'
        elif params['time_range'] == 'year':
            epoch_1970 = datetime.date(1970, 1, 1)
            today_no = (datetime.date.today() - epoch_1970).days
            filters = 'ex1:"ez5_%s_%s"' % (today_no - 365, today_no)
    params['url'] = base_url + '?' + urlencode(query_params)
    if filters:
        params['url'] = params['url'] + '&filters=' + filters
    return params
@ -107,7 +212,8 @@ def response(resp):
            url_cite = extract_text(eval_xpath(result, './/div[@class="b_attribution"]/cite'))
            # Bing can shorten the URL either at the end or in the middle of the string
            if (
-                url_cite.startswith('https://')
+                url_cite
                and url_cite.startswith('https://')
                and '…' not in url_cite
                and '...' not in url_cite
                and '›' not in url_cite
@ -127,9 +233,9 @@ def response(resp):
    # resolve all Bing redirections in parallel
    request_list = [
-        Request.get(u, allow_redirects=False, headers=resp.search_params['headers']) for u in url_to_resolve
+        network.Request.get(u, allow_redirects=False, headers=resp.search_params['headers']) for u in url_to_resolve
    ]
-    response_list = multi_requests(request_list)
+    response_list = network.multi_requests(request_list)
    for i, redirect_response in enumerate(response_list):
        if not isinstance(redirect_response, Exception):
            results[url_to_resolve_index[i]]['url'] = redirect_response.headers['location']
@ -157,27 +263,71 @@ def response(resp):
    return results
-# get supported languages from their site
+def fetch_traits(engine_traits: EngineTraits):
-def _fetch_supported_languages(resp):
+    """Fetch languages and regions from Bing-Web."""
-    lang_tags = set()
+    xpath_market_codes = '//table[1]/tbody/tr/td[3]'
    # xpath_country_codes = '//table[2]/tbody/tr/td[2]'
    xpath_language_codes = '//table[3]/tbody/tr/td[2]'
    _fetch_traits(engine_traits, bing_traits_url, xpath_language_codes, xpath_market_codes)
 def _fetch_traits(engine_traits: EngineTraits, url: str, xpath_language_codes: str, xpath_market_codes: str):
    # insert alias to map from a language (zh) to a language + script (zh_Hans)
    engine_traits.languages['zh'] = 'zh-hans'
    resp = network.get(url)
    if not resp.ok:
        print("ERROR: response from peertube is not OK.")
    dom = html.fromstring(resp.text)
    lang_links = eval_xpath(dom, '//div[@id="language-section"]//li')
-    for _li in lang_links:
+    map_lang = {'jp': 'ja'}
    for td in eval_xpath(dom, xpath_language_codes):
        eng_lang = td.text
-        href = eval_xpath(_li, './/@href')[0]
+        if eng_lang in ('en-gb', 'pt-br'):
-        (_scheme, _netloc, _path, _params, query, _fragment) = urlparse(href)
+            # language 'en' is already in the list and a language 'en-gb' can't
-        query = parse_qs(query, keep_blank_values=True)
+            # be handled in SearXNG, same with pt-br which is covered by pt-pt.
            continue
-        # fmt: off
+        babel_lang = map_lang.get(eng_lang, eng_lang).replace('-', '_')
-        setlang = query.get('setlang', [None, ])[0]
+        try:
-        # example: 'mn-Cyrl-MN' --> '['mn', 'Cyrl-MN']
+            sxng_tag = language_tag(babel.Locale.parse(babel_lang))
-        lang, nation = (setlang.split('-', maxsplit=1) + [None,])[:2]  # fmt: skip
+        except babel.UnknownLocaleError:
-        # fmt: on
+            print("ERROR: language (%s) is unknown by babel" % (eng_lang))
            continue
        conflict = engine_traits.languages.get(sxng_tag)
        if conflict:
            if conflict != eng_lang:
                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_lang))
            continue
        engine_traits.languages[sxng_tag] = eng_lang
-        tag = lang + '-' + nation if nation else lang
+    map_region = {
-        lang_tags.add(tag)
+        'en-ID': 'id_ID',
        'no-NO': 'nb_NO',
    }
-    return list(lang_tags)
+    for td in eval_xpath(dom, xpath_market_codes):
        eng_region = td.text
        babel_region = map_region.get(eng_region, eng_region).replace('-', '_')
        if eng_region == 'en-WW':
            engine_traits.all_locale = eng_region
            continue
        try:
            sxng_tag = region_tag(babel.Locale.parse(babel_region))
        except babel.UnknownLocaleError:
            print("ERROR: region (%s) is unknown by babel" % (eng_region))
            continue
        conflict = engine_traits.regions.get(sxng_tag)
        if conflict:
            if conflict != eng_region:
                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_region))
            continue
        engine_traits.regions[sxng_tag] = eng_region
--- a/searx/engines/bing_images.py
+++ b/searx/engines/bing_images.py
@ -1,20 +1,30 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
-"""Bing (Images)
+"""Bing-Images: description see :py:obj:`searx.engines.bing`.
 """
 # pylint: disable=invalid-name
-from json import loads
+
 from typing import TYPE_CHECKING
 import uuid
 import json
 from urllib.parse import urlencode
 from lxml import html
-from searx.utils import match_language
+from searx.enginelib.traits import EngineTraits
-from searx.engines.bing import language_aliases
+from searx.engines.bing import (
-from searx.engines.bing import (  # pylint: disable=unused-import
+    set_bing_cookies,
-    _fetch_supported_languages,
+    _fetch_traits,
    supported_languages_url,
 )
 from searx.engines.bing import send_accept_language_header  # pylint: disable=unused-import
 if TYPE_CHECKING:
    import logging
    logger: logging.Logger
 traits: EngineTraits
 # about
 about = {
@ -31,77 +41,92 @@ categories = ['images', 'web']
 paging = True
 safesearch = True
 time_range_support = True
 send_accept_language_header = True
 supported_languages_url = 'https://www.bing.com/account/general'
 number_of_results = 28
-# search-url
+base_url = 'https://www.bing.com/images/async'
-base_url = 'https://www.bing.com/'
+"""Bing (Images) search URL"""
-search_string = (
+
 bing_traits_url = 'https://learn.microsoft.com/en-us/bing/search-apis/bing-image-search/reference/market-codes'
 """Bing (Images) search API description"""
 time_map = {
    # fmt: off
-    'images/search'
+    'day': 60 * 24,
-    '?{query}'
+    'week': 60 * 24 * 7,
-    '&count={count}'
+    'month': 60 * 24 * 31,
-    '&first={first}'
+    'year': 60 * 24 * 365,
    '&tsc=ImageHoverTitle'
    # fmt: on
-)
+}
 time_range_string = '&qft=+filterui:age-lt{interval}'
 time_range_dict = {'day': '1440', 'week': '10080', 'month': '43200', 'year': '525600'}
 # safesearch definitions
 safesearch_types = {2: 'STRICT', 1: 'DEMOTE', 0: 'OFF'}
 # do search-request
 def request(query, params):
-    offset = ((params['pageno'] - 1) * number_of_results) + 1
+    """Assemble a Bing-Image request."""
-    search_path = search_string.format(query=urlencode({'q': query}), count=number_of_results, first=offset)
+    engine_region = traits.get_region(params['searxng_locale'], 'en-US')
    engine_language = traits.get_language(params['searxng_locale'], 'en')
-    language = match_language(params['language'], supported_languages, language_aliases).lower()
+    SID = uuid.uuid1().hex.upper()
    set_bing_cookies(params, engine_language, engine_region, SID)
-    params['cookies']['SRCHHPGUSR'] = 'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
+    # build URL query
    # - example: https://www.bing.com/images/async?q=foo&first=155&count=35
-    params['cookies']['_EDGE_S'] = 'mkt=' + language + '&ui=' + language + '&F=1'
+    query_params = {
        # fmt: off
        'q': query,
        'async' : 'content',
        # to simplify the page count lets use the default of 35 images per page
        'first' : (int(params.get('pageno', 1)) - 1) * 35 + 1,
        'count' : 35,
        # fmt: on
    }
-    params['url'] = base_url + search_path
+    # time range
-    if params['time_range'] in time_range_dict:
+    # - example: one year (525600 minutes) 'qft=+filterui:age-lt525600'
-        params['url'] += time_range_string.format(interval=time_range_dict[params['time_range']])
+
    if params['time_range']:
        query_params['qft'] = 'filterui:age-lt%s' % time_map[params['time_range']]
    params['url'] = base_url + '?' + urlencode(query_params)
    return params
 # get response from search-request
 def response(resp):
-    results = []
+    """Get response from Bing-Images"""
    results = []
    dom = html.fromstring(resp.text)
-    # parse results
+    for result in dom.xpath('//ul[contains(@class, "dgControl_list")]/li'):
    for result in dom.xpath('//div[@class="imgpt"]'):
        img_format = result.xpath('./div[contains(@class, "img_info")]/span/text()')[0]
        # Microsoft seems to experiment with this code so don't make the path too specific,
        # just catch the text section for the first anchor in img_info assuming this to be
        # the originating site.
        source = result.xpath('./div[contains(@class, "img_info")]//a/text()')[0]
-        m = loads(result.xpath('./a/@m')[0])
+        metadata = result.xpath('.//a[@class="iusc"]/@m')
        if not metadata:
            continue
-        # strip 'Unicode private use area' highlighting, they render to Tux
+        metadata = json.loads(result.xpath('.//a[@class="iusc"]/@m')[0])
-        # the Linux penguin and a standing diamond on my machine...
+        title = ' '.join(result.xpath('.//div[@class="infnmpt"]//a/text()')).strip()
-        title = m.get('t', '').replace('\ue000', '').replace('\ue001', '')
+        img_format = ' '.join(result.xpath('.//div[@class="imgpt"]/div/span/text()')).strip()
        source = ' '.join(result.xpath('.//div[@class="imgpt"]//div[@class="lnkw"]//a/text()')).strip()
        results.append(
            {
                'template': 'images.html',
-                'url': m['purl'],
+                'url': metadata['purl'],
-                'thumbnail_src': m['turl'],
+                'thumbnail_src': metadata['turl'],
-                'img_src': m['murl'],
+                'img_src': metadata['murl'],
-                'content': '',
+                'content': metadata['desc'],
                'title': title,
                'source': source,
                'img_format': img_format,
            }
        )
    return results
 def fetch_traits(engine_traits: EngineTraits):
    """Fetch languages and regions from Bing-News."""
    xpath_market_codes = '//table[1]/tbody/tr/td[3]'
    # xpath_country_codes = '//table[2]/tbody/tr/td[2]'
    xpath_language_codes = '//table[3]/tbody/tr/td[2]'
    _fetch_traits(engine_traits, bing_traits_url, xpath_language_codes, xpath_market_codes)
--- a/searx/engines/bing_news.py
+++ b/searx/engines/bing_news.py
@ -1,24 +1,30 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
-"""Bing (News)
+"""Bing-News: description see :py:obj:`searx.engines.bing`.
 """
-from urllib.parse import (
+# pylint: disable=invalid-name
-    urlencode,
+
-    urlparse,
+from typing import TYPE_CHECKING
-    parse_qsl,
+import uuid
-    quote,
+from urllib.parse import urlencode
-)
+
-from datetime import datetime
+from lxml import html
-from dateutil import parser
+
-from lxml import etree
+from searx.enginelib.traits import EngineTraits
-from lxml.etree import XPath
+from searx.engines.bing import (
-from searx.utils import match_language, eval_xpath_getindex
+    set_bing_cookies,
-from searx.engines.bing import (  # pylint: disable=unused-import
+    _fetch_traits,
    language_aliases,
    _fetch_supported_languages,
    supported_languages_url,
 )
 from searx.engines.bing import send_accept_language_header  # pylint: disable=unused-import
 if TYPE_CHECKING:
    import logging
    logger: logging.Logger
 traits: EngineTraits
 # about
 about = {
@ -34,108 +40,111 @@ about = {
 categories = ['news']
 paging = True
 time_range_support = True
-send_accept_language_header = True
+time_map = {
    'day': '4',
    'week': '8',
    'month': '9',
 }
 """A string '4' means *last hour*. We use *last hour* for ``day`` here since the
 difference of *last day* and *last week* in the result list is just marginally.
 """
-# search-url
+base_url = 'https://www.bing.com/news/infinitescrollajax'
-base_url = 'https://www.bing.com/'
+"""Bing (News) search URL"""
 search_string = 'news/search?{query}&first={offset}&format=RSS'
 search_string_with_time = 'news/search?{query}&first={offset}&qft=interval%3d"{interval}"&format=RSS'
 time_range_dict = {'day': '7', 'week': '8', 'month': '9'}
 bing_traits_url = 'https://learn.microsoft.com/en-us/bing/search-apis/bing-news-search/reference/market-codes'
 """Bing (News) search API description"""
-def url_cleanup(url_string):
+mkt_alias = {
-    """remove click"""
+    'zh': 'en-WW',
-
+    'zh-CN': 'en-WW',
-    parsed_url = urlparse(url_string)
+}
-    if parsed_url.netloc == 'www.bing.com' and parsed_url.path == '/news/apiclick.aspx':
+"""Bing News has an official market code 'zh-CN' but we won't get a result with
-        query = dict(parse_qsl(parsed_url.query))
+this market code.  For 'zh' and 'zh-CN' we better use the *Worldwide aggregate*
-        url_string = query.get('url', None)
+market code (en-WW).
-    return url_string
+"""
 def image_url_cleanup(url_string):
    """replace the http://*bing.com/th?id=... by https://www.bing.com/th?id=..."""
    parsed_url = urlparse(url_string)
    if parsed_url.netloc.endswith('bing.com') and parsed_url.path == '/th':
        query = dict(parse_qsl(parsed_url.query))
        url_string = "https://www.bing.com/th?id=" + quote(query.get('id'))
    return url_string
 def _get_url(query, language, offset, time_range):
    if time_range in time_range_dict:
        search_path = search_string_with_time.format(
            # fmt: off
            query = urlencode({
                'q': query,
                'setmkt': language
            }),
            offset = offset,
            interval = time_range_dict[time_range]
            # fmt: on
        )
    else:
        # e.g. setmkt=de-de&setlang=de
        search_path = search_string.format(
            # fmt: off
            query = urlencode({
                'q': query,
                'setmkt': language
            }),
            offset = offset
            # fmt: on
        )
    return base_url + search_path
 def request(query, params):
    """Assemble a Bing-News request."""
-    if params['time_range'] and params['time_range'] not in time_range_dict:
+    sxng_locale = params['searxng_locale']
-        return params
+    engine_region = traits.get_region(mkt_alias.get(sxng_locale, sxng_locale), traits.all_locale)
    engine_language = traits.get_language(sxng_locale, 'en')
-    offset = (params['pageno'] - 1) * 10 + 1
+    SID = uuid.uuid1().hex.upper()
-    if params['language'] == 'all':
+    set_bing_cookies(params, engine_language, engine_region, SID)
-        language = 'en-US'
+
-    else:
+    # build URL query
-        language = match_language(params['language'], supported_languages, language_aliases)
+    #
-    params['url'] = _get_url(query, language, offset, params['time_range'])
+    # example: https://www.bing.com/news/infinitescrollajax?q=london&first=1
    query_params = {
        # fmt: off
        'q': query,
        'InfiniteScroll': 1,
        # to simplify the page count lets use the default of 10 images per page
        'first' : (int(params.get('pageno', 1)) - 1) * 10 + 1,
        # fmt: on
    }
    if params['time_range']:
        # qft=interval:"7"
        query_params['qft'] = 'qft=interval="%s"' % time_map.get(params['time_range'], '9')
    params['url'] = base_url + '?' + urlencode(query_params)
    return params
 def response(resp):
-
+    """Get response from Bing-Video"""
    results = []
    rss = etree.fromstring(resp.content)
    namespaces = rss.nsmap
-    for item in rss.xpath('./channel/item'):
+    if not resp.ok or not resp.text:
-        # url / title / content
+        return results
        url = url_cleanup(eval_xpath_getindex(item, './link/text()', 0, default=None))
        title = eval_xpath_getindex(item, './title/text()', 0, default=url)
        content = eval_xpath_getindex(item, './description/text()', 0, default='')
-        # publishedDate
+    dom = html.fromstring(resp.text)
        publishedDate = eval_xpath_getindex(item, './pubDate/text()', 0, default=None)
        try:
            publishedDate = parser.parse(publishedDate, dayfirst=False)
        except TypeError:
            publishedDate = datetime.now()
        except ValueError:
            publishedDate = datetime.now()
-        # thumbnail
+    for newsitem in dom.xpath('//div[contains(@class, "newsitem")]'):
-        thumbnail = eval_xpath_getindex(item, XPath('./News:Image/text()', namespaces=namespaces), 0, default=None)
+
-        if thumbnail is not None:
+        url = newsitem.xpath('./@url')[0]
-            thumbnail = image_url_cleanup(thumbnail)
+        title = ' '.join(newsitem.xpath('.//div[@class="caption"]//a[@class="title"]/text()')).strip()
        content = ' '.join(newsitem.xpath('.//div[@class="snippet"]/text()')).strip()
        thumbnail = None
        author = newsitem.xpath('./@data-author')[0]
        metadata = ' '.join(newsitem.xpath('.//div[@class="source"]/span/text()')).strip()
        img_src = newsitem.xpath('.//a[@class="imagelink"]//img/@src')
        if img_src:
            thumbnail = 'https://www.bing.com/' + img_src[0]
        # append result
        if thumbnail is not None:
        results.append(
-                {'url': url, 'title': title, 'publishedDate': publishedDate, 'content': content, 'img_src': thumbnail}
+            {
                'url': url,
                'title': title,
                'content': content,
                'img_src': thumbnail,
                'author': author,
                'metadata': metadata,
            }
        )
        else:
            results.append({'url': url, 'title': title, 'publishedDate': publishedDate, 'content': content})
    return results
 def fetch_traits(engine_traits: EngineTraits):
    """Fetch languages and regions from Bing-News.
    The :py:obj:`description <searx.engines.bing_news.bing_traits_url>` of the
    first table says *"query parameter when calling the Video Search API."*
    .. thats why I use the 4. table "News Category API markets" for the
    ``xpath_market_codes``.
    """
    xpath_market_codes = '//table[4]/tbody/tr/td[3]'
    # xpath_country_codes = '//table[2]/tbody/tr/td[2]'
    xpath_language_codes = '//table[3]/tbody/tr/td[2]'
    _fetch_traits(engine_traits, bing_traits_url, xpath_language_codes, xpath_market_codes)
--- a/searx/engines/bing_videos.py
+++ b/searx/engines/bing_videos.py
@ -1,21 +1,30 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
-"""Bing (Videos)
+"""Bing-Videos: description see :py:obj:`searx.engines.bing`.
 """
 # pylint: disable=invalid-name
-from json import loads
+from typing import TYPE_CHECKING
 import uuid
 import json
 from urllib.parse import urlencode
 from lxml import html
-from searx.utils import match_language
+from searx.enginelib.traits import EngineTraits
-from searx.engines.bing import language_aliases
+from searx.engines.bing import (
-
+    set_bing_cookies,
-from searx.engines.bing import (  # pylint: disable=unused-import
+    _fetch_traits,
    _fetch_supported_languages,
    supported_languages_url,
 )
 from searx.engines.bing import send_accept_language_header  # pylint: disable=unused-import
 if TYPE_CHECKING:
    import logging
    logger: logging.Logger
 traits: EngineTraits
 about = {
    "website": 'https://www.bing.com/videos',
@ -26,65 +35,76 @@ about = {
    "results": 'HTML',
 }
 # engine dependent config
 categories = ['videos', 'web']
 paging = True
 safesearch = True
 time_range_support = True
 send_accept_language_header = True
 number_of_results = 28
-base_url = 'https://www.bing.com/'
+base_url = 'https://www.bing.com/videos/asyncv2'
-search_string = (
+"""Bing (Videos) async search URL."""
 bing_traits_url = 'https://learn.microsoft.com/en-us/bing/search-apis/bing-video-search/reference/market-codes'
 """Bing (Video) search API description"""
 time_map = {
    # fmt: off
-    'videos/search'
+    'day': 60 * 24,
-    '?{query}'
+    'week': 60 * 24 * 7,
-    '&count={count}'
+    'month': 60 * 24 * 31,
-    '&first={first}'
+    'year': 60 * 24 * 365,
    '&scope=video'
    '&FORM=QBLH'
    # fmt: on
-)
+}
 time_range_string = '&qft=+filterui:videoage-lt{interval}'
 time_range_dict = {'day': '1440', 'week': '10080', 'month': '43200', 'year': '525600'}
 # safesearch definitions
 safesearch_types = {2: 'STRICT', 1: 'DEMOTE', 0: 'OFF'}
 # do search-request
 def request(query, params):
-    offset = ((params['pageno'] - 1) * number_of_results) + 1
+    """Assemble a Bing-Video request."""
-    search_path = search_string.format(query=urlencode({'q': query}), count=number_of_results, first=offset)
+    engine_region = traits.get_region(params['searxng_locale'], 'en-US')
    engine_language = traits.get_language(params['searxng_locale'], 'en')
-    # safesearch cookie
+    SID = uuid.uuid1().hex.upper()
-    params['cookies']['SRCHHPGUSR'] = 'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
+    set_bing_cookies(params, engine_language, engine_region, SID)
-    # language cookie
+    # build URL query
-    language = match_language(params['language'], supported_languages, language_aliases).lower()
+    #
-    params['cookies']['_EDGE_S'] = 'mkt=' + language + '&F=1'
+    # example: https://www.bing.com/videos/asyncv2?q=foo&async=content&first=1&count=35
-    # query and paging
+    query_params = {
-    params['url'] = base_url + search_path
+        # fmt: off
        'q': query,
        'async' : 'content',
        # to simplify the page count lets use the default of 35 images per page
        'first' : (int(params.get('pageno', 1)) - 1) * 35 + 1,
        'count' : 35,
        # fmt: on
    }
    # time range
-    if params['time_range'] in time_range_dict:
+    #
-        params['url'] += time_range_string.format(interval=time_range_dict[params['time_range']])
+    # example: one week (10080 minutes) '&qft= filterui:videoage-lt10080'  '&form=VRFLTR'
    if params['time_range']:
        query_params['form'] = 'VRFLTR'
        query_params['qft'] = ' filterui:videoage-lt%s' % time_map[params['time_range']]
    params['url'] = base_url + '?' + urlencode(query_params)
    return params
 # get response from search-request
 def response(resp):
    """Get response from Bing-Video"""
    results = []
    dom = html.fromstring(resp.text)
-    for result in dom.xpath('//div[@class="dg_u"]/div[contains(@class, "mc_vtvc")]'):
+    for result in dom.xpath('//div[@class="dg_u"]//div[contains(@id, "mc_vtvc_video")]'):
-        metadata = loads(result.xpath('.//div[@class="vrhdata"]/@vrhm')[0])
+        metadata = json.loads(result.xpath('.//div[@class="vrhdata"]/@vrhm')[0])
        info = ' - '.join(result.xpath('.//div[@class="mc_vtvc_meta_block"]//span/text()')).strip()
        content = '{0} - {1}'.format(metadata['du'], info)
-        thumbnail = '{0}th?id={1}'.format(base_url, metadata['thid'])
+        thumbnail = result.xpath('.//div[contains(@class, "mc_vtvc_th")]//img/@src')[0]
        results.append(
            {
                'url': metadata['murl'],
@ -96,3 +116,13 @@ def response(resp):
        )
    return results
 def fetch_traits(engine_traits: EngineTraits):
    """Fetch languages and regions from Bing-Videos."""
    xpath_market_codes = '//table[1]/tbody/tr/td[3]'
    # xpath_country_codes = '//table[2]/tbody/tr/td[2]'
    xpath_language_codes = '//table[3]/tbody/tr/td[2]'
    _fetch_traits(engine_traits, bing_traits_url, xpath_language_codes, xpath_market_codes)
--- a/searx/engines/dailymotion.py
+++ b/searx/engines/dailymotion.py
@ -1,17 +1,35 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
-"""Dailymotion (Videos)
+# lint: pylint
 """
 Dailymotion (Videos)
 ~~~~~~~~~~~~~~~~~~~~
 .. _REST GET: https://developers.dailymotion.com/tools/
 .. _Global API Parameters: https://developers.dailymotion.com/api/#global-parameters
 .. _Video filters API: https://developers.dailymotion.com/api/#video-filters
 .. _Fields selection: https://developers.dailymotion.com/api/#fields-selection
 """
-from typing import Set
+from typing import TYPE_CHECKING
 from datetime import datetime, timedelta
 from urllib.parse import urlencode
 import time
 import babel
 from searx.exceptions import SearxEngineAPIException
-from searx.network import raise_for_httperror
+from searx import network
 from searx.utils import html_to_text
 from searx.locales import region_tag, language_tag
 from searx.enginelib.traits import EngineTraits
 if TYPE_CHECKING:
    import logging
    logger: logging.Logger
 traits: EngineTraits
 # about
 about = {
@ -37,11 +55,24 @@ time_delta_dict = {
 }
 safesearch = True
-safesearch_params = {2: '&is_created_for_kids=true', 1: '&is_created_for_kids=true', 0: ''}
+safesearch_params = {
    2: {'is_created_for_kids': 'true'},
    1: {'is_created_for_kids': 'true'},
    0: {},
 }
 """True if this video is "Created for Kids" / intends to target an audience
 under the age of 16 (``is_created_for_kids`` in `Video filters API`_ )
 """
-# search-url
+family_filter_map = {
-# - https://developers.dailymotion.com/tools/
+    2: 'true',
-# - https://www.dailymotion.com/doc/api/obj-video.html
+    1: 'true',
    0: 'false',
 }
 """By default, the family filter is turned on. Setting this parameter to
 ``false`` will stop filtering-out explicit content from searches and global
 contexts (``family_filter`` in `Global API Parameters`_ ).
 """
 result_fields = [
    'allow_embed',
@ -53,27 +84,21 @@ result_fields = [
    'thumbnail_360_url',
    'id',
 ]
-search_url = (
+"""`Fields selection`_, by default, a few fields are returned. To request more
-    'https://api.dailymotion.com/videos?'
+specific fields, the ``fields`` parameter is used with the list of fields
-    'fields={fields}&password_protected={password_protected}&private={private}&sort={sort}&limit={limit}'
+SearXNG needs in the response to build a video result list.
-).format(
+"""
-    fields=','.join(result_fields),
+
-    password_protected='false',
+search_url = 'https://api.dailymotion.com/videos?'
-    private='false',
+"""URL to retrieve a list of videos.
-    sort='relevance',
+
-    limit=number_of_results,
+- `REST GET`_
-)
+- `Global API Parameters`_
 - `Video filters API`_
 """
 iframe_src = "https://www.dailymotion.com/embed/video/{video_id}"
-
+"""URL template to embed video in SearXNG's result list."""
 # The request query filters by 'languages' & 'country', therefore instead of
 # fetching only languages we need to fetch locales.
 supported_languages_url = 'https://api.dailymotion.com/locales'
 supported_languages_iso639: Set[str] = set()
 def init(_engine_settings):
    global supported_languages_iso639
    supported_languages_iso639 = set([language.split('_')[0] for language in supported_languages])
 def request(query, params):
@ -81,34 +106,42 @@ def request(query, params):
    if not query:
        return False
-    language = params['language']
+    eng_region = traits.get_region(params['searxng_locale'], 'en_US')
-    if language == 'all':
+    eng_lang = traits.get_language(params['searxng_locale'], 'en')
        language = 'en-US'
    locale = babel.Locale.parse(language, sep='-')
-    language_iso639 = locale.language
+    args = {
    if locale.language not in supported_languages_iso639:
        language_iso639 = 'en'
    query_args = {
        'search': query,
-        'languages': language_iso639,
+        'family_filter': family_filter_map.get(params['safesearch'], 'false'),
        'thumbnail_ratio': 'original',  # original|widescreen|square
        # https://developers.dailymotion.com/api/#video-filters
        'languages': eng_lang,
        'page': params['pageno'],
        'password_protected': 'false',
        'private': 'false',
        'sort': 'relevance',
        'limit': number_of_results,
        'fields': ','.join(result_fields),
    }
-    if locale.territory:
+    args.update(safesearch_params.get(params['safesearch'], {}))
-        localization = locale.language + '_' + locale.territory
+
-        if localization in supported_languages:
+    # Don't add localization and country arguments if the user does select a
-            query_args['country'] = locale.territory
+    # language (:de, :en, ..)
    if len(params['searxng_locale'].split('-')) > 1:
        # https://developers.dailymotion.com/api/#global-parameters
        args['localization'] = eng_region
        args['country'] = eng_region.split('_')[1]
        # Insufficient rights for the `ams_country' parameter of route `GET /videos'
        # 'ams_country': eng_region.split('_')[1],
    time_delta = time_delta_dict.get(params["time_range"])
    if time_delta:
        created_after = datetime.now() - time_delta
-        query_args['created_after'] = datetime.timestamp(created_after)
+        args['created_after'] = datetime.timestamp(created_after)
-    query_str = urlencode(query_args)
+    query_str = urlencode(args)
-    params['url'] = search_url + '&' + query_str + safesearch_params.get(params['safesearch'], '')
+    params['url'] = search_url + query_str
    params['raise_for_httperror'] = False
    return params
@ -123,7 +156,7 @@ def response(resp):
    if 'error' in search_res:
        raise SearxEngineAPIException(search_res['error'].get('message'))
-    raise_for_httperror(resp)
+    network.raise_for_httperror(resp)
    # parse results
    for res in search_res.get('list', []):
@ -167,7 +200,53 @@ def response(resp):
    return results
-# get supported languages from their site
+def fetch_traits(engine_traits: EngineTraits):
-def _fetch_supported_languages(resp):
+    """Fetch locales & languages from dailymotion.
-    response_json = resp.json()
+
-    return [item['locale'] for item in response_json['list']]
+    Locales fetched from `api/locales <https://api.dailymotion.com/locales>`_.
    There are duplications in the locale codes returned from Dailymotion which
    can be ignored::
      en_EN --> en_GB, en_US
      ar_AA --> ar_EG, ar_AE, ar_SA
    The language list `api/languages <https://api.dailymotion.com/languages>`_
    contains over 7000 *languages* codes (see PR1071_).  We use only those
    language codes that are used in the locales.
    .. _PR1071: https://github.com/searxng/searxng/pull/1071
    """
    resp = network.get('https://api.dailymotion.com/locales')
    if not resp.ok:
        print("ERROR: response from dailymotion/locales is not OK.")
    for item in resp.json()['list']:
        eng_tag = item['locale']
        if eng_tag in ('en_EN', 'ar_AA'):
            continue
        try:
            sxng_tag = region_tag(babel.Locale.parse(eng_tag))
        except babel.UnknownLocaleError:
            print("ERROR: item unknown --> %s" % item)
            continue
        conflict = engine_traits.regions.get(sxng_tag)
        if conflict:
            if conflict != eng_tag:
                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
            continue
        engine_traits.regions[sxng_tag] = eng_tag
    locale_lang_list = [x.split('_')[0] for x in engine_traits.regions.values()]
    resp = network.get('https://api.dailymotion.com/languages')
    if not resp.ok:
        print("ERROR: response from dailymotion/languages is not OK.")
    for item in resp.json()['list']:
        eng_tag = item['code']
        if eng_tag in locale_lang_list:
            sxng_tag = language_tag(babel.Locale.parse(eng_tag))
            engine_traits.languages[sxng_tag] = eng_tag
--- a/searx/engines/demo_offline.py
+++ b/searx/engines/demo_offline.py
@ -63,7 +63,7 @@ def search(query, request_params):
    for row in result_list:
        entry = {
            'query': query,
-            'language': request_params['language'],
+            'language': request_params['searxng_locale'],
            'value': row.get("value"),
            # choose a result template or comment out to use the *default*
            'template': 'key-value.html',
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@ -1,71 +1,207 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
-"""DuckDuckGo Lite
+"""
 DuckDuckGo Lite
 ~~~~~~~~~~~~~~~
 """
-from json import loads
+from typing import TYPE_CHECKING
-
+from urllib.parse import urlencode
-from lxml.html import fromstring
+import json
 import babel
 import lxml.html
 from searx import (
    network,
    locales,
    redislib,
 )
 from searx import redisdb
 from searx.utils import (
    dict_subset,
    eval_xpath,
    eval_xpath_getindex,
    extract_text,
    match_language,
 )
-from searx.network import get
+from searx.enginelib.traits import EngineTraits
 from searx.exceptions import SearxEngineAPIException
 if TYPE_CHECKING:
    import logging
    logger: logging.Logger
 traits: EngineTraits
 # about
 about = {
    "website": 'https://lite.duckduckgo.com/lite/',
    "wikidata_id": 'Q12805',
    "official_api_documentation": 'https://duckduckgo.com/api',
    "use_official_api": False,
    "require_api_key": False,
    "results": 'HTML',
 }
 send_accept_language_header = True
 """DuckDuckGo-Lite tries to guess user's prefered language from the HTTP
 ``Accept-Language``.  Optional the user can select a region filter (but not a
 language).
 """
 # engine dependent config
 categories = ['general', 'web']
 paging = True
 supported_languages_url = 'https://duckduckgo.com/util/u588.js'
 time_range_support = True
-send_accept_language_header = True
+safesearch = True  # user can't select but the results are filtered
-language_aliases = {
+url = 'https://lite.duckduckgo.com/lite/'
-    'ar-SA': 'ar-XA',
+# url_ping = 'https://duckduckgo.com/t/sl_l'
    'es-419': 'es-XL',
    'ja': 'jp-JP',
    'ko': 'kr-KR',
    'sl-SI': 'sl-SL',
    'zh-TW': 'tzh-TW',
    'zh-HK': 'tzh-HK',
 }
 time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'}
 form_data = {'v': 'l', 'api': 'd.js', 'o': 'json'}
 # search-url
 url = 'https://lite.duckduckgo.com/lite/'
 url_ping = 'https://duckduckgo.com/t/sl_l'
-# match query's language to a region code that duckduckgo will accept
+def cache_vqd(query, value):
-def get_region_code(lang, lang_list=None):
+    """Caches a ``vqd`` value from a query.
    if lang == 'all':
        return None
-    lang_code = match_language(lang, lang_list or [], language_aliases, 'wt-WT')
+    The vqd value depends on the query string and is needed for the follow up
-    lang_parts = lang_code.split('-')
+    pages or the images loaded by a XMLHttpRequest:
-    # country code goes first
+    - DuckDuckGo Web: `https://links.duckduckgo.com/d.js?q=...&vqd=...`
-    return lang_parts[1].lower() + '-' + lang_parts[0].lower()
+    - DuckDuckGo Images: `https://duckduckgo.com/i.js??q=...&vqd=...`
    """
    c = redisdb.client()
    if c:
        logger.debug("cache vqd value: %s", value)
        key = 'SearXNG_ddg_vqd' + redislib.secret_hash(query)
        c.set(key, value, ex=600)
 def get_vqd(query, headers):
    """Returns the ``vqd`` that fits to the *query*.  If there is no ``vqd`` cached
    (:py:obj:`cache_vqd`) the query is sent to DDG to get a vqd value from the
    response.
    """
    value = None
    c = redisdb.client()
    if c:
        key = 'SearXNG_ddg_vqd' + redislib.secret_hash(query)
        value = c.get(key)
        if value:
            value = value.decode('utf-8')
            logger.debug("re-use cached vqd value: %s", value)
            return value
    query_url = 'https://duckduckgo.com/?{query}&iar=images'.format(query=urlencode({'q': query}))
    res = network.get(query_url, headers=headers)
    content = res.text
    if content.find('vqd=\'') == -1:
        raise SearxEngineAPIException('Request failed')
    value = content[content.find('vqd=\'') + 5 :]
    value = value[: value.find('\'')]
    logger.debug("new vqd value: %s", value)
    cache_vqd(query, value)
    return value
 def get_ddg_lang(eng_traits: EngineTraits, sxng_locale, default='en_US'):
    """Get DuckDuckGo's language identifier from SearXNG's locale.
    DuckDuckGo defines its lanaguages by region codes (see
    :py:obj:`fetch_traits`).
    To get region and language of a DDG service use:
    .. code: python
       eng_region = traits.get_region(params['searxng_locale'], traits.all_locale)
       eng_lang = get_ddg_lang(traits, params['searxng_locale'])
    It might confuse, but the ``l`` value of the cookie is what SearXNG calls
    the *region*:
    .. code:: python
        # !ddi paris :es-AR --> {'ad': 'es_AR', 'ah': 'ar-es', 'l': 'ar-es'}
        params['cookies']['ad'] = eng_lang
        params['cookies']['ah'] = eng_region
        params['cookies']['l'] = eng_region
    .. hint::
       `DDG-lite <https://lite.duckduckgo.com/lite>`__ does not offer a language
       selection to the user, only a region can be selected by the user
       (``eng_region`` from the example above).  DDG-lite stores the selected
       region in a cookie::
         params['cookies']['kl'] = eng_region  # 'ar-es'
    """
    return eng_traits.custom['lang_region'].get(sxng_locale, eng_traits.get_language(sxng_locale, default))
 ddg_reg_map = {
    'tw-tzh': 'zh_TW',
    'hk-tzh': 'zh_HK',
    'ct-ca': 'skip',  # ct-ca and es-ca both map to ca_ES
    'es-ca': 'ca_ES',
    'id-en': 'id_ID',
    'no-no': 'nb_NO',
    'jp-jp': 'ja_JP',
    'kr-kr': 'ko_KR',
    'xa-ar': 'ar_SA',
    'sl-sl': 'sl_SI',
    'th-en': 'th_TH',
    'vn-en': 'vi_VN',
 }
 ddg_lang_map = {
    # use ar --> ar_EG (Egypt's arabic)
    "ar_DZ": 'lang_region',
    "ar_JO": 'lang_region',
    "ar_SA": 'lang_region',
    # use bn --> bn_BD
    'bn_IN': 'lang_region',
    # use de --> de_DE
    'de_CH': 'lang_region',
    # use en --> en_US,
    'en_AU': 'lang_region',
    'en_CA': 'lang_region',
    'en_GB': 'lang_region',
    # Esperanto
    'eo_XX': 'eo',
    # use es --> es_ES,
    'es_AR': 'lang_region',
    'es_CL': 'lang_region',
    'es_CO': 'lang_region',
    'es_CR': 'lang_region',
    'es_EC': 'lang_region',
    'es_MX': 'lang_region',
    'es_PE': 'lang_region',
    'es_UY': 'lang_region',
    'es_VE': 'lang_region',
    # use fr --> rf_FR
    'fr_CA': 'lang_region',
    'fr_CH': 'lang_region',
    'fr_BE': 'lang_region',
    # use nl --> nl_NL
    'nl_BE': 'lang_region',
    # use pt --> pt_PT
    'pt_BR': 'lang_region',
    # skip these languages
    'od_IN': 'skip',
    'io_XX': 'skip',
    'tokipona_XX': 'skip',
 }
 def request(query, params):
    eng_region = traits.get_region(params['searxng_locale'], traits.all_locale)
    # eng_lang = get_ddg_lang(traits, params['searxng_locale'])
    params['url'] = url
    params['method'] = 'POST'
    params['data']['q'] = query
    # The API is not documented, so we do some reverse engineering and emulate
@ -88,23 +224,19 @@ def request(query, params):
        params['data']['s'] = offset
        params['data']['dc'] = offset + 1
    # request needs a vqd argument
    params['data']['vqd'] = get_vqd(query, params["headers"])
    # initial page does not have additional data in the input form
    if params['pageno'] > 1:
        # request the second page (and more pages) needs 'o' and 'api' arguments
        params['data']['o'] = 'json'
        params['data']['api'] = 'd.js'
-    # initial page does not have additional data in the input form
+        params['data']['o'] = form_data.get('o', 'json')
-    if params['pageno'] > 2:
+        params['data']['api'] = form_data.get('api', 'd.js')
-        # request the third page (and more pages) some more arguments
+        params['data']['nextParams'] = form_data.get('nextParams', '')
-        params['data']['nextParams'] = ''
+        params['data']['v'] = form_data.get('v', 'l')
        params['data']['v'] = ''
        params['data']['vqd'] = ''
-    region_code = get_region_code(params['language'], supported_languages)
+    params['data']['kl'] = eng_region
-    if region_code:
+    params['cookies']['kl'] = eng_region
        params['data']['kl'] = region_code
        params['cookies']['kl'] = region_code
    params['data']['df'] = ''
    if params['time_range'] in time_range_dict:
@ -116,26 +248,40 @@ def request(query, params):
    return params
 # get response from search-request
 def response(resp):
    headers_ping = dict_subset(resp.request.headers, ['User-Agent', 'Accept-Encoding', 'Accept', 'Cookie'])
    get(url_ping, headers=headers_ping)
    if resp.status_code == 303:
        return []
    results = []
-    doc = fromstring(resp.text)
+    doc = lxml.html.fromstring(resp.text)
    result_table = eval_xpath(doc, '//html/body/form/div[@class="filters"]/table')
-    if not len(result_table) >= 3:
+
    if len(result_table) == 2:
        # some locales (at least China) does not have a "next page" button and
        # the layout of the HTML tables is different.
        result_table = result_table[1]
    elif not len(result_table) >= 3:
        # no more results
        return []
    else:
        result_table = result_table[2]
        # update form data from response
        form = eval_xpath(doc, '//html/body/form/div[@class="filters"]/table//input/..')
        if len(form):
            form = form[0]
            form_data['v'] = eval_xpath(form, '//input[@name="v"]/@value')[0]
            form_data['api'] = eval_xpath(form, '//input[@name="api"]/@value')[0]
            form_data['o'] = eval_xpath(form, '//input[@name="o"]/@value')[0]
            logger.debug('form_data: %s', form_data)
            value = eval_xpath(form, '//input[@name="vqd"]/@value')[0]
            query = resp.search_params['data']['q']
            cache_vqd(query, value)
    tr_rows = eval_xpath(result_table, './/tr')
    # In the last <tr> is the form of the 'previous/next page' links
    tr_rows = tr_rows[:-1]
@ -172,15 +318,105 @@ def response(resp):
    return results
-# get supported languages from their site
+def fetch_traits(engine_traits: EngineTraits):
-def _fetch_supported_languages(resp):
+    """Fetch languages & regions from DuckDuckGo.
-    # response is a js file with regions as an embedded object
+    SearXNG's ``all`` locale maps DuckDuckGo's "Alle regions" (``wt-wt``).
-    response_page = resp.text
+    DuckDuckGo's language "Browsers prefered language" (``wt_WT``) makes no
-    response_page = response_page[response_page.find('regions:{') + 8 :]
+    sense in a SearXNG request since SearXNG's ``all`` will not add a
-    response_page = response_page[: response_page.find('}') + 1]
+    ``Accept-Language`` HTTP header.  The value in ``engine_traits.all_locale``
    is ``wt-wt`` (the region).
-    regions_json = loads(response_page)
+    Beside regions DuckDuckGo also defines its lanaguages by region codes.  By
-    supported_languages = map((lambda x: x[3:] + '-' + x[:2].upper()), regions_json.keys())
+    example these are the english languages in DuckDuckGo:
-    return list(supported_languages)
+    - en_US
    - en_AU
    - en_CA
    - en_GB
    The function :py:obj:`get_ddg_lang` evaluates DuckDuckGo's language from
    SearXNG's locale.
    """
    # pylint: disable=too-many-branches, too-many-statements
    # fetch regions
    engine_traits.all_locale = 'wt-wt'
    # updated from u588 to u661 / should be updated automatically?
    resp = network.get('https://duckduckgo.com/util/u661.js')
    if not resp.ok:
        print("ERROR: response from DuckDuckGo is not OK.")
    pos = resp.text.find('regions:{') + 8
    js_code = resp.text[pos:]
    pos = js_code.find('}') + 1
    regions = json.loads(js_code[:pos])
    for eng_tag, name in regions.items():
        if eng_tag == 'wt-wt':
            engine_traits.all_locale = 'wt-wt'
            continue
        region = ddg_reg_map.get(eng_tag)
        if region == 'skip':
            continue
        if not region:
            eng_territory, eng_lang = eng_tag.split('-')
            region = eng_lang + '_' + eng_territory.upper()
        try:
            sxng_tag = locales.region_tag(babel.Locale.parse(region))
        except babel.UnknownLocaleError:
            print("ERROR: %s (%s) -> %s is unknown by babel" % (name, eng_tag, region))
            continue
        conflict = engine_traits.regions.get(sxng_tag)
        if conflict:
            if conflict != eng_tag:
                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
            continue
        engine_traits.regions[sxng_tag] = eng_tag
    # fetch languages
    engine_traits.custom['lang_region'] = {}
    pos = resp.text.find('languages:{') + 10
    js_code = resp.text[pos:]
    pos = js_code.find('}') + 1
    js_code = '{"' + js_code[1:pos].replace(':', '":').replace(',', ',"')
    languages = json.loads(js_code)
    for eng_lang, name in languages.items():
        if eng_lang == 'wt_WT':
            continue
        babel_tag = ddg_lang_map.get(eng_lang, eng_lang)
        if babel_tag == 'skip':
            continue
        try:
            if babel_tag == 'lang_region':
                sxng_tag = locales.region_tag(babel.Locale.parse(eng_lang))
                engine_traits.custom['lang_region'][sxng_tag] = eng_lang
                continue
            sxng_tag = locales.language_tag(babel.Locale.parse(babel_tag))
        except babel.UnknownLocaleError:
            print("ERROR: language %s (%s) is unknown by babel" % (name, eng_lang))
            continue
        conflict = engine_traits.languages.get(sxng_tag)
        if conflict:
            if conflict != eng_lang:
                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_lang))
            continue
        engine_traits.languages[sxng_tag] = eng_lang
--- a/searx/engines/duckduckgo_definitions.py
+++ b/searx/engines/duckduckgo_definitions.py
@ -1,22 +1,33 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
-"""DuckDuckGo (Instant Answer API)
+"""
 DuckDuckGo Instant Answer API
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 The `DDG-API <https://duckduckgo.com/api>`__ is no longer documented but from
 reverse engineering we can see that some services (e.g. instant answers) still
 in use from the DDG search engine.
 As far we can say the *instant answers* API does not support languages, or at
 least we could not find out how language support should work.  It seems that
 most of the features are based on English terms.
 """
-import json
+from typing import TYPE_CHECKING
 from urllib.parse import urlencode, urlparse, urljoin
 from lxml import html
 from searx.data import WIKIDATA_UNITS
-from searx.engines.duckduckgo import language_aliases
+from searx.utils import extract_text, html_to_text, get_string_replaces_function
 from searx.engines.duckduckgo import (  # pylint: disable=unused-import
    _fetch_supported_languages,
    supported_languages_url,
 )
 from searx.utils import extract_text, html_to_text, match_language, get_string_replaces_function
 from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom
 if TYPE_CHECKING:
    import logging
    logger: logging.Logger
 # about
 about = {
    "website": 'https://duckduckgo.com/',
@ -37,7 +48,7 @@ replace_http_by_https = get_string_replaces_function({'http:': 'https:'})
 def is_broken_text(text):
-    """duckduckgo may return something like "<a href="xxxx">http://somewhere Related website<a/>"
+    """duckduckgo may return something like ``<a href="xxxx">http://somewhere Related website<a/>``
    The href URL is broken, the "Related website" may contains some HTML.
@ -62,8 +73,6 @@ def result_to_text(text, htmlResult):
 def request(query, params):
    params['url'] = URL.format(query=urlencode({'q': query}))
    language = match_language(params['language'], supported_languages, language_aliases)
    language = language.split('-')[0]
    return params
@ -71,7 +80,7 @@ def response(resp):
    # pylint: disable=too-many-locals, too-many-branches, too-many-statements
    results = []
-    search_res = json.loads(resp.text)
+    search_res = resp.json()
    # search_res.get('Entity') possible values (not exhaustive) :
    # * continent / country / department / location / waterfall
@ -235,7 +244,7 @@ def unit_to_str(unit):
 def area_to_str(area):
-    """parse {'unit': 'http://www.wikidata.org/entity/Q712226', 'amount': '+20.99'}"""
+    """parse ``{'unit': 'https://www.wikidata.org/entity/Q712226', 'amount': '+20.99'}``"""
    unit = unit_to_str(area.get('unit'))
    if unit is not None:
        try:
--- a/searx/engines/duckduckgo_images.py
+++ b/searx/engines/duckduckgo_images.py
@ -1,26 +1,30 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """
- DuckDuckGo (Images)
+DuckDuckGo Images
 ~~~~~~~~~~~~~~~~~
 """
-from json import loads
+from typing import TYPE_CHECKING
 from urllib.parse import urlencode
-from searx.exceptions import SearxEngineAPIException
+
-from searx.engines.duckduckgo import get_region_code
+from searx.engines.duckduckgo import fetch_traits  # pylint: disable=unused-import
-from searx.engines.duckduckgo import (  # pylint: disable=unused-import
+from searx.engines.duckduckgo import (
-    _fetch_supported_languages,
+    get_ddg_lang,
-    supported_languages_url,
+    get_vqd,
 )
-from searx.network import get
+from searx.enginelib.traits import EngineTraits
 if TYPE_CHECKING:
    import logging
    logger: logging.Logger
 traits: EngineTraits
 # about
 about = {
    "website": 'https://duckduckgo.com/',
    "wikidata_id": 'Q12805',
    "official_api_documentation": {
        'url': 'https://duckduckgo.com/api',
        'comment': 'but images are not supported',
    },
    "use_official_api": False,
    "require_api_key": False,
    "results": 'JSON (site requires js to get images)',
@ -32,70 +36,64 @@ paging = True
 safesearch = True
 send_accept_language_header = True
-# search-url
+safesearch_cookies = {0: '-2', 1: None, 2: '1'}
-images_url = 'https://duckduckgo.com/i.js?{query}&s={offset}&p={safesearch}&o=json&vqd={vqd}'
+safesearch_args = {0: '1', 1: None, 2: '1'}
 site_url = 'https://duckduckgo.com/?{query}&iar=images&iax=1&ia=images'
 # run query in site to get vqd number needed for requesting images
 # TODO: find a way to get this number without an extra request (is it a hash of the query?)
 def get_vqd(query, headers):
    query_url = site_url.format(query=urlencode({'q': query}))
    res = get(query_url, headers=headers)
    content = res.text
    if content.find('vqd=\'') == -1:
        raise SearxEngineAPIException('Request failed')
    vqd = content[content.find('vqd=\'') + 5 :]
    vqd = vqd[: vqd.find('\'')]
    return vqd
 # do search-request
 def request(query, params):
    # to avoid running actual external requests when testing
    if 'is_test' not in params:
        vqd = get_vqd(query, params['headers'])
    else:
        vqd = '12345'
-    offset = (params['pageno'] - 1) * 50
+    eng_region = traits.get_region(params['searxng_locale'], traits.all_locale)
    eng_lang = get_ddg_lang(traits, params['searxng_locale'])
-    safesearch = params['safesearch'] - 1
+    args = {
        'q': query,
        'o': 'json',
        # 'u': 'bing',
        'l': eng_region,
        'vqd': get_vqd(query, params["headers"]),
    }
-    region_code = get_region_code(params['language'], lang_list=supported_languages)
+    if params['pageno'] > 1:
-    if region_code:
+        args['s'] = (params['pageno'] - 1) * 100
-        params['url'] = images_url.format(
+
-            query=urlencode({'q': query, 'l': region_code}), offset=offset, safesearch=safesearch, vqd=vqd
+    params['cookies']['ad'] = eng_lang  # zh_CN
-        )
+    params['cookies']['ah'] = eng_region  # "us-en,de-de"
-    else:
+    params['cookies']['l'] = eng_region  # "hk-tzh"
-        params['url'] = images_url.format(query=urlencode({'q': query}), offset=offset, safesearch=safesearch, vqd=vqd)
+    logger.debug("cookies: %s", params['cookies'])
    safe_search = safesearch_cookies.get(params['safesearch'])
    if safe_search is not None:
        params['cookies']['p'] = safe_search  # "-2", "1"
    safe_search = safesearch_args.get(params['safesearch'])
    if safe_search is not None:
        args['p'] = safe_search  # "-1", "1"
    args = urlencode(args)
    params['url'] = 'https://duckduckgo.com/i.js?{args}&f={f}'.format(args=args, f=',,,,,')
    params['headers']['Accept'] = 'application/json, text/javascript, */*; q=0.01'
    params['headers']['Referer'] = 'https://duckduckgo.com/'
    params['headers']['X-Requested-With'] = 'XMLHttpRequest'
    logger.debug("headers: %s", params['headers'])
    return params
 # get response from search-request
 def response(resp):
    results = []
    res_json = resp.json()
    content = resp.text
    res_json = loads(content)
    # parse results
    for result in res_json['results']:
        title = result['title']
        url = result['url']
        thumbnail = result['thumbnail']
        image = result['image']
        # append result
        results.append(
            {
                'template': 'images.html',
-                'title': title,
+                'title': result['title'],
                'content': '',
-                'thumbnail_src': thumbnail,
+                'thumbnail_src': result['thumbnail'],
-                'img_src': image,
+                'img_src': result['image'],
-                'url': url,
+                'url': result['url'],
                'img_format': '%s x %s' % (result['width'], result['height']),
                'source': result['source'],
            }
        )
--- a/searx/engines/duckduckgo_weather.py
+++ b/searx/engines/duckduckgo_weather.py
@ -1,13 +1,29 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
-"""DuckDuckGo Weather"""
+"""
 DuckDuckGo Weather
 ~~~~~~~~~~~~~~~~~~
 """
 from typing import TYPE_CHECKING
 from json import loads
 from urllib.parse import quote
 from datetime import datetime
 from flask_babel import gettext
 from searx.engines.duckduckgo import fetch_traits  # pylint: disable=unused-import
 from searx.engines.duckduckgo import get_ddg_lang
 from searx.enginelib.traits import EngineTraits
 if TYPE_CHECKING:
    import logging
    logger: logging.Logger
 traits: EngineTraits
 about = {
    "website": 'https://duckduckgo.com/',
    "wikidata_id": 'Q12805',
@ -17,9 +33,11 @@ about = {
    "results": "JSON",
 }
-categories = ["others"]
+send_accept_language_header = True
-url = "https://duckduckgo.com/js/spice/forecast/{query}/{lang}"
+# engine dependent config
 categories = ["others"]
 URL = "https://duckduckgo.com/js/spice/forecast/{query}/{lang}"
 def generate_condition_table(condition):
@ -72,8 +90,17 @@ def generate_day_table(day):
 def request(query, params):
    params["url"] = url.format(query=quote(query), lang=params['language'].split('-')[0])
    eng_region = traits.get_region(params['searxng_locale'], traits.all_locale)
    eng_lang = get_ddg_lang(traits, params['searxng_locale'])
    # !ddw paris :es-AR --> {'ad': 'es_AR', 'ah': 'ar-es', 'l': 'ar-es'}
    params['cookies']['ad'] = eng_lang
    params['cookies']['ah'] = eng_region
    params['cookies']['l'] = eng_region
    logger.debug("cookies: %s", params['cookies'])
    params["url"] = URL.format(query=quote(query), lang=eng_lang.split('_')[0])
    return params
--- a/searx/engines/gentoo.py
+++ b/searx/engines/gentoo.py
@ -25,6 +25,7 @@ base_url = 'https://wiki.gentoo.org'
 # xpath queries
 xpath_results = '//ul[@class="mw-search-results"]/li'
 xpath_link = './/div[@class="mw-search-result-heading"]/a'
 xpath_content = './/div[@class="searchresult"]'
 # cut 'en' from 'en-US', 'de' from 'de-CH', and so on
@ -77,8 +78,6 @@ main_langs = {
    'uk': 'Українська',
    'zh': '简体中文',
 }
 supported_languages = dict(lang_urls, **main_langs)
 # do search-request
 def request(query, params):
@ -118,7 +117,8 @@ def response(resp):
        link = result.xpath(xpath_link)[0]
        href = urljoin(base_url, link.attrib.get('href'))
        title = extract_text(link)
        content = extract_text(result.xpath(xpath_content))
-        results.append({'url': href, 'title': title})
+        results.append({'url': href, 'title': title, 'content': content})
    return results
--- a/searx/engines/google.py
+++ b/searx/engines/google.py
@ -1,34 +1,39 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
-"""This is the implementation of the google WEB engine.  Some of this
+"""This is the implementation of the Google WEB engine.  Some of this
-implementations are shared by other engines:
+implementations (manly the :py:obj:`get_google_info`) are shared by other
 engines:
 - :ref:`google images engine`
 - :ref:`google news engine`
 - :ref:`google videos engine`
-
+- :ref:`google scholar engine`
-The google WEB engine itself has a special setup option:
+- :ref:`google autocomplete`
 .. code:: yaml
  - name: google
    ...
    use_mobile_ui: false
 ``use_mobile_ui``: (default: ``false``)
  Enables to use *mobile endpoint* to bypass the google blocking (see
  :issue:`159`).  On the mobile UI of Google Search, the button :guilabel:`More
  results` is not affected by Google rate limiting and we can still do requests
  while actively blocked by the original Google search.  By activate
  ``use_mobile_ui`` this behavior is simulated by adding the parameter
  ``async=use_ac:true,_fmt:pc`` to the :py:func:`request`.
 """
 from typing import TYPE_CHECKING
 import re
 from urllib.parse import urlencode
 from lxml import html
-from searx.utils import match_language, extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex
+import babel
 import babel.core
 import babel.languages
 from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex
 from searx.locales import language_tag, region_tag, get_offical_locales
 from searx import network
 from searx.exceptions import SearxEngineCaptchaException
 from searx.enginelib.traits import EngineTraits
 if TYPE_CHECKING:
    import logging
    logger: logging.Logger
 traits: EngineTraits
 # about
 about = {
@ -45,64 +50,6 @@ categories = ['general', 'web']
 paging = True
 time_range_support = True
 safesearch = True
 send_accept_language_header = True
 use_mobile_ui = False
 supported_languages_url = 'https://www.google.com/preferences?#languages'
 # based on https://en.wikipedia.org/wiki/List_of_Google_domains and tests
 google_domains = {
    'BG': 'google.bg',  # Bulgaria
    'CZ': 'google.cz',  # Czech Republic
    'DE': 'google.de',  # Germany
    'DK': 'google.dk',  # Denmark
    'AT': 'google.at',  # Austria
    'CH': 'google.ch',  # Switzerland
    'GR': 'google.gr',  # Greece
    'AU': 'google.com.au',  # Australia
    'CA': 'google.ca',  # Canada
    'GB': 'google.co.uk',  # United Kingdom
    'ID': 'google.co.id',  # Indonesia
    'IE': 'google.ie',  # Ireland
    'IN': 'google.co.in',  # India
    'MY': 'google.com.my',  # Malaysia
    'NZ': 'google.co.nz',  # New Zealand
    'PH': 'google.com.ph',  # Philippines
    'SG': 'google.com.sg',  # Singapore
    'US': 'google.com',  # United States (google.us) redirects to .com
    'ZA': 'google.co.za',  # South Africa
    'AR': 'google.com.ar',  # Argentina
    'CL': 'google.cl',  # Chile
    'ES': 'google.es',  # Spain
    'MX': 'google.com.mx',  # Mexico
    'EE': 'google.ee',  # Estonia
    'FI': 'google.fi',  # Finland
    'BE': 'google.be',  # Belgium
    'FR': 'google.fr',  # France
    'IL': 'google.co.il',  # Israel
    'HR': 'google.hr',  # Croatia
    'HU': 'google.hu',  # Hungary
    'IT': 'google.it',  # Italy
    'JP': 'google.co.jp',  # Japan
    'KR': 'google.co.kr',  # South Korea
    'LT': 'google.lt',  # Lithuania
    'LV': 'google.lv',  # Latvia
    'NO': 'google.no',  # Norway
    'NL': 'google.nl',  # Netherlands
    'PL': 'google.pl',  # Poland
    'BR': 'google.com.br',  # Brazil
    'PT': 'google.pt',  # Portugal
    'RO': 'google.ro',  # Romania
    'RU': 'google.ru',  # Russia
    'SK': 'google.sk',  # Slovakia
    'SI': 'google.si',  # Slovenia
    'SE': 'google.se',  # Sweden
    'TH': 'google.co.th',  # Thailand
    'TR': 'google.com.tr',  # Turkey
    'UA': 'google.com.ua',  # Ukraine
    'CN': 'google.com.hk',  # There is no google.cn, we use .com.hk for zh-CN
    'HK': 'google.com.hk',  # Hong Kong
    'TW': 'google.com.tw',  # Taiwan
 }
 time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'}
@ -112,50 +59,50 @@ filter_mapping = {0: 'off', 1: 'medium', 2: 'high'}
 # specific xpath variables
 # ------------------------
-results_xpath = './/div[@data-sokoban-container]'
+results_xpath = './/div[contains(@jscontroller, "SC7lYd")]'
 title_xpath = './/a/h3[1]'
 href_xpath = './/a[h3]/@href'
-content_xpath = './/div[@data-content-feature=1]'
+content_xpath = './/div[@data-sncf]'
 # google *sections* are no usual *results*, we ignore them
 g_section_with_header = './g-section-with-header'
 # Suggestions are links placed in a *card-section*, we extract only the text
 # from the links not the links itself.
 suggestion_xpath = '//div[contains(@class, "EIaa9b")]//a'
 # UI_ASYNC = 'use_ac:true,_fmt:html' # returns a HTTP 500 when user search for
 #                                    # celebrities like '!google natasha allegri'
 #                                    # or '!google chris evans'
 UI_ASYNC = 'use_ac:true,_fmt:prog'
 """Format of the response from UI's async request."""
-def get_lang_info(params, lang_list, custom_aliases, supported_any_language):
+
-    """Composing various language properties for the google engines.
+def get_google_info(params, eng_traits):
    """Composing various (language) properties for the google engines (:ref:`google
    API`).
    This function is called by the various google engines (:ref:`google web
    engine`, :ref:`google images engine`, :ref:`google news engine` and
    :ref:`google videos engine`).
-    :param dict param: request parameters of the engine
+    :param dict param: Request parameters of the engine.  At least
        a ``searxng_locale`` key should be in the dictionary.
-    :param list lang_list: list of supported languages of the engine
+    :param eng_traits: Engine's traits fetched from google preferences
-        :py:obj:`ENGINES_LANGUAGES[engine-name] <searx.data.ENGINES_LANGUAGES>`
+        (:py:obj:`searx.enginelib.traits.EngineTraits`)
    :param dict lang_list: custom aliases for non standard language codes
        (used when calling :py:func:`searx.utils.match_language`)
    :param bool supported_any_language: When a language is not specified, the
        language interpretation is left up to Google to decide how the search
        results should be delivered.  This argument is ``True`` for the google
        engine and ``False`` for the other engines (google-images, -news,
        -scholar, -videos).
    :rtype: dict
    :returns:
        Py-Dictionary with the key/value pairs:
        language:
-            Return value from :py:func:`searx.utils.match_language`
+            The language code that is used by google (e.g. ``lang_en`` or
            ``lang_zh-TW``)
        country:
-            The country code (e.g. US, AT, CA, FR, DE ..)
+            The country code that is used by google (e.g. ``US`` or ``TW``)
        locale:
            A instance of :py:obj:`babel.core.Locale` build from the
            ``searxng_locale`` value.
        subdomain:
            Google subdomain :py:obj:`google_domains` that fits to the country
@ -165,52 +112,67 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language):
            Py-Dictionary with additional request arguments (can be passed to
            :py:func:`urllib.parse.urlencode`).
            - ``hl`` parameter: specifies the interface language of user interface.
            - ``lr`` parameter: restricts search results to documents written in
              a particular language.
            - ``cr`` parameter: restricts search results to documents
              originating in a particular country.
            - ``ie`` parameter: sets the character encoding scheme that should
              be used to interpret the query string ('utf8').
            - ``oe`` parameter: sets the character encoding scheme that should
              be used to decode the XML result ('utf8').
        headers:
            Py-Dictionary with additional HTTP headers (can be passed to
            request's headers)
            - ``Accept: '*/*``
    """
    ret_val = {
        'language': None,
        'country': None,
        'subdomain': None,
        'params': {},
        'headers': {},
        'cookies': {},
        'locale': None,
    }
-    # language ...
+    sxng_locale = params.get('searxng_locale', 'all')
    try:
        locale = babel.Locale.parse(sxng_locale, sep='-')
    except babel.core.UnknownLocaleError:
        locale = None
-    _lang = params['language']
+    eng_lang = eng_traits.get_language(sxng_locale, 'lang_en')
-    _any_language = _lang.lower() == 'all'
+    lang_code = eng_lang.split('_')[-1]  # lang_zh-TW --> zh-TW / lang_en --> en
-    if _any_language:
+    country = eng_traits.get_region(sxng_locale, eng_traits.all_locale)
        _lang = 'en-US'
    language = match_language(_lang, lang_list, custom_aliases)
    ret_val['language'] = language
-    # country ...
+    # Test zh_hans & zh_hant --> in the topmost links in the result list of list
    # TW and HK you should a find wiktionary.org zh_hant link.  In the result
    # list of zh-CN should not be no hant link instead you should find
    # zh.m.wikipedia.org/zh somewhere in the top.
-    _l = _lang.split('-')
+    # '!go 日 :zh-TW' --> https://zh.m.wiktionary.org/zh-hant/%E6%97%A5
-    if len(_l) == 2:
+    # '!go 日 :zh-CN' --> https://zh.m.wikipedia.org/zh/%E6%97%A5
-        country = _l[1]
+
-    else:
+    ret_val['language'] = eng_lang
        country = _l[0].upper()
        if country == 'EN':
            country = 'US'
    ret_val['country'] = country
-
+    ret_val['locale'] = locale
-    # subdomain ...
+    ret_val['subdomain'] = eng_traits.custom['supported_domains'].get(country.upper(), 'www.google.com')
    ret_val['subdomain'] = 'www.' + google_domains.get(country.upper(), 'google.com')
    # params & headers
    lang_country = '%s-%s' % (language, country)  # (en-US, en-EN, de-DE, de-AU, fr-FR ..)
    # hl parameter:
-    #   https://developers.google.com/custom-search/docs/xml_results#hlsp The
+    #   The hl parameter specifies the interface language (host language) of
-    # Interface Language:
+    #   your user interface. To improve the performance and the quality of your
    #   search results, you are strongly encouraged to set this parameter
    #   explicitly.
    #   https://developers.google.com/custom-search/docs/xml_results#hlsp
    # The Interface Language:
    #   https://developers.google.com/custom-search/docs/xml_results_appendices#interfaceLanguages
-    ret_val['params']['hl'] = lang_list.get(lang_country, language)
+    ret_val['params']['hl'] = lang_code
    # lr parameter:
    #   The lr (language restrict) parameter restricts search results to
@ -218,22 +180,72 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language):
    #   https://developers.google.com/custom-search/docs/xml_results#lrsp
    #   Language Collection Values:
    #   https://developers.google.com/custom-search/docs/xml_results_appendices#languageCollections
    if _any_language and supported_any_language:
        # interpretation is left up to Google (based on whoogle)
    #
-        # - add parameter ``source=lnt``
+    # To select 'all' languages an empty 'lr' value is used.
-        # - don't use parameter ``lr``
+    #
-        # - don't add a ``Accept-Language`` HTTP header.
+    # Different to other google services, Google Schloar supports to select more
    # than one language. The languages are seperated by a pipe '|' (logical OR).
    # By example: &lr=lang_zh-TW%7Clang_de selects articles written in
    # traditional chinese OR german language.
-        ret_val['params']['source'] = 'lnt'
+    ret_val['params']['lr'] = eng_lang
    if sxng_locale == 'all':
        ret_val['params']['lr'] = ''
-    else:
+    # cr parameter:
    #   The cr parameter restricts search results to documents originating in a
    #   particular country.
    #   https://developers.google.com/custom-search/docs/xml_results#crsp
-        # restricts search results to documents written in a particular
+    ret_val['params']['cr'] = 'country' + country
-        # language.
+    if sxng_locale == 'all':
-        ret_val['params']['lr'] = "lang_" + lang_list.get(lang_country, language)
+        ret_val['params']['cr'] = ''
    # gl parameter: (mandatory by Geeogle News)
    #   The gl parameter value is a two-letter country code. For WebSearch
    #   results, the gl parameter boosts search results whose country of origin
    #   matches the parameter value. See the Country Codes section for a list of
    #   valid values.
    #   Specifying a gl parameter value in WebSearch requests should improve the
    #   relevance of results. This is particularly true for international
    #   customers and, even more specifically, for customers in English-speaking
    #   countries other than the United States.
    #   https://developers.google.com/custom-search/docs/xml_results#glsp
    ret_val['params']['gl'] = country
    # ie parameter:
    #   The ie parameter sets the character encoding scheme that should be used
    #   to interpret the query string. The default ie value is latin1.
    #   https://developers.google.com/custom-search/docs/xml_results#iesp
    ret_val['params']['ie'] = 'utf8'
    # oe parameter:
    #   The oe parameter sets the character encoding scheme that should be used
    #   to decode the XML result. The default oe value is latin1.
    #   https://developers.google.com/custom-search/docs/xml_results#oesp
    ret_val['params']['oe'] = 'utf8'
    # num parameter:
    #   The num parameter identifies the number of search results to return.
    #   The default num value is 10, and the maximum value is 20. If you request
    #   more than 20 results, only 20 results will be returned.
    #   https://developers.google.com/custom-search/docs/xml_results#numsp
    # HINT: seems to have no effect (tested in google WEB & Images)
    # ret_val['params']['num'] = 20
    # HTTP headers
    ret_val['headers']['Accept'] = '*/*'
    # Cookies
    # - https://github.com/searxng/searxng/pull/1679#issuecomment-1235432746
    # - https://github.com/searxng/searxng/issues/1555
    ret_val['cookies']['CONSENT'] = "YES+"
    return ret_val
@ -245,33 +257,34 @@ def detect_google_sorry(resp):
 def request(query, params):
    """Google search request"""
-
+    # pylint: disable=line-too-long
    offset = (params['pageno'] - 1) * 10
-
+    google_info = get_google_info(params, traits)
    lang_info = get_lang_info(params, supported_languages, language_aliases, True)
    additional_parameters = {}
    if use_mobile_ui:
        additional_parameters = {
            'asearch': 'arc',
            'async': 'use_ac:true,_fmt:prog',
        }
    # https://www.google.de/search?q=corona&hl=de&lr=lang_de&start=0&tbs=qdr%3Ad&safe=medium
    query_url = (
        'https://'
-        + lang_info['subdomain']
+        + google_info['subdomain']
        + '/search'
        + "?"
        + urlencode(
            {
                'q': query,
-                **lang_info['params'],
+                **google_info['params'],
                'ie': "utf8",
                'oe': "utf8",
                'start': offset,
                'filter': '0',
-                **additional_parameters,
+                'start': offset,
                # 'vet': '12ahUKEwik3ZbIzfn7AhXMX_EDHbUDBh0QxK8CegQIARAC..i',
                # 'ved': '2ahUKEwik3ZbIzfn7AhXMX_EDHbUDBh0Q_skCegQIARAG',
                # 'cs' : 1,
                # 'sa': 'N',
                # 'yv': 3,
                # 'prmd': 'vin',
                # 'ei': 'GASaY6TxOcy_xc8PtYeY6AE',
                # 'sa': 'N',
                # 'sstk': 'AcOHfVkD7sWCSAheZi-0tx_09XDO55gTWY0JNq3_V26cNN-c8lfD45aZYPI8s_Bqp8s57AHz5pxchDtAGCA_cikAWSjy9kw3kgg'
                # formally known as use_mobile_ui
                'asearch': 'arc',
                'async': UI_ASYNC,
            }
        )
    )
@ -282,25 +295,38 @@ def request(query, params):
        query_url += '&' + urlencode({'safe': filter_mapping[params['safesearch']]})
    params['url'] = query_url
-    params['cookies']['CONSENT'] = "YES+"
+    params['cookies'] = google_info['cookies']
-    params['headers'].update(lang_info['headers'])
+    params['headers'].update(google_info['headers'])
    if use_mobile_ui:
        params['headers']['Accept'] = '*/*'
    else:
        params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
    return params
 # =26;[3,"dimg_ZNMiZPCqE4apxc8P3a2tuAQ_137"]a87;data:image/jpeg;base64,/9j/4AAQSkZJRgABA
 # ...6T+9Nl4cnD+gr9OK8I56/tX3l86nWYw//2Q==26;
 RE_DATA_IMAGE = re.compile(r'"(dimg_[^"]*)"[^;]*;(data:image[^;]*;[^;]*);')
 def _parse_data_images(dom):
    data_image_map = {}
    for img_id, data_image in RE_DATA_IMAGE.findall(dom.text_content()):
        end_pos = data_image.rfind('=')
        if end_pos > 0:
            data_image = data_image[: end_pos + 1]
        data_image_map[img_id] = data_image
    logger.debug('data:image objects --> %s', list(data_image_map.keys()))
    return data_image_map
 def response(resp):
    """Get response from google's search request"""
-
+    # pylint: disable=too-many-branches, too-many-statements
    detect_google_sorry(resp)
    results = []
    # convert the text to dom
    dom = html.fromstring(resp.text)
    data_image_map = _parse_data_images(dom)
    # results --> answer
    answer_list = eval_xpath(dom, '//div[contains(@class, "LGOjhe")]')
    if answer_list:
@ -309,25 +335,9 @@ def response(resp):
    else:
        logger.debug("did not find 'answer'")
        # results --> number_of_results
        if not use_mobile_ui:
            try:
                _txt = eval_xpath_getindex(dom, '//div[@id="result-stats"]//text()', 0)
                _digit = ''.join([n for n in _txt if n.isdigit()])
                number_of_results = int(_digit)
                results.append({'number_of_results': number_of_results})
            except Exception as e:  # pylint: disable=broad-except
                logger.debug("did not 'number_of_results'")
                logger.error(e, exc_info=True)
    # parse results
-    for result in eval_xpath_list(dom, results_xpath):
+    for result in eval_xpath_list(dom, results_xpath):  # pylint: disable=too-many-nested-blocks
        # google *sections*
        if extract_text(eval_xpath(result, g_section_with_header)):
            logger.debug("ignoring <g-section-with-header>")
            continue
        try:
            title_tag = eval_xpath_getindex(result, title_xpath, 0, default=None)
@ -336,16 +346,30 @@ def response(resp):
                logger.debug('ignoring item from the result_xpath list: missing title')
                continue
            title = extract_text(title_tag)
            url = eval_xpath_getindex(result, href_xpath, 0, None)
            if url is None:
                logger.debug('ignoring item from the result_xpath list: missing url of title "%s"', title)
                continue
-            content = extract_text(eval_xpath_getindex(result, content_xpath, 0, default=None), allow_none=True)
+
-            if content is None:
+            content_nodes = eval_xpath(result, content_xpath)
            content = extract_text(content_nodes)
            if not content:
                logger.debug('ignoring item from the result_xpath list: missing content of title "%s"', title)
                continue
-            logger.debug('add link to results: %s', title)
+            img_src = content_nodes[0].xpath('.//img/@src')
-            results.append({'url': url, 'title': title, 'content': content})
+            if img_src:
                img_src = img_src[0]
                if img_src.startswith('data:image'):
                    img_id = content_nodes[0].xpath('.//img/@id')
                    if img_id:
                        img_src = data_image_map.get(img_id[0])
            else:
                img_src = None
            results.append({'url': url, 'title': title, 'content': content, 'img_src': img_src})
        except Exception as e:  # pylint: disable=broad-except
            logger.error(e, exc_info=True)
@ -361,15 +385,107 @@ def response(resp):
 # get supported languages from their site
-def _fetch_supported_languages(resp):
+
-    ret_val = {}
+
 skip_countries = [
    # official language of google-country not in google-languages
    'AL',  # Albanien (sq)
    'AZ',  # Aserbaidschan  (az)
    'BD',  # Bangladesch (bn)
    'BN',  # Brunei Darussalam (ms)
    'BT',  # Bhutan (dz)
    'ET',  # Äthiopien (am)
    'GE',  # Georgien (ka, os)
    'GL',  # Grönland (kl)
    'KH',  # Kambodscha (km)
    'LA',  # Laos (lo)
    'LK',  # Sri Lanka (si, ta)
    'ME',  # Montenegro (sr)
    'MK',  # Nordmazedonien (mk, sq)
    'MM',  # Myanmar (my)
    'MN',  # Mongolei (mn)
    'MV',  # Malediven (dv) // dv_MV is unknown by babel
    'MY',  # Malaysia (ms)
    'NP',  # Nepal (ne)
    'TJ',  # Tadschikistan (tg)
    'TM',  # Turkmenistan (tk)
    'UZ',  # Usbekistan (uz)
 ]
 def fetch_traits(engine_traits: EngineTraits, add_domains: bool = True):
    """Fetch languages from Google."""
    # pylint: disable=import-outside-toplevel, too-many-branches
    engine_traits.custom['supported_domains'] = {}
    resp = network.get('https://www.google.com/preferences')
    if not resp.ok:
        raise RuntimeError("Response from Google's preferences is not OK.")
    dom = html.fromstring(resp.text)
-    radio_buttons = eval_xpath_list(dom, '//*[@id="langSec"]//input[@name="lr"]')
+    # supported language codes
-    for x in radio_buttons:
+    lang_map = {'no': 'nb'}
-        name = x.get("data-name")
+    for x in eval_xpath_list(dom, '//*[@id="langSec"]//input[@name="lr"]'):
        code = x.get("value").split('_')[-1]
        ret_val[code] = {"name": name}
-    return ret_val
+        eng_lang = x.get("value").split('_')[-1]
        try:
            locale = babel.Locale.parse(lang_map.get(eng_lang, eng_lang), sep='-')
        except babel.UnknownLocaleError:
            print("ERROR: %s -> %s is unknown by babel" % (x.get("data-name"), eng_lang))
            continue
        sxng_lang = language_tag(locale)
        conflict = engine_traits.languages.get(sxng_lang)
        if conflict:
            if conflict != eng_lang:
                print("CONFLICT: babel %s --> %s, %s" % (sxng_lang, conflict, eng_lang))
            continue
        engine_traits.languages[sxng_lang] = 'lang_' + eng_lang
    # alias languages
    engine_traits.languages['zh'] = 'lang_zh-CN'
    # supported region codes
    for x in eval_xpath_list(dom, '//*[@name="region"]/..//input[@name="region"]'):
        eng_country = x.get("value")
        if eng_country in skip_countries:
            continue
        if eng_country == 'ZZ':
            engine_traits.all_locale = 'ZZ'
            continue
        sxng_locales = get_offical_locales(eng_country, engine_traits.languages.keys(), regional=True)
        if not sxng_locales:
            print("ERROR: can't map from google country %s (%s) to a babel region." % (x.get('data-name'), eng_country))
            continue
        for sxng_locale in sxng_locales:
            engine_traits.regions[region_tag(sxng_locale)] = eng_country
    # alias regions
    engine_traits.regions['zh-CN'] = 'HK'
    # supported domains
    if add_domains:
        resp = network.get('https://www.google.com/supported_domains')
        if not resp.ok:
            raise RuntimeError("Response from https://www.google.com/supported_domains is not OK.")
        for domain in resp.text.split():
            domain = domain.strip()
            if not domain or domain in [
                '.google.com',
            ]:
                continue
            region = domain.split('.')[-1].upper()
            engine_traits.custom['supported_domains'][region] = 'www' + domain
            if region == 'HK':
                # There is no google.cn, we use .com.hk for zh-CN
                engine_traits.custom['supported_domains']['CN'] = 'www' + domain
--- a/searx/engines/google_images.py
+++ b/searx/engines/google_images.py
@ -1,31 +1,38 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
-"""This is the implementation of the google images engine using the google
+"""This is the implementation of the Google Images engine using the internal
-internal API used the Google Go Android app.
+Google API used by the Google Go Android app.
 This internal API offer results in
- JSON (_fmt:json)
+- JSON (``_fmt:json``)
- Protobuf (_fmt:pb)
+- Protobuf_ (``_fmt:pb``)
- Protobuf compressed? (_fmt:pc)
+- Protobuf_ compressed? (``_fmt:pc``)
- HTML (_fmt:html)
+- HTML (``_fmt:html``)
- Protobuf encoded in JSON (_fmt:jspb).
+- Protobuf_ encoded in JSON (``_fmt:jspb``).
 .. _Protobuf: https://en.wikipedia.org/wiki/Protocol_Buffers
 """
 from typing import TYPE_CHECKING
 from urllib.parse import urlencode
 from json import loads
 from searx.engines.google import fetch_traits  # pylint: disable=unused-import
 from searx.engines.google import (
-    get_lang_info,
+    get_google_info,
    time_range_dict,
    detect_google_sorry,
 )
-# pylint: disable=unused-import
+if TYPE_CHECKING:
-from searx.engines.google import supported_languages_url, _fetch_supported_languages
+    import logging
    from searx.enginelib.traits import EngineTraits
    logger: logging.Logger
    traits: EngineTraits
 # pylint: enable=unused-import
 # about
 about = {
@ -40,7 +47,6 @@ about = {
 # engine dependent config
 categories = ['images', 'web']
 paging = True
 use_locale_domain = True
 time_range_support = True
 safesearch = True
 send_accept_language_header = True
@ -51,20 +57,18 @@ filter_mapping = {0: 'images', 1: 'active', 2: 'active'}
 def request(query, params):
    """Google-Image search request"""
-    lang_info = get_lang_info(params, supported_languages, language_aliases, False)
+    google_info = get_google_info(params, traits)
    query_url = (
        'https://'
-        + lang_info['subdomain']
+        + google_info['subdomain']
        + '/search'
        + "?"
        + urlencode(
            {
                'q': query,
                'tbm': "isch",
-                **lang_info['params'],
+                **google_info['params'],
                'ie': "utf8",
                'oe': "utf8",
                'asearch': 'isch',
                'async': '_fmt:json,p:1,ijn:' + str(params['pageno']),
            }
@ -77,9 +81,8 @@ def request(query, params):
        query_url += '&' + urlencode({'safe': filter_mapping[params['safesearch']]})
    params['url'] = query_url
-    params['headers'].update(lang_info['headers'])
+    params['cookies'] = google_info['cookies']
-    params['headers']['User-Agent'] = 'NSTN/3.60.474802233.release Dalvik/2.1.0 (Linux; U; Android 12; US) gzip'
+    params['headers'].update(google_info['headers'])
    params['headers']['Accept'] = '*/*'
    return params
@ -111,7 +114,11 @@ def response(resp):
        copyright_notice = item["result"].get('iptc', {}).get('copyright_notice')
        if copyright_notice:
-            result_item['source'] += ' / ' + copyright_notice
+            result_item['source'] += ' | ' + copyright_notice
        freshness_date = item["result"].get("freshness_date")
        if freshness_date:
            result_item['source'] += ' | ' + freshness_date
        file_size = item.get('gsa', {}).get('file_size')
        if file_size:
--- a/searx/engines/google_news.py
+++ b/searx/engines/google_news.py
@ -1,24 +1,40 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
-"""This is the implementation of the google news engine.  The google news API
+"""This is the implementation of the Google News engine.
 ignores some parameters from the common :ref:`google API`:
- num_ : the number of search results is ignored
+Google News has a different region handling compared to Google WEB.
 - the ``ceid`` argument has to be set (:py:obj:`ceid_list`)
 - the hl_ argument has to be set correctly (and different to Google WEB)
 - the gl_ argument is mandatory
 If one of this argument is not set correctly, the request is redirected to
 CONSENT dialog::
  https://consent.google.com/m?continue=
 The google news API ignores some parameters from the common :ref:`google API`:
 - num_ : the number of search results is ignored / there is no paging all
  results for a query term are in the first response.
 - save_ : is ignored / Google-News results are always *SafeSearch*
 .. _hl: https://developers.google.com/custom-search/docs/xml_results#hlsp
 .. _gl: https://developers.google.com/custom-search/docs/xml_results#glsp
 .. _num: https://developers.google.com/custom-search/docs/xml_results#numsp
 .. _save: https://developers.google.com/custom-search/docs/xml_results#safesp
 """
-# pylint: disable=invalid-name
+from typing import TYPE_CHECKING
 import binascii
 import re
 from urllib.parse import urlencode
 from base64 import b64decode
 from lxml import html
 import babel
 from searx import locales
 from searx.utils import (
    eval_xpath,
    eval_xpath_list,
@ -26,18 +42,19 @@ from searx.utils import (
    extract_text,
 )
-# pylint: disable=unused-import
+from searx.engines.google import fetch_traits as _fetch_traits  # pylint: disable=unused-import
 from searx.engines.google import (
-    supported_languages_url,
+    get_google_info,
    _fetch_supported_languages,
 )
 # pylint: enable=unused-import
 from searx.engines.google import (
    get_lang_info,
    detect_google_sorry,
 )
 from searx.enginelib.traits import EngineTraits
 if TYPE_CHECKING:
    import logging
    logger: logging.Logger
 traits: EngineTraits
 # about
 about = {
@ -49,70 +66,77 @@ about = {
    "results": 'HTML',
 }
 # compared to other google engines google-news has a different time range
 # support.  The time range is included in the search term.
 time_range_dict = {
    'day': 'when:1d',
    'week': 'when:7d',
    'month': 'when:1m',
    'year': 'when:1y',
 }
 # engine dependent config
 categories = ['news']
 paging = False
-use_locale_domain = True
+time_range_support = False
 time_range_support = True
 # Google-News results are always *SafeSearch*. Option 'safesearch' is set to
 # False here, otherwise checker will report safesearch-errors::
 #
 #  safesearch : results are identitical for safesearch=0 and safesearch=2
-safesearch = False
+safesearch = True
-send_accept_language_header = True
+# send_accept_language_header = True
 def request(query, params):
    """Google-News search request"""
-    lang_info = get_lang_info(params, supported_languages, language_aliases, False)
+    sxng_locale = params.get('searxng_locale', 'en-US')
    ceid = locales.get_engine_locale(sxng_locale, traits.custom['ceid'], default='US:en')
    google_info = get_google_info(params, traits)
    google_info['subdomain'] = 'news.google.com'  # google news has only one domain
-    # google news has only one domain
+    ceid_region, ceid_lang = ceid.split(':')
-    lang_info['subdomain'] = 'news.google.com'
+    ceid_lang, ceid_suffix = (
        ceid_lang.split('-')
        + [
            None,
        ]
    )[:2]
-    ceid = "%s:%s" % (lang_info['country'], lang_info['language'])
+    google_info['params']['hl'] = ceid_lang
-    # google news redirects en to en-US
+    if ceid_suffix and ceid_suffix not in ['Hans', 'Hant']:
    if lang_info['params']['hl'] == 'en':
        lang_info['params']['hl'] = 'en-US'
-    # Very special to google-news compared to other google engines, the time
+        if ceid_region.lower() == ceid_lang:
-    # range is included in the search term.
+            google_info['params']['hl'] = ceid_lang + '-' + ceid_region
-    if params['time_range']:
+        else:
-        query += ' ' + time_range_dict[params['time_range']]
+            google_info['params']['hl'] = ceid_lang + '-' + ceid_suffix
    elif ceid_region.lower() != ceid_lang:
        if ceid_region in ['AT', 'BE', 'CH', 'IL', 'SA', 'IN', 'BD', 'PT']:
            google_info['params']['hl'] = ceid_lang
        else:
            google_info['params']['hl'] = ceid_lang + '-' + ceid_region
    google_info['params']['lr'] = 'lang_' + ceid_lang.split('-')[0]
    google_info['params']['gl'] = ceid_region
    query_url = (
        'https://'
-        + lang_info['subdomain']
+        + google_info['subdomain']
-        + '/search'
+        + "/search?"
-        + "?"
+        + urlencode(
-        + urlencode({'q': query, **lang_info['params'], 'ie': "utf8", 'oe': "utf8", 'gl': lang_info['country']})
+            {
                'q': query,
                **google_info['params'],
            }
        )
        # ceid includes a ':' character which must not be urlencoded
        + ('&ceid=%s' % ceid)
-    )  # ceid includes a ':' character which must not be urlencoded
+    )
    params['url'] = query_url
-
+    params['cookies'] = google_info['cookies']
-    params['cookies']['CONSENT'] = "YES+"
+    params['headers'].update(google_info['headers'])
    params['headers'].update(lang_info['headers'])
    params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
    return params
 def response(resp):
    """Get response from google's search request"""
    results = []
    detect_google_sorry(resp)
    # convert the text to dom
@ -152,8 +176,8 @@ def response(resp):
        # The pub_date is mostly a string like 'yesertday', not a real
        # timezone date or time.  Therefore we can't use publishedDate.
-        pub_date = extract_text(eval_xpath(result, './article/div[1]/div[1]/time'))
+        pub_date = extract_text(eval_xpath(result, './article//time'))
-        pub_origin = extract_text(eval_xpath(result, './article/div[1]/div[1]/a'))
+        pub_origin = extract_text(eval_xpath(result, './article//a[@data-n-tid]'))
        content = ' / '.join([x for x in [pub_origin, pub_date] if x])
@ -174,3 +198,127 @@ def response(resp):
    # return results
    return results
 ceid_list = [
    'AE:ar',
    'AR:es-419',
    'AT:de',
    'AU:en',
    'BD:bn',
    'BE:fr',
    'BE:nl',
    'BG:bg',
    'BR:pt-419',
    'BW:en',
    'CA:en',
    'CA:fr',
    'CH:de',
    'CH:fr',
    'CL:es-419',
    'CN:zh-Hans',
    'CO:es-419',
    'CU:es-419',
    'CZ:cs',
    'DE:de',
    'EG:ar',
    'ES:es',
    'ET:en',
    'FR:fr',
    'GB:en',
    'GH:en',
    'GR:el',
    'HK:zh-Hant',
    'HU:hu',
    'ID:en',
    'ID:id',
    'IE:en',
    'IL:en',
    'IL:he',
    'IN:bn',
    'IN:en',
    'IN:hi',
    'IN:ml',
    'IN:mr',
    'IN:ta',
    'IN:te',
    'IT:it',
    'JP:ja',
    'KE:en',
    'KR:ko',
    'LB:ar',
    'LT:lt',
    'LV:en',
    'LV:lv',
    'MA:fr',
    'MX:es-419',
    'MY:en',
    'NA:en',
    'NG:en',
    'NL:nl',
    'NO:no',
    'NZ:en',
    'PE:es-419',
    'PH:en',
    'PK:en',
    'PL:pl',
    'PT:pt-150',
    'RO:ro',
    'RS:sr',
    'RU:ru',
    'SA:ar',
    'SE:sv',
    'SG:en',
    'SI:sl',
    'SK:sk',
    'SN:fr',
    'TH:th',
    'TR:tr',
    'TW:zh-Hant',
    'TZ:en',
    'UA:ru',
    'UA:uk',
    'UG:en',
    'US:en',
    'US:es-419',
    'VE:es-419',
    'VN:vi',
    'ZA:en',
    'ZW:en',
 ]
 """List of region/language combinations supported by Google News.  Values of the
 ``ceid`` argument of the Google News REST API."""
 _skip_values = [
    'ET:en',  # english (ethiopia)
    'ID:en',  # english (indonesia)
    'LV:en',  # english (latvia)
 ]
 _ceid_locale_map = {'NO:no': 'nb-NO'}
 def fetch_traits(engine_traits: EngineTraits):
    _fetch_traits(engine_traits, add_domains=False)
    engine_traits.custom['ceid'] = {}
    for ceid in ceid_list:
        if ceid in _skip_values:
            continue
        region, lang = ceid.split(':')
        x = lang.split('-')
        if len(x) > 1:
            if x[1] not in ['Hant', 'Hans']:
                lang = x[0]
        sxng_locale = _ceid_locale_map.get(ceid, lang + '-' + region)
        try:
            locale = babel.Locale.parse(sxng_locale, sep='-')
        except babel.UnknownLocaleError:
            print("ERROR: %s -> %s is unknown by babel" % (ceid, sxng_locale))
            continue
        engine_traits.custom['ceid'][locales.region_tag(locale)] = ceid
--- a/searx/engines/google_scholar.py
+++ b/searx/engines/google_scholar.py
@ -1,19 +1,18 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
-"""Google (Scholar)
+"""This is the implementation of the Google Scholar engine.
-For detailed description of the *REST-full* API see: `Query Parameter
+Compared to other Google services the Scholar engine has a simple GET REST-API
-Definitions`_.
+and there does not exists `async` API.  Even though the API slightly vintage we
-
+can make use of the :ref:`google API` to assemble the arguments of the GET
-.. _Query Parameter Definitions:
+request.
   https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions
 """
-# pylint: disable=invalid-name
+from typing import TYPE_CHECKING
 from typing import Optional
 from urllib.parse import urlencode
 from datetime import datetime
 from typing import Optional
 from lxml import html
 from searx.utils import (
@ -23,19 +22,21 @@ from searx.utils import (
    extract_text,
 )
 from searx.exceptions import SearxEngineCaptchaException
 from searx.engines.google import fetch_traits  # pylint: disable=unused-import
 from searx.engines.google import (
-    get_lang_info,
+    get_google_info,
    time_range_dict,
    detect_google_sorry,
 )
 from searx.enginelib.traits import EngineTraits
-# pylint: disable=unused-import
+if TYPE_CHECKING:
-from searx.engines.google import (
+    import logging
    supported_languages_url,
    _fetch_supported_languages,
 )
-# pylint: enable=unused-import
+    logger: logging.Logger
 traits: EngineTraits
 # about
 about = {
@ -51,53 +52,62 @@ about = {
 categories = ['science', 'scientific publications']
 paging = True
 language_support = True
 use_locale_domain = True
 time_range_support = True
 safesearch = False
 send_accept_language_header = True
-def time_range_url(params):
+def time_range_args(params):
-    """Returns a URL query component for a google-Scholar time range based on
+    """Returns a dictionary with a time range arguments based on
-    ``params['time_range']``.  Google-Scholar does only support ranges in years.
+    ``params['time_range']``.
-    To have any effect, all the Searx ranges (*day*, *week*, *month*, *year*)
+
-    are mapped to *year*.  If no range is set, an empty string is returned.
+    Google Scholar supports a detailed search by year.  Searching by *last
-    Example::
+    month* or *last week* (as offered by SearXNG) is uncommon for scientific
    publications and is not supported by Google Scholar.
    To limit the result list when the users selects a range, all the SearXNG
    ranges (*day*, *week*, *month*, *year*) are mapped to *year*.  If no range
    is set an empty dictionary of arguments is returned.  Example;  when
    user selects a time range (current year minus one in 2022):
    .. code:: python
        { 'as_ylo' : 2021 }
        &as_ylo=2019
    """
-    # as_ylo=2016&as_yhi=2019
+    ret_val = {}
    ret_val = ''
    if params['time_range'] in time_range_dict:
-        ret_val = urlencode({'as_ylo': datetime.now().year - 1})
+        ret_val['as_ylo'] = datetime.now().year - 1
-    return '&' + ret_val
+    return ret_val
 def detect_google_captcha(dom):
    """In case of CAPTCHA Google Scholar open its own *not a Robot* dialog and is
    not redirected to ``sorry.google.com``.
    """
    if eval_xpath(dom, "//form[@id='gs_captcha_f']"):
        raise SearxEngineCaptchaException()
 def request(query, params):
    """Google-Scholar search request"""
-    offset = (params['pageno'] - 1) * 10
+    google_info = get_google_info(params, traits)
    lang_info = get_lang_info(params, supported_languages, language_aliases, False)
    # subdomain is: scholar.google.xy
-    lang_info['subdomain'] = lang_info['subdomain'].replace("www.", "scholar.")
+    google_info['subdomain'] = google_info['subdomain'].replace("www.", "scholar.")
-    query_url = (
+    args = {
-        'https://'
+        'q': query,
-        + lang_info['subdomain']
+        **google_info['params'],
-        + '/scholar'
+        'start': (params['pageno'] - 1) * 10,
-        + "?"
+        'as_sdt': '2007',  # include patents / to disable set '0,5'
-        + urlencode({'q': query, **lang_info['params'], 'ie': "utf8", 'oe': "utf8", 'start': offset})
+        'as_vis': '0',  # include citations / to disable set '1'
-    )
+    }
    args.update(time_range_args(params))
-    query_url += time_range_url(params)
+    params['url'] = 'https://' + google_info['subdomain'] + '/scholar?' + urlencode(args)
-    params['url'] = query_url
+    params['cookies'] = google_info['cookies']
-
+    params['headers'].update(google_info['headers'])
    params['cookies']['CONSENT'] = "YES+"
    params['headers'].update(lang_info['headers'])
    params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
    # params['google_subdomain'] = subdomain
    return params
@ -138,19 +148,15 @@ def parse_gs_a(text: Optional[str]):
 def response(resp):  # pylint: disable=too-many-locals
-    """Get response from google's search request"""
+    """Parse response from Google Scholar"""
    results = []
    detect_google_sorry(resp)
    # which subdomain ?
    # subdomain = resp.search_params.get('google_subdomain')
    # convert the text to dom
    dom = html.fromstring(resp.text)
    detect_google_captcha(dom)
    # parse results
-    for result in eval_xpath_list(dom, '//div[@data-cid]'):
+    for result in eval_xpath_list(dom, '//div[@data-rp]'):
        title = extract_text(eval_xpath(result, './/h3[1]//a'))
@ -158,7 +164,7 @@ def response(resp):  # pylint: disable=too-many-locals
            # this is a [ZITATION] block
            continue
-        pub_type = extract_text(eval_xpath(result, './/span[@class="gs_ct1"]'))
+        pub_type = extract_text(eval_xpath(result, './/span[@class="gs_ctg2"]'))
        if pub_type:
            pub_type = pub_type[1:-1].lower()
--- a/searx/engines/google_videos.py
+++ b/searx/engines/google_videos.py
@ -1,6 +1,6 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
-"""This is the implementation of the google videos engine.
+"""This is the implementation of the Google Videos engine.
 .. admonition:: Content-Security-Policy (CSP)
@ -14,9 +14,8 @@
 """
-# pylint: disable=invalid-name
+from typing import TYPE_CHECKING
 import re
 from urllib.parse import urlencode
 from lxml import html
@ -27,20 +26,22 @@ from searx.utils import (
    extract_text,
 )
 from searx.engines.google import fetch_traits  # pylint: disable=unused-import
 from searx.engines.google import (
-    get_lang_info,
+    get_google_info,
    time_range_dict,
    filter_mapping,
    g_section_with_header,
    title_xpath,
    suggestion_xpath,
    detect_google_sorry,
 )
 from searx.enginelib.traits import EngineTraits
-# pylint: disable=unused-import
+if TYPE_CHECKING:
-from searx.engines.google import supported_languages_url, _fetch_supported_languages
+    import logging
-# pylint: enable=unused-import
+    logger: logging.Logger
 traits: EngineTraits
 # about
 about = {
@ -55,70 +56,32 @@ about = {
 # engine dependent config
 categories = ['videos', 'web']
-paging = False
+paging = True
 language_support = True
 use_locale_domain = True
 time_range_support = True
 safesearch = True
 send_accept_language_header = True
 RE_CACHE = {}
 def _re(regexpr):
    """returns compiled regular expression"""
    RE_CACHE[regexpr] = RE_CACHE.get(regexpr, re.compile(regexpr))
    return RE_CACHE[regexpr]
 def scrap_out_thumbs_src(dom):
    ret_val = {}
    thumb_name = 'dimg_'
    for script in eval_xpath_list(dom, '//script[contains(., "google.ldi={")]'):
        _script = script.text
        # "dimg_35":"https://i.ytimg.c....",
        _dimurl = _re("s='([^']*)").findall(_script)
        for k, v in _re('(' + thumb_name + '[0-9]*)":"(http[^"]*)').findall(_script):
            v = v.replace(r'\u003d', '=')
            v = v.replace(r'\u0026', '&')
            ret_val[k] = v
    logger.debug("found %s imgdata for: %s", thumb_name, ret_val.keys())
    return ret_val
 def scrap_out_thumbs(dom):
    """Scrap out thumbnail data from <script> tags."""
    ret_val = {}
    thumb_name = 'dimg_'
    for script in eval_xpath_list(dom, '//script[contains(., "_setImagesSrc")]'):
        _script = script.text
        # var s='data:image/jpeg;base64, ...'
        _imgdata = _re("s='([^']*)").findall(_script)
        if not _imgdata:
            continue
        # var ii=['dimg_17']
        for _vidthumb in _re(r"(%s\d+)" % thumb_name).findall(_script):
            # At least the equal sign in the URL needs to be decoded
            ret_val[_vidthumb] = _imgdata[0].replace(r"\x3d", "=")
    logger.debug("found %s imgdata for: %s", thumb_name, ret_val.keys())
    return ret_val
 def request(query, params):
    """Google-Video search request"""
-    lang_info = get_lang_info(params, supported_languages, language_aliases, False)
+    google_info = get_google_info(params, traits)
    query_url = (
        'https://'
-        + lang_info['subdomain']
+        + google_info['subdomain']
        + '/search'
        + "?"
-        + urlencode({'q': query, 'tbm': "vid", **lang_info['params'], 'ie': "utf8", 'oe': "utf8"})
+        + urlencode(
            {
                'q': query,
                'tbm': "vid",
                'start': 10 * params['pageno'],
                **google_info['params'],
                'asearch': 'arc',
                'async': 'use_ac:true,_fmt:html',
            }
        )
    )
    if params['time_range'] in time_range_dict:
@ -127,9 +90,8 @@ def request(query, params):
        query_url += '&' + urlencode({'safe': filter_mapping[params['safesearch']]})
    params['url'] = query_url
-    params['cookies']['CONSENT'] = "YES+"
+    params['cookies'] = google_info['cookies']
-    params['headers'].update(lang_info['headers'])
+    params['headers'].update(google_info['headers'])
    params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
    return params
@ -141,43 +103,30 @@ def response(resp):
    # convert the text to dom
    dom = html.fromstring(resp.text)
    vidthumb_imgdata = scrap_out_thumbs(dom)
    thumbs_src = scrap_out_thumbs_src(dom)
    logger.debug(str(thumbs_src))
    # parse results
    for result in eval_xpath_list(dom, '//div[contains(@class, "g ")]'):
-        # ignore google *sections*
+        img_src = eval_xpath_getindex(result, './/img/@src', 0, None)
-        if extract_text(eval_xpath(result, g_section_with_header)):
+        if img_src is None:
            logger.debug("ignoring <g-section-with-header>")
            continue
-        # ingnore articles without an image id / e.g. news articles
+        title = extract_text(eval_xpath_getindex(result, './/a/h3[1]', 0))
-        img_id = eval_xpath_getindex(result, './/g-img/img/@id', 0, default=None)
+        url = eval_xpath_getindex(result, './/a/h3[1]/../@href', 0)
        if img_id is None:
            logger.error("no img_id found in item %s (news article?)", len(results) + 1)
            continue
        img_src = vidthumb_imgdata.get(img_id, None)
        if not img_src:
            img_src = thumbs_src.get(img_id, "")
        title = extract_text(eval_xpath_getindex(result, title_xpath, 0))
        url = eval_xpath_getindex(result, './/div[@class="dXiKIc"]//a/@href', 0)
        length = extract_text(eval_xpath(result, './/div[contains(@class, "P7xzyf")]/span/span'))
        c_node = eval_xpath_getindex(result, './/div[@class="Uroaid"]', 0)
        content = extract_text(c_node)
-        pub_info = extract_text(eval_xpath(result, './/div[@class="Zg1NU"]'))
+        pub_info = extract_text(eval_xpath(result, './/div[@class="P7xzyf"]'))
        length = extract_text(eval_xpath(result, './/div[@class="J1mWY"]'))
        results.append(
            {
                'url': url,
                'title': title,
                'content': content,
                'length': length,
                'author': pub_info,
                'thumbnail': img_src,
                'length': length,
                'template': 'videos.html',
            }
        )
--- a/searx/engines/peertube.py
+++ b/searx/engines/peertube.py
@ -1,18 +1,30 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
-"""
+# lint: pylint
- peertube (Videos)
+"""Peertube and :py:obj:`SepiaSearch <searx.engines.sepiasearch>` do share
 (more or less) the same REST API and the schema of the JSON result is identical.
 """
-from json import loads
+import re
 from datetime import datetime
 from urllib.parse import urlencode
-from searx.utils import html_to_text
+from datetime import datetime
 from dateutil.parser import parse
 from dateutil.relativedelta import relativedelta
 import babel
 from searx import network
 from searx.locales import language_tag
 from searx.utils import html_to_text
 from searx.enginelib.traits import EngineTraits
 traits: EngineTraits
 # about
 about = {
    # pylint: disable=line-too-long
    "website": 'https://joinpeertube.org',
    "wikidata_id": 'Q50938515',
-    "official_api_documentation": 'https://docs.joinpeertube.org/api-rest-reference.html',
+    "official_api_documentation": 'https://docs.joinpeertube.org/api-rest-reference.html#tag/Search/operation/searchVideos',
    "use_official_api": True,
    "require_api_key": False,
    "results": 'JSON',
@ -22,66 +34,155 @@ about = {
 categories = ["videos"]
 paging = True
 base_url = "https://peer.tube"
-supported_languages_url = 'https://peer.tube/api/v1/videos/languages'
+"""Base URL of the Peertube instance.  A list of instances is available at:
 - https://instances.joinpeertube.org/instances
 """
 time_range_support = True
 time_range_table = {
    'day': relativedelta(),
    'week': relativedelta(weeks=-1),
    'month': relativedelta(months=-1),
    'year': relativedelta(years=-1),
 }
 safesearch = True
 safesearch_table = {0: 'both', 1: 'false', 2: 'false'}
 def minute_to_hm(minute):
    if isinstance(minute, int):
        return "%d:%02d" % (divmod(minute, 60))
    return None
 # do search-request
 def request(query, params):
-    sanitized_url = base_url.rstrip("/")
+    """Assemble request for the Peertube API"""
-    pageno = (params["pageno"] - 1) * 15
+
-    search_url = sanitized_url + "/api/v1/search/videos/?pageno={pageno}&{query}"
+    if not query:
-    query_dict = {"search": query}
+        return False
-    language = params["language"].split("-")[0]
+
-    if "all" != language and language in supported_languages:
+    # eng_region = traits.get_region(params['searxng_locale'], 'en_US')
-        query_dict["languageOneOf"] = language
+    eng_lang = traits.get_language(params['searxng_locale'], None)
-    params["url"] = search_url.format(query=urlencode(query_dict), pageno=pageno)
+
    params['url'] = (
        base_url.rstrip("/")
        + "/api/v1/search/videos?"
        + urlencode(
            {
                'search': query,
                'searchTarget': 'search-index',  # Vidiversum
                'resultType': 'videos',
                'start': (params['pageno'] - 1) * 10,
                'count': 10,
                # -createdAt: sort by date ascending / createdAt: date descending
                'sort': '-match',  # sort by *match descending*
                'nsfw': safesearch_table[params['safesearch']],
            }
        )
    )
    if eng_lang is not None:
        params['url'] += '&languageOneOf[]=' + eng_lang
        params['url'] += '&boostLanguages[]=' + eng_lang
    if params['time_range'] in time_range_table:
        time = datetime.now().date() + time_range_table[params['time_range']]
        params['url'] += '&startDate=' + time.isoformat()
    return params
 def _get_offset_from_pageno(pageno):
    return (pageno - 1) * 15 + 1
 # get response from search-request
 def response(resp):
-    sanitized_url = base_url.rstrip("/")
+    return video_response(resp)
 def video_response(resp):
    """Parse video response from SepiaSearch and Peertube instances."""
    results = []
-    search_res = loads(resp.text)
+    json_data = resp.json()
-    # return empty array if there are no results
+    if 'data' not in json_data:
    if "data" not in search_res:
        return []
-    # parse results
+    for result in json_data['data']:
-    for res in search_res["data"]:
+        metadata = [
-        title = res["name"]
+            x
-        url = sanitized_url + "/videos/watch/" + res["uuid"]
+            for x in [
-        description = res["description"]
+                result.get('channel', {}).get('displayName'),
-        if description:
+                result.get('channel', {}).get('name') + '@' + result.get('channel', {}).get('host'),
-            content = html_to_text(res["description"])
+                ', '.join(result.get('tags', [])),
-        else:
+            ]
-            content = ""
+            if x
-        thumbnail = sanitized_url + res["thumbnailPath"]
+        ]
        publishedDate = datetime.strptime(res["publishedAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
        results.append(
            {
-                "template": "videos.html",
+                'url': result['url'],
-                "url": url,
+                'title': result['name'],
-                "title": title,
+                'content': html_to_text(result.get('description') or ''),
-                "content": content,
+                'author': result.get('account', {}).get('displayName'),
-                "publishedDate": publishedDate,
+                'length': minute_to_hm(result.get('duration')),
-                "iframe_src": sanitized_url + res["embedPath"],
+                'template': 'videos.html',
-                "thumbnail": thumbnail,
+                'publishedDate': parse(result['publishedAt']),
                'iframe_src': result.get('embedUrl'),
                'thumbnail': result.get('thumbnailUrl') or result.get('previewUrl'),
                'metadata': ' | '.join(metadata),
            }
        )
    # return results
    return results
-def _fetch_supported_languages(resp):
+def fetch_traits(engine_traits: EngineTraits):
-    videolanguages = resp.json()
+    """Fetch languages from peertube's search-index source code.
-    peertube_languages = list(videolanguages.keys())
+
-    return peertube_languages
+    See videoLanguages_ in commit `8ed5c729 - Refactor and redesign client`_
    .. _8ed5c729 - Refactor and redesign client:
       https://framagit.org/framasoft/peertube/search-index/-/commit/8ed5c729
    .. _videoLanguages:
       https://framagit.org/framasoft/peertube/search-index/-/commit/8ed5c729#3d8747f9a60695c367c70bb64efba8f403721fad_0_291
    """
    resp = network.get(
        'https://framagit.org/framasoft/peertube/search-index/-/raw/master/client/src/components/Filters.vue',
        # the response from search-index repository is very slow
        timeout=60,
    )
    if not resp.ok:
        print("ERROR: response from peertube is not OK.")
        return
    js_lang = re.search(r"videoLanguages \(\)[^\n]+(.*?)\]", resp.text, re.DOTALL)
    if not js_lang:
        print("ERROR: can't determine languages from peertube")
        return
    for lang in re.finditer(r"\{ id: '([a-z]+)', label:", js_lang.group(1)):
        try:
            eng_tag = lang.group(1)
            if eng_tag == 'oc':
                # Occitanis not known by babel, its closest relative is Catalan
                # but 'ca' is already in the list of engine_traits.languages -->
                # 'oc' will be ignored.
                continue
            sxng_tag = language_tag(babel.Locale.parse(eng_tag))
        except babel.UnknownLocaleError:
            print("ERROR: %s is unknown by babel" % eng_tag)
            continue
        conflict = engine_traits.languages.get(sxng_tag)
        if conflict:
            if conflict != eng_tag:
                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
            continue
        engine_traits.languages[sxng_tag] = eng_tag
    engine_traits.languages['zh_Hans'] = 'zh'
    engine_traits.languages['zh_Hant'] = 'zh'
--- a/searx/engines/qwant.py
+++ b/searx/engines/qwant.py
@ -34,7 +34,9 @@ import babel
 from searx.exceptions import SearxEngineAPIException
 from searx.network import raise_for_httperror
-from searx.locales import get_engine_locale
+from searx.enginelib.traits import EngineTraits
 traits: EngineTraits
 # about
 about = {
@ -49,7 +51,6 @@ about = {
 # engine dependent config
 categories = []
 paging = True
 supported_languages_url = about['website']
 qwant_categ = None  # web|news|inages|videos
 safesearch = True
@ -95,7 +96,7 @@ def request(query, params):
    )
    # add quant's locale
-    q_locale = get_engine_locale(params['language'], supported_languages, default='en_US')
+    q_locale = traits.get_region(params["searxng_locale"], default='en_US')
    params['url'] += '&locale=' + q_locale
    # add safesearch option
@ -243,15 +244,20 @@ def response(resp):
    return results
-def _fetch_supported_languages(resp):
+def fetch_traits(engine_traits: EngineTraits):
    # pylint: disable=import-outside-toplevel
    from searx import network
    from searx.locales import region_tag
    resp = network.get(about['website'])
    text = resp.text
    text = text[text.find('INITIAL_PROPS') :]
    text = text[text.find('{') : text.find('</script>')]
    q_initial_props = loads(text)
    q_locales = q_initial_props.get('locales')
-    q_valid_locales = []
+    eng_tag_list = set()
    for country, v in q_locales.items():
        for lang in v['langs']:
@ -261,25 +267,18 @@ def _fetch_supported_languages(resp):
                # qwant-news does not support all locales from qwant-web:
                continue
-            q_valid_locales.append(_locale)
+            eng_tag_list.add(_locale)
-    supported_languages = {}
+    for eng_tag in eng_tag_list:
    for q_locale in q_valid_locales:
        try:
-            locale = babel.Locale.parse(q_locale, sep='_')
+            sxng_tag = region_tag(babel.Locale.parse(eng_tag, sep='_'))
-        except babel.core.UnknownLocaleError:
+        except babel.UnknownLocaleError:
-            print("ERROR: can't determine babel locale of quant's locale %s" % q_locale)
+            print("ERROR: can't determine babel locale of quant's locale %s" % eng_tag)
            continue
-        # note: supported_languages (dict)
+        conflict = engine_traits.regions.get(sxng_tag)
-        #
+        if conflict:
-        #   dict's key is a string build up from a babel.Locale object / the
+            if conflict != eng_tag:
-        #   notation 'xx-XX' (and 'xx') conforms to SearXNG's locale (and
+                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
-        #   language) notation and dict's values are the locale strings used by
+            continue
-        #   the engine.
+        engine_traits.regions[sxng_tag] = eng_tag
        searxng_locale = locale.language + '-' + locale.territory  # --> params['language']
        supported_languages[searxng_locale] = q_locale
    return supported_languages
--- a/searx/engines/sepiasearch.py
+++ b/searx/engines/sepiasearch.py
@ -1,70 +1,80 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
-"""
+# lint: pylint
- SepiaSearch (Videos)
+"""SepiaSearch uses the same languages as :py:obj:`Peertube
 <searx.engines.peertube>` and the response is identical to the response from the
 peertube engines.
 """
-from json import loads
+from typing import TYPE_CHECKING
-from dateutil import parser, relativedelta
+
 from urllib.parse import urlencode
 from datetime import datetime
-# about
+from searx.engines.peertube import fetch_traits  # pylint: disable=unused-import
 from searx.engines.peertube import (
    # pylint: disable=unused-import
    video_response,
    safesearch_table,
    time_range_table,
 )
 from searx.enginelib.traits import EngineTraits
 if TYPE_CHECKING:
    import logging
    logger: logging.Logger
 traits: EngineTraits
 about = {
    # pylint: disable=line-too-long
    "website": 'https://sepiasearch.org',
    "wikidata_id": None,
-    "official_api_documentation": "https://framagit.org/framasoft/peertube/search-index/-/tree/master/server/controllers/api",  # NOQA
+    "official_api_documentation": 'https://docs.joinpeertube.org/api-rest-reference.html#tag/Search/operation/searchVideos',
    "use_official_api": True,
    "require_api_key": False,
    "results": 'JSON',
 }
 # engine dependent config
 categories = ['videos']
 paging = True
 base_url = 'https://sepiasearch.org'
 time_range_support = True
 safesearch = True
 supported_languages = [
    # fmt: off
    'en', 'fr', 'ja', 'eu', 'ca', 'cs', 'eo', 'el',
    'de', 'it', 'nl', 'es', 'oc', 'gd', 'zh', 'pt',
    'sv', 'pl', 'fi', 'ru'
    # fmt: on
 ]
 base_url = 'https://sepiasearch.org/api/v1/search/videos'
 safesearch_table = {0: 'both', 1: 'false', 2: 'false'}
 time_range_table = {
    'day': relativedelta.relativedelta(),
    'week': relativedelta.relativedelta(weeks=-1),
    'month': relativedelta.relativedelta(months=-1),
    'year': relativedelta.relativedelta(years=-1),
 }
 def minute_to_hm(minute):
    if isinstance(minute, int):
        return "%d:%02d" % (divmod(minute, 60))
    return None
 def request(query, params):
    """Assemble request for the SepiaSearch API"""
    if not query:
        return False
    # eng_region = traits.get_region(params['searxng_locale'], 'en_US')
    eng_lang = traits.get_language(params['searxng_locale'], None)
    params['url'] = (
-        base_url
+        base_url.rstrip("/")
-        + '?'
+        + "/api/v1/search/videos?"
        + urlencode(
            {
                'search': query,
                'start': (params['pageno'] - 1) * 10,
                'count': 10,
-                'sort': '-match',
+                # -createdAt: sort by date ascending / createdAt: date descending
                'sort': '-match',  # sort by *match descending*
                'nsfw': safesearch_table[params['safesearch']],
            }
        )
    )
-    language = params['language'].split('-')[0]
+    if eng_lang is not None:
-    if language in supported_languages:
+        params['url'] += '&languageOneOf[]=' + eng_lang
-        params['url'] += '&languageOneOf[]=' + language
+        params['url'] += '&boostLanguages[]=' + eng_lang
    if params['time_range'] in time_range_table:
        time = datetime.now().date() + time_range_table[params['time_range']]
        params['url'] += '&startDate=' + time.isoformat()
@ -73,34 +83,4 @@ def request(query, params):
 def response(resp):
-    results = []
+    return video_response(resp)
    search_results = loads(resp.text)
    if 'data' not in search_results:
        return []
    for result in search_results['data']:
        title = result['name']
        content = result['description']
        thumbnail = result['thumbnailUrl']
        publishedDate = parser.parse(result['publishedAt'])
        author = result.get('account', {}).get('displayName')
        length = minute_to_hm(result.get('duration'))
        url = result['url']
        results.append(
            {
                'url': url,
                'title': title,
                'content': content,
                'author': author,
                'length': length,
                'template': 'videos.html',
                'publishedDate': publishedDate,
                'iframe_src': result.get('embedUrl'),
                'thumbnail': thumbnail,
            }
        )
    return results
--- a/searx/engines/startpage.py
+++ b/searx/engines/startpage.py
@ -1,28 +1,108 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
-"""Startpage (Web)
+"""Startpage's language & region selectors are a mess ..
 .. _startpage regions:
 Startpage regions
 =================
 In the list of regions there are tags we need to map to common region tags::
  pt-BR_BR --> pt_BR
  zh-CN_CN --> zh_Hans_CN
  zh-TW_TW --> zh_Hant_TW
  zh-TW_HK --> zh_Hant_HK
  en-GB_GB --> en_GB
 and there is at least one tag with a three letter language tag (ISO 639-2)::
  fil_PH --> fil_PH
 The locale code ``no_NO`` from Startpage does not exists and is mapped to
 ``nb-NO``::
    babel.core.UnknownLocaleError: unknown locale 'no_NO'
 For reference see languages-subtag at iana; ``no`` is the macrolanguage [1]_ and
 W3C recommends subtag over macrolanguage [2]_.
 .. [1] `iana: language-subtag-registry
   <https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry>`_ ::
      type: language
      Subtag: nb
      Description: Norwegian Bokmål
      Added: 2005-10-16
      Suppress-Script: Latn
      Macrolanguage: no
 .. [2]
   Use macrolanguages with care.  Some language subtags have a Scope field set to
   macrolanguage, i.e. this primary language subtag encompasses a number of more
   specific primary language subtags in the registry.  ...  As we recommended for
   the collection subtags mentioned above, in most cases you should try to use
   the more specific subtags ... `W3: The primary language subtag
   <https://www.w3.org/International/questions/qa-choosing-language-tags#langsubtag>`_
 .. _startpage languages:
 Startpage languages
 ===================
 :py:obj:`send_accept_language_header`:
  The displayed name in Startpage's settings page depend on the location of the
  IP when ``Accept-Language`` HTTP header is unset.  In :py:obj:`fetch_traits`
  we use::
    'Accept-Language': "en-US,en;q=0.5",
    ..
  to get uniform names independent from the IP).
 .. _startpage categories:
 Startpage categories
 ====================
 Startpage's category (for Web-search, News, Videos, ..) is set by
 :py:obj:`startpage_categ` in  settings.yml::
  - name: startpage
    engine: startpage
    startpage_categ: web
    ...
 .. hint::
   The default category is ``web`` .. and other categories than ``web`` are not
   yet implemented.
 """
 from typing import TYPE_CHECKING
 from collections import OrderedDict
 import re
 from time import time
 from urllib.parse import urlencode
 from unicodedata import normalize, combining
 from time import time
 from datetime import datetime, timedelta
-from dateutil import parser
+import dateutil.parser
-from lxml import html
+import lxml.html
-from babel import Locale
+import babel
 from babel.localedata import locale_identifiers
-from searx.network import get
+from searx import network
-from searx.utils import extract_text, eval_xpath, match_language
+from searx.utils import extract_text, eval_xpath, gen_useragent
-from searx.exceptions import (
+from searx.exceptions import SearxEngineCaptchaException
-    SearxEngineResponseException,
+from searx.locales import region_tag
-    SearxEngineCaptchaException,
+from searx.enginelib.traits import EngineTraits
 )
 if TYPE_CHECKING:
    import logging
    logger: logging.Logger
 traits: EngineTraits
 # about
 about = {
@ -34,18 +114,28 @@ about = {
    "results": 'HTML',
 }
 startpage_categ = 'web'
 """Startpage's category, visit :ref:`startpage categories`.
 """
 send_accept_language_header = True
 """Startpage tries to guess user's language and territory from the HTTP
 ``Accept-Language``.  Optional the user can select a search-language (can be
 different to the UI language) and a region filter.
 """
 # engine dependent config
 categories = ['general', 'web']
 # there is a mechanism to block "bot" search
 # (probably the parameter qid), require
 # storing of qid's between mulitble search-calls
 paging = True
-supported_languages_url = 'https://www.startpage.com/do/settings'
+time_range_support = True
 safesearch = True
 time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'}
 safesearch_dict = {0: '0', 1: '1', 2: '1'}
 # search-url
-base_url = 'https://startpage.com/'
+base_url = 'https://www.startpage.com'
-search_url = base_url + 'sp/search?'
+search_url = base_url + '/sp/search'
 # specific xpath variables
 # ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"]
@ -53,92 +143,193 @@ search_url = base_url + 'sp/search?'
 results_xpath = '//div[@class="w-gl__result__main"]'
 link_xpath = './/a[@class="w-gl__result-title result-link"]'
 content_xpath = './/p[@class="w-gl__description"]'
 search_form_xpath = '//form[@id="search"]'
 """XPath of Startpage's origin search form
 .. code: html
    <form action="/sp/search" method="post">
      <input type="text" name="query"  value="" ..>
      <input type="hidden" name="t" value="device">
      <input type="hidden" name="lui" value="english">
      <input type="hidden" name="sc" value="Q7Mt5TRqowKB00">
      <input type="hidden" name="cat" value="web">
      <input type="hidden" class="abp" id="abp-input" name="abp" value="1">
    </form>
 """
 # timestamp of the last fetch of 'sc' code
 sc_code_ts = 0
 sc_code = ''
 sc_code_cache_sec = 30
 """Time in seconds the sc-code is cached in memory :py:obj:`get_sc_code`."""
-def raise_captcha(resp):
+def get_sc_code(searxng_locale, params):
    """Get an actual ``sc`` argument from Startpage's search form (HTML page).
-    if str(resp.url).startswith('https://www.startpage.com/sp/captcha'):
+    Startpage puts a ``sc`` argument on every HTML :py:obj:`search form
-        raise SearxEngineCaptchaException()
+    <search_form_xpath>`.  Without this argument Startpage considers the request
    is from a bot.  We do not know what is encoded in the value of the ``sc``
    argument, but it seems to be a kind of a *time-stamp*.
-
+    Startpage's search form generates a new sc-code on each request.  This
-def get_sc_code(headers):
+    function scrap a new sc-code from Startpage's home page every
-    """Get an actual `sc` argument from startpage's home page.
+    :py:obj:`sc_code_cache_sec` seconds.
    Startpage puts a `sc` argument on every link.  Without this argument
    startpage considers the request is from a bot.  We do not know what is
    encoded in the value of the `sc` argument, but it seems to be a kind of a
    *time-stamp*.  This *time-stamp* is valid for a few hours.
    This function scrap a new *time-stamp* from startpage's home page every hour
    (3000 sec).
    """
    global sc_code_ts, sc_code  # pylint: disable=global-statement
-    if time() > (sc_code_ts + 3000):
+    if sc_code and (time() < (sc_code_ts + sc_code_cache_sec)):
-        logger.debug("query new sc time-stamp ...")
+        logger.debug("get_sc_code: reuse '%s'", sc_code)
        return sc_code
-        resp = get(base_url, headers=headers)
+    headers = {**params['headers']}
-        raise_captcha(resp)
+    headers['Origin'] = base_url
-        dom = html.fromstring(resp.text)
+    headers['Referer'] = base_url + '/'
    # headers['Connection'] = 'keep-alive'
    # headers['Accept-Encoding'] = 'gzip, deflate, br'
    # headers['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8'
    # headers['User-Agent'] = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:105.0) Gecko/20100101 Firefox/105.0'
    # add Accept-Language header
    if searxng_locale == 'all':
        searxng_locale = 'en-US'
    locale = babel.Locale.parse(searxng_locale, sep='-')
    if send_accept_language_header:
        ac_lang = locale.language
        if locale.territory:
            ac_lang = "%s-%s,%s;q=0.9,*;q=0.5" % (
                locale.language,
                locale.territory,
                locale.language,
            )
        headers['Accept-Language'] = ac_lang
    get_sc_url = base_url + '/?sc=%s' % (sc_code)
    logger.debug("query new sc time-stamp ... %s", get_sc_url)
    logger.debug("headers: %s", headers)
    resp = network.get(get_sc_url, headers=headers)
    # ?? x = network.get('https://www.startpage.com/sp/cdn/images/filter-chevron.svg', headers=headers)
    # ?? https://www.startpage.com/sp/cdn/images/filter-chevron.svg
    # ?? ping-back URL: https://www.startpage.com/sp/pb?sc=TLsB0oITjZ8F21
    if str(resp.url).startswith('https://www.startpage.com/sp/captcha'):
        raise SearxEngineCaptchaException(
            message="get_sc_code: got redirected to https://www.startpage.com/sp/captcha",
        )
    dom = lxml.html.fromstring(resp.text)
    try:
-            # <input type="hidden" name="sc" value="...">
+        sc_code = eval_xpath(dom, search_form_xpath + '//input[@name="sc"]/@value')[0]
            sc_code = eval_xpath(dom, '//input[@name="sc"]/@value')[0]
    except IndexError as exc:
-            # suspend startpage API --> https://github.com/searxng/searxng/pull/695
+        logger.debug("suspend startpage API --> https://github.com/searxng/searxng/pull/695")
-            raise SearxEngineResponseException(
+        raise SearxEngineCaptchaException(
-                suspended_time=7 * 24 * 3600, message="PR-695: query new sc time-stamp failed!"
+            message="get_sc_code: [PR-695] query new sc time-stamp failed! (%s)" % resp.url,
        ) from exc
    sc_code_ts = time()
-        logger.debug("new value is: %s", sc_code)
+    logger.debug("get_sc_code: new value is: %s", sc_code)
    return sc_code
 # do search-request
 def request(query, params):
    """Assemble a Startpage request.
-    # pylint: disable=line-too-long
+    To avoid CAPTCHA we need to send a well formed HTTP POST request with a
-    # The format string from Startpage's FFox add-on [1]::
+    cookie.  We need to form a request that is identical to the request build by
-    #
+    Startpage's search form:
    #     https://www.startpage.com/do/dsearch?query={searchTerms}&cat=web&pl=ext-ff&language=__MSG_extensionUrlLanguage__&extVersion=1.3.0
    #
    # [1] https://addons.mozilla.org/en-US/firefox/addon/startpage-private-search/
    - in the cookie the **region** is selected
    - in the HTTP POST data the **language** is selected
    Additionally the arguments form Startpage's search form needs to be set in
    HTML POST data / compare ``<input>`` elements: :py:obj:`search_form_xpath`.
    """
    if startpage_categ == 'web':
        return _request_cat_web(query, params)
    logger.error("Startpages's category '%' is not yet implemented.", startpage_categ)
    return params
 def _request_cat_web(query, params):
    engine_region = traits.get_region(params['searxng_locale'], 'en-US')
    engine_language = traits.get_language(params['searxng_locale'], 'en')
    # build arguments
    args = {
        'query': query,
        'page': params['pageno'],
        'cat': 'web',
-        # 'pl': 'ext-ff',
+        't': 'device',
-        # 'extVersion': '1.3.0',
+        'sc': get_sc_code(params['searxng_locale'], params),  # hint: this func needs HTTP headers,
-        # 'abp': "-1",
+        'with_date': time_range_dict.get(params['time_range'], ''),
        'sc': get_sc_code(params['headers']),
    }
-    # set language if specified
+    if engine_language:
-    if params['language'] != 'all':
+        args['language'] = engine_language
-        lang_code = match_language(params['language'], supported_languages, fallback=None)
+        args['lui'] = engine_language
-        if lang_code:
+
-            language_name = supported_languages[lang_code]['alias']
+    args['abp'] = '1'
-            args['language'] = language_name
+    if params['pageno'] > 1:
-            args['lui'] = language_name
+        args['page'] = params['pageno']
    # build cookie
    lang_homepage = 'en'
    cookie = OrderedDict()
    cookie['date_time'] = 'world'
    cookie['disable_family_filter'] = safesearch_dict[params['safesearch']]
    cookie['disable_open_in_new_window'] = '0'
    cookie['enable_post_method'] = '1'  # hint: POST
    cookie['enable_proxy_safety_suggest'] = '1'
    cookie['enable_stay_control'] = '1'
    cookie['instant_answers'] = '1'
    cookie['lang_homepage'] = 's/device/%s/' % lang_homepage
    cookie['num_of_results'] = '10'
    cookie['suggestions'] = '1'
    cookie['wt_unit'] = 'celsius'
    if engine_language:
        cookie['language'] = engine_language
        cookie['language_ui'] = engine_language
    if engine_region:
        cookie['search_results_region'] = engine_region
    params['cookies']['preferences'] = 'N1N'.join(["%sEEE%s" % x for x in cookie.items()])
    logger.debug('cookie preferences: %s', params['cookies']['preferences'])
    # POST request
    logger.debug("data: %s", args)
    params['data'] = args
    params['method'] = 'POST'
    params['url'] = search_url
    params['headers']['Origin'] = base_url
    params['headers']['Referer'] = base_url + '/'
    # is the Accept header needed?
    # params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
    params['url'] = search_url + urlencode(args)
    return params
 # get response from search-request
 def response(resp):
-    results = []
+    dom = lxml.html.fromstring(resp.text)
-    dom = html.fromstring(resp.text)
+    if startpage_categ == 'web':
        return _response_cat_web(dom)
    logger.error("Startpages's category '%' is not yet implemented.", startpage_categ)
    return []
 def _response_cat_web(dom):
    results = []
    # parse results
    for result in eval_xpath(dom, results_xpath):
@ -173,7 +364,7 @@ def response(resp):
            content = content[date_pos:]
            try:
-                published_date = parser.parse(date_string, dayfirst=True)
+                published_date = dateutil.parser.parse(date_string, dayfirst=True)
            except ValueError:
                pass
@ -199,62 +390,103 @@ def response(resp):
    return results
-# get supported languages from their site
+def fetch_traits(engine_traits: EngineTraits):
-def _fetch_supported_languages(resp):
+    """Fetch :ref:`languages <startpage languages>` and :ref:`regions <startpage
-    # startpage's language selector is a mess each option has a displayed name
+    regions>` from Startpage."""
-    # and a value, either of which may represent the language name in the native
+    # pylint: disable=too-many-branches
    # script, the language name in English, an English transliteration of the
    # native name, the English name of the writing script used by the language,
    # or occasionally something else entirely.
-    # this cases are so special they need to be hardcoded, a couple of them are misspellings
+    headers = {
-    language_names = {
+        'User-Agent': gen_useragent(),
-        'english_uk': 'en-GB',
+        'Accept-Language': "en-US,en;q=0.5",  # bing needs to set the English language
        'fantizhengwen': ['zh-TW', 'zh-HK'],
        'hangul': 'ko',
        'malayam': 'ml',
        'norsk': 'nb',
        'sinhalese': 'si',
        'sudanese': 'su',
    }
    resp = network.get('https://www.startpage.com/do/settings', headers=headers)
-    # get the English name of every language known by babel
+    if not resp.ok:
-    language_names.update(
+        print("ERROR: response from Startpage is not OK.")
-        {
+
-            # fmt: off
+    dom = lxml.html.fromstring(resp.text)
-            name.lower(): lang_code
+
-            # pylint: disable=protected-access
+    # regions
-            for lang_code, name in Locale('en')._data['languages'].items()
+
-            # fmt: on
+    sp_region_names = []
-        }
+    for option in dom.xpath('//form[@name="settings"]//select[@name="search_results_region"]/option'):
-    )
+        sp_region_names.append(option.get('value'))
    for eng_tag in sp_region_names:
        if eng_tag == 'all':
            continue
        babel_region_tag = {'no_NO': 'nb_NO'}.get(eng_tag, eng_tag)  # norway
        if '-' in babel_region_tag:
            l, r = babel_region_tag.split('-')
            r = r.split('_')[-1]
            sxng_tag = region_tag(babel.Locale.parse(l + '_' + r, sep='_'))
        else:
            try:
                sxng_tag = region_tag(babel.Locale.parse(babel_region_tag, sep='_'))
            except babel.UnknownLocaleError:
                print("ERROR: can't determine babel locale of startpage's locale %s" % eng_tag)
                continue
        conflict = engine_traits.regions.get(sxng_tag)
        if conflict:
            if conflict != eng_tag:
                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
            continue
        engine_traits.regions[sxng_tag] = eng_tag
    # languages
    catalog_engine2code = {name.lower(): lang_code for lang_code, name in babel.Locale('en').languages.items()}
    # get the native name of every language known by babel
-    for lang_code in filter(lambda lang_code: lang_code.find('_') == -1, locale_identifiers()):
+
-        native_name = Locale(lang_code).get_language_name().lower()
+    for lang_code in filter(lambda lang_code: lang_code.find('_') == -1, babel.localedata.locale_identifiers()):
        native_name = babel.Locale(lang_code).get_language_name().lower()
        # add native name exactly as it is
-        language_names[native_name] = lang_code
+        catalog_engine2code[native_name] = lang_code
        # add "normalized" language name (i.e. français becomes francais and español becomes espanol)
        unaccented_name = ''.join(filter(lambda c: not combining(c), normalize('NFKD', native_name)))
        if len(unaccented_name) == len(unaccented_name.encode()):
            # add only if result is ascii (otherwise "normalization" didn't work)
-            language_names[unaccented_name] = lang_code
+            catalog_engine2code[unaccented_name] = lang_code
    # values that can't be determined by babel's languages names
    catalog_engine2code.update(
        {
            # traditional chinese used in ..
            'fantizhengwen': 'zh_Hant',
            # Korean alphabet
            'hangul': 'ko',
            # Malayalam is one of 22 scheduled languages of India.
            'malayam': 'ml',
            'norsk': 'nb',
            'sinhalese': 'si',
        }
    )
    skip_eng_tags = {
        'english_uk',  # SearXNG lang 'en' already maps to 'english'
    }
    dom = html.fromstring(resp.text)
    sp_lang_names = []
    for option in dom.xpath('//form[@name="settings"]//select[@name="language"]/option'):
        sp_lang_names.append((option.get('value'), extract_text(option).lower()))
-    supported_languages = {}
+        eng_tag = option.get('value')
-    for sp_option_value, sp_option_text in sp_lang_names:
+        if eng_tag in skip_eng_tags:
-        lang_code = language_names.get(sp_option_value) or language_names.get(sp_option_text)
+            continue
-        if isinstance(lang_code, str):
+        name = extract_text(option).lower()
            supported_languages[lang_code] = {'alias': sp_option_value}
        elif isinstance(lang_code, list):
            for _lc in lang_code:
                supported_languages[_lc] = {'alias': sp_option_value}
        else:
            print('Unknown language option in Startpage: {} ({})'.format(sp_option_value, sp_option_text))
-    return supported_languages
+        sxng_tag = catalog_engine2code.get(eng_tag)
        if sxng_tag is None:
            sxng_tag = catalog_engine2code[name]
        conflict = engine_traits.languages.get(sxng_tag)
        if conflict:
            if conflict != eng_tag:
                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
            continue
        engine_traits.languages[sxng_tag] = eng_tag
--- a/searx/engines/wikidata.py
+++ b/searx/engines/wikidata.py
@ -1,9 +1,12 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
-"""Wikidata
+"""This module implements the Wikidata engine.  Some implementations are shared
 from :ref:`wikipedia engine`.
 """
 # pylint: disable=missing-class-docstring
 from typing import TYPE_CHECKING
 from hashlib import md5
 from urllib.parse import urlencode, unquote
 from json import loads
@ -13,12 +16,17 @@ from babel.dates import format_datetime, format_date, format_time, get_datetime_
 from searx.data import WIKIDATA_UNITS
 from searx.network import post, get
-from searx.utils import match_language, searx_useragent, get_string_replaces_function
+from searx.utils import searx_useragent, get_string_replaces_function
 from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom
-from searx.engines.wikipedia import (  # pylint: disable=unused-import
+from searx.engines.wikipedia import fetch_traits as _fetch_traits
-    _fetch_supported_languages,
+from searx.enginelib.traits import EngineTraits
-    supported_languages_url,
+
-)
+if TYPE_CHECKING:
    import logging
    logger: logging.Logger
 traits: EngineTraits
 # about
 about = {
@ -154,33 +162,35 @@ def send_wikidata_query(query, method='GET'):
 def request(query, params):
-    language = params['language'].split('-')[0]
+
-    if language == 'all':
+    # wikidata does not support zh-classical (zh_Hans) / zh-TW, zh-HK and zh-CN
-        language = 'en'
+    # mapped to zh
-    else:
+    sxng_lang = params['searxng_locale'].split('-')[0]
-        language = match_language(params['language'], supported_languages, language_aliases).split('-')[0]
+    language = traits.get_language(sxng_lang, 'en')
    query, attributes = get_query(query, language)
    logger.debug("request --> language %s // len(attributes): %s", language, len(attributes))
    params['method'] = 'POST'
    params['url'] = SPARQL_ENDPOINT_URL
    params['data'] = {'query': query}
    params['headers'] = get_headers()
    params['language'] = language
    params['attributes'] = attributes
    return params
 def response(resp):
    results = []
    jsonresponse = loads(resp.content.decode())
-    language = resp.search_params['language'].lower()
+    language = resp.search_params['language']
    attributes = resp.search_params['attributes']
    logger.debug("request --> language %s // len(attributes): %s", language, len(attributes))
    seen_entities = set()
    for result in jsonresponse.get('results', {}).get('bindings', []):
        attribute_result = {key: value['value'] for key, value in result.items()}
        entity_url = attribute_result['item']
@ -756,3 +766,15 @@ def init(engine_settings=None):  # pylint: disable=unused-argument
        lang = result['name']['xml:lang']
        entity_id = result['item']['value'].replace('http://www.wikidata.org/entity/', '')
        WIKIDATA_PROPERTIES[(entity_id, lang)] = name.capitalize()
 def fetch_traits(engine_traits: EngineTraits):
    """Use languages evaluated from :py:obj:`wikipedia.fetch_traits
    <searx.engines.wikipedia.fetch_traits>` except zh-classical (zh_Hans) what
    is not supported by wikidata."""
    _fetch_traits(engine_traits)
    # wikidata does not support zh-classical (zh_Hans)
    engine_traits.languages.pop('zh_Hans')
    # wikidata does not have net-locations for the languages
    engine_traits.custom['wiki_netloc'] = {}
--- a/searx/engines/wikipedia.py
+++ b/searx/engines/wikipedia.py
@ -1,13 +1,26 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
-"""
+# lint: pylint
- Wikipedia (Web)
+"""This module implements the Wikipedia engine.  Some of this implementations
 are shared by other engines:
 - :ref:`wikidata engine`
 The list of supported languages is fetched from the article linked by
 :py:obj:`wikipedia_article_depth`.  Unlike traditional search engines, wikipedia
 does not support one Wikipedia for all the languages, but there is one Wikipedia
 for every language (:py:obj:`fetch_traits`).
 """
-from urllib.parse import quote
+import urllib.parse
-from json import loads
+import babel
-from lxml.html import fromstring
+
-from searx.utils import match_language, searx_useragent
+from lxml import html
-from searx.network import raise_for_httperror
+
 from searx import network
 from searx.locales import language_tag
 from searx.enginelib.traits import EngineTraits
 traits: EngineTraits
 # about
 about = {
@ -19,32 +32,40 @@ about = {
    "results": 'JSON',
 }
 send_accept_language_header = True
-# search-url
+wikipedia_article_depth = 'https://meta.wikimedia.org/wiki/Wikipedia_article_depth'
-search_url = 'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}'
+"""The *editing depth* of Wikipedia is one of several possible rough indicators
-supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'
+of the encyclopedia's collaborative quality, showing how frequently its articles
-language_variants = {"zh": ("zh-cn", "zh-hk", "zh-mo", "zh-my", "zh-sg", "zh-tw")}
+are updated.  The measurement of depth was introduced after some limitations of
 the classic measurement of article count were realized.
 """
 # example: https://zh-classical.wikipedia.org/api/rest_v1/page/summary/日
 rest_v1_summary_url = 'https://{wiki_netloc}/api/rest_v1/page/summary/{title}'
 """`wikipedia rest_v1 summary API`_: The summary response includes an extract of
 the first paragraph of the page in plain text and HTML as well as the type of
 page. This is useful for page previews (fka. Hovercards, aka. Popups) on the web
 and link previews in the apps.
 .. _wikipedia rest_v1 summary API: https://en.wikipedia.org/api/rest_v1/#/Page%20content/get_page_summary__title_
 """
 # set language in base_url
 def url_lang(lang):
    lang_pre = lang.split('-')[0]
    if lang_pre == 'all' or lang_pre not in supported_languages and lang_pre not in language_aliases:
        return 'en'
    return match_language(lang, supported_languages, language_aliases).split('-')[0]
 # do search-request
 def request(query, params):
    """Assemble a request (`wikipedia rest_v1 summary API`_)."""
    if query.islower():
        query = query.title()
-    language = url_lang(params['language'])
+    engine_language = traits.get_language(params['searxng_locale'], 'en')
-    params['url'] = search_url.format(title=quote(query), language=language)
+    wiki_netloc = traits.custom['wiki_netloc'].get(engine_language, 'https://en.wikipedia.org/wiki/')
    title = urllib.parse.quote(query)
    # '!wikipedia 日 :zh-TW' --> https://zh-classical.wikipedia.org/
    # '!wikipedia 日 :zh' --> https://zh.wikipedia.org/
    params['url'] = rest_v1_summary_url.format(wiki_netloc=wiki_netloc, title=title)
    params['headers']['User-Agent'] = searx_useragent()
    params['raise_for_httperror'] = False
    params['soft_max_redirects'] = 2
@ -53,13 +74,14 @@ def request(query, params):
 # get response from search-request
 def response(resp):
    results = []
    if resp.status_code == 404:
        return []
    if resp.status_code == 400:
        try:
-            api_result = loads(resp.text)
+            api_result = resp.json()
-        except:
+        except Exception:  # pylint: disable=broad-except
            pass
        else:
            if (
@ -68,20 +90,14 @@ def response(resp):
            ):
                return []
-    raise_for_httperror(resp)
+    network.raise_for_httperror(resp)
    results = []
    api_result = loads(resp.text)
    # skip disambiguation pages
    if api_result.get('type') != 'standard':
        return []
    api_result = resp.json()
    title = api_result['title']
    wikipedia_link = api_result['content_urls']['desktop']['page']
    results.append({'url': wikipedia_link, 'title': title, 'content': api_result.get('description', '')})
-    results.append({'url': wikipedia_link, 'title': title})
+    if api_result.get('type') == 'standard':
        results.append(
            {
                'infobox': title,
@ -95,22 +111,114 @@ def response(resp):
    return results
-# get supported languages from their site
+# Nonstandard language codes
-def _fetch_supported_languages(resp):
+#
-    supported_languages = {}
+# These Wikipedias use language codes that do not conform to the ISO 639
-    dom = fromstring(resp.text)
+# standard (which is how wiki subdomains are chosen nowadays).
    tables = dom.xpath('//table[contains(@class,"sortable")]')
    for table in tables:
        # exclude header row
        trs = table.xpath('.//tr')[1:]
        for tr in trs:
            td = tr.xpath('./td')
            code = td[3].xpath('./a')[0].text
            name = td[1].xpath('./a')[0].text
            english_name = td[1].xpath('./a')[0].text
            articles = int(td[4].xpath('./a')[0].text.replace(',', ''))
            # exclude languages with too few articles
            if articles >= 100:
                supported_languages[code] = {"name": name, "english_name": english_name}
-    return supported_languages
+lang_map = {
    'be-tarask': 'bel',
    'ak': 'aka',
    'als': 'gsw',
    'bat-smg': 'sgs',
    'cbk-zam': 'cbk',
    'fiu-vro': 'vro',
    'map-bms': 'map',
    'nrm': 'nrf',
    'roa-rup': 'rup',
    'nds-nl': 'nds',
    #'simple: – invented code used for the Simple English Wikipedia (not the official IETF code en-simple)
    'zh-min-nan': 'nan',
    'zh-yue': 'yue',
    'an': 'arg',
    'zh-classical': 'zh-Hant',  # babel maps classical to zh-Hans (for whatever reason)
 }
 unknown_langs = [
    'an',  # Aragonese
    'ba',  # Bashkir
    'bar',  # Bavarian
    'bcl',  # Central Bicolano
    'be-tarask',  # Belarusian variant / Belarusian is already covered by 'be'
    'bpy',  # Bishnupriya Manipuri is unknown by babel
    'hif',  # Fiji Hindi
    'ilo',  # Ilokano
    'li',  # Limburgish
    'sco',  # Scots (sco) is not known by babel, Scottish Gaelic (gd) is known by babel
    'sh',  # Serbo-Croatian
    'simple',  # simple english is not know as a natural language different to english (babel)
    'vo',  # Volapük
    'wa',  # Walloon
 ]
 def fetch_traits(engine_traits: EngineTraits):
    """Fetch languages from Wikipedia.
    The location of the Wikipedia address of a language is mapped in a
    :py:obj:`custom field <searx.enginelib.traits.EngineTraits.custom>`
    (``wiki_netloc``).  Here is a reduced example:
    .. code:: python
       traits.custom['wiki_netloc'] = {
           "en": "en.wikipedia.org",
           ..
           "gsw": "als.wikipedia.org",
           ..
           "zh": "zh.wikipedia.org",
           "zh-classical": "zh-classical.wikipedia.org"
       }
    """
    engine_traits.custom['wiki_netloc'] = {}
    # insert alias to map from a region like zh-CN to a language zh_Hans
    engine_traits.languages['zh_Hans'] = 'zh'
    resp = network.get(wikipedia_article_depth)
    if not resp.ok:
        print("ERROR: response from Wikipedia is not OK.")
    dom = html.fromstring(resp.text)
    for row in dom.xpath('//table[contains(@class,"sortable")]//tbody/tr'):
        cols = row.xpath('./td')
        if not cols:
            continue
        cols = [c.text_content().strip() for c in cols]
        depth = float(cols[3].replace('-', '0').replace(',', ''))
        articles = int(cols[4].replace(',', '').replace(',', ''))
        if articles < 10000:
            # exclude languages with too few articles
            continue
        if int(depth) < 20:
            # Rough indicator of a Wikipedia’s quality, showing how frequently
            # its articles are updated.
            continue
        eng_tag = cols[2]
        wiki_url = row.xpath('./td[3]/a/@href')[0]
        wiki_url = urllib.parse.urlparse(wiki_url)
        if eng_tag in unknown_langs:
            continue
        try:
            sxng_tag = language_tag(babel.Locale.parse(lang_map.get(eng_tag, eng_tag), sep='-'))
        except babel.UnknownLocaleError:
            print("ERROR: %s [%s] is unknown by babel" % (cols[0], eng_tag))
            continue
        conflict = engine_traits.languages.get(sxng_tag)
        if conflict:
            if conflict != eng_tag:
                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
            continue
        engine_traits.languages[sxng_tag] = eng_tag
        engine_traits.custom['wiki_netloc'][eng_tag] = wiki_url.netloc
--- a/searx/engines/yahoo.py
+++ b/searx/engines/yahoo.py
@ -17,8 +17,10 @@ from searx.utils import (
    eval_xpath_getindex,
    eval_xpath_list,
    extract_text,
    match_language,
 )
 from searx.enginelib.traits import EngineTraits
 traits: EngineTraits
 # about
 about = {
@ -34,8 +36,7 @@ about = {
 categories = ['general', 'web']
 paging = True
 time_range_support = True
-supported_languages_url = 'https://search.yahoo.com/preferences/languages'
+# send_accept_language_header = True
 """Supported languages are read from Yahoo preference page."""
 time_range_dict = {
    'day': ('1d', 'd'),
@ -43,15 +44,10 @@ time_range_dict = {
    'month': ('1m', 'm'),
 }
 language_aliases = {
    'zh-HK': 'zh_chs',
    'zh-CN': 'zh_chs',  # dead since 2015 / routed to hk.search.yahoo.com
    'zh-TW': 'zh_cht',
 }
 lang2domain = {
    'zh_chs': 'hk.search.yahoo.com',
    'zh_cht': 'tw.search.yahoo.com',
    'any': 'search.yahoo.com',
    'en': 'search.yahoo.com',
    'bg': 'search.yahoo.com',
    'cs': 'search.yahoo.com',
@ -67,21 +63,23 @@ lang2domain = {
 }
 """Map language to domain"""
-
+locale_aliases = {
-def _get_language(params):
+    'zh': 'zh_Hans',
-
+    'zh-HK': 'zh_Hans',
-    lang = language_aliases.get(params['language'])
+    'zh-CN': 'zh_Hans',  # dead since 2015 / routed to hk.search.yahoo.com
-    if lang is None:
+    'zh-TW': 'zh_Hant',
-        lang = match_language(params['language'], supported_languages, language_aliases)
+}
    lang = lang.split('-')[0]
    logger.debug("params['language']: %s --> %s", params['language'], lang)
    return lang
 def request(query, params):
    """build request"""
    lang = locale_aliases.get(params['language'], None)
    if not lang:
        lang = params['language'].split('-')[0]
    lang = traits.get_language(lang, traits.all_locale)
    offset = (params['pageno'] - 1) * 7 + 1
    lang = _get_language(params)
    age, btf = time_range_dict.get(params['time_range'], ('', ''))
    args = urlencode(
@ -154,13 +152,37 @@ def response(resp):
    return results
-# get supported languages from their site
+def fetch_traits(engine_traits: EngineTraits):
-def _fetch_supported_languages(resp):
+    """Fetch languages from yahoo"""
-    supported_languages = []
+
    # pylint: disable=import-outside-toplevel
    import babel
    from searx import network
    from searx.locales import language_tag
    engine_traits.all_locale = 'any'
    resp = network.get('https://search.yahoo.com/preferences/languages')
    if not resp.ok:
        print("ERROR: response from peertube is not OK.")
    dom = html.fromstring(resp.text)
    offset = len('lang_')
-    for val in eval_xpath_list(dom, '//div[contains(@class, "lang-item")]/input/@value'):
+    eng2sxng = {'zh_chs': 'zh_Hans', 'zh_cht': 'zh_Hant'}
        supported_languages.append(val[offset:])
-    return supported_languages
+    for val in eval_xpath_list(dom, '//div[contains(@class, "lang-item")]/input/@value'):
        eng_tag = val[offset:]
        try:
            sxng_tag = language_tag(babel.Locale.parse(eng2sxng.get(eng_tag, eng_tag)))
        except babel.UnknownLocaleError:
            print('ERROR: unknown language --> %s' % eng_tag)
            continue
        conflict = engine_traits.languages.get(sxng_tag)
        if conflict:
            if conflict != eng_tag:
                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
            continue
        engine_traits.languages[sxng_tag] = eng_tag
--- a/searx/locales.py
+++ b/searx/locales.py
@ -4,11 +4,11 @@
 """Initialize :py:obj:`LOCALE_NAMES`, :py:obj:`RTL_LOCALES`.
 """
-from typing import Set
+from typing import Set, Optional, List
 import os
 import pathlib
-from babel import Locale
+import babel
 from babel.support import Translations
 import babel.languages
 import babel.core
@ -134,7 +134,7 @@ def locales_initialize(directory=None):
    flask_babel.get_translations = get_translations
    for tag, descr in ADDITIONAL_TRANSLATIONS.items():
-        locale = Locale.parse(LOCALE_BEST_MATCH[tag], sep='-')
+        locale = babel.Locale.parse(LOCALE_BEST_MATCH[tag], sep='-')
        LOCALE_NAMES[tag] = descr
        if locale.text_direction == 'rtl':
            RTL_LOCALES.add(tag)
@ -142,7 +142,7 @@ def locales_initialize(directory=None):
    for tag in LOCALE_BEST_MATCH:
        descr = LOCALE_NAMES.get(tag)
        if not descr:
-            locale = Locale.parse(tag, sep='-')
+            locale = babel.Locale.parse(tag, sep='-')
            LOCALE_NAMES[tag] = get_locale_descr(locale, tag.replace('-', '_'))
            if locale.text_direction == 'rtl':
                RTL_LOCALES.add(tag)
@ -154,12 +154,77 @@ def locales_initialize(directory=None):
        tag = dirname.replace('_', '-')
        descr = LOCALE_NAMES.get(tag)
        if not descr:
-            locale = Locale.parse(dirname)
+            locale = babel.Locale.parse(dirname)
            LOCALE_NAMES[tag] = get_locale_descr(locale, dirname)
            if locale.text_direction == 'rtl':
                RTL_LOCALES.add(tag)
 def region_tag(locale: babel.Locale) -> str:
    """Returns SearXNG's region tag from the locale (e.g. zh-TW , en-US)."""
    if not locale.territory:
        raise ValueError('%s missed a territory')
    return locale.language + '-' + locale.territory
 def language_tag(locale: babel.Locale) -> str:
    """Returns SearXNG's language tag from the locale and if exits, the tag
    includes the script name (e.g. en, zh_Hant).
    """
    sxng_lang = locale.language
    if locale.script:
        sxng_lang += '_' + locale.script
    return sxng_lang
 def get_locale(locale_tag: str) -> Optional[babel.Locale]:
    """Returns a :py:obj:`babel.Locale` object parsed from argument
    ``locale_tag``"""
    try:
        locale = babel.Locale.parse(locale_tag, sep='-')
        return locale
    except babel.core.UnknownLocaleError:
        return None
 def get_offical_locales(
    territory: str, languages=None, regional: bool = False, de_facto: bool = True
 ) -> Set[babel.Locale]:
    """Returns a list of :py:obj:`babel.Locale` with languages from
    :py:obj:`babel.languages.get_official_languages`.
    :param territory: The territory (country or region) code.
    :param languages: A list of language codes the languages from
      :py:obj:`babel.languages.get_official_languages` should be in
      (intersection).  If this argument is ``None``, all official languages in
      this territory are used.
    :param regional: If the regional flag is set, then languages which are
      regionally official are also returned.
    :param de_facto: If the de_facto flag is set to `False`, then languages
      which are “de facto” official are not returned.
    """
    ret_val = set()
    o_languages = babel.languages.get_official_languages(territory, regional=regional, de_facto=de_facto)
    if languages:
        languages = [l.lower() for l in languages]
        o_languages = set(l for l in o_languages if l.lower() in languages)
    for lang in o_languages:
        try:
            locale = babel.Locale.parse(lang + '_' + territory)
            ret_val.add(locale)
        except babel.UnknownLocaleError:
            continue
    return ret_val
 def get_engine_locale(searxng_locale, engine_locales, default=None):
    """Return engine's language (aka locale) string that best fits to argument
    ``searxng_locale``.
@ -177,6 +242,10 @@ def get_engine_locale(searxng_locale, engine_locales, default=None):
          ...
          'pl-PL'          : 'pl_PL',
          'pt-PT'          : 'pt_PT'
          ..
          'zh'             : 'zh'
          'zh_Hans'        : 'zh'
          'zh_Hant'        : 'zh-classical'
      }
    .. hint::
@ -210,13 +279,13 @@ def get_engine_locale(searxng_locale, engine_locales, default=None):
      engine.
    """
-    # pylint: disable=too-many-branches
+    # pylint: disable=too-many-branches, too-many-return-statements
    engine_locale = engine_locales.get(searxng_locale)
    if engine_locale is not None:
-        # There was a 1:1 mapping (e.g. "fr-BE --> fr_BE" or "fr --> fr_FR"), no
+        # There was a 1:1 mapping (e.g. a region "fr-BE --> fr_BE" or a language
-        # need to narrow language nor territory.
+        # "zh --> zh"), no need to narrow language-script nor territory.
        return engine_locale
    try:
@ -227,6 +296,12 @@ def get_engine_locale(searxng_locale, engine_locales, default=None):
        except babel.core.UnknownLocaleError:
            return default
    searxng_lang = language_tag(locale)
    engine_locale = engine_locales.get(searxng_lang)
    if engine_locale is not None:
        # There was a 1:1 mapping (e.g. "zh-HK --> zh_Hant" or "zh-CN --> zh_Hans")
        return engine_locale
    # SearXNG's selected locale is not supported by the engine ..
    if locale.territory:
@ -247,10 +322,6 @@ def get_engine_locale(searxng_locale, engine_locales, default=None):
    if locale.language:
        searxng_lang = locale.language
        if locale.script:
            searxng_lang += '_' + locale.script
        terr_lang_dict = {}
        for territory, langs in babel.core.get_global("territory_languages").items():
            if not langs.get(searxng_lang, {}).get('official_status'):
@ -303,3 +374,98 @@ def get_engine_locale(searxng_locale, engine_locales, default=None):
        engine_locale = default
    return default
 def match_locale(searxng_locale: str, locale_tag_list: List[str], fallback: Optional[str] = None) -> Optional[str]:
    """Return tag from ``locale_tag_list`` that best fits to ``searxng_locale``.
    :param str searxng_locale: SearXNG's internal representation of locale (de,
        de-DE, fr-BE, zh, zh-CN, zh-TW ..).
    :param list locale_tag_list: The list of locale tags to select from
    :param str fallback: fallback locale tag (if unset --> ``None``)
    The rules to find a match are implemented in :py:obj:`get_engine_locale`,
    the ``engine_locales`` is build up by :py:obj:`build_engine_locales`.
    .. hint::
       The *SearXNG locale* string and the members of ``locale_tag_list`` has to
       be known by babel!  The :py:obj:`ADDITIONAL_TRANSLATIONS` are used in the
       UI and are not known by babel --> will be ignored.
    """
    # searxng_locale = 'es'
    # locale_tag_list = ['es-AR', 'es-ES', 'es-MX']
    if not searxng_locale:
        return fallback
    locale = get_locale(searxng_locale)
    if locale is None:
        return fallback
    # normalize to a SearXNG locale that can be passed to get_engine_locale
    searxng_locale = language_tag(locale)
    if locale.territory:
        searxng_locale = region_tag(locale)
    # clean up locale_tag_list
    tag_list = []
    for tag in locale_tag_list:
        if tag in ('all', 'auto') or tag in ADDITIONAL_TRANSLATIONS:
            continue
        tag_list.append(tag)
    # emulate fetch_traits
    engine_locales = build_engine_locales(tag_list)
    return get_engine_locale(searxng_locale, engine_locales, default=fallback)
 def build_engine_locales(tag_list: List[str]):
    """From a list of locale tags a dictionary is build that can be passed by
    argument ``engine_locales`` to :py:obj:`get_engine_locale`.  This function
    is mainly used by :py:obj:`match_locale` and is similar to what the
    ``fetch_traits(..)`` function of engines do.
    If there are territory codes in the ``tag_list`` that have a *script code*
    additional keys are added to the returned dictionary.
    .. code:: python
       >>> import locales
       >>> engine_locales = locales.build_engine_locales(['en', 'en-US', 'zh', 'zh-CN', 'zh-TW'])
       >>> engine_locales
       {
           'en': 'en', 'en-US': 'en-US',
           'zh': 'zh', 'zh-CN': 'zh-CN', 'zh_Hans': 'zh-CN',
           'zh-TW': 'zh-TW', 'zh_Hant': 'zh-TW'
       }
       >>> get_engine_locale('zh-Hans', engine_locales)
       'zh-CN'
    This function is a good example to understand the language/region model
    of SearXNG:
      SearXNG only distinguishes between **search languages** and **search
      regions**, by adding the *script-tags*, languages with *script-tags* can
      be assigned to the **regions** that SearXNG supports.
    """
    engine_locales = {}
    for tag in tag_list:
        locale = get_locale(tag)
        if locale is None:
            logger.warn("build_engine_locales: skip locale tag %s / unknown by babel", tag)
            continue
        if locale.territory:
            engine_locales[region_tag(locale)] = tag
            if locale.script:
                engine_locales[language_tag(locale)] = tag
        else:
            engine_locales[language_tag(locale)] = tag
    return engine_locales
--- a/searx/preferences.py
+++ b/searx/preferences.py
@ -13,7 +13,7 @@ from typing import Iterable, Dict, List
 import flask
 from searx import settings, autocomplete
-from searx.engines import Engine
+from searx.enginelib import Engine
 from searx.plugins import Plugin
 from searx.locales import LOCALE_NAMES
 from searx.webutils import VALID_LANGUAGE_CODE
--- a/searx/query.py
+++ b/searx/query.py
@ -4,7 +4,7 @@ from abc import abstractmethod, ABC
 import re
 from searx import settings
-from searx.languages import language_codes
+from searx.sxng_locales import sxng_locales
 from searx.engines import categories, engines, engine_shortcuts
 from searx.external_bang import get_bang_definition_and_autocomplete
 from searx.search import EngineRef
@ -84,7 +84,7 @@ class LanguageParser(QueryPartParser):
        found = False
        # check if any language-code is equal with
        # declared language-codes
-        for lc in language_codes:
+        for lc in sxng_locales:
            lang_id, lang_name, country, english_name, _flag = map(str.lower, lc)
            # if correct language-code is found
@ -125,7 +125,7 @@ class LanguageParser(QueryPartParser):
                    self.raw_text_query.autocomplete_list.append(lang)
            return
-        for lc in language_codes:
+        for lc in sxng_locales:
            if lc[0] not in settings['search']['languages']:
                continue
            lang_id, lang_name, country, english_name, _flag = map(str.lower, lc)
--- a/searx/search/processors/init.py
+++ b/searx/search/processors/init.py
@ -30,7 +30,10 @@ from .abstract import EngineProcessor
 logger = logger.getChild('search.processors')
 PROCESSORS: Dict[str, EngineProcessor] = {}
-"""Cache request processores, stored by *engine-name* (:py:func:`initialize`)"""
+"""Cache request processores, stored by *engine-name* (:py:func:`initialize`)
 :meta hide-value:
 """
 def get_processor_class(engine_type):
--- a/searx/search/processors/abstract.py
+++ b/searx/search/processors/abstract.py
@ -138,7 +138,8 @@ class EngineProcessor(ABC):
        return False
    def get_params(self, search_query, engine_category):
-        """Returns a set of *request params* or ``None`` if request is not supported.
+        """Returns a set of (see :ref:`request params <engine request arguments>`) or
        ``None`` if request is not supported.
        Not supported conditions (``None`` is returned):
@ -159,11 +160,20 @@ class EngineProcessor(ABC):
        params['safesearch'] = search_query.safesearch
        params['time_range'] = search_query.time_range
        params['engine_data'] = search_query.engine_data.get(self.engine_name, {})
        params['searxng_locale'] = search_query.lang
        # deprecated / vintage --> use params['searxng_locale']
        #
        # Conditions related to engine's traits are implemented in engine.traits
        # module. Don't do 'locale' decissions here in the abstract layer of the
        # search processor, just pass the value from user's choice unchanged to
        # the engine request.
        if hasattr(self.engine, 'language') and self.engine.language:
            params['language'] = self.engine.language
        else:
            params['language'] = search_query.lang
        return params
    @abstractmethod
--- a/searx/search/processors/online.py
+++ b/searx/search/processors/online.py
@ -51,6 +51,9 @@ class OnlineProcessor(EngineProcessor):
        super().initialize()
    def get_params(self, search_query, engine_category):
        """Returns a set of :ref:`request params <engine request online>` or ``None``
        if request is not supported.
        """
        params = super().get_params(search_query, engine_category)
        if params is None:
            return None
@ -184,11 +187,6 @@ class OnlineProcessor(EngineProcessor):
            self.handle_exception(result_container, e, suspend=True)
            self.logger.exception('CAPTCHA')
        except SearxEngineTooManyRequestsException as e:
            if "google" in self.engine_name:
                self.logger.warn(
                    "Set to 'true' the use_mobile_ui parameter in the 'engines:'"
                    " section of your settings.yml file if google is blocked for you."
                )
            self.handle_exception(result_container, e, suspend=True)
            self.logger.exception('Too many requests')
        except SearxEngineAccessDeniedException as e:
@ -223,7 +221,7 @@ class OnlineProcessor(EngineProcessor):
                'test': ['unique_results'],
            }
-        if getattr(self.engine, 'supported_languages', []):
+        if getattr(self.engine, 'traits', False):
            tests['lang_fr'] = {
                'matrix': {'query': 'paris', 'lang': 'fr'},
                'result_container': ['not_empty', ('has_language', 'fr')],
--- a/searx/search/processors/online_currency.py
+++ b/searx/search/processors/online_currency.py
@ -38,8 +38,8 @@ class OnlineCurrencyProcessor(OnlineProcessor):
    engine_type = 'online_currency'
    def get_params(self, search_query, engine_category):
-        """Returns a set of *request params* or ``None`` if search query does not match
+        """Returns a set of :ref:`request params <engine request online_currency>`
-        to :py:obj:`parser_re`."""
+        or ``None`` if search query does not match to :py:obj:`parser_re`."""
        params = super().get_params(search_query, engine_category)
        if params is None:
--- a/searx/search/processors/online_dictionary.py
+++ b/searx/search/processors/online_dictionary.py
@ -18,8 +18,9 @@ class OnlineDictionaryProcessor(OnlineProcessor):
    engine_type = 'online_dictionary'
    def get_params(self, search_query, engine_category):
-        """Returns a set of *request params* or ``None`` if search query does not match
+        """Returns a set of :ref:`request params <engine request online_dictionary>` or
-        to :py:obj:`parser_re`."""
+        ``None`` if search query does not match to :py:obj:`parser_re`.
        """
        params = super().get_params(search_query, engine_category)
        if params is None:
            return None
--- a/searx/search/processors/online_url_search.py
+++ b/searx/search/processors/online_url_search.py
@ -20,9 +20,10 @@ class OnlineUrlSearchProcessor(OnlineProcessor):
    engine_type = 'online_url_search'
    def get_params(self, search_query, engine_category):
-        """Returns a set of *request params* or ``None`` if search query does not match
+        """Returns a set of :ref:`request params <engine request online>` or ``None`` if
-        to at least one of :py:obj:`re_search_urls`.
+        search query does not match to :py:obj:`re_search_urls`.
        """
        params = super().get_params(search_query, engine_category)
        if params is None:
            return None
--- a/searx/settings.yml
+++ b/searx/settings.yml
@ -731,22 +731,9 @@ engines:
  - name: google
    engine: google
    shortcut: go
    # see https://docs.searxng.org/src/searx.engines.google.html#module-searx.engines.google
    use_mobile_ui: false
    # additional_tests:
    #   android: *test_android
  # - name: google italian
  #   engine: google
  #   shortcut: goit
  #   use_mobile_ui: false
  #   language: it
  # - name: google mobile ui
  #   engine: google
  #   shortcut: gomui
  #   use_mobile_ui: true
  - name: google images
    engine: google_images
    shortcut: goi
@ -1758,9 +1745,8 @@ engines:
    engine: peertube
    shortcut: ptb
    paging: true
-    # https://instances.joinpeertube.org/instances
+    # alternatives see: https://instances.joinpeertube.org/instances
-    base_url: https://peertube.biz/
+    # base_url: https://tube.4aem.com
    # base_url: https://tube.tardis.world/
    categories: videos
    disabled: true
    timeout: 6.0
--- a/searx/settings_defaults.py
+++ b/searx/settings_defaults.py
@ -12,13 +12,13 @@ import logging
 from base64 import b64decode
 from os.path import dirname, abspath
-from searx.languages import language_codes as languages
+from .sxng_locales import sxng_locales
 searx_dir = abspath(dirname(__file__))
 logger = logging.getLogger('searx')
 OUTPUT_FORMATS = ['html', 'csv', 'json', 'rss']
-LANGUAGE_CODES = ['all', 'auto'] + list(l[0] for l in languages)
+SXNG_LOCALE_TAGS = ['all', 'auto'] + list(l[0] for l in sxng_locales)
 SIMPLE_STYLE = ('auto', 'light', 'dark')
 CATEGORIES_AS_TABS = {
    'general': {},
@ -156,8 +156,8 @@ SCHEMA = {
        'safe_search': SettingsValue((0, 1, 2), 0),
        'autocomplete': SettingsValue(str, ''),
        'autocomplete_min': SettingsValue(int, 4),
-        'default_lang': SettingsValue(tuple(LANGUAGE_CODES + ['']), ''),
+        'default_lang': SettingsValue(tuple(SXNG_LOCALE_TAGS + ['']), ''),
-        'languages': SettingSublistValue(LANGUAGE_CODES, LANGUAGE_CODES),
+        'languages': SettingSublistValue(SXNG_LOCALE_TAGS, SXNG_LOCALE_TAGS),
        'ban_time_on_fail': SettingsValue(numbers.Real, 5),
        'max_ban_time_on_fail': SettingsValue(numbers.Real, 120),
        'suspended_times': {
--- a/searx/sxng_locales.py
+++ b/searx/sxng_locales.py
@ -1,73 +1,120 @@
 # -*- coding: utf-8 -*-
-# list of language codes
+'''List of SearXNG's locale codes.
-# this file is generated automatically by utils/fetch_languages.py
+
-language_codes = (
+This file is generated automatically by::
-    ('af-ZA', 'Afrikaans', 'Suid-Afrika', 'Afrikaans', '\U0001f1ff\U0001f1e6'),
+
-    ('ar-EG', 'العربية', 'مصر', 'Arabic', '\U0001f1ea\U0001f1ec'),
+   ./manage pyenv.cmd searxng_extra/update/update_engine_traits.py
-    ('be-BY', 'Беларуская', 'Беларусь', 'Belarusian', '\U0001f1e7\U0001f1fe'),
+'''
 sxng_locales = (
    ('ar', 'العربية', '', 'Arabic', '\U0001f310'),
    ('bg', 'Български', '', 'Bulgarian', '\U0001f310'),
    ('bg-BG', 'Български', 'България', 'Bulgarian', '\U0001f1e7\U0001f1ec'),
    ('ca', 'Català', '', 'Catalan', '\U0001f310'),
    ('ca-ES', 'Català', 'Espanya', 'Catalan', '\U0001f1ea\U0001f1f8'),
    ('cs', 'Čeština', '', 'Czech', '\U0001f310'),
    ('cs-CZ', 'Čeština', 'Česko', 'Czech', '\U0001f1e8\U0001f1ff'),
    ('da', 'Dansk', '', 'Danish', '\U0001f310'),
    ('da-DK', 'Dansk', 'Danmark', 'Danish', '\U0001f1e9\U0001f1f0'),
    ('de', 'Deutsch', '', 'German', '\U0001f310'),
    ('de-AT', 'Deutsch', 'Österreich', 'German', '\U0001f1e6\U0001f1f9'),
    ('de-CH', 'Deutsch', 'Schweiz', 'German', '\U0001f1e8\U0001f1ed'),
    ('de-DE', 'Deutsch', 'Deutschland', 'German', '\U0001f1e9\U0001f1ea'),
    ('el', 'Ελληνικά', '', 'Greek', '\U0001f310'),
    ('el-GR', 'Ελληνικά', 'Ελλάδα', 'Greek', '\U0001f1ec\U0001f1f7'),
    ('en', 'English', '', 'English', '\U0001f310'),
    ('en-AU', 'English', 'Australia', 'English', '\U0001f1e6\U0001f1fa'),
    ('en-CA', 'English', 'Canada', 'English', '\U0001f1e8\U0001f1e6'),
    ('en-GB', 'English', 'United Kingdom', 'English', '\U0001f1ec\U0001f1e7'),
    ('en-IE', 'English', 'Ireland', 'English', '\U0001f1ee\U0001f1ea'),
    ('en-IN', 'English', 'India', 'English', '\U0001f1ee\U0001f1f3'),
    ('en-MY', 'English', 'Malaysia', 'English', '\U0001f1f2\U0001f1fe'),
    ('en-NZ', 'English', 'New Zealand', 'English', '\U0001f1f3\U0001f1ff'),
    ('en-PH', 'English', 'Philippines', 'English', '\U0001f1f5\U0001f1ed'),
    ('en-US', 'English', 'United States', 'English', '\U0001f1fa\U0001f1f8'),
    ('en-ZA', 'English', 'South Africa', 'English', '\U0001f1ff\U0001f1e6'),
    ('es', 'Español', '', 'Spanish', '\U0001f310'),
    ('es-AR', 'Español', 'Argentina', 'Spanish', '\U0001f1e6\U0001f1f7'),
    ('es-CL', 'Español', 'Chile', 'Spanish', '\U0001f1e8\U0001f1f1'),
    ('es-ES', 'Español', 'España', 'Spanish', '\U0001f1ea\U0001f1f8'),
    ('es-MX', 'Español', 'México', 'Spanish', '\U0001f1f2\U0001f1fd'),
    ('es-US', 'Español', 'Estados Unidos', 'Spanish', '\U0001f1fa\U0001f1f8'),
    ('et', 'Eesti', '', 'Estonian', '\U0001f310'),
    ('et-EE', 'Eesti', 'Eesti', 'Estonian', '\U0001f1ea\U0001f1ea'),
-    ('fa-IR', 'فارسی', 'ایران', 'Persian', '\U0001f1ee\U0001f1f7'),
+    ('fi', 'Suomi', '', 'Finnish', '\U0001f310'),
    ('fi-FI', 'Suomi', 'Suomi', 'Finnish', '\U0001f1eb\U0001f1ee'),
    ('fil-PH', 'Filipino', 'Pilipinas', 'Filipino', '\U0001f1f5\U0001f1ed'),
    ('fr', 'Français', '', 'French', '\U0001f310'),
    ('fr-BE', 'Français', 'Belgique', 'French', '\U0001f1e7\U0001f1ea'),
    ('fr-CA', 'Français', 'Canada', 'French', '\U0001f1e8\U0001f1e6'),
    ('fr-CH', 'Français', 'Suisse', 'French', '\U0001f1e8\U0001f1ed'),
    ('fr-FR', 'Français', 'France', 'French', '\U0001f1eb\U0001f1f7'),
-    ('he-IL', 'עברית', 'ישראל', 'Hebrew', '\U0001f1ee\U0001f1f1'),
+    ('he', 'עברית', '', 'Hebrew', '\U0001f1ee\U0001f1f7'),
-    ('hi-IN', 'हिन्दी', 'भारत', 'Hindi', '\U0001f1ee\U0001f1f3'),
+    ('hi', 'हिन्दी', '', 'Hindi', '\U0001f310'),
-    ('hr-HR', 'Hrvatski', 'Hrvatska', 'Croatian', '\U0001f1ed\U0001f1f7'),
+    ('hr', 'Hrvatski', '', 'Croatian', '\U0001f310'),
    ('hu', 'Magyar', '', 'Hungarian', '\U0001f310'),
    ('hu-HU', 'Magyar', 'Magyarország', 'Hungarian', '\U0001f1ed\U0001f1fa'),
    ('id', 'Indonesia', '', 'Indonesian', '\U0001f310'),
    ('id-ID', 'Indonesia', 'Indonesia', 'Indonesian', '\U0001f1ee\U0001f1e9'),
-    ('is-IS', 'Íslenska', 'Ísland', 'Icelandic', '\U0001f1ee\U0001f1f8'),
+    ('is', 'Íslenska', '', 'Icelandic', '\U0001f310'),
    ('it', 'Italiano', '', 'Italian', '\U0001f310'),
    ('it-CH', 'Italiano', 'Svizzera', 'Italian', '\U0001f1e8\U0001f1ed'),
    ('it-IT', 'Italiano', 'Italia', 'Italian', '\U0001f1ee\U0001f1f9'),
    ('ja', '日本語', '', 'Japanese', '\U0001f310'),
    ('ja-JP', '日本語', '日本', 'Japanese', '\U0001f1ef\U0001f1f5'),
    ('ko', '한국어', '', 'Korean', '\U0001f310'),
    ('ko-KR', '한국어', '대한민국', 'Korean', '\U0001f1f0\U0001f1f7'),
-    ('lt-LT', 'Lietuvių', 'Lietuva', 'Lithuanian', '\U0001f1f1\U0001f1f9'),
+    ('lt', 'Lietuvių', '', 'Lithuanian', '\U0001f310'),
-    ('lv-LV', 'Latviešu', 'Latvija', 'Latvian', '\U0001f1f1\U0001f1fb'),
+    ('lv', 'Latviešu', '', 'Latvian', '\U0001f310'),
    ('nb', 'Norsk Bokmål', '', 'Norwegian Bokmål', '\U0001f310'),
    ('nb-NO', 'Norsk Bokmål', 'Norge', 'Norwegian Bokmål', '\U0001f1f3\U0001f1f4'),
    ('nl', 'Nederlands', '', 'Dutch', '\U0001f310'),
    ('nl-BE', 'Nederlands', 'België', 'Dutch', '\U0001f1e7\U0001f1ea'),
    ('nl-NL', 'Nederlands', 'Nederland', 'Dutch', '\U0001f1f3\U0001f1f1'),
-    ('no-NO', 'Norsk', '', 'Norwegian (Bokmål)', '\U0001f1f3\U0001f1f4'),
+    ('pl', 'Polski', '', 'Polish', '\U0001f310'),
    ('pl-PL', 'Polski', 'Polska', 'Polish', '\U0001f1f5\U0001f1f1'),
    ('pt', 'Português', '', 'Portuguese', '\U0001f310'),
    ('pt-BR', 'Português', 'Brasil', 'Portuguese', '\U0001f1e7\U0001f1f7'),
    ('pt-PT', 'Português', 'Portugal', 'Portuguese', '\U0001f1f5\U0001f1f9'),
    ('ro', 'Română', '', 'Romanian', '\U0001f310'),
    ('ro-RO', 'Română', 'România', 'Romanian', '\U0001f1f7\U0001f1f4'),
    ('ru', 'Русский', '', 'Russian', '\U0001f310'),
    ('ru-RU', 'Русский', 'Россия', 'Russian', '\U0001f1f7\U0001f1fa'),
-    ('sk-SK', 'Slovenčina', 'Slovensko', 'Slovak', '\U0001f1f8\U0001f1f0'),
+    ('sk', 'Slovenčina', '', 'Slovak', '\U0001f310'),
-    ('sl-SI', 'Slovenščina', 'Slovenija', 'Slovenian', '\U0001f1f8\U0001f1ee'),
+    ('sl', 'Slovenščina', '', 'Slovenian', '\U0001f310'),
-    ('sr-RS', 'Српски', 'Србија', 'Serbian', '\U0001f1f7\U0001f1f8'),
+    ('sr', 'Српски', '', 'Serbian', '\U0001f310'),
    ('sv', 'Svenska', '', 'Swedish', '\U0001f310'),
    ('sv-SE', 'Svenska', 'Sverige', 'Swedish', '\U0001f1f8\U0001f1ea'),
-    ('sw-TZ', 'Kiswahili', 'Tanzania', 'Swahili', '\U0001f1f9\U0001f1ff'),
+    ('th', 'ไทย', '', 'Thai', '\U0001f310'),
    ('th-TH', 'ไทย', 'ไทย', 'Thai', '\U0001f1f9\U0001f1ed'),
    ('tr', 'Türkçe', '', 'Turkish', '\U0001f310'),
    ('tr-TR', 'Türkçe', 'Türkiye', 'Turkish', '\U0001f1f9\U0001f1f7'),
-    ('uk-UA', 'Українська', 'Україна', 'Ukrainian', '\U0001f1fa\U0001f1e6'),
+    ('uk', 'Українська', '', 'Ukrainian', '\U0001f310'),
-    ('vi-VN', 'Tiếng Việt', 'Việt Nam', 'Vietnamese', '\U0001f1fb\U0001f1f3'),
+    ('vi', 'Tiếng Việt', '', 'Vietnamese', '\U0001f310'),
    ('zh', '中文', '', 'Chinese', '\U0001f310'),
    ('zh-CN', '中文', '中国', 'Chinese', '\U0001f1e8\U0001f1f3'),
-    ('zh-HK', '中文', '中國香港', 'Chinese', '\U0001f1ed\U0001f1f0'),
+    ('zh-HK', '中文', '中國香港特別行政區', 'Chinese', '\U0001f1ed\U0001f1f0'),
    ('zh-TW', '中文', '台灣', 'Chinese', '\U0001f1f9\U0001f1fc'),
 )
 '''
 A list of five-digit tuples:
 0. SearXNG's internal locale tag (a language or region tag)
 1. Name of the language (:py:obj:`babel.core.Locale.get_language_name`)
 2. For region tags the name of the region (:py:obj:`babel.core.Locale.get_territory_name`).
   Empty string for language tags.
 3. English language name (from :py:obj:`babel.core.Locale.english_name`)
 4. Unicode flag (emoji) that fits to SearXNG's internal region tag. Languages
   are represented by a globe (🌐)
 .. code:: python
   ('en',    'English', '',              'English', '🌐'),
   ('en-CA', 'English', 'Canada',        'English', '🇨🇦'),
   ('en-US', 'English', 'United States', 'English', '🇺🇸'),
   ..
   ('fr',    'Français', '',             'French',  '🌐'),
   ('fr-BE', 'Français', 'Belgique',     'French',  '🇧🇪'),
   ('fr-CA', 'Français', 'Canada',       'French',  '🇨🇦'),
 :meta hide-value:
 '''
--- a/searx/templates/simple/filters/languages.html
+++ b/searx/templates/simple/filters/languages.html
@ -1,12 +1,12 @@
 <select class="language" id="language" name="language" aria-label="{{ _('Search language') }}">{{- '' -}}
-	<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
+	<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }} [all]</option>
 	<option value="auto" {% if current_language == 'auto' %}selected="selected"{% endif %}>
 		{{- _('Auto-detect') -}}
 		{%- if current_language == 'auto' %} ({{ search_language }}){%- endif -%}
 	</option>
-	{%- for lang_id,lang_name,country_name,english_name,flag in language_codes | sort(attribute=1) -%}
+	{%- for sxng_tag,lang_name,country_name,english_name,flag in sxng_locales | sort(attribute=1) -%}
-	<option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>
+	<option value="{{ sxng_tag }}" {% if sxng_tag == current_language %}selected="selected"{% endif %}>
-		{% if flag %}{{ flag }} {% endif%} {{- lang_name }} {% if country_name %}({{ country_name }}) {% endif %}
+		{% if flag %}{{ flag }} {% endif%} {{- lang_name }} {% if country_name %} - {{ country_name }} {% endif %} [{{sxng_tag}}]
 	</option>
 	{%- endfor -%}
 </select>
--- a/searx/templates/simple/preferences.html
+++ b/searx/templates/simple/preferences.html
@ -115,10 +115,10 @@
      <legend id="pref_language">{{ _('Search language') }}</legend>
      <p class="value">{{- '' -}}
        <select name='language' aria-labelledby="pref_language" aria-describedby="desc_language">{{- '' -}}
-          <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
+          <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }} [all]</option>
-          <option value="auto" {% if current_language == 'auto' %}selected="selected"{% endif %}>{{ _('Auto-detect') }}</option>
+          <option value="auto" {% if current_language == 'auto' %}selected="selected"{% endif %}>{{ _('Auto-detect') }} [auto]</option>
-          {%- for lang_id,lang_name,country_name,english_name,flag in language_codes | sort(attribute=1) -%}
+          {%- for sxng_tag,lang_name,country_name,english_name,flag in sxng_locales | sort(attribute=1) -%}
-          <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{% if flag %}{{ flag }} {% endif%} {{- lang_name }} {% if country_name %}({{ country_name }}) {% endif %}</option>
+          <option value="{{ sxng_tag }}" {% if sxng_tag == current_language %}selected="selected"{% endif %}>{% if flag %}{{ flag }} {% endif%} {{- lang_name }} {% if country_name %} - {{ country_name }} {% endif %} [{{sxng_tag}}]</option>
          {%- endfor -%}
        </select>{{- '' -}}
      </p>
--- a/searx/utils.py
+++ b/searx/utils.py
@ -18,13 +18,11 @@ from urllib.parse import urljoin, urlparse
 from lxml import html
 from lxml.etree import ElementBase, XPath, XPathError, XPathSyntaxError, _ElementStringResult, _ElementUnicodeResult
 from babel.core import get_global
 from searx import settings
 from searx.data import USER_AGENTS, data_dir
 from searx.version import VERSION_TAG
-from searx.languages import language_codes
+from searx.sxng_locales import sxng_locales
 from searx.exceptions import SearxXPathSyntaxException, SearxEngineXPathException
 from searx import logger
@ -53,8 +51,8 @@ _LANG_TO_LC_CACHE: Dict[str, Dict[str, str]] = {}
 _FASTTEXT_MODEL: Optional["fasttext.FastText._FastText"] = None
 """fasttext model to predict laguage of a search term"""
-SEARCH_LANGUAGE_CODES = frozenset([searxng_locale[0].split('-')[0] for searxng_locale in language_codes])
+SEARCH_LANGUAGE_CODES = frozenset([searxng_locale[0].split('-')[0] for searxng_locale in sxng_locales])
-"""Languages supported by most searxng engines (:py:obj:`searx.languages.language_codes`)."""
+"""Languages supported by most searxng engines (:py:obj:`searx.sxng_locales.sxng_locales`)."""
 class _NotSetClass:  # pylint: disable=too-few-public-methods
@ -355,102 +353,16 @@ def is_valid_lang(lang) -> Optional[Tuple[bool, str, str]]:
    is_abbr = len(lang) == 2
    lang = lang.lower()
    if is_abbr:
-        for l in language_codes:
+        for l in sxng_locales:
            if l[0][:2] == lang:
                return (True, l[0][:2], l[3].lower())
        return None
-    for l in language_codes:
+    for l in sxng_locales:
        if l[1].lower() == lang or l[3].lower() == lang:
            return (True, l[0][:2], l[3].lower())
    return None
 def _get_lang_to_lc_dict(lang_list: List[str]) -> Dict[str, str]:
    key = str(lang_list)
    value = _LANG_TO_LC_CACHE.get(key, None)
    if value is None:
        value = {}
        for lang in lang_list:
            value.setdefault(lang.split('-')[0], lang)
        _LANG_TO_LC_CACHE[key] = value
    return value
 # babel's get_global contains all sorts of miscellaneous locale and territory related data
 # see get_global in: https://github.com/python-babel/babel/blob/master/babel/core.py
 def _get_from_babel(lang_code: str, key):
    match = get_global(key).get(lang_code.replace('-', '_'))
    # for some keys, such as territory_aliases, match may be a list
    if isinstance(match, str):
        return match.replace('_', '-')
    return match
 def _match_language(lang_code: str, lang_list=[], custom_aliases={}) -> Optional[str]:  # pylint: disable=W0102
    """auxiliary function to match lang_code in lang_list"""
    # replace language code with a custom alias if necessary
    if lang_code in custom_aliases:
        lang_code = custom_aliases[lang_code]
    if lang_code in lang_list:
        return lang_code
    # try to get the most likely country for this language
    subtags = _get_from_babel(lang_code, 'likely_subtags')
    if subtags:
        if subtags in lang_list:
            return subtags
        subtag_parts = subtags.split('-')
        new_code = subtag_parts[0] + '-' + subtag_parts[-1]
        if new_code in custom_aliases:
            new_code = custom_aliases[new_code]
        if new_code in lang_list:
            return new_code
    # try to get the any supported country for this language
    return _get_lang_to_lc_dict(lang_list).get(lang_code)
 def match_language(  # pylint: disable=W0102
    locale_code, lang_list=[], custom_aliases={}, fallback: Optional[str] = 'en-US'
 ) -> Optional[str]:
    """get the language code from lang_list that best matches locale_code"""
    # try to get language from given locale_code
    language = _match_language(locale_code, lang_list, custom_aliases)
    if language:
        return language
    locale_parts = locale_code.split('-')
    lang_code = locale_parts[0]
    # if locale_code has script, try matching without it
    if len(locale_parts) > 2:
        language = _match_language(lang_code + '-' + locale_parts[-1], lang_list, custom_aliases)
        if language:
            return language
    # try to get language using an equivalent country code
    if len(locale_parts) > 1:
        country_alias = _get_from_babel(locale_parts[-1], 'territory_aliases')
        if country_alias:
            language = _match_language(lang_code + '-' + country_alias[0], lang_list, custom_aliases)
            if language:
                return language
    # try to get language using an equivalent language code
    alias = _get_from_babel(lang_code, 'language_aliases')
    if alias:
        language = _match_language(alias, lang_list, custom_aliases)
        if language:
            return language
    if lang_code != locale_code:
        # try to get language from given language without giving the country
        language = _match_language(lang_code, lang_list, custom_aliases)
    return language or fallback
 def load_module(filename: str, module_dir: str) -> types.ModuleType:
    modname = splitext(filename)[0]
    modpath = join(module_dir, filename)
--- a/searx/webapp.py
+++ b/searx/webapp.py
@ -89,7 +89,6 @@ from searx.utils import (
    html_to_text,
    gen_useragent,
    dict_subset,
    match_language,
 )
 from searx.version import VERSION_STRING, GIT_URL, GIT_BRANCH
 from searx.query import RawTextQuery
@ -117,12 +116,13 @@ from searx.locales import (
    RTL_LOCALES,
    localeselector,
    locales_initialize,
    match_locale,
 )
 # renaming names from searx imports ...
 from searx.autocomplete import search_autocomplete, backends as autocomplete_backends
 from searx.languages import language_codes as languages
 from searx.redisdb import initialize as redis_initialize
 from searx.sxng_locales import sxng_locales
 from searx.search import SearchWithPlugins, initialize as search_initialize
 from searx.network import stream as http_stream, set_context_network_name
 from searx.search.checker import get_result as checker_get_result
@ -227,7 +227,7 @@ def _get_browser_language(req, lang_list):
        if '-' in lang:
            lang_parts = lang.split('-')
            lang = "{}-{}".format(lang_parts[0], lang_parts[-1].upper())
-        locale = match_language(lang, lang_list, fallback=None)
+        locale = match_locale(lang, lang_list, fallback=None)
        if locale is not None:
            return locale
    return 'en'
@ -407,7 +407,7 @@ def get_client_settings():
 def render(template_name: str, **kwargs):
-
+    # pylint: disable=too-many-statements
    kwargs['client_settings'] = str(
        base64.b64encode(
            bytes(
@ -438,17 +438,20 @@ def render(template_name: str, **kwargs):
    kwargs['OTHER_CATEGORY'] = OTHER_CATEGORY
    # i18n
-    kwargs['language_codes'] = [l for l in languages if l[0] in settings['search']['languages']]
+    kwargs['sxng_locales'] = [l for l in sxng_locales if l[0] in settings['search']['languages']]
    locale = request.preferences.get_value('locale')
    kwargs['locale_rfc5646'] = _get_locale_rfc5646(locale)
    if locale in RTL_LOCALES and 'rtl' not in kwargs:
        kwargs['rtl'] = True
    if 'current_language' not in kwargs:
-        kwargs['current_language'] = match_language(
+        _locale = request.preferences.get_value('language')
-            request.preferences.get_value('language'), settings['search']['languages']
+        if _locale in ('auto', 'all'):
-        )
+            kwargs['current_language'] = _locale
        else:
            kwargs['current_language'] = match_locale(_locale, settings['search']['languages'])
    # values from settings
    kwargs['search_formats'] = [x for x in settings['search']['formats'] if x != 'html']
@ -810,6 +813,13 @@ def search():
        )
    )
    if search_query.lang in ('auto', 'all'):
        current_language = search_query.lang
    else:
        current_language = match_locale(
            search_query.lang, settings['search']['languages'], fallback=request.preferences.get_value("language")
        )
    # search_query.lang contains the user choice (all, auto, en, ...)
    # when the user choice is "auto", search.search_query.lang contains the detected language
    # otherwise it is equals to search_query.lang
@ -832,12 +842,8 @@ def search():
            result_container.unresponsive_engines
        ),
        current_locale = request.preferences.get_value("locale"),
-        current_language = match_language(
+        current_language = current_language,
-            search_query.lang,
+        search_language = match_locale(
            settings['search']['languages'],
            fallback=request.preferences.get_value("language")
        ),
        search_language = match_language(
            search.search_query.lang,
            settings['search']['languages'],
            fallback=request.preferences.get_value("language")
@ -907,16 +913,11 @@ def autocompleter():
    # and there is a query part
    if len(raw_text_query.autocomplete_list) == 0 and len(sug_prefix) > 0:
-        # get language from cookie
+        # get SearXNG's locale and autocomplete backend from cookie
-        language = request.preferences.get_value('language')
+        sxng_locale = request.preferences.get_value('language')
-        if not language or language == 'all':
+        backend_name = request.preferences.get_value('autocomplete')
            language = 'en'
        else:
            language = language.split('-')[0]
-        # run autocompletion
+        for result in search_autocomplete(backend_name, sug_prefix, sxng_locale):
        raw_results = search_autocomplete(request.preferences.get_value('autocomplete'), sug_prefix, language)
        for result in raw_results:
            # attention: this loop will change raw_text_query object and this is
            # the reason why the sug_prefix was stored before (see above)
            if result != sug_prefix:
@ -1001,7 +1002,9 @@ def preferences():
            'rate80': rate80,
            'rate95': rate95,
            'warn_timeout': e.timeout > settings['outgoing']['request_timeout'],
-            'supports_selected_language': _is_selected_language_supported(e, request.preferences),
+            'supports_selected_language': e.traits.is_locale_supported(
                str(request.preferences.get_value('language') or 'all')
            ),
            'result_count': result_count,
        }
    # end of stats
@ -1052,7 +1055,9 @@ def preferences():
    # supports
    supports = {}
    for _, e in filtered_engines.items():
-        supports_selected_language = _is_selected_language_supported(e, request.preferences)
+        supports_selected_language = e.traits.is_locale_supported(
            str(request.preferences.get_value('language') or 'all')
        )
        safesearch = e.safesearch
        time_range_support = e.time_range_support
        for checker_test_name in checker_results.get(e.name, {}).get('errors', {}):
@ -1099,16 +1104,6 @@ def preferences():
    )
 def _is_selected_language_supported(engine, preferences: Preferences):  # pylint: disable=redefined-outer-name
    language = preferences.get_value('language')
    if language == 'all':
        return True
    x = match_language(
        language, getattr(engine, 'supported_languages', []), getattr(engine, 'language_aliases', {}), None
    )
    return bool(x)
@app.route('/image_proxy', methods=['GET'])
 def image_proxy():
    # pylint: disable=too-many-return-statements, too-many-branches
@ -1327,10 +1322,7 @@ def config():
        if not request.preferences.validate_token(engine):
            continue
-        supported_languages = engine.supported_languages
+        _languages = engine.traits.languages.keys()
        if isinstance(engine.supported_languages, dict):
            supported_languages = list(engine.supported_languages.keys())
        _engines.append(
            {
                'name': name,
@ -1339,7 +1331,8 @@ def config():
                'enabled': not engine.disabled,
                'paging': engine.paging,
                'language_support': engine.language_support,
-                'supported_languages': supported_languages,
+                'languages': list(_languages),
                'regions': list(engine.traits.regions.keys()),
                'safesearch': engine.safesearch,
                'time_range_support': engine.time_range_support,
                'timeout': engine.timeout,
--- a/searx/webutils.py
+++ b/searx/webutils.py
@ -1,4 +1,6 @@
 # -*- coding: utf-8 -*-
 from __future__ import annotations
 import os
 import pathlib
 import csv
@ -8,7 +10,7 @@ import re
 import inspect
 import itertools
 from datetime import datetime, timedelta
-from typing import Iterable, List, Tuple, Dict
+from typing import Iterable, List, Tuple, Dict, TYPE_CHECKING
 from io import StringIO
 from codecs import getincrementalencoder
@ -16,7 +18,10 @@ from codecs import getincrementalencoder
 from flask_babel import gettext, format_date
 from searx import logger, settings
-from searx.engines import Engine, OTHER_CATEGORY
+from searx.engines import OTHER_CATEGORY
 if TYPE_CHECKING:
    from searx.enginelib import Engine
 VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$')
--- a/searxng_extra/update/update_engine_descriptions.py
+++ b/searxng_extra/update/update_engine_descriptions.py
@ -18,8 +18,8 @@ from os.path import join
 from lxml.html import fromstring
 from searx.engines import wikidata, set_loggers
-from searx.utils import extract_text, match_language
+from searx.utils import extract_text
-from searx.locales import LOCALE_NAMES, locales_initialize
+from searx.locales import LOCALE_NAMES, locales_initialize, match_locale
 from searx import searx_dir
 from searx.utils import gen_useragent, detect_language
 import searx.search
@ -225,9 +225,9 @@ def fetch_website_description(engine_name, website):
            fetched_lang, desc = get_website_description(website, lang, WIKIPEDIA_LANGUAGES[lang])
            if fetched_lang is None or desc is None:
                continue
-            matched_lang = match_language(fetched_lang, LANGUAGES, fallback=None)
+            matched_lang = match_locale(fetched_lang, LANGUAGES, fallback=None)
            if matched_lang is None:
-                fetched_wikipedia_lang = match_language(fetched_lang, WIKIPEDIA_LANGUAGES.values(), fallback=None)
+                fetched_wikipedia_lang = match_locale(fetched_lang, WIKIPEDIA_LANGUAGES.values(), fallback=None)
                matched_lang = wikipedia_languages_r.get(fetched_wikipedia_lang)
            if matched_lang is not None:
                update_description(engine_name, matched_lang, desc, website, replace=False)
--- a/searxng_extra/update/update_engine_traits.py
+++ b/searxng_extra/update/update_engine_traits.py
@ -0,0 +1,198 @@
 #!/usr/bin/env python
 # lint: pylint
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Update :py:obj:`searx.enginelib.traits.EngineTraitsMap` and :origin:`searx/languages.py`
 :py:obj:`searx.enginelib.traits.EngineTraitsMap.ENGINE_TRAITS_FILE`:
  Persistence of engines traits, fetched from the engines.
 :origin:`searx/languages.py`
  Is generated  from intersecting each engine's supported traits.
 The script :origin:`searxng_extra/update/update_engine_traits.py` is called in
 the :origin:`CI Update data ... <.github/workflows/data-update.yml>`
 """
 # pylint: disable=invalid-name
 from unicodedata import lookup
 from pathlib import Path
 from pprint import pformat
 import babel
 from searx import settings, searx_dir
 from searx import network
 from searx.engines import load_engines
 from searx.enginelib.traits import EngineTraitsMap
 # Output files.
 languages_file = Path(searx_dir) / 'sxng_locales.py'
 languages_file_header = """\
 # -*- coding: utf-8 -*-
 '''List of SearXNG's locale codes.
 This file is generated automatically by::
   ./manage pyenv.cmd searxng_extra/update/update_engine_traits.py
 '''
 sxng_locales = (
 """
 languages_file_footer = """,
 )
 '''
 A list of five-digit tuples:
 0. SearXNG's internal locale tag (a language or region tag)
 1. Name of the language (:py:obj:`babel.core.Locale.get_language_name`)
 2. For region tags the name of the region (:py:obj:`babel.core.Locale.get_territory_name`).
   Empty string for language tags.
 3. English language name (from :py:obj:`babel.core.Locale.english_name`)
 4. Unicode flag (emoji) that fits to SearXNG's internal region tag. Languages
   are represented by a globe (\U0001F310)
 .. code:: python
   ('en',    'English', '',              'English', '\U0001f310'),
   ('en-CA', 'English', 'Canada',        'English', '\U0001f1e8\U0001f1e6'),
   ('en-US', 'English', 'United States', 'English', '\U0001f1fa\U0001f1f8'),
   ..
   ('fr',    'Français', '',             'French',  '\U0001f310'),
   ('fr-BE', 'Français', 'Belgique',     'French',  '\U0001f1e7\U0001f1ea'),
   ('fr-CA', 'Français', 'Canada',       'French',  '\U0001f1e8\U0001f1e6'),
 :meta hide-value:
 '''
 """
 lang2emoji = {
    'ha': '\U0001F1F3\U0001F1EA',  # Hausa / Niger
    'bs': '\U0001F1E7\U0001F1E6',  # Bosnian / Bosnia & Herzegovina
    'jp': '\U0001F1EF\U0001F1F5',  # Japanese
    'ua': '\U0001F1FA\U0001F1E6',  # Ukrainian
    'he': '\U0001F1EE\U0001F1F7',  # Hebrew
 }
 def main():
    load_engines(settings['engines'])
    # traits_map = EngineTraitsMap.from_data()
    traits_map = fetch_traits_map()
    sxng_tag_list = filter_locales(traits_map)
    write_languages_file(sxng_tag_list)
 def fetch_traits_map():
    """Fetchs supported languages for each engine and writes json file with those."""
    network.set_timeout_for_thread(10.0)
    def log(msg):
        print(msg)
    traits_map = EngineTraitsMap.fetch_traits(log=log)
    print("fetched properties from %s engines" % len(traits_map))
    print("write json file: %s" % traits_map.ENGINE_TRAITS_FILE)
    traits_map.save_data()
    return traits_map
 def filter_locales(traits_map: EngineTraitsMap):
    """Filter language & region tags by a threshold."""
    min_eng_per_region = 11
    min_eng_per_lang = 13
    _ = {}
    for eng in traits_map.values():
        for reg in eng.regions.keys():
            _[reg] = _.get(reg, 0) + 1
    regions = set(k for k, v in _.items() if v >= min_eng_per_region)
    lang_from_region = set(k.split('-')[0] for k in regions)
    _ = {}
    for eng in traits_map.values():
        for lang in eng.languages.keys():
            # ignore script types like zh_Hant, zh_Hans or sr_Latin, pa_Arab (they
            # already counted by existence of 'zh' or 'sr', 'pa')
            if '_' in lang:
                # print("ignore %s" % lang)
                continue
            _[lang] = _.get(lang, 0) + 1
    languages = set(k for k, v in _.items() if v >= min_eng_per_lang)
    sxng_tag_list = set()
    sxng_tag_list.update(regions)
    sxng_tag_list.update(lang_from_region)
    sxng_tag_list.update(languages)
    return sxng_tag_list
 def write_languages_file(sxng_tag_list):
    language_codes = []
    for sxng_tag in sorted(sxng_tag_list):
        sxng_locale: babel.Locale = babel.Locale.parse(sxng_tag, sep='-')
        flag = get_unicode_flag(sxng_locale) or ''
        item = (
            sxng_tag,
            sxng_locale.get_language_name().title(),
            sxng_locale.get_territory_name() or '',
            sxng_locale.english_name.split(' (')[0],
            UnicodeEscape(flag),
        )
        language_codes.append(item)
    language_codes = tuple(language_codes)
    with open(languages_file, 'w', encoding='utf-8') as new_file:
        file_content = "{header} {language_codes}{footer}".format(
            header=languages_file_header,
            language_codes=pformat(language_codes, width=120, indent=4)[1:-1],
            footer=languages_file_footer,
        )
        new_file.write(file_content)
        new_file.close()
 class UnicodeEscape(str):
    """Escape unicode string in :py:obj:`pprint.pformat`"""
    def __repr__(self):
        return "'" + "".join([chr(c) for c in self.encode('unicode-escape')]) + "'"
 def get_unicode_flag(locale: babel.Locale):
    """Determine a unicode flag (emoji) that fits to the ``locale``"""
    emoji = lang2emoji.get(locale.language)
    if emoji:
        return emoji
    if not locale.territory:
        return '\U0001F310'
    emoji = lang2emoji.get(locale.territory.lower())
    if emoji:
        return emoji
    try:
        c1 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + locale.territory[0])
        c2 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + locale.territory[1])
        # print("OK   : %s --> %s%s" % (locale, c1, c2))
    except KeyError as exc:
        print("ERROR: %s --> %s" % (locale, exc))
        return None
    return c1 + c2
 if __name__ == "__main__":
    main()
--- a/searxng_extra/update/update_languages.py
+++ b/searxng_extra/update/update_languages.py
@ -1,313 +0,0 @@
 #!/usr/bin/env python
 # lint: pylint
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """This script generates languages.py from intersecting each engine's supported
 languages.
 Output files: :origin:`searx/data/engines_languages.json` and
 :origin:`searx/languages.py` (:origin:`CI Update data ...
 <.github/workflows/data-update.yml>`).
 """
 # pylint: disable=invalid-name
 from unicodedata import lookup
 import json
 from pathlib import Path
 from pprint import pformat
 from babel import Locale, UnknownLocaleError
 from babel.languages import get_global
 from babel.core import parse_locale
 from searx import settings, searx_dir
 from searx.engines import load_engines, engines
 from searx.network import set_timeout_for_thread
 # Output files.
 engines_languages_file = Path(searx_dir) / 'data' / 'engines_languages.json'
 languages_file = Path(searx_dir) / 'languages.py'
 # Fetches supported languages for each engine and writes json file with those.
 def fetch_supported_languages():
    set_timeout_for_thread(10.0)
    engines_languages = {}
    names = list(engines)
    names.sort()
    for engine_name in names:
        if hasattr(engines[engine_name], 'fetch_supported_languages'):
            engines_languages[engine_name] = engines[engine_name].fetch_supported_languages()
            print("fetched %s languages from engine %s" % (len(engines_languages[engine_name]), engine_name))
            if type(engines_languages[engine_name]) == list:  # pylint: disable=unidiomatic-typecheck
                engines_languages[engine_name] = sorted(engines_languages[engine_name])
    print("fetched languages from %s engines" % len(engines_languages))
    # write json file
    with open(engines_languages_file, 'w', encoding='utf-8') as f:
        json.dump(engines_languages, f, indent=2, sort_keys=True)
    return engines_languages
 # Get babel Locale object from lang_code if possible.
 def get_locale(lang_code):
    try:
        locale = Locale.parse(lang_code, sep='-')
        return locale
    except (UnknownLocaleError, ValueError):
        return None
 lang2emoji = {
    'ha': '\U0001F1F3\U0001F1EA',  # Hausa / Niger
    'bs': '\U0001F1E7\U0001F1E6',  # Bosnian / Bosnia & Herzegovina
    'jp': '\U0001F1EF\U0001F1F5',  # Japanese
    'ua': '\U0001F1FA\U0001F1E6',  # Ukrainian
    'he': '\U0001F1EE\U0001F1F7',  # Hebrew
 }
 def get_unicode_flag(lang_code):
    """Determine a unicode flag (emoji) that fits to the ``lang_code``"""
    emoji = lang2emoji.get(lang_code.lower())
    if emoji:
        return emoji
    if len(lang_code) == 2:
        return '\U0001F310'
    language = territory = script = variant = ''
    try:
        language, territory, script, variant = parse_locale(lang_code, '-')
    except ValueError as exc:
        print(exc)
    # https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
    if not territory:
        # https://www.unicode.org/emoji/charts/emoji-list.html#country-flag
        emoji = lang2emoji.get(language)
        if not emoji:
            print(
                "%s --> language: %s / territory: %s / script: %s / variant: %s"
                % (lang_code, language, territory, script, variant)
            )
        return emoji
    emoji = lang2emoji.get(territory.lower())
    if emoji:
        return emoji
    try:
        c1 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + territory[0])
        c2 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + territory[1])
        # print("%s --> territory: %s --> %s%s" %(lang_code, territory, c1, c2 ))
    except KeyError as exc:
        print("%s --> territory: %s --> %s" % (lang_code, territory, exc))
        return None
    return c1 + c2
 def get_territory_name(lang_code):
    country_name = None
    locale = get_locale(lang_code)
    try:
        if locale is not None:
            country_name = locale.get_territory_name()
    except FileNotFoundError as exc:
        print("ERROR: %s --> %s" % (locale, exc))
    return country_name
 # Join all language lists.
 def join_language_lists(engines_languages):
    language_list = {}
    for engine_name in engines_languages:
        for lang_code in engines_languages[engine_name]:
            # apply custom fixes if necessary
            if lang_code in getattr(engines[engine_name], 'language_aliases', {}).values():
                lang_code = next(
                    lc for lc, alias in engines[engine_name].language_aliases.items() if lang_code == alias
                )
            locale = get_locale(lang_code)
            # ensure that lang_code uses standard language and country codes
            if locale and locale.territory:
                lang_code = "{lang}-{country}".format(lang=locale.language, country=locale.territory)
            short_code = lang_code.split('-')[0]
            # add language without country if not in list
            if short_code not in language_list:
                if locale:
                    # get language's data from babel's Locale object
                    language_name = locale.get_language_name().title()
                    english_name = locale.english_name.split(' (')[0]
                elif short_code in engines_languages['wikipedia']:
                    # get language's data from wikipedia if not known by babel
                    language_name = engines_languages['wikipedia'][short_code]['name']
                    english_name = engines_languages['wikipedia'][short_code]['english_name']
                else:
                    language_name = None
                    english_name = None
                # add language to list
                language_list[short_code] = {
                    'name': language_name,
                    'english_name': english_name,
                    'counter': set(),
                    'countries': {},
                }
            # add language with country if not in list
            if lang_code != short_code and lang_code not in language_list[short_code]['countries']:
                country_name = ''
                if locale:
                    # get country name from babel's Locale object
                    try:
                        country_name = locale.get_territory_name()
                    except FileNotFoundError as exc:
                        print("ERROR: %s --> %s" % (locale, exc))
                        locale = None
                language_list[short_code]['countries'][lang_code] = {
                    'country_name': country_name,
                    'counter': set(),
                }
            # count engine for both language_country combination and language alone
            language_list[short_code]['counter'].add(engine_name)
            if lang_code != short_code:
                language_list[short_code]['countries'][lang_code]['counter'].add(engine_name)
    return language_list
 # Filter language list so it only includes the most supported languages and countries
 def filter_language_list(all_languages):
    min_engines_per_lang = 12
    min_engines_per_country = 7
    # pylint: disable=consider-using-dict-items, consider-iterating-dictionary
    main_engines = [
        engine_name
        for engine_name in engines.keys()
        if 'general' in engines[engine_name].categories
        and engines[engine_name].supported_languages
        and not engines[engine_name].disabled
    ]
    # filter list to include only languages supported by most engines or all default general engines
    filtered_languages = {
        code: lang
        for code, lang in all_languages.items()
        if (
            len(lang['counter']) >= min_engines_per_lang
            or all(main_engine in lang['counter'] for main_engine in main_engines)
        )
    }
    def _copy_lang_data(lang, country_name=None):
        new_dict = {}
        new_dict['name'] = all_languages[lang]['name']
        new_dict['english_name'] = all_languages[lang]['english_name']
        if country_name:
            new_dict['country_name'] = country_name
        return new_dict
    # for each language get country codes supported by most engines or at least one country code
    filtered_languages_with_countries = {}
    for lang, lang_data in filtered_languages.items():
        countries = lang_data['countries']
        filtered_countries = {}
        # get language's country codes with enough supported engines
        for lang_country, country_data in countries.items():
            if len(country_data['counter']) >= min_engines_per_country:
                filtered_countries[lang_country] = _copy_lang_data(lang, country_data['country_name'])
        # add language without countries too if there's more than one country to choose from
        if len(filtered_countries) > 1:
            filtered_countries[lang] = _copy_lang_data(lang, None)
        elif len(filtered_countries) == 1:
            lang_country = next(iter(filtered_countries))
        # if no country has enough engines try to get most likely country code from babel
        if not filtered_countries:
            lang_country = None
            subtags = get_global('likely_subtags').get(lang)
            if subtags:
                country_code = subtags.split('_')[-1]
                if len(country_code) == 2:
                    lang_country = "{lang}-{country}".format(lang=lang, country=country_code)
            if lang_country:
                filtered_countries[lang_country] = _copy_lang_data(lang, None)
            else:
                filtered_countries[lang] = _copy_lang_data(lang, None)
        filtered_languages_with_countries.update(filtered_countries)
    return filtered_languages_with_countries
 class UnicodeEscape(str):
    """Escape unicode string in :py:obj:`pprint.pformat`"""
    def __repr__(self):
        return "'" + "".join([chr(c) for c in self.encode('unicode-escape')]) + "'"
 # Write languages.py.
 def write_languages_file(languages):
    file_headers = (
        "# -*- coding: utf-8 -*-",
        "# list of language codes",
        "# this file is generated automatically by utils/fetch_languages.py",
        "language_codes = (\n",
    )
    language_codes = []
    for code in sorted(languages):
        name = languages[code]['name']
        if name is None:
            print("ERROR: languages['%s'] --> %s" % (code, languages[code]))
            continue
        flag = get_unicode_flag(code) or ''
        item = (
            code,
            languages[code]['name'].split(' (')[0],
            get_territory_name(code) or '',
            languages[code].get('english_name') or '',
            UnicodeEscape(flag),
        )
        language_codes.append(item)
    language_codes = tuple(language_codes)
    with open(languages_file, 'w', encoding='utf-8') as new_file:
        file_content = "{file_headers} {language_codes},\n)\n".format(
            # fmt: off
            file_headers = '\n'.join(file_headers),
            language_codes = pformat(language_codes, indent=4)[1:-1]
            # fmt: on
        )
        new_file.write(file_content)
        new_file.close()
 if __name__ == "__main__":
    load_engines(settings['engines'])
    _engines_languages = fetch_supported_languages()
    _all_languages = join_language_lists(_engines_languages)
    _filtered_languages = filter_language_list(_all_languages)
    write_languages_file(_filtered_languages)
--- a/searxng_extra/update/update_osm_keys_tags.py
+++ b/searxng_extra/update/update_osm_keys_tags.py
@ -50,7 +50,7 @@ from pathlib import Path
 from searx import searx_dir
 from searx.network import set_timeout_for_thread
 from searx.engines import wikidata, set_loggers
-from searx.languages import language_codes
+from searx.sxng_locales import sxng_locales
 from searx.engines.openstreetmap import get_key_rank, VALUE_TO_LINK
 set_loggers(wikidata, 'wikidata')
@ -76,7 +76,7 @@ GROUP BY ?key ?item ?itemLabel
 ORDER BY ?key ?item ?itemLabel
 """
-LANGUAGES = [l[0].lower() for l in language_codes]
+LANGUAGES = [l[0].lower() for l in sxng_locales]
 PRESET_KEYS = {
    ('wikidata',): {'en': 'Wikidata'},
--- a/tests/unit/test_locales.py
+++ b/tests/unit/test_locales.py
@ -0,0 +1,111 @@
 # -*- coding: utf-8 -*-
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """Test some code from module :py:obj:`searx.locales`"""
 from searx import locales
 from searx.sxng_locales import sxng_locales
 from tests import SearxTestCase
 class TestLocales(SearxTestCase):
    """Implemented tests:
    - :py:obj:`searx.locales.match_locale`
    """
    def test_match_locale(self):
        locale_tag_list = [x[0] for x in sxng_locales]
        # Test SearXNG search languages
        self.assertEqual(locales.match_locale('de', locale_tag_list), 'de')
        self.assertEqual(locales.match_locale('fr', locale_tag_list), 'fr')
        self.assertEqual(locales.match_locale('zh', locale_tag_list), 'zh')
        # Test SearXNG search regions
        self.assertEqual(locales.match_locale('ca-es', locale_tag_list), 'ca-ES')
        self.assertEqual(locales.match_locale('de-at', locale_tag_list), 'de-AT')
        self.assertEqual(locales.match_locale('de-de', locale_tag_list), 'de-DE')
        self.assertEqual(locales.match_locale('en-UK', locale_tag_list), 'en-GB')
        self.assertEqual(locales.match_locale('fr-be', locale_tag_list), 'fr-BE')
        self.assertEqual(locales.match_locale('fr-be', locale_tag_list), 'fr-BE')
        self.assertEqual(locales.match_locale('fr-ca', locale_tag_list), 'fr-CA')
        self.assertEqual(locales.match_locale('fr-ch', locale_tag_list), 'fr-CH')
        self.assertEqual(locales.match_locale('zh-cn', locale_tag_list), 'zh-CN')
        self.assertEqual(locales.match_locale('zh-tw', locale_tag_list), 'zh-TW')
        self.assertEqual(locales.match_locale('zh-hk', locale_tag_list), 'zh-HK')
        # Test language script code
        self.assertEqual(locales.match_locale('zh-hans', locale_tag_list), 'zh-CN')
        self.assertEqual(locales.match_locale('zh-hans-cn', locale_tag_list), 'zh-CN')
        self.assertEqual(locales.match_locale('zh-hant', locale_tag_list), 'zh-TW')
        self.assertEqual(locales.match_locale('zh-hant-tw', locale_tag_list), 'zh-TW')
        # Test individual locale lists
        self.assertEqual(locales.match_locale('es', [], fallback='fallback'), 'fallback')
        self.assertEqual(locales.match_locale('de', ['de-CH', 'de-DE']), 'de-DE')
        self.assertEqual(locales.match_locale('de', ['de-CH', 'de-DE']), 'de-DE')
        self.assertEqual(locales.match_locale('es', ['ES']), 'ES')
        self.assertEqual(locales.match_locale('es', ['es-AR', 'es-ES', 'es-MX']), 'es-ES')
        self.assertEqual(locales.match_locale('es-AR', ['es-AR', 'es-ES', 'es-MX']), 'es-AR')
        self.assertEqual(locales.match_locale('es-CO', ['es-AR', 'es-ES']), 'es-ES')
        self.assertEqual(locales.match_locale('es-CO', ['es-AR']), 'es-AR')
        # Tests from the commit message of 9ae409a05a
        # Assumption:
        #   A. When a user selects a language the results should be optimized according to
        #      the selected language.
        #
        #   B. When user selects a language and a territory the results should be
        #      optimized with first priority on territory and second on language.
        # Assume we have an engine that supports the follwoing locales:
        locale_tag_list = ['zh-CN', 'zh-HK', 'nl-BE', 'fr-CA']
        # Examples (Assumption A.)
        # ------------------------
        # A user selects region 'zh-TW' which should end in zh_HK.
        # hint: CN is 'Hans' and HK ('Hant') fits better to TW ('Hant')
        self.assertEqual(locales.match_locale('zh-TW', locale_tag_list), 'zh-HK')
        # A user selects only the language 'zh' which should end in CN
        self.assertEqual(locales.match_locale('zh', locale_tag_list), 'zh-CN')
        # A user selects only the language 'fr' which should end in fr_CA
        self.assertEqual(locales.match_locale('fr', locale_tag_list), 'fr-CA')
        # The difference in priority on the territory is best shown with a
        # engine that supports the following locales:
        locale_tag_list = ['fr-FR', 'fr-CA', 'en-GB', 'nl-BE']
        # A user selects only a language
        self.assertEqual(locales.match_locale('en', locale_tag_list), 'en-GB')
        # hint: the engine supports fr_FR and fr_CA since no territory is given,
        # fr_FR takes priority ..
        self.assertEqual(locales.match_locale('fr', locale_tag_list), 'fr-FR')
        # Examples (Assumption B.)
        # ------------------------
        #  A user selects region 'fr-BE' which should end in nl-BE
        self.assertEqual(locales.match_locale('fr-BE', locale_tag_list), 'nl-BE')
        # If the user selects a language and there are two locales like the
        # following:
        locale_tag_list = ['fr-BE', 'fr-CH']
        # The get_engine_locale selects the locale by looking at the "population
        # percent" and this percentage has an higher amount in BE (68.%)
        # compared to CH (21%)
        self.assertEqual(locales.match_locale('fr', locale_tag_list), 'fr-BE')
--- a/tests/unit/test_utils.py
+++ b/tests/unit/test_utils.py
@ -87,39 +87,6 @@ class TestUtils(SearxTestCase):
        html = '<p><b>Lorem ipsum</i>dolor sit amet</p>'
        self.assertEqual(utils.html_to_text(html), "Lorem ipsum")
    def test_match_language(self):
        self.assertEqual(utils.match_language('es', ['es']), 'es')
        self.assertEqual(utils.match_language('es', [], fallback='fallback'), 'fallback')
        self.assertEqual(utils.match_language('ja', ['jp'], {'ja': 'jp'}), 'jp')
        # handle script tags
        self.assertEqual(utils.match_language('zh-CN', ['zh-Hans-CN', 'zh-Hant-TW']), 'zh-Hans-CN')
        self.assertEqual(utils.match_language('zh-TW', ['zh-Hans-CN', 'zh-Hant-TW']), 'zh-Hant-TW')
        self.assertEqual(utils.match_language('zh-Hans-CN', ['zh-CN', 'zh-TW']), 'zh-CN')
        self.assertEqual(utils.match_language('zh-Hant-TW', ['zh-CN', 'zh-TW']), 'zh-TW')
        self.assertEqual(utils.match_language('zh-Hans', ['zh-CN', 'zh-TW', 'zh-HK']), 'zh-CN')
        self.assertEqual(utils.match_language('zh-Hant', ['zh-CN', 'zh-TW', 'zh-HK']), 'zh-TW')
        aliases = {'en-GB': 'en-UK', 'he': 'iw'}
        # guess country
        self.assertEqual(utils.match_language('de-DE', ['de']), 'de')
        self.assertEqual(utils.match_language('de', ['de-DE']), 'de-DE')
        self.assertEqual(utils.match_language('es-CO', ['es-AR', 'es-ES', 'es-MX']), 'es-ES')
        self.assertEqual(utils.match_language('es-CO', ['es-MX']), 'es-MX')
        self.assertEqual(utils.match_language('en-UK', ['en-AU', 'en-GB', 'en-US']), 'en-GB')
        self.assertEqual(utils.match_language('en-GB', ['en-AU', 'en-UK', 'en-US'], aliases), 'en-UK')
        # language aliases
        self.assertEqual(utils.match_language('iw', ['he']), 'he')
        self.assertEqual(utils.match_language('he', ['iw'], aliases), 'iw')
        self.assertEqual(utils.match_language('iw-IL', ['he']), 'he')
        self.assertEqual(utils.match_language('he-IL', ['iw'], aliases), 'iw')
        self.assertEqual(utils.match_language('iw', ['he-IL']), 'he-IL')
        self.assertEqual(utils.match_language('he', ['iw-IL'], aliases), 'iw-IL')
        self.assertEqual(utils.match_language('iw-IL', ['he-IL']), 'he-IL')
        self.assertEqual(utils.match_language('he-IL', ['iw-IL'], aliases), 'iw-IL')
    def test_ecma_unscape(self):
        self.assertEqual(utils.ecma_unescape('text%20with%20space'), 'text with space')
        self.assertEqual(utils.ecma_unescape('text using %xx: %F3'), 'text using %xx: ó')
--- a/utils/templates/etc/searxng/settings.yml
+++ b/utils/templates/etc/searxng/settings.yml
@ -52,9 +52,6 @@ enabled_plugins:
 engines:
  - name: google
    use_mobile_ui: true
 #   - name: fdroid
 #     disabled: false
 #