diff --git a/.github/workflows/data-update.yml b/.github/workflows/data-update.yml index d20cd6c63..0ffb1498e 100644 --- a/.github/workflows/data-update.yml +++ b/.github/workflows/data-update.yml @@ -17,7 +17,7 @@ jobs: - update_currencies.py - update_external_bangs.py - update_firefox_version.py - - update_languages.py + - update_engine_traits.py - update_wikidata_units.py - update_engine_descriptions.py steps: diff --git a/docs/admin/engines/configured_engines.rst b/docs/admin/engines/configured_engines.rst index c7b6a1f52..fa1e5a4b0 100644 --- a/docs/admin/engines/configured_engines.rst +++ b/docs/admin/engines/configured_engines.rst @@ -42,7 +42,7 @@ Explanation of the :ref:`general engine configuration` shown in the table - Timeout - Weight - Paging - - Language + - Language, Region - Safe search - Time range diff --git a/docs/admin/engines/settings.rst b/docs/admin/engines/settings.rst index 099e449e0..0d9e14e57 100644 --- a/docs/admin/engines/settings.rst +++ b/docs/admin/engines/settings.rst @@ -569,10 +569,13 @@ engine is shown. Most of the options have a default value or even are optional. To disable by default the engine, but not deleting it. It will allow the user to manually activate it in the settings. +``inactive``: optional + Remove the engine from the settings (*disabled & removed*). + ``language`` : optional If you want to use another language for a specific engine, you can define it - by using the full ISO code of language and country, like ``fr_FR``, ``en_US``, - ``de_DE``. + by using the ISO code of language (and region), like ``fr``, ``en-US``, + ``de-DE``. ``tokens`` : optional A list of secret tokens to make this engine *private*, more details see diff --git a/docs/conf.py b/docs/conf.py index 8e0c3ab1b..1d71b7f8a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -127,6 +127,10 @@ extensions = [ 'notfound.extension', # https://github.com/readthedocs/sphinx-notfound-page ] +autodoc_default_options = { + 'member-order': 'groupwise', +} + myst_enable_extensions = [ "replacements", "smartquotes" ] @@ -135,6 +139,7 @@ suppress_warnings = ['myst.domains'] intersphinx_mapping = { "python": ("https://docs.python.org/3/", None), + "babel" : ("https://babel.readthedocs.io/en/latest/", None), "flask": ("https://flask.palletsprojects.com/", None), "flask_babel": ("https://python-babel.github.io/flask-babel/", None), # "werkzeug": ("https://werkzeug.palletsprojects.com/", None), diff --git a/docs/dev/engine_overview.rst b/docs/dev/engine_overview.rst index 95ed267e2..930fd0813 100644 --- a/docs/dev/engine_overview.rst +++ b/docs/dev/engine_overview.rst @@ -54,6 +54,7 @@ Engine File - ``offline`` :ref:`[ref] ` - ``online_dictionary`` - ``online_currency`` + - ``online_url_search`` ======================= =========== ======================================================== .. _engine settings: @@ -131,8 +132,10 @@ Passed Arguments (request) These arguments can be used to construct the search query. Furthermore, parameters with default value can be redefined for special purposes. +.. _engine request online: -.. table:: If the ``engine_type`` is ``online`` +.. table:: If the ``engine_type`` is :py:obj:`online + ` :width: 100% ====================== ============== ======================================================================== @@ -149,12 +152,16 @@ parameters with default value can be redefined for special purposes. safesearch int ``0``, between ``0`` and ``2`` (normal, moderate, strict) time_range Optional[str] ``None``, can be ``day``, ``week``, ``month``, ``year`` pageno int current pagenumber - language str specific language code like ``'en_US'``, or ``'all'`` if unspecified + searxng_locale str SearXNG's locale selected by user. Specific language code like + ``'en'``, ``'en-US'``, or ``'all'`` if unspecified. ====================== ============== ======================================================================== -.. table:: If the ``engine_type`` is ``online_dictionary``, in addition to the - ``online`` arguments: +.. _engine request online_dictionary: + +.. table:: If the ``engine_type`` is :py:obj:`online_dictionary + `, + in addition to the :ref:`online ` arguments: :width: 100% ====================== ============== ======================================================================== @@ -165,8 +172,11 @@ parameters with default value can be redefined for special purposes. query str the text query without the languages ====================== ============== ======================================================================== -.. table:: If the ``engine_type`` is ``online_currency```, in addition to the - ``online`` arguments: +.. _engine request online_currency: + +.. table:: If the ``engine_type`` is :py:obj:`online_currency + `, + in addition to the :ref:`online ` arguments: :width: 100% ====================== ============== ======================================================================== @@ -179,6 +189,26 @@ parameters with default value can be redefined for special purposes. to_name str currency name ====================== ============== ======================================================================== +.. _engine request online_url_search: + +.. table:: If the ``engine_type`` is :py:obj:`online_url_search + `, + in addition to the :ref:`online ` arguments: + :width: 100% + + ====================== ============== ======================================================================== + argument type default-value, information + ====================== ============== ======================================================================== + search_url dict URLs from the search query: + + .. code:: python + + { + 'http': str, + 'ftp': str, + 'data:image': str + } + ====================== ============== ======================================================================== Specify Request --------------- diff --git a/docs/dev/searxng_extra/update.rst b/docs/dev/searxng_extra/update.rst index d05c81409..a125303e0 100644 --- a/docs/dev/searxng_extra/update.rst +++ b/docs/dev/searxng_extra/update.rst @@ -52,12 +52,12 @@ Scripts to update static data in :origin:`searx/data/` :members: -``update_languages.py`` -======================= +``update_engine_traits.py`` +=========================== -:origin:`[source] ` +:origin:`[source] ` -.. automodule:: searxng_extra.update.update_languages +.. automodule:: searxng_extra.update.update_engine_traits :members: diff --git a/docs/src/searx.engine.archlinux.rst b/docs/src/searx.engine.archlinux.rst new file mode 100644 index 000000000..be48b1859 --- /dev/null +++ b/docs/src/searx.engine.archlinux.rst @@ -0,0 +1,9 @@ +.. _archlinux engine: + +========== +Arch Linux +========== + +.. automodule:: searx.engines.archlinux + :members: + diff --git a/docs/src/searx.engine.dailymotion.rst b/docs/src/searx.engine.dailymotion.rst new file mode 100644 index 000000000..84348e2d0 --- /dev/null +++ b/docs/src/searx.engine.dailymotion.rst @@ -0,0 +1,8 @@ +.. _dailymotion engine: + +=========== +Dailymotion +=========== + +.. automodule:: searx.engines.dailymotion + :members: diff --git a/docs/src/searx.engine.duckduckgo.rst b/docs/src/searx.engine.duckduckgo.rst new file mode 100644 index 000000000..1646d4984 --- /dev/null +++ b/docs/src/searx.engine.duckduckgo.rst @@ -0,0 +1,22 @@ +.. _duckduckgo engines: + +================= +DukcDukGo engines +================= + +.. contents:: Contents + :depth: 2 + :local: + :backlinks: entry + +.. automodule:: searx.engines.duckduckgo + :members: + +.. automodule:: searx.engines.duckduckgo_images + :members: + +.. automodule:: searx.engines.duckduckgo_definitions + :members: + +.. automodule:: searx.engines.duckduckgo_weather + :members: diff --git a/docs/src/searx.enginelib.rst b/docs/src/searx.enginelib.rst new file mode 100644 index 000000000..651a04e68 --- /dev/null +++ b/docs/src/searx.enginelib.rst @@ -0,0 +1,17 @@ +.. _searx.enginelib: + +============ +Engine model +============ + +.. automodule:: searx.enginelib + :members: + +.. _searx.enginelib.traits: + +============= +Engine traits +============= + +.. automodule:: searx.enginelib.traits + :members: diff --git a/docs/src/searx.engines.bing.rst b/docs/src/searx.engines.bing.rst new file mode 100644 index 000000000..6b7bba8f2 --- /dev/null +++ b/docs/src/searx.engines.bing.rst @@ -0,0 +1,43 @@ +.. _bing engines: + +============ +Bing Engines +============ + +.. contents:: Contents + :depth: 2 + :local: + :backlinks: entry + + +.. _bing web engine: + +Bing WEB +======== + +.. automodule:: searx.engines.bing + :members: + +.. _bing images engine: + +Bing Images +=========== + +.. automodule:: searx.engines.bing_images + :members: + +.. _bing videos engine: + +Bing Videos +=========== + +.. automodule:: searx.engines.bing_videos + :members: + +.. _bing news engine: + +Bing News +========= + +.. automodule:: searx.engines.bing_news + :members: diff --git a/docs/src/searx.engines.google.rst b/docs/src/searx.engines.google.rst index 2d10b5eea..9c15325f8 100644 --- a/docs/src/searx.engines.google.rst +++ b/docs/src/searx.engines.google.rst @@ -12,15 +12,21 @@ Google Engines .. _google API: -google API +Google API ========== .. _Query Parameter Definitions: https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions +SearXNG's implementation of the Google API is mainly done in +:py:obj:`get_google_info `. + For detailed description of the *REST-full* API see: `Query Parameter -Definitions`_. Not all parameters can be appied and some engines are *special* -(e.g. :ref:`google news engine`). +Definitions`_. The linked API documentation can sometimes be helpful during +reverse engineering. However, we cannot use it in the freely accessible WEB +services; not all parameters can be applied and some engines are more *special* +than other (e.g. :ref:`google news engine`). + .. _google web engine: @@ -30,6 +36,13 @@ Google WEB .. automodule:: searx.engines.google :members: +.. _google autocomplete: + +Google Autocomplete +==================== + +.. autofunction:: searx.autocomplete.google_complete + .. _google images engine: Google Images @@ -53,3 +66,11 @@ Google News .. automodule:: searx.engines.google_news :members: + +.. _google scholar engine: + +Google Scholar +============== + +.. automodule:: searx.engines.google_scholar + :members: diff --git a/docs/src/searx.engines.peertube.rst b/docs/src/searx.engines.peertube.rst new file mode 100644 index 000000000..8e1576ea0 --- /dev/null +++ b/docs/src/searx.engines.peertube.rst @@ -0,0 +1,27 @@ +.. _peertube engines: + +================ +Peertube Engines +================ + +.. contents:: Contents + :depth: 2 + :local: + :backlinks: entry + + +.. _peertube video engine: + +Peertube Video +============== + +.. automodule:: searx.engines.peertube + :members: + +.. _sepiasearch engine: + +SepiaSearch +=========== + +.. automodule:: searx.engines.sepiasearch + :members: diff --git a/docs/src/searx.engines.rst b/docs/src/searx.engines.rst index 687fdb0b2..4ce96b27e 100644 --- a/docs/src/searx.engines.rst +++ b/docs/src/searx.engines.rst @@ -1,8 +1,8 @@ -.. _load_engines: +.. _searx.engines: -============ -Load Engines -============ +================= +SearXNG's engines +================= .. automodule:: searx.engines :members: diff --git a/docs/src/searx.engines.startpage.rst b/docs/src/searx.engines.startpage.rst new file mode 100644 index 000000000..c885d8f1b --- /dev/null +++ b/docs/src/searx.engines.startpage.rst @@ -0,0 +1,13 @@ +.. _startpage engines: + +================= +Startpage engines +================= + +.. contents:: Contents + :depth: 2 + :local: + :backlinks: entry + +.. automodule:: searx.engines.startpage + :members: diff --git a/docs/src/searx.engines.wikipedia.rst b/docs/src/searx.engines.wikipedia.rst new file mode 100644 index 000000000..e644cd645 --- /dev/null +++ b/docs/src/searx.engines.wikipedia.rst @@ -0,0 +1,27 @@ +.. _wikimedia engines: + +========= +Wikimedia +========= + +.. contents:: Contents + :depth: 2 + :local: + :backlinks: entry + + +.. _wikipedia engine: + +Wikipedia +========= + +.. automodule:: searx.engines.wikipedia + :members: + +.. _wikidata engine: + +Wikidata +========= + +.. automodule:: searx.engines.wikidata + :members: diff --git a/docs/src/searx.locales.rst b/docs/src/searx.locales.rst index 579247aff..2f13bfca1 100644 --- a/docs/src/searx.locales.rst +++ b/docs/src/searx.locales.rst @@ -4,5 +4,17 @@ Locales ======= +.. contents:: Contents + :depth: 2 + :local: + :backlinks: entry + .. automodule:: searx.locales :members: + + +SearXNG's locale codes +====================== + +.. automodule:: searx.sxng_locales + :members: diff --git a/docs/src/searx.search.processors.rst b/docs/src/searx.search.processors.rst new file mode 100644 index 000000000..390680657 --- /dev/null +++ b/docs/src/searx.search.processors.rst @@ -0,0 +1,47 @@ +.. _searx.search.processors: + +================= +Search processors +================= + +.. contents:: Contents + :depth: 2 + :local: + :backlinks: entry + + +Abstract processor class +======================== + +.. automodule:: searx.search.processors.abstract + :members: + +Offline processor +================= + +.. automodule:: searx.search.processors.offline + :members: + +Online processor +================ + +.. automodule:: searx.search.processors.online + :members: + +Online currency processor +========================= + +.. automodule:: searx.search.processors.online_currency + :members: + +Online Dictionary processor +=========================== + +.. automodule:: searx.search.processors.online_dictionary + :members: + +Online URL search processor +=========================== + +.. automodule:: searx.search.processors.online_url_search + :members: diff --git a/manage b/manage index c51d76ddb..8d61e1328 100755 --- a/manage +++ b/manage @@ -63,7 +63,7 @@ PYLINT_SEARXNG_DISABLE_OPTION="\ I,C,R,\ W0105,W0212,W0511,W0603,W0613,W0621,W0702,W0703,W1401,\ E1136" -PYLINT_ADDITIONAL_BUILTINS_FOR_ENGINES="supported_languages,language_aliases,logger,categories" +PYLINT_ADDITIONAL_BUILTINS_FOR_ENGINES="traits,supported_languages,language_aliases,logger,categories" PYLINT_OPTIONS="-m pylint -j 0 --rcfile .pylintrc" help() { diff --git a/requirements.txt b/requirements.txt index adccfa3f5..3ee417d7c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ certifi==2022.12.7 -babel==2.11.0 +babel==2.12.1 flask-babel==3.0.1 flask==2.2.3 jinja2==3.1.2 diff --git a/searx/autocomplete.py b/searx/autocomplete.py index aeb697a14..ad9903f36 100644 --- a/searx/autocomplete.py +++ b/searx/autocomplete.py @@ -5,20 +5,20 @@ """ # pylint: disable=use-dict-literal -from json import loads +import json from urllib.parse import urlencode -from lxml import etree +import lxml from httpx import HTTPError from searx import settings -from searx.data import ENGINES_LANGUAGES +from searx.engines import ( + engines, + google, +) from searx.network import get as http_get from searx.exceptions import SearxEngineResponseException -# a fetch_supported_languages() for XPath engines isn't available right now -# _brave = ENGINES_LANGUAGES['brave'].keys() - def get(*args, **kwargs): if 'timeout' not in kwargs: @@ -55,34 +55,58 @@ def dbpedia(query, _lang): results = [] if response.ok: - dom = etree.fromstring(response.content) + dom = lxml.etree.fromstring(response.content) results = dom.xpath('//Result/Label//text()') return results -def duckduckgo(query, _lang): - # duckduckgo autocompleter - url = 'https://ac.duckduckgo.com/ac/?{0}&type=list' +def duckduckgo(query, sxng_locale): + """Autocomplete from DuckDuckGo. Supports DuckDuckGo's languages""" - resp = loads(get(url.format(urlencode(dict(q=query)))).text) - if len(resp) > 1: - return resp[1] - return [] + traits = engines['duckduckgo'].traits + args = { + 'q': query, + 'kl': traits.get_region(sxng_locale, traits.all_locale), + } + + url = 'https://duckduckgo.com/ac/?type=list&' + urlencode(args) + resp = get(url) + + ret_val = [] + if resp.ok: + j = resp.json() + if len(j) > 1: + ret_val = j[1] + return ret_val -def google(query, lang): - # google autocompleter - autocomplete_url = 'https://suggestqueries.google.com/complete/search?client=toolbar&' +def google_complete(query, sxng_locale): + """Autocomplete from Google. Supports Google's languages and subdomains + (:py:obj:`searx.engines.google.get_google_info`) by using the async REST + API:: - response = get(autocomplete_url + urlencode(dict(hl=lang, q=query))) + https://{subdomain}/complete/search?{args} + """ + + google_info = google.get_google_info({'searxng_locale': sxng_locale}, engines['google'].traits) + + url = 'https://{subdomain}/complete/search?{args}' + args = urlencode( + { + 'q': query, + 'client': 'gws-wiz', + 'hl': google_info['params']['hl'], + } + ) results = [] - - if response.ok: - dom = etree.fromstring(response.text) - results = dom.xpath('//suggestion/@data') - + resp = get(url.format(subdomain=google_info['subdomain'], args=args)) + if resp.ok: + json_txt = resp.text[resp.text.find('[') : resp.text.find(']', -3) + 1] + data = json.loads(json_txt) + for item in data[0]: + results.append(lxml.html.fromstring(item[0]).text_content()) return results @@ -109,9 +133,9 @@ def seznam(query, _lang): ] -def startpage(query, lang): - # startpage autocompleter - lui = ENGINES_LANGUAGES['startpage'].get(lang, 'english') +def startpage(query, sxng_locale): + """Autocomplete from Startpage. Supports Startpage's languages""" + lui = engines['startpage'].traits.get_language(sxng_locale, 'english') url = 'https://startpage.com/suggestions?{query}' resp = get(url.format(query=urlencode({'q': query, 'segment': 'startpage.udog', 'lui': lui}))) data = resp.json() @@ -122,20 +146,20 @@ def swisscows(query, _lang): # swisscows autocompleter url = 'https://swisscows.ch/api/suggest?{query}&itemsCount=5' - resp = loads(get(url.format(query=urlencode({'query': query}))).text) + resp = json.loads(get(url.format(query=urlencode({'query': query}))).text) return resp -def qwant(query, lang): - # qwant autocompleter (additional parameter : lang=en_en&count=xxx ) - url = 'https://api.qwant.com/api/suggest?{query}' - - resp = get(url.format(query=urlencode({'q': query, 'lang': lang}))) - +def qwant(query, sxng_locale): + """Autocomplete from Qwant. Supports Qwant's regions.""" results = [] + locale = engines['qwant'].traits.get_region(sxng_locale, 'en_US') + url = 'https://api.qwant.com/v3/suggest?{query}' + resp = get(url.format(query=urlencode({'q': query, 'locale': locale, 'version': '2'}))) + if resp.ok: - data = loads(resp.text) + data = resp.json() if data['status'] == 'success': for item in data['data']['items']: results.append(item['value']) @@ -143,21 +167,38 @@ def qwant(query, lang): return results -def wikipedia(query, lang): - # wikipedia autocompleter - url = 'https://' + lang + '.wikipedia.org/w/api.php?action=opensearch&{0}&limit=10&namespace=0&format=json' +def wikipedia(query, sxng_locale): + """Autocomplete from Wikipedia. Supports Wikipedia's languages (aka netloc).""" + results = [] + eng_traits = engines['wikipedia'].traits + wiki_lang = eng_traits.get_language(sxng_locale, 'en') + wiki_netloc = eng_traits.custom['wiki_netloc'].get(wiki_lang, 'en.wikipedia.org') - resp = loads(get(url.format(urlencode(dict(search=query)))).text) - if len(resp) > 1: - return resp[1] - return [] + url = 'https://{wiki_netloc}/w/api.php?{args}' + args = urlencode( + { + 'action': 'opensearch', + 'format': 'json', + 'formatversion': '2', + 'search': query, + 'namespace': '0', + 'limit': '10', + } + ) + resp = get(url.format(args=args, wiki_netloc=wiki_netloc)) + if resp.ok: + data = resp.json() + if len(data) > 1: + results = data[1] + + return results def yandex(query, _lang): # yandex autocompleter url = "https://suggest.yandex.com/suggest-ff.cgi?{0}" - resp = loads(get(url.format(urlencode(dict(part=query)))).text) + resp = json.loads(get(url.format(urlencode(dict(part=query)))).text) if len(resp) > 1: return resp[1] return [] @@ -166,7 +207,7 @@ def yandex(query, _lang): backends = { 'dbpedia': dbpedia, 'duckduckgo': duckduckgo, - 'google': google, + 'google': google_complete, 'seznam': seznam, 'startpage': startpage, 'swisscows': swisscows, @@ -177,12 +218,11 @@ backends = { } -def search_autocomplete(backend_name, query, lang): +def search_autocomplete(backend_name, query, sxng_locale): backend = backends.get(backend_name) if backend is None: return [] - try: - return backend(query, lang) + return backend(query, sxng_locale) except (HTTPError, SearxEngineResponseException): return [] diff --git a/searx/data/__init__.py b/searx/data/__init__.py index 424440a71..0822f4ac8 100644 --- a/searx/data/__init__.py +++ b/searx/data/__init__.py @@ -7,7 +7,7 @@ """ __all__ = [ - 'ENGINES_LANGUAGES', + 'ENGINE_TRAITS', 'CURRENCIES', 'USER_AGENTS', 'EXTERNAL_URLS', @@ -42,7 +42,6 @@ def ahmia_blacklist_loader(): return f.read().split() -ENGINES_LANGUAGES = _load('engines_languages.json') CURRENCIES = _load('currencies.json') USER_AGENTS = _load('useragents.json') EXTERNAL_URLS = _load('external_urls.json') @@ -50,3 +49,4 @@ WIKIDATA_UNITS = _load('wikidata_units.json') EXTERNAL_BANGS = _load('external_bangs.json') OSM_KEYS_TAGS = _load('osm_keys_tags.json') ENGINE_DESCRIPTIONS = _load('engine_descriptions.json') +ENGINE_TRAITS = _load('engine_traits.json') diff --git a/searx/data/engine_traits.json b/searx/data/engine_traits.json new file mode 100644 index 000000000..ea3724c10 --- /dev/null +++ b/searx/data/engine_traits.json @@ -0,0 +1,3810 @@ +{ + "arch linux wiki": { + "all_locale": null, + "custom": { + "title": { + "de": "Spezial:Suche", + "fa": "\u0648\u06cc\u0698\u0647:\u062c\u0633\u062a\u062c\u0648", + "ja": "\u7279\u5225:\u691c\u7d22", + "zh": "Special:\u641c\u7d22" + }, + "wiki_netloc": { + "de": "wiki.archlinux.de", + "fa": "wiki.archusers.ir", + "ja": "wiki.archlinux.jp", + "zh": "wiki.archlinuxcn.org" + } + }, + "data_type": "traits_v1", + "languages": { + "ar": "\u0627\u0644\u0639\u0631\u0628\u064a\u0629", + "bg": "\u0411\u044a\u043b\u0433\u0430\u0440\u0441\u043a\u0438", + "bs": "Bosanski", + "cs": "\u010ce\u0161tina", + "da": "Dansk", + "de": "Deutsch", + "el": "\u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ac", + "en": "English", + "es": "Espa\u00f1ol", + "fa": "\u0641\u0627\u0631\u0633\u06cc", + "fi": "Suomi", + "fr": "Fran\u00e7ais", + "he": "\u05e2\u05d1\u05e8\u05d9\u05ea", + "hr": "Hrvatski", + "hu": "Magyar", + "id": "Bahasa Indonesia", + "it": "Italiano", + "ja": "\u65e5\u672c\u8a9e", + "ko": "\ud55c\uad6d\uc5b4", + "lt": "Lietuvi\u0173", + "nl": "Nederlands", + "pl": "Polski", + "pt": "Portugu\u00eas", + "ru": "\u0420\u0443\u0441\u0441\u043a\u0438\u0439", + "sk": "Sloven\u010dina", + "sr": "\u0421\u0440\u043f\u0441\u043a\u0438 / srpski", + "sv": "Svenska", + "th": "\u0e44\u0e17\u0e22", + "tr": "T\u00fcrk\u00e7e", + "uk": "\u0423\u043a\u0440\u0430\u0457\u043d\u0441\u044c\u043a\u0430", + "zh": "\u4e2d\u6587\uff08\u7e41\u9ad4\uff09" + }, + "regions": {} + }, + "bing": { + "all_locale": null, + "custom": {}, + "data_type": "traits_v1", + "languages": { + "ar": "ar", + "bg": "bg", + "bn": "bn", + "ca": "ca", + "cs": "cs", + "da": "da", + "de": "de", + "en": "en", + "es": "es", + "et": "et", + "eu": "eu", + "fi": "fi", + "fr": "fr", + "gl": "gl", + "gu": "gu", + "he": "he", + "hi": "hi", + "hr": "hr", + "hu": "hu", + "is": "is", + "it": "it", + "ja": "jp", + "kn": "kn", + "ko": "ko", + "lt": "lt", + "lv": "lv", + "ml": "ml", + "mr": "mr", + "ms": "ms", + "nb": "nb", + "nl": "nl", + "pa": "pa", + "pl": "pl", + "pt": "pt-pt", + "ro": "ro", + "ru": "ru", + "sk": "sk", + "sl": "sl", + "sr": "sr", + "sv": "sv", + "ta": "ta", + "te": "te", + "th": "th", + "tr": "tr", + "uk": "uk", + "vi": "vi", + "zh": "zh-hans", + "zh_Hans": "zh-hans", + "zh_Hant": "zh-hant" + }, + "regions": { + "da-DK": "da-DK", + "de-AT": "de-AT", + "de-CH": "de-CH", + "de-DE": "de-DE", + "en-AU": "en-AU", + "en-CA": "en-CA", + "en-GB": "en-GB", + "en-IN": "en-IN", + "en-MY": "en-MY", + "en-NZ": "en-NZ", + "en-PH": "en-PH", + "en-US": "en-US", + "en-ZA": "en-ZA", + "es-AR": "es-AR", + "es-CL": "es-CL", + "es-ES": "es-ES", + "es-MX": "es-MX", + "es-US": "es-US", + "fi-FI": "fi-FI", + "fr-BE": "fr-BE", + "fr-CA": "fr-CA", + "fr-CH": "fr-CH", + "fr-FR": "fr-FR", + "id-ID": "en-ID", + "it-IT": "it-IT", + "ja-JP": "ja-JP", + "ko-KR": "ko-KR", + "nb-NO": "no-NO", + "nl-BE": "nl-BE", + "nl-NL": "nl-NL", + "pl-PL": "pl-PL", + "pt-BR": "pt-BR", + "ru-RU": "ru-RU", + "sv-SE": "sv-SE", + "tr-TR": "tr-TR", + "zh-CN": "zh-CN", + "zh-HK": "zh-HK", + "zh-TW": "zh-TW" + } + }, + "bing images": { + "all_locale": null, + "custom": {}, + "data_type": "traits_v1", + "languages": { + "ar": "ar", + "bg": "bg", + "bn": "bn", + "ca": "ca", + "cs": "cs", + "da": "da", + "de": "de", + "en": "en", + "es": "es", + "et": "et", + "eu": "eu", + "fi": "fi", + "fr": "fr", + "gl": "gl", + "gu": "gu", + "he": "he", + "hi": "hi", + "hr": "hr", + "hu": "hu", + "is": "is", + "it": "it", + "ja": "jp", + "kn": "kn", + "ko": "ko", + "lt": "lt", + "lv": "lv", + "ml": "ml", + "mr": "mr", + "ms": "ms", + "nb": "nb", + "nl": "nl", + "pa": "pa", + "pl": "pl", + "pt": "pt-pt", + "ro": "ro", + "ru": "ru", + "sk": "sk", + "sl": "sl", + "sr": "sr", + "sv": "sv", + "ta": "ta", + "te": "te", + "th": "th", + "tr": "tr", + "uk": "uk", + "vi": "vi", + "zh": "zh-hans", + "zh_Hans": "zh-hans", + "zh_Hant": "zh-hant" + }, + "regions": { + "da-DK": "da-DK", + "de-AT": "de-AT", + "de-CH": "de-CH", + "de-DE": "de-DE", + "en-AU": "en-AU", + "en-CA": "en-CA", + "en-GB": "en-GB", + "en-IN": "en-IN", + "en-MY": "en-MY", + "en-NZ": "en-NZ", + "en-PH": "en-PH", + "en-US": "en-US", + "en-ZA": "en-ZA", + "es-AR": "es-AR", + "es-CL": "es-CL", + "es-ES": "es-ES", + "es-MX": "es-MX", + "es-US": "es-US", + "fi-FI": "fi-FI", + "fr-BE": "fr-BE", + "fr-CA": "fr-CA", + "fr-CH": "fr-CH", + "fr-FR": "fr-FR", + "id-ID": "en-ID", + "it-IT": "it-IT", + "ja-JP": "ja-JP", + "ko-KR": "ko-KR", + "nb-NO": "no-NO", + "nl-BE": "nl-BE", + "nl-NL": "nl-NL", + "pl-PL": "pl-PL", + "pt-BR": "pt-BR", + "ru-RU": "ru-RU", + "sv-SE": "sv-SE", + "tr-TR": "tr-TR", + "zh-CN": "zh-CN", + "zh-HK": "zh-HK", + "zh-TW": "zh-TW" + } + }, + "bing news": { + "all_locale": "en-WW", + "custom": {}, + "data_type": "traits_v1", + "languages": { + "ar": "ar", + "bg": "bg", + "bn": "bn", + "ca": "ca", + "cs": "cs", + "da": "da", + "de": "de", + "en": "en", + "es": "es", + "et": "et", + "eu": "eu", + "fi": "fi", + "fr": "fr", + "gl": "gl", + "gu": "gu", + "he": "he", + "hi": "hi", + "hr": "hr", + "hu": "hu", + "is": "is", + "it": "it", + "ja": "jp", + "kn": "kn", + "ko": "ko", + "lt": "lt", + "lv": "lv", + "ml": "ml", + "mr": "mr", + "ms": "ms", + "nb": "nb", + "nl": "nl", + "pa": "pa", + "pl": "pl", + "pt": "pt-pt", + "ro": "ro", + "ru": "ru", + "sk": "sk", + "sl": "sl", + "sr": "sr", + "sv": "sv", + "ta": "ta", + "te": "te", + "th": "th", + "tr": "tr", + "uk": "uk", + "vi": "vi", + "zh": "zh-hans", + "zh_Hans": "zh-hans", + "zh_Hant": "zh-hant" + }, + "regions": { + "da-DK": "da-DK", + "de-DE": "de-DE", + "en-AU": "en-AU", + "en-GB": "en-GB", + "en-US": "en-US", + "es-CL": "es-CL", + "es-MX": "es-MX", + "es-US": "es-US", + "fi-FI": "fi-FI", + "fr-CA": "fr-CA", + "fr-FR": "fr-FR", + "it-IT": "it-IT", + "pt-BR": "pt-BR", + "zh-CN": "zh-CN" + } + }, + "bing videos": { + "all_locale": null, + "custom": {}, + "data_type": "traits_v1", + "languages": { + "ar": "ar", + "bg": "bg", + "bn": "bn", + "ca": "ca", + "cs": "cs", + "da": "da", + "de": "de", + "en": "en", + "es": "es", + "et": "et", + "eu": "eu", + "fi": "fi", + "fr": "fr", + "gl": "gl", + "gu": "gu", + "he": "he", + "hi": "hi", + "hr": "hr", + "hu": "hu", + "is": "is", + "it": "it", + "ja": "jp", + "kn": "kn", + "ko": "ko", + "lt": "lt", + "lv": "lv", + "ml": "ml", + "mr": "mr", + "ms": "ms", + "nb": "nb", + "nl": "nl", + "pa": "pa", + "pl": "pl", + "pt": "pt-pt", + "ro": "ro", + "ru": "ru", + "sk": "sk", + "sl": "sl", + "sr": "sr", + "sv": "sv", + "ta": "ta", + "te": "te", + "th": "th", + "tr": "tr", + "uk": "uk", + "vi": "vi", + "zh": "zh-hans", + "zh_Hans": "zh-hans", + "zh_Hant": "zh-hant" + }, + "regions": { + "da-DK": "da-DK", + "de-AT": "de-AT", + "de-CH": "de-CH", + "de-DE": "de-DE", + "en-AU": "en-AU", + "en-CA": "en-CA", + "en-GB": "en-GB", + "en-IN": "en-IN", + "en-MY": "en-MY", + "en-NZ": "en-NZ", + "en-PH": "en-PH", + "en-US": "en-US", + "en-ZA": "en-ZA", + "es-AR": "es-AR", + "es-CL": "es-CL", + "es-ES": "es-ES", + "es-MX": "es-MX", + "es-US": "es-US", + "fi-FI": "fi-FI", + "fr-BE": "fr-BE", + "fr-CA": "fr-CA", + "fr-CH": "fr-CH", + "fr-FR": "fr-FR", + "id-ID": "en-ID", + "it-IT": "it-IT", + "ja-JP": "ja-JP", + "ko-KR": "ko-KR", + "nb-NO": "no-NO", + "nl-BE": "nl-BE", + "nl-NL": "nl-NL", + "pl-PL": "pl-PL", + "pt-BR": "pt-BR", + "ru-RU": "ru-RU", + "sv-SE": "sv-SE", + "tr-TR": "tr-TR", + "zh-CN": "zh-CN", + "zh-HK": "zh-HK", + "zh-TW": "zh-TW" + } + }, + "dailymotion": { + "all_locale": null, + "custom": {}, + "data_type": "traits_v1", + "languages": { + "ar": "ar", + "de": "de", + "el": "el", + "en": "en", + "es": "es", + "fr": "fr", + "id": "id", + "it": "it", + "ja": "ja", + "ko": "ko", + "ms": "ms", + "nl": "nl", + "pl": "pl", + "pt": "pt", + "ro": "ro", + "ru": "ru", + "th": "th", + "tr": "tr", + "vi": "vi", + "zh": "zh" + }, + "regions": { + "ar-AE": "ar_AE", + "ar-EG": "ar_EG", + "ar-SA": "ar_SA", + "de-AT": "de_AT", + "de-CH": "de_CH", + "de-DE": "de_DE", + "el-GR": "el_GR", + "en-AU": "en_AU", + "en-CA": "en_CA", + "en-GB": "en_GB", + "en-HK": "en_HK", + "en-IE": "en_IE", + "en-IN": "en_IN", + "en-NG": "en_NG", + "en-PH": "en_PH", + "en-PK": "en_PK", + "en-SG": "en_SG", + "en-US": "en_US", + "en-ZA": "en_ZA", + "es-AR": "es_AR", + "es-ES": "es_ES", + "es-MX": "es_MX", + "fr-BE": "fr_BE", + "fr-CA": "fr_CA", + "fr-CH": "fr_CH", + "fr-CI": "fr_CI", + "fr-FR": "fr_FR", + "fr-MA": "fr_MA", + "fr-SN": "fr_SN", + "fr-TN": "fr_TN", + "id-ID": "id_ID", + "it-CH": "it_CH", + "it-IT": "it_IT", + "ja-JP": "ja_JP", + "ko-KR": "ko_KR", + "ms-MY": "ms_MY", + "nl-BE": "nl_BE", + "nl-NL": "nl_NL", + "pl-PL": "pl_PL", + "pt-BR": "pt_BR", + "pt-PT": "pt_PT", + "ro-RO": "ro_RO", + "ru-RU": "ru_RU", + "th-TH": "th_TH", + "tr-TR": "tr_TR", + "vi-VN": "vi_VN", + "zh-CN": "zh_CN", + "zh-TW": "zh_TW" + } + }, + "duckduckgo": { + "all_locale": "wt-wt", + "custom": { + "lang_region": { + "ar-DZ": "ar_DZ", + "ar-JO": "ar_JO", + "ar-SA": "ar_SA", + "bn-IN": "bn_IN", + "de-CH": "de_CH", + "en-AU": "en_AU", + "en-CA": "en_CA", + "en-GB": "en_GB", + "es-AR": "es_AR", + "es-CL": "es_CL", + "es-CO": "es_CO", + "es-CR": "es_CR", + "es-EC": "es_EC", + "es-MX": "es_MX", + "es-PE": "es_PE", + "es-UY": "es_UY", + "es-VE": "es_VE", + "fr-BE": "fr_BE", + "fr-CA": "fr_CA", + "fr-CH": "fr_CH", + "nl-BE": "nl_BE", + "pt-BR": "pt_BR" + } + }, + "data_type": "traits_v1", + "languages": { + "af": "af_ZA", + "ar": "ar_EG", + "ast": "ast_ES", + "az_Latn": "az_AZ", + "be": "be_BY", + "bg": "bg_BG", + "bn": "bn_BD", + "br": "br_FR", + "bs_Latn": "bs_BA", + "ca": "ca_ES", + "cs": "cs_CZ", + "cy": "cy_GB", + "da": "da_DK", + "de": "de_DE", + "el": "el_GR", + "en": "en_US", + "eo": "eo_XX", + "es": "es_ES", + "et": "et_EE", + "eu": "eu_ES", + "fa": "fa_IR", + "fi": "fi_FI", + "fil": "tl_PH", + "fr": "fr_FR", + "ga": "ga_IE", + "gd": "gd_GB", + "gl": "gl_ES", + "he": "he_IL", + "hi": "hi_IN", + "hr": "hr_HR", + "hu": "hu_HU", + "hy": "hy_AM", + "id": "id_ID", + "is": "is_IS", + "it": "it_IT", + "ja": "ja_JP", + "kab": "kab_DZ", + "kn": "kn_IN", + "ko": "ko_KR", + "ku": "ku", + "kw": "kw_GB", + "lt": "lt_LT", + "lv": "lv_LV", + "ml": "ml_IN", + "mr": "mr_IN", + "ms": "ms_MY", + "nb": "nb_NO", + "nl": "nl_NL", + "nn": "nn_NO", + "pl": "pl_PL", + "pt": "pt_PT", + "ro": "ro_RO", + "ru": "ru_RU", + "sc": "sc_IT", + "si": "si_LK", + "sk": "sk_SK", + "sl": "sl_SI", + "sq": "sq_AL", + "sr_Cyrl": "sr_RS", + "sv": "sv_SE", + "ta": "ta_IN", + "te": "te_IN", + "th": "th_TH", + "tr": "tr_TR", + "uk": "uk_UA", + "ur": "ur_PK", + "vi": "vi_VN", + "zh_Hans": "zh_CN", + "zh_Hant": "zh_TW" + }, + "regions": { + "ar-SA": "xa-ar", + "bg-BG": "bg-bg", + "ca-ES": "es-ca", + "cs-CZ": "cz-cs", + "da-DK": "dk-da", + "de-AT": "at-de", + "de-CH": "ch-de", + "de-DE": "de-de", + "el-GR": "gr-el", + "en-AU": "au-en", + "en-CA": "ca-en", + "en-GB": "uk-en", + "en-IE": "ie-en", + "en-IL": "il-en", + "en-IN": "in-en", + "en-MY": "my-en", + "en-NZ": "nz-en", + "en-PH": "ph-en", + "en-PK": "pk-en", + "en-SG": "sg-en", + "en-US": "us-en", + "en-ZA": "za-en", + "es-AR": "ar-es", + "es-CL": "cl-es", + "es-CO": "co-es", + "es-ES": "es-es", + "es-MX": "mx-es", + "es-PE": "pe-es", + "es-US": "us-es", + "et-EE": "ee-et", + "fi-FI": "fi-fi", + "fr-BE": "be-fr", + "fr-CA": "ca-fr", + "fr-CH": "ch-fr", + "fr-FR": "fr-fr", + "hr-HR": "hr-hr", + "hu-HU": "hu-hu", + "id-ID": "id-en", + "it-IT": "it-it", + "ja-JP": "jp-jp", + "ko-KR": "kr-kr", + "lt-LT": "lt-lt", + "lv-LV": "lv-lv", + "nb-NO": "no-no", + "nl-BE": "be-nl", + "nl-NL": "nl-nl", + "pl-PL": "pl-pl", + "pt-BR": "br-pt", + "pt-PT": "pt-pt", + "ro-RO": "ro-ro", + "ru-RU": "ru-ru", + "sk-SK": "sk-sk", + "sl-SI": "sl-sl", + "sv-SE": "se-sv", + "th-TH": "th-en", + "tr-TR": "tr-tr", + "uk-UA": "ua-uk", + "vi-VN": "vn-en", + "zh-CN": "cn-zh", + "zh-HK": "hk-tzh", + "zh-TW": "tw-tzh" + } + }, + "duckduckgo images": { + "all_locale": "wt-wt", + "custom": { + "lang_region": { + "ar-DZ": "ar_DZ", + "ar-JO": "ar_JO", + "ar-SA": "ar_SA", + "bn-IN": "bn_IN", + "de-CH": "de_CH", + "en-AU": "en_AU", + "en-CA": "en_CA", + "en-GB": "en_GB", + "es-AR": "es_AR", + "es-CL": "es_CL", + "es-CO": "es_CO", + "es-CR": "es_CR", + "es-EC": "es_EC", + "es-MX": "es_MX", + "es-PE": "es_PE", + "es-UY": "es_UY", + "es-VE": "es_VE", + "fr-BE": "fr_BE", + "fr-CA": "fr_CA", + "fr-CH": "fr_CH", + "nl-BE": "nl_BE", + "pt-BR": "pt_BR" + } + }, + "data_type": "traits_v1", + "languages": { + "af": "af_ZA", + "ar": "ar_EG", + "ast": "ast_ES", + "az_Latn": "az_AZ", + "be": "be_BY", + "bg": "bg_BG", + "bn": "bn_BD", + "br": "br_FR", + "bs_Latn": "bs_BA", + "ca": "ca_ES", + "cs": "cs_CZ", + "cy": "cy_GB", + "da": "da_DK", + "de": "de_DE", + "el": "el_GR", + "en": "en_US", + "eo": "eo_XX", + "es": "es_ES", + "et": "et_EE", + "eu": "eu_ES", + "fa": "fa_IR", + "fi": "fi_FI", + "fil": "tl_PH", + "fr": "fr_FR", + "ga": "ga_IE", + "gd": "gd_GB", + "gl": "gl_ES", + "he": "he_IL", + "hi": "hi_IN", + "hr": "hr_HR", + "hu": "hu_HU", + "hy": "hy_AM", + "id": "id_ID", + "is": "is_IS", + "it": "it_IT", + "ja": "ja_JP", + "kab": "kab_DZ", + "kn": "kn_IN", + "ko": "ko_KR", + "ku": "ku", + "kw": "kw_GB", + "lt": "lt_LT", + "lv": "lv_LV", + "ml": "ml_IN", + "mr": "mr_IN", + "ms": "ms_MY", + "nb": "nb_NO", + "nl": "nl_NL", + "nn": "nn_NO", + "pl": "pl_PL", + "pt": "pt_PT", + "ro": "ro_RO", + "ru": "ru_RU", + "sc": "sc_IT", + "si": "si_LK", + "sk": "sk_SK", + "sl": "sl_SI", + "sq": "sq_AL", + "sr_Cyrl": "sr_RS", + "sv": "sv_SE", + "ta": "ta_IN", + "te": "te_IN", + "th": "th_TH", + "tr": "tr_TR", + "uk": "uk_UA", + "ur": "ur_PK", + "vi": "vi_VN", + "zh_Hans": "zh_CN", + "zh_Hant": "zh_TW" + }, + "regions": { + "ar-SA": "xa-ar", + "bg-BG": "bg-bg", + "ca-ES": "es-ca", + "cs-CZ": "cz-cs", + "da-DK": "dk-da", + "de-AT": "at-de", + "de-CH": "ch-de", + "de-DE": "de-de", + "el-GR": "gr-el", + "en-AU": "au-en", + "en-CA": "ca-en", + "en-GB": "uk-en", + "en-IE": "ie-en", + "en-IL": "il-en", + "en-IN": "in-en", + "en-MY": "my-en", + "en-NZ": "nz-en", + "en-PH": "ph-en", + "en-PK": "pk-en", + "en-SG": "sg-en", + "en-US": "us-en", + "en-ZA": "za-en", + "es-AR": "ar-es", + "es-CL": "cl-es", + "es-CO": "co-es", + "es-ES": "es-es", + "es-MX": "mx-es", + "es-PE": "pe-es", + "es-US": "us-es", + "et-EE": "ee-et", + "fi-FI": "fi-fi", + "fr-BE": "be-fr", + "fr-CA": "ca-fr", + "fr-CH": "ch-fr", + "fr-FR": "fr-fr", + "hr-HR": "hr-hr", + "hu-HU": "hu-hu", + "id-ID": "id-en", + "it-IT": "it-it", + "ja-JP": "jp-jp", + "ko-KR": "kr-kr", + "lt-LT": "lt-lt", + "lv-LV": "lv-lv", + "nb-NO": "no-no", + "nl-BE": "be-nl", + "nl-NL": "nl-nl", + "pl-PL": "pl-pl", + "pt-BR": "br-pt", + "pt-PT": "pt-pt", + "ro-RO": "ro-ro", + "ru-RU": "ru-ru", + "sk-SK": "sk-sk", + "sl-SI": "sl-sl", + "sv-SE": "se-sv", + "th-TH": "th-en", + "tr-TR": "tr-tr", + "uk-UA": "ua-uk", + "vi-VN": "vn-en", + "zh-CN": "cn-zh", + "zh-HK": "hk-tzh", + "zh-TW": "tw-tzh" + } + }, + "duckduckgo weather": { + "all_locale": "wt-wt", + "custom": { + "lang_region": { + "ar-DZ": "ar_DZ", + "ar-JO": "ar_JO", + "ar-SA": "ar_SA", + "bn-IN": "bn_IN", + "de-CH": "de_CH", + "en-AU": "en_AU", + "en-CA": "en_CA", + "en-GB": "en_GB", + "es-AR": "es_AR", + "es-CL": "es_CL", + "es-CO": "es_CO", + "es-CR": "es_CR", + "es-EC": "es_EC", + "es-MX": "es_MX", + "es-PE": "es_PE", + "es-UY": "es_UY", + "es-VE": "es_VE", + "fr-BE": "fr_BE", + "fr-CA": "fr_CA", + "fr-CH": "fr_CH", + "nl-BE": "nl_BE", + "pt-BR": "pt_BR" + } + }, + "data_type": "traits_v1", + "languages": { + "af": "af_ZA", + "ar": "ar_EG", + "ast": "ast_ES", + "az_Latn": "az_AZ", + "be": "be_BY", + "bg": "bg_BG", + "bn": "bn_BD", + "br": "br_FR", + "bs_Latn": "bs_BA", + "ca": "ca_ES", + "cs": "cs_CZ", + "cy": "cy_GB", + "da": "da_DK", + "de": "de_DE", + "el": "el_GR", + "en": "en_US", + "eo": "eo_XX", + "es": "es_ES", + "et": "et_EE", + "eu": "eu_ES", + "fa": "fa_IR", + "fi": "fi_FI", + "fil": "tl_PH", + "fr": "fr_FR", + "ga": "ga_IE", + "gd": "gd_GB", + "gl": "gl_ES", + "he": "he_IL", + "hi": "hi_IN", + "hr": "hr_HR", + "hu": "hu_HU", + "hy": "hy_AM", + "id": "id_ID", + "is": "is_IS", + "it": "it_IT", + "ja": "ja_JP", + "kab": "kab_DZ", + "kn": "kn_IN", + "ko": "ko_KR", + "ku": "ku", + "kw": "kw_GB", + "lt": "lt_LT", + "lv": "lv_LV", + "ml": "ml_IN", + "mr": "mr_IN", + "ms": "ms_MY", + "nb": "nb_NO", + "nl": "nl_NL", + "nn": "nn_NO", + "pl": "pl_PL", + "pt": "pt_PT", + "ro": "ro_RO", + "ru": "ru_RU", + "sc": "sc_IT", + "si": "si_LK", + "sk": "sk_SK", + "sl": "sl_SI", + "sq": "sq_AL", + "sr_Cyrl": "sr_RS", + "sv": "sv_SE", + "ta": "ta_IN", + "te": "te_IN", + "th": "th_TH", + "tr": "tr_TR", + "uk": "uk_UA", + "ur": "ur_PK", + "vi": "vi_VN", + "zh_Hans": "zh_CN", + "zh_Hant": "zh_TW" + }, + "regions": { + "ar-SA": "xa-ar", + "bg-BG": "bg-bg", + "ca-ES": "es-ca", + "cs-CZ": "cz-cs", + "da-DK": "dk-da", + "de-AT": "at-de", + "de-CH": "ch-de", + "de-DE": "de-de", + "el-GR": "gr-el", + "en-AU": "au-en", + "en-CA": "ca-en", + "en-GB": "uk-en", + "en-IE": "ie-en", + "en-IL": "il-en", + "en-IN": "in-en", + "en-MY": "my-en", + "en-NZ": "nz-en", + "en-PH": "ph-en", + "en-PK": "pk-en", + "en-SG": "sg-en", + "en-US": "us-en", + "en-ZA": "za-en", + "es-AR": "ar-es", + "es-CL": "cl-es", + "es-CO": "co-es", + "es-ES": "es-es", + "es-MX": "mx-es", + "es-PE": "pe-es", + "es-US": "us-es", + "et-EE": "ee-et", + "fi-FI": "fi-fi", + "fr-BE": "be-fr", + "fr-CA": "ca-fr", + "fr-CH": "ch-fr", + "fr-FR": "fr-fr", + "hr-HR": "hr-hr", + "hu-HU": "hu-hu", + "id-ID": "id-en", + "it-IT": "it-it", + "ja-JP": "jp-jp", + "ko-KR": "kr-kr", + "lt-LT": "lt-lt", + "lv-LV": "lv-lv", + "nb-NO": "no-no", + "nl-BE": "be-nl", + "nl-NL": "nl-nl", + "pl-PL": "pl-pl", + "pt-BR": "br-pt", + "pt-PT": "pt-pt", + "ro-RO": "ro-ro", + "ru-RU": "ru-ru", + "sk-SK": "sk-sk", + "sl-SI": "sl-sl", + "sv-SE": "se-sv", + "th-TH": "th-en", + "tr-TR": "tr-tr", + "uk-UA": "ua-uk", + "vi-VN": "vn-en", + "zh-CN": "cn-zh", + "zh-HK": "hk-tzh", + "zh-TW": "tw-tzh" + } + }, + "google": { + "all_locale": "ZZ", + "custom": { + "supported_domains": { + "AD": "www.google.ad", + "AE": "www.google.ae", + "AF": "www.google.com.af", + "AG": "www.google.com.ag", + "AI": "www.google.com.ai", + "AL": "www.google.al", + "AM": "www.google.am", + "AO": "www.google.co.ao", + "AR": "www.google.com.ar", + "AS": "www.google.as", + "AT": "www.google.at", + "AU": "www.google.com.au", + "AZ": "www.google.az", + "BA": "www.google.ba", + "BD": "www.google.com.bd", + "BE": "www.google.be", + "BF": "www.google.bf", + "BG": "www.google.bg", + "BH": "www.google.com.bh", + "BI": "www.google.bi", + "BJ": "www.google.bj", + "BN": "www.google.com.bn", + "BO": "www.google.com.bo", + "BR": "www.google.com.br", + "BS": "www.google.bs", + "BT": "www.google.bt", + "BW": "www.google.co.bw", + "BY": "www.google.by", + "BZ": "www.google.com.bz", + "CA": "www.google.ca", + "CAT": "www.google.cat", + "CD": "www.google.cd", + "CF": "www.google.cf", + "CG": "www.google.cg", + "CH": "www.google.ch", + "CI": "www.google.ci", + "CK": "www.google.co.ck", + "CL": "www.google.cl", + "CM": "www.google.cm", + "CN": "www.google.com.hk", + "CO": "www.google.com.co", + "CR": "www.google.co.cr", + "CU": "www.google.com.cu", + "CV": "www.google.cv", + "CY": "www.google.com.cy", + "CZ": "www.google.cz", + "DE": "www.google.de", + "DJ": "www.google.dj", + "DK": "www.google.dk", + "DM": "www.google.dm", + "DO": "www.google.com.do", + "DZ": "www.google.dz", + "EC": "www.google.com.ec", + "EE": "www.google.ee", + "EG": "www.google.com.eg", + "ES": "www.google.es", + "ET": "www.google.com.et", + "FI": "www.google.fi", + "FJ": "www.google.com.fj", + "FM": "www.google.fm", + "FR": "www.google.fr", + "GA": "www.google.ga", + "GE": "www.google.ge", + "GG": "www.google.gg", + "GH": "www.google.com.gh", + "GI": "www.google.com.gi", + "GL": "www.google.gl", + "GM": "www.google.gm", + "GR": "www.google.gr", + "GT": "www.google.com.gt", + "GY": "www.google.gy", + "HK": "www.google.com.hk", + "HN": "www.google.hn", + "HR": "www.google.hr", + "HT": "www.google.ht", + "HU": "www.google.hu", + "ID": "www.google.co.id", + "IE": "www.google.ie", + "IL": "www.google.co.il", + "IM": "www.google.im", + "IN": "www.google.co.in", + "IQ": "www.google.iq", + "IS": "www.google.is", + "IT": "www.google.it", + "JE": "www.google.je", + "JM": "www.google.com.jm", + "JO": "www.google.jo", + "JP": "www.google.co.jp", + "KE": "www.google.co.ke", + "KG": "www.google.kg", + "KH": "www.google.com.kh", + "KI": "www.google.ki", + "KR": "www.google.co.kr", + "KW": "www.google.com.kw", + "KZ": "www.google.kz", + "LA": "www.google.la", + "LB": "www.google.com.lb", + "LI": "www.google.li", + "LK": "www.google.lk", + "LS": "www.google.co.ls", + "LT": "www.google.lt", + "LU": "www.google.lu", + "LV": "www.google.lv", + "LY": "www.google.com.ly", + "MA": "www.google.co.ma", + "MD": "www.google.md", + "ME": "www.google.me", + "MG": "www.google.mg", + "MK": "www.google.mk", + "ML": "www.google.ml", + "MM": "www.google.com.mm", + "MN": "www.google.mn", + "MS": "www.google.ms", + "MT": "www.google.com.mt", + "MU": "www.google.mu", + "MV": "www.google.mv", + "MW": "www.google.mw", + "MX": "www.google.com.mx", + "MY": "www.google.com.my", + "MZ": "www.google.co.mz", + "NA": "www.google.com.na", + "NE": "www.google.ne", + "NG": "www.google.com.ng", + "NI": "www.google.com.ni", + "NL": "www.google.nl", + "NO": "www.google.no", + "NP": "www.google.com.np", + "NR": "www.google.nr", + "NU": "www.google.nu", + "NZ": "www.google.co.nz", + "OM": "www.google.com.om", + "PA": "www.google.com.pa", + "PE": "www.google.com.pe", + "PG": "www.google.com.pg", + "PH": "www.google.com.ph", + "PK": "www.google.com.pk", + "PL": "www.google.pl", + "PN": "www.google.pn", + "PR": "www.google.com.pr", + "PS": "www.google.ps", + "PT": "www.google.pt", + "PY": "www.google.com.py", + "QA": "www.google.com.qa", + "RO": "www.google.ro", + "RS": "www.google.rs", + "RU": "www.google.ru", + "RW": "www.google.rw", + "SA": "www.google.com.sa", + "SB": "www.google.com.sb", + "SC": "www.google.sc", + "SE": "www.google.se", + "SG": "www.google.com.sg", + "SH": "www.google.sh", + "SI": "www.google.si", + "SK": "www.google.sk", + "SL": "www.google.com.sl", + "SM": "www.google.sm", + "SN": "www.google.sn", + "SO": "www.google.so", + "SR": "www.google.sr", + "ST": "www.google.st", + "SV": "www.google.com.sv", + "TD": "www.google.td", + "TG": "www.google.tg", + "TH": "www.google.co.th", + "TJ": "www.google.com.tj", + "TL": "www.google.tl", + "TM": "www.google.tm", + "TN": "www.google.tn", + "TO": "www.google.to", + "TR": "www.google.com.tr", + "TT": "www.google.tt", + "TW": "www.google.com.tw", + "TZ": "www.google.co.tz", + "UA": "www.google.com.ua", + "UG": "www.google.co.ug", + "UK": "www.google.co.uk", + "UY": "www.google.com.uy", + "UZ": "www.google.co.uz", + "VC": "www.google.com.vc", + "VE": "www.google.co.ve", + "VG": "www.google.vg", + "VI": "www.google.co.vi", + "VN": "www.google.com.vn", + "VU": "www.google.vu", + "WS": "www.google.ws", + "ZA": "www.google.co.za", + "ZM": "www.google.co.zm", + "ZW": "www.google.co.zw" + } + }, + "data_type": "traits_v1", + "languages": { + "af": "lang_af", + "ar": "lang_ar", + "be": "lang_be", + "bg": "lang_bg", + "ca": "lang_ca", + "cs": "lang_cs", + "da": "lang_da", + "de": "lang_de", + "el": "lang_el", + "en": "lang_en", + "eo": "lang_eo", + "es": "lang_es", + "et": "lang_et", + "fa": "lang_fa", + "fi": "lang_fi", + "fil": "lang_tl", + "fr": "lang_fr", + "he": "lang_iw", + "hi": "lang_hi", + "hr": "lang_hr", + "hu": "lang_hu", + "hy": "lang_hy", + "id": "lang_id", + "is": "lang_is", + "it": "lang_it", + "ja": "lang_ja", + "ko": "lang_ko", + "lt": "lang_lt", + "lv": "lang_lv", + "nb": "lang_no", + "nl": "lang_nl", + "pl": "lang_pl", + "pt": "lang_pt", + "ro": "lang_ro", + "ru": "lang_ru", + "sk": "lang_sk", + "sl": "lang_sl", + "sr": "lang_sr", + "sv": "lang_sv", + "sw": "lang_sw", + "th": "lang_th", + "tr": "lang_tr", + "uk": "lang_uk", + "vi": "lang_vi", + "zh": "lang_zh-CN", + "zh_Hans": "lang_zh-CN", + "zh_Hant": "lang_zh-TW" + }, + "regions": { + "af-ZA": "ZA", + "ar-AE": "AE", + "ar-BH": "BH", + "ar-DJ": "DJ", + "ar-DZ": "DZ", + "ar-EG": "EG", + "ar-IL": "IL", + "ar-IQ": "IQ", + "ar-JO": "JO", + "ar-KW": "KW", + "ar-LB": "LB", + "ar-LY": "LY", + "ar-MA": "MA", + "ar-OM": "OM", + "ar-PS": "PS", + "ar-QA": "QA", + "ar-SA": "SA", + "ar-SO": "SO", + "ar-TD": "TD", + "ar-TN": "TN", + "be-BY": "BY", + "bg-BG": "BG", + "ca-AD": "AD", + "ca-ES": "ES", + "cs-CZ": "CZ", + "da-DK": "DK", + "de-AT": "AT", + "de-BE": "BE", + "de-CH": "CH", + "de-DE": "DE", + "de-LI": "LI", + "de-LU": "LU", + "el-CY": "CY", + "el-GR": "GR", + "en-AG": "AG", + "en-AI": "AI", + "en-AS": "AS", + "en-AU": "AU", + "en-BI": "BI", + "en-BS": "BS", + "en-BW": "BW", + "en-BZ": "BZ", + "en-CA": "CA", + "en-CK": "CK", + "en-CM": "CM", + "en-DM": "DM", + "en-FJ": "FJ", + "en-FM": "FM", + "en-GB": "GB", + "en-GG": "GG", + "en-GH": "GH", + "en-GI": "GI", + "en-GM": "GM", + "en-GY": "GY", + "en-HK": "HK", + "en-IE": "IE", + "en-IM": "IM", + "en-IN": "IN", + "en-JE": "JE", + "en-JM": "JM", + "en-KE": "KE", + "en-KI": "KI", + "en-LS": "LS", + "en-MG": "MG", + "en-MS": "MS", + "en-MT": "MT", + "en-MU": "MU", + "en-MW": "MW", + "en-NA": "NA", + "en-NG": "NG", + "en-NR": "NR", + "en-NU": "NU", + "en-NZ": "NZ", + "en-PG": "PG", + "en-PH": "PH", + "en-PK": "PK", + "en-PN": "PN", + "en-PR": "PR", + "en-RW": "RW", + "en-SB": "SB", + "en-SC": "SC", + "en-SG": "SG", + "en-SH": "SH", + "en-SL": "SL", + "en-TO": "TO", + "en-TT": "TT", + "en-TZ": "TZ", + "en-UG": "UG", + "en-US": "US", + "en-VC": "VC", + "en-VG": "VG", + "en-VI": "VI", + "en-VU": "VU", + "en-WS": "WS", + "en-ZA": "ZA", + "en-ZM": "ZM", + "en-ZW": "ZW", + "es-AR": "AR", + "es-BO": "BO", + "es-CL": "CL", + "es-CO": "CO", + "es-CR": "CR", + "es-CU": "CU", + "es-DO": "DO", + "es-EC": "EC", + "es-ES": "ES", + "es-GT": "GT", + "es-HN": "HN", + "es-MX": "MX", + "es-NI": "NI", + "es-PA": "PA", + "es-PE": "PE", + "es-PR": "PR", + "es-PY": "PY", + "es-SV": "SV", + "es-US": "US", + "es-UY": "UY", + "es-VE": "VE", + "et-EE": "EE", + "fa-AF": "AF", + "fi-FI": "FI", + "fil-PH": "PH", + "fr-BE": "BE", + "fr-BF": "BF", + "fr-BI": "BI", + "fr-BJ": "BJ", + "fr-CA": "CA", + "fr-CD": "CD", + "fr-CF": "CF", + "fr-CG": "CG", + "fr-CH": "CH", + "fr-CI": "CI", + "fr-CM": "CM", + "fr-DJ": "DJ", + "fr-DZ": "DZ", + "fr-FR": "FR", + "fr-GA": "GA", + "fr-HT": "HT", + "fr-LU": "LU", + "fr-MA": "MA", + "fr-MG": "MG", + "fr-ML": "ML", + "fr-MU": "MU", + "fr-NE": "NE", + "fr-RW": "RW", + "fr-SC": "SC", + "fr-SN": "SN", + "fr-TD": "TD", + "fr-TG": "TG", + "fr-TN": "TN", + "fr-VU": "VU", + "he-IL": "IL", + "hi-IN": "IN", + "hr-BA": "BA", + "hr-HR": "HR", + "hu-HU": "HU", + "hy-AM": "AM", + "id-ID": "ID", + "is-IS": "IS", + "it-CH": "CH", + "it-IT": "IT", + "it-SM": "SM", + "ja-JP": "JP", + "ko-KR": "KR", + "lt-LT": "LT", + "lv-LV": "LV", + "nb-NO": "NO", + "nl-BE": "BE", + "nl-NL": "NL", + "nl-SR": "SR", + "pl-PL": "PL", + "pt-AO": "AO", + "pt-BR": "BR", + "pt-CV": "CV", + "pt-MZ": "MZ", + "pt-PT": "PT", + "pt-ST": "ST", + "pt-TL": "TL", + "ro-MD": "MD", + "ro-RO": "RO", + "ru-BY": "BY", + "ru-KG": "KG", + "ru-KZ": "KZ", + "ru-RU": "RU", + "ru-UA": "UA", + "sk-SK": "SK", + "sl-SI": "SI", + "sr-BA": "BA", + "sr-RS": "RS", + "sv-FI": "FI", + "sv-SE": "SE", + "sw-CD": "CD", + "sw-KE": "KE", + "sw-TZ": "TZ", + "sw-UG": "UG", + "th-TH": "TH", + "tr-CY": "CY", + "tr-TR": "TR", + "uk-UA": "UA", + "vi-VN": "VN", + "zh-CN": "HK", + "zh-HK": "HK", + "zh-SG": "SG", + "zh-TW": "TW" + } + }, + "google images": { + "all_locale": "ZZ", + "custom": { + "supported_domains": { + "AD": "www.google.ad", + "AE": "www.google.ae", + "AF": "www.google.com.af", + "AG": "www.google.com.ag", + "AI": "www.google.com.ai", + "AL": "www.google.al", + "AM": "www.google.am", + "AO": "www.google.co.ao", + "AR": "www.google.com.ar", + "AS": "www.google.as", + "AT": "www.google.at", + "AU": "www.google.com.au", + "AZ": "www.google.az", + "BA": "www.google.ba", + "BD": "www.google.com.bd", + "BE": "www.google.be", + "BF": "www.google.bf", + "BG": "www.google.bg", + "BH": "www.google.com.bh", + "BI": "www.google.bi", + "BJ": "www.google.bj", + "BN": "www.google.com.bn", + "BO": "www.google.com.bo", + "BR": "www.google.com.br", + "BS": "www.google.bs", + "BT": "www.google.bt", + "BW": "www.google.co.bw", + "BY": "www.google.by", + "BZ": "www.google.com.bz", + "CA": "www.google.ca", + "CAT": "www.google.cat", + "CD": "www.google.cd", + "CF": "www.google.cf", + "CG": "www.google.cg", + "CH": "www.google.ch", + "CI": "www.google.ci", + "CK": "www.google.co.ck", + "CL": "www.google.cl", + "CM": "www.google.cm", + "CN": "www.google.com.hk", + "CO": "www.google.com.co", + "CR": "www.google.co.cr", + "CU": "www.google.com.cu", + "CV": "www.google.cv", + "CY": "www.google.com.cy", + "CZ": "www.google.cz", + "DE": "www.google.de", + "DJ": "www.google.dj", + "DK": "www.google.dk", + "DM": "www.google.dm", + "DO": "www.google.com.do", + "DZ": "www.google.dz", + "EC": "www.google.com.ec", + "EE": "www.google.ee", + "EG": "www.google.com.eg", + "ES": "www.google.es", + "ET": "www.google.com.et", + "FI": "www.google.fi", + "FJ": "www.google.com.fj", + "FM": "www.google.fm", + "FR": "www.google.fr", + "GA": "www.google.ga", + "GE": "www.google.ge", + "GG": "www.google.gg", + "GH": "www.google.com.gh", + "GI": "www.google.com.gi", + "GL": "www.google.gl", + "GM": "www.google.gm", + "GR": "www.google.gr", + "GT": "www.google.com.gt", + "GY": "www.google.gy", + "HK": "www.google.com.hk", + "HN": "www.google.hn", + "HR": "www.google.hr", + "HT": "www.google.ht", + "HU": "www.google.hu", + "ID": "www.google.co.id", + "IE": "www.google.ie", + "IL": "www.google.co.il", + "IM": "www.google.im", + "IN": "www.google.co.in", + "IQ": "www.google.iq", + "IS": "www.google.is", + "IT": "www.google.it", + "JE": "www.google.je", + "JM": "www.google.com.jm", + "JO": "www.google.jo", + "JP": "www.google.co.jp", + "KE": "www.google.co.ke", + "KG": "www.google.kg", + "KH": "www.google.com.kh", + "KI": "www.google.ki", + "KR": "www.google.co.kr", + "KW": "www.google.com.kw", + "KZ": "www.google.kz", + "LA": "www.google.la", + "LB": "www.google.com.lb", + "LI": "www.google.li", + "LK": "www.google.lk", + "LS": "www.google.co.ls", + "LT": "www.google.lt", + "LU": "www.google.lu", + "LV": "www.google.lv", + "LY": "www.google.com.ly", + "MA": "www.google.co.ma", + "MD": "www.google.md", + "ME": "www.google.me", + "MG": "www.google.mg", + "MK": "www.google.mk", + "ML": "www.google.ml", + "MM": "www.google.com.mm", + "MN": "www.google.mn", + "MS": "www.google.ms", + "MT": "www.google.com.mt", + "MU": "www.google.mu", + "MV": "www.google.mv", + "MW": "www.google.mw", + "MX": "www.google.com.mx", + "MY": "www.google.com.my", + "MZ": "www.google.co.mz", + "NA": "www.google.com.na", + "NE": "www.google.ne", + "NG": "www.google.com.ng", + "NI": "www.google.com.ni", + "NL": "www.google.nl", + "NO": "www.google.no", + "NP": "www.google.com.np", + "NR": "www.google.nr", + "NU": "www.google.nu", + "NZ": "www.google.co.nz", + "OM": "www.google.com.om", + "PA": "www.google.com.pa", + "PE": "www.google.com.pe", + "PG": "www.google.com.pg", + "PH": "www.google.com.ph", + "PK": "www.google.com.pk", + "PL": "www.google.pl", + "PN": "www.google.pn", + "PR": "www.google.com.pr", + "PS": "www.google.ps", + "PT": "www.google.pt", + "PY": "www.google.com.py", + "QA": "www.google.com.qa", + "RO": "www.google.ro", + "RS": "www.google.rs", + "RU": "www.google.ru", + "RW": "www.google.rw", + "SA": "www.google.com.sa", + "SB": "www.google.com.sb", + "SC": "www.google.sc", + "SE": "www.google.se", + "SG": "www.google.com.sg", + "SH": "www.google.sh", + "SI": "www.google.si", + "SK": "www.google.sk", + "SL": "www.google.com.sl", + "SM": "www.google.sm", + "SN": "www.google.sn", + "SO": "www.google.so", + "SR": "www.google.sr", + "ST": "www.google.st", + "SV": "www.google.com.sv", + "TD": "www.google.td", + "TG": "www.google.tg", + "TH": "www.google.co.th", + "TJ": "www.google.com.tj", + "TL": "www.google.tl", + "TM": "www.google.tm", + "TN": "www.google.tn", + "TO": "www.google.to", + "TR": "www.google.com.tr", + "TT": "www.google.tt", + "TW": "www.google.com.tw", + "TZ": "www.google.co.tz", + "UA": "www.google.com.ua", + "UG": "www.google.co.ug", + "UK": "www.google.co.uk", + "UY": "www.google.com.uy", + "UZ": "www.google.co.uz", + "VC": "www.google.com.vc", + "VE": "www.google.co.ve", + "VG": "www.google.vg", + "VI": "www.google.co.vi", + "VN": "www.google.com.vn", + "VU": "www.google.vu", + "WS": "www.google.ws", + "ZA": "www.google.co.za", + "ZM": "www.google.co.zm", + "ZW": "www.google.co.zw" + } + }, + "data_type": "traits_v1", + "languages": { + "af": "lang_af", + "ar": "lang_ar", + "be": "lang_be", + "bg": "lang_bg", + "ca": "lang_ca", + "cs": "lang_cs", + "da": "lang_da", + "de": "lang_de", + "el": "lang_el", + "en": "lang_en", + "eo": "lang_eo", + "es": "lang_es", + "et": "lang_et", + "fa": "lang_fa", + "fi": "lang_fi", + "fil": "lang_tl", + "fr": "lang_fr", + "he": "lang_iw", + "hi": "lang_hi", + "hr": "lang_hr", + "hu": "lang_hu", + "hy": "lang_hy", + "id": "lang_id", + "is": "lang_is", + "it": "lang_it", + "ja": "lang_ja", + "ko": "lang_ko", + "lt": "lang_lt", + "lv": "lang_lv", + "nb": "lang_no", + "nl": "lang_nl", + "pl": "lang_pl", + "pt": "lang_pt", + "ro": "lang_ro", + "ru": "lang_ru", + "sk": "lang_sk", + "sl": "lang_sl", + "sr": "lang_sr", + "sv": "lang_sv", + "sw": "lang_sw", + "th": "lang_th", + "tr": "lang_tr", + "uk": "lang_uk", + "vi": "lang_vi", + "zh": "lang_zh-CN", + "zh_Hans": "lang_zh-CN", + "zh_Hant": "lang_zh-TW" + }, + "regions": { + "af-ZA": "ZA", + "ar-AE": "AE", + "ar-BH": "BH", + "ar-DJ": "DJ", + "ar-DZ": "DZ", + "ar-EG": "EG", + "ar-IL": "IL", + "ar-IQ": "IQ", + "ar-JO": "JO", + "ar-KW": "KW", + "ar-LB": "LB", + "ar-LY": "LY", + "ar-MA": "MA", + "ar-OM": "OM", + "ar-PS": "PS", + "ar-QA": "QA", + "ar-SA": "SA", + "ar-SO": "SO", + "ar-TD": "TD", + "ar-TN": "TN", + "be-BY": "BY", + "bg-BG": "BG", + "ca-AD": "AD", + "ca-ES": "ES", + "cs-CZ": "CZ", + "da-DK": "DK", + "de-AT": "AT", + "de-BE": "BE", + "de-CH": "CH", + "de-DE": "DE", + "de-LI": "LI", + "de-LU": "LU", + "el-CY": "CY", + "el-GR": "GR", + "en-AG": "AG", + "en-AI": "AI", + "en-AS": "AS", + "en-AU": "AU", + "en-BI": "BI", + "en-BS": "BS", + "en-BW": "BW", + "en-BZ": "BZ", + "en-CA": "CA", + "en-CK": "CK", + "en-CM": "CM", + "en-DM": "DM", + "en-FJ": "FJ", + "en-FM": "FM", + "en-GB": "GB", + "en-GG": "GG", + "en-GH": "GH", + "en-GI": "GI", + "en-GM": "GM", + "en-GY": "GY", + "en-HK": "HK", + "en-IE": "IE", + "en-IM": "IM", + "en-IN": "IN", + "en-JE": "JE", + "en-JM": "JM", + "en-KE": "KE", + "en-KI": "KI", + "en-LS": "LS", + "en-MG": "MG", + "en-MS": "MS", + "en-MT": "MT", + "en-MU": "MU", + "en-MW": "MW", + "en-NA": "NA", + "en-NG": "NG", + "en-NR": "NR", + "en-NU": "NU", + "en-NZ": "NZ", + "en-PG": "PG", + "en-PH": "PH", + "en-PK": "PK", + "en-PN": "PN", + "en-PR": "PR", + "en-RW": "RW", + "en-SB": "SB", + "en-SC": "SC", + "en-SG": "SG", + "en-SH": "SH", + "en-SL": "SL", + "en-TO": "TO", + "en-TT": "TT", + "en-TZ": "TZ", + "en-UG": "UG", + "en-US": "US", + "en-VC": "VC", + "en-VG": "VG", + "en-VI": "VI", + "en-VU": "VU", + "en-WS": "WS", + "en-ZA": "ZA", + "en-ZM": "ZM", + "en-ZW": "ZW", + "es-AR": "AR", + "es-BO": "BO", + "es-CL": "CL", + "es-CO": "CO", + "es-CR": "CR", + "es-CU": "CU", + "es-DO": "DO", + "es-EC": "EC", + "es-ES": "ES", + "es-GT": "GT", + "es-HN": "HN", + "es-MX": "MX", + "es-NI": "NI", + "es-PA": "PA", + "es-PE": "PE", + "es-PR": "PR", + "es-PY": "PY", + "es-SV": "SV", + "es-US": "US", + "es-UY": "UY", + "es-VE": "VE", + "et-EE": "EE", + "fa-AF": "AF", + "fi-FI": "FI", + "fil-PH": "PH", + "fr-BE": "BE", + "fr-BF": "BF", + "fr-BI": "BI", + "fr-BJ": "BJ", + "fr-CA": "CA", + "fr-CD": "CD", + "fr-CF": "CF", + "fr-CG": "CG", + "fr-CH": "CH", + "fr-CI": "CI", + "fr-CM": "CM", + "fr-DJ": "DJ", + "fr-DZ": "DZ", + "fr-FR": "FR", + "fr-GA": "GA", + "fr-HT": "HT", + "fr-LU": "LU", + "fr-MA": "MA", + "fr-MG": "MG", + "fr-ML": "ML", + "fr-MU": "MU", + "fr-NE": "NE", + "fr-RW": "RW", + "fr-SC": "SC", + "fr-SN": "SN", + "fr-TD": "TD", + "fr-TG": "TG", + "fr-TN": "TN", + "fr-VU": "VU", + "he-IL": "IL", + "hi-IN": "IN", + "hr-BA": "BA", + "hr-HR": "HR", + "hu-HU": "HU", + "hy-AM": "AM", + "id-ID": "ID", + "is-IS": "IS", + "it-CH": "CH", + "it-IT": "IT", + "it-SM": "SM", + "ja-JP": "JP", + "ko-KR": "KR", + "lt-LT": "LT", + "lv-LV": "LV", + "nb-NO": "NO", + "nl-BE": "BE", + "nl-NL": "NL", + "nl-SR": "SR", + "pl-PL": "PL", + "pt-AO": "AO", + "pt-BR": "BR", + "pt-CV": "CV", + "pt-MZ": "MZ", + "pt-PT": "PT", + "pt-ST": "ST", + "pt-TL": "TL", + "ro-MD": "MD", + "ro-RO": "RO", + "ru-BY": "BY", + "ru-KG": "KG", + "ru-KZ": "KZ", + "ru-RU": "RU", + "ru-UA": "UA", + "sk-SK": "SK", + "sl-SI": "SI", + "sr-BA": "BA", + "sr-RS": "RS", + "sv-FI": "FI", + "sv-SE": "SE", + "sw-CD": "CD", + "sw-KE": "KE", + "sw-TZ": "TZ", + "sw-UG": "UG", + "th-TH": "TH", + "tr-CY": "CY", + "tr-TR": "TR", + "uk-UA": "UA", + "vi-VN": "VN", + "zh-CN": "HK", + "zh-HK": "HK", + "zh-SG": "SG", + "zh-TW": "TW" + } + }, + "google news": { + "all_locale": "ZZ", + "custom": { + "ceid": { + "ar-AE": "AE:ar", + "ar-EG": "EG:ar", + "ar-LB": "LB:ar", + "ar-SA": "SA:ar", + "bg-BG": "BG:bg", + "bn-BD": "BD:bn", + "bn-IN": "IN:bn", + "cs-CZ": "CZ:cs", + "de-AT": "AT:de", + "de-CH": "CH:de", + "de-DE": "DE:de", + "el-GR": "GR:el", + "en-AU": "AU:en", + "en-BW": "BW:en", + "en-CA": "CA:en", + "en-GB": "GB:en", + "en-GH": "GH:en", + "en-IE": "IE:en", + "en-IL": "IL:en", + "en-IN": "IN:en", + "en-KE": "KE:en", + "en-MY": "MY:en", + "en-NA": "NA:en", + "en-NG": "NG:en", + "en-NZ": "NZ:en", + "en-PH": "PH:en", + "en-PK": "PK:en", + "en-SG": "SG:en", + "en-TZ": "TZ:en", + "en-UG": "UG:en", + "en-US": "US:en", + "en-ZA": "ZA:en", + "en-ZW": "ZW:en", + "es-AR": "AR:es-419", + "es-CL": "CL:es-419", + "es-CO": "CO:es-419", + "es-CU": "CU:es-419", + "es-ES": "ES:es", + "es-MX": "MX:es-419", + "es-PE": "PE:es-419", + "es-US": "US:es-419", + "es-VE": "VE:es-419", + "fr-BE": "BE:fr", + "fr-CA": "CA:fr", + "fr-CH": "CH:fr", + "fr-FR": "FR:fr", + "fr-MA": "MA:fr", + "fr-SN": "SN:fr", + "he-IL": "IL:he", + "hi-IN": "IN:hi", + "hu-HU": "HU:hu", + "id-ID": "ID:id", + "it-IT": "IT:it", + "ja-JP": "JP:ja", + "ko-KR": "KR:ko", + "lt-LT": "LT:lt", + "lv-LV": "LV:lv", + "ml-IN": "IN:ml", + "mr-IN": "IN:mr", + "nb-NO": "NO:no", + "nl-BE": "BE:nl", + "nl-NL": "NL:nl", + "pl-PL": "PL:pl", + "pt-BR": "BR:pt-419", + "pt-PT": "PT:pt-150", + "ro-RO": "RO:ro", + "ru-RU": "RU:ru", + "ru-UA": "UA:ru", + "sk-SK": "SK:sk", + "sl-SI": "SI:sl", + "sr-RS": "RS:sr", + "sv-SE": "SE:sv", + "ta-IN": "IN:ta", + "te-IN": "IN:te", + "th-TH": "TH:th", + "tr-TR": "TR:tr", + "uk-UA": "UA:uk", + "vi-VN": "VN:vi", + "zh-CN": "CN:zh-Hans", + "zh-HK": "HK:zh-Hant", + "zh-TW": "TW:zh-Hant" + }, + "supported_domains": {} + }, + "data_type": "traits_v1", + "languages": { + "af": "lang_af", + "ar": "lang_ar", + "be": "lang_be", + "bg": "lang_bg", + "ca": "lang_ca", + "cs": "lang_cs", + "da": "lang_da", + "de": "lang_de", + "el": "lang_el", + "en": "lang_en", + "eo": "lang_eo", + "es": "lang_es", + "et": "lang_et", + "fa": "lang_fa", + "fi": "lang_fi", + "fil": "lang_tl", + "fr": "lang_fr", + "he": "lang_iw", + "hi": "lang_hi", + "hr": "lang_hr", + "hu": "lang_hu", + "hy": "lang_hy", + "id": "lang_id", + "is": "lang_is", + "it": "lang_it", + "ja": "lang_ja", + "ko": "lang_ko", + "lt": "lang_lt", + "lv": "lang_lv", + "nb": "lang_no", + "nl": "lang_nl", + "pl": "lang_pl", + "pt": "lang_pt", + "ro": "lang_ro", + "ru": "lang_ru", + "sk": "lang_sk", + "sl": "lang_sl", + "sr": "lang_sr", + "sv": "lang_sv", + "sw": "lang_sw", + "th": "lang_th", + "tr": "lang_tr", + "uk": "lang_uk", + "vi": "lang_vi", + "zh": "lang_zh-CN", + "zh_Hans": "lang_zh-CN", + "zh_Hant": "lang_zh-TW" + }, + "regions": { + "af-ZA": "ZA", + "ar-AE": "AE", + "ar-BH": "BH", + "ar-DJ": "DJ", + "ar-DZ": "DZ", + "ar-EG": "EG", + "ar-IL": "IL", + "ar-IQ": "IQ", + "ar-JO": "JO", + "ar-KW": "KW", + "ar-LB": "LB", + "ar-LY": "LY", + "ar-MA": "MA", + "ar-OM": "OM", + "ar-PS": "PS", + "ar-QA": "QA", + "ar-SA": "SA", + "ar-SO": "SO", + "ar-TD": "TD", + "ar-TN": "TN", + "be-BY": "BY", + "bg-BG": "BG", + "ca-AD": "AD", + "ca-ES": "ES", + "cs-CZ": "CZ", + "da-DK": "DK", + "de-AT": "AT", + "de-BE": "BE", + "de-CH": "CH", + "de-DE": "DE", + "de-LI": "LI", + "de-LU": "LU", + "el-CY": "CY", + "el-GR": "GR", + "en-AG": "AG", + "en-AI": "AI", + "en-AS": "AS", + "en-AU": "AU", + "en-BI": "BI", + "en-BS": "BS", + "en-BW": "BW", + "en-BZ": "BZ", + "en-CA": "CA", + "en-CK": "CK", + "en-CM": "CM", + "en-DM": "DM", + "en-FJ": "FJ", + "en-FM": "FM", + "en-GB": "GB", + "en-GG": "GG", + "en-GH": "GH", + "en-GI": "GI", + "en-GM": "GM", + "en-GY": "GY", + "en-HK": "HK", + "en-IE": "IE", + "en-IM": "IM", + "en-IN": "IN", + "en-JE": "JE", + "en-JM": "JM", + "en-KE": "KE", + "en-KI": "KI", + "en-LS": "LS", + "en-MG": "MG", + "en-MS": "MS", + "en-MT": "MT", + "en-MU": "MU", + "en-MW": "MW", + "en-NA": "NA", + "en-NG": "NG", + "en-NR": "NR", + "en-NU": "NU", + "en-NZ": "NZ", + "en-PG": "PG", + "en-PH": "PH", + "en-PK": "PK", + "en-PN": "PN", + "en-PR": "PR", + "en-RW": "RW", + "en-SB": "SB", + "en-SC": "SC", + "en-SG": "SG", + "en-SH": "SH", + "en-SL": "SL", + "en-TO": "TO", + "en-TT": "TT", + "en-TZ": "TZ", + "en-UG": "UG", + "en-US": "US", + "en-VC": "VC", + "en-VG": "VG", + "en-VI": "VI", + "en-VU": "VU", + "en-WS": "WS", + "en-ZA": "ZA", + "en-ZM": "ZM", + "en-ZW": "ZW", + "es-AR": "AR", + "es-BO": "BO", + "es-CL": "CL", + "es-CO": "CO", + "es-CR": "CR", + "es-CU": "CU", + "es-DO": "DO", + "es-EC": "EC", + "es-ES": "ES", + "es-GT": "GT", + "es-HN": "HN", + "es-MX": "MX", + "es-NI": "NI", + "es-PA": "PA", + "es-PE": "PE", + "es-PR": "PR", + "es-PY": "PY", + "es-SV": "SV", + "es-US": "US", + "es-UY": "UY", + "es-VE": "VE", + "et-EE": "EE", + "fa-AF": "AF", + "fi-FI": "FI", + "fil-PH": "PH", + "fr-BE": "BE", + "fr-BF": "BF", + "fr-BI": "BI", + "fr-BJ": "BJ", + "fr-CA": "CA", + "fr-CD": "CD", + "fr-CF": "CF", + "fr-CG": "CG", + "fr-CH": "CH", + "fr-CI": "CI", + "fr-CM": "CM", + "fr-DJ": "DJ", + "fr-DZ": "DZ", + "fr-FR": "FR", + "fr-GA": "GA", + "fr-HT": "HT", + "fr-LU": "LU", + "fr-MA": "MA", + "fr-MG": "MG", + "fr-ML": "ML", + "fr-MU": "MU", + "fr-NE": "NE", + "fr-RW": "RW", + "fr-SC": "SC", + "fr-SN": "SN", + "fr-TD": "TD", + "fr-TG": "TG", + "fr-TN": "TN", + "fr-VU": "VU", + "he-IL": "IL", + "hi-IN": "IN", + "hr-BA": "BA", + "hr-HR": "HR", + "hu-HU": "HU", + "hy-AM": "AM", + "id-ID": "ID", + "is-IS": "IS", + "it-CH": "CH", + "it-IT": "IT", + "it-SM": "SM", + "ja-JP": "JP", + "ko-KR": "KR", + "lt-LT": "LT", + "lv-LV": "LV", + "nb-NO": "NO", + "nl-BE": "BE", + "nl-NL": "NL", + "nl-SR": "SR", + "pl-PL": "PL", + "pt-AO": "AO", + "pt-BR": "BR", + "pt-CV": "CV", + "pt-MZ": "MZ", + "pt-PT": "PT", + "pt-ST": "ST", + "pt-TL": "TL", + "ro-MD": "MD", + "ro-RO": "RO", + "ru-BY": "BY", + "ru-KG": "KG", + "ru-KZ": "KZ", + "ru-RU": "RU", + "ru-UA": "UA", + "sk-SK": "SK", + "sl-SI": "SI", + "sr-BA": "BA", + "sr-RS": "RS", + "sv-FI": "FI", + "sv-SE": "SE", + "sw-CD": "CD", + "sw-KE": "KE", + "sw-TZ": "TZ", + "sw-UG": "UG", + "th-TH": "TH", + "tr-CY": "CY", + "tr-TR": "TR", + "uk-UA": "UA", + "vi-VN": "VN", + "zh-CN": "HK", + "zh-HK": "HK", + "zh-SG": "SG", + "zh-TW": "TW" + } + }, + "google scholar": { + "all_locale": "ZZ", + "custom": { + "supported_domains": { + "AD": "www.google.ad", + "AE": "www.google.ae", + "AF": "www.google.com.af", + "AG": "www.google.com.ag", + "AI": "www.google.com.ai", + "AL": "www.google.al", + "AM": "www.google.am", + "AO": "www.google.co.ao", + "AR": "www.google.com.ar", + "AS": "www.google.as", + "AT": "www.google.at", + "AU": "www.google.com.au", + "AZ": "www.google.az", + "BA": "www.google.ba", + "BD": "www.google.com.bd", + "BE": "www.google.be", + "BF": "www.google.bf", + "BG": "www.google.bg", + "BH": "www.google.com.bh", + "BI": "www.google.bi", + "BJ": "www.google.bj", + "BN": "www.google.com.bn", + "BO": "www.google.com.bo", + "BR": "www.google.com.br", + "BS": "www.google.bs", + "BT": "www.google.bt", + "BW": "www.google.co.bw", + "BY": "www.google.by", + "BZ": "www.google.com.bz", + "CA": "www.google.ca", + "CAT": "www.google.cat", + "CD": "www.google.cd", + "CF": "www.google.cf", + "CG": "www.google.cg", + "CH": "www.google.ch", + "CI": "www.google.ci", + "CK": "www.google.co.ck", + "CL": "www.google.cl", + "CM": "www.google.cm", + "CN": "www.google.com.hk", + "CO": "www.google.com.co", + "CR": "www.google.co.cr", + "CU": "www.google.com.cu", + "CV": "www.google.cv", + "CY": "www.google.com.cy", + "CZ": "www.google.cz", + "DE": "www.google.de", + "DJ": "www.google.dj", + "DK": "www.google.dk", + "DM": "www.google.dm", + "DO": "www.google.com.do", + "DZ": "www.google.dz", + "EC": "www.google.com.ec", + "EE": "www.google.ee", + "EG": "www.google.com.eg", + "ES": "www.google.es", + "ET": "www.google.com.et", + "FI": "www.google.fi", + "FJ": "www.google.com.fj", + "FM": "www.google.fm", + "FR": "www.google.fr", + "GA": "www.google.ga", + "GE": "www.google.ge", + "GG": "www.google.gg", + "GH": "www.google.com.gh", + "GI": "www.google.com.gi", + "GL": "www.google.gl", + "GM": "www.google.gm", + "GR": "www.google.gr", + "GT": "www.google.com.gt", + "GY": "www.google.gy", + "HK": "www.google.com.hk", + "HN": "www.google.hn", + "HR": "www.google.hr", + "HT": "www.google.ht", + "HU": "www.google.hu", + "ID": "www.google.co.id", + "IE": "www.google.ie", + "IL": "www.google.co.il", + "IM": "www.google.im", + "IN": "www.google.co.in", + "IQ": "www.google.iq", + "IS": "www.google.is", + "IT": "www.google.it", + "JE": "www.google.je", + "JM": "www.google.com.jm", + "JO": "www.google.jo", + "JP": "www.google.co.jp", + "KE": "www.google.co.ke", + "KG": "www.google.kg", + "KH": "www.google.com.kh", + "KI": "www.google.ki", + "KR": "www.google.co.kr", + "KW": "www.google.com.kw", + "KZ": "www.google.kz", + "LA": "www.google.la", + "LB": "www.google.com.lb", + "LI": "www.google.li", + "LK": "www.google.lk", + "LS": "www.google.co.ls", + "LT": "www.google.lt", + "LU": "www.google.lu", + "LV": "www.google.lv", + "LY": "www.google.com.ly", + "MA": "www.google.co.ma", + "MD": "www.google.md", + "ME": "www.google.me", + "MG": "www.google.mg", + "MK": "www.google.mk", + "ML": "www.google.ml", + "MM": "www.google.com.mm", + "MN": "www.google.mn", + "MS": "www.google.ms", + "MT": "www.google.com.mt", + "MU": "www.google.mu", + "MV": "www.google.mv", + "MW": "www.google.mw", + "MX": "www.google.com.mx", + "MY": "www.google.com.my", + "MZ": "www.google.co.mz", + "NA": "www.google.com.na", + "NE": "www.google.ne", + "NG": "www.google.com.ng", + "NI": "www.google.com.ni", + "NL": "www.google.nl", + "NO": "www.google.no", + "NP": "www.google.com.np", + "NR": "www.google.nr", + "NU": "www.google.nu", + "NZ": "www.google.co.nz", + "OM": "www.google.com.om", + "PA": "www.google.com.pa", + "PE": "www.google.com.pe", + "PG": "www.google.com.pg", + "PH": "www.google.com.ph", + "PK": "www.google.com.pk", + "PL": "www.google.pl", + "PN": "www.google.pn", + "PR": "www.google.com.pr", + "PS": "www.google.ps", + "PT": "www.google.pt", + "PY": "www.google.com.py", + "QA": "www.google.com.qa", + "RO": "www.google.ro", + "RS": "www.google.rs", + "RU": "www.google.ru", + "RW": "www.google.rw", + "SA": "www.google.com.sa", + "SB": "www.google.com.sb", + "SC": "www.google.sc", + "SE": "www.google.se", + "SG": "www.google.com.sg", + "SH": "www.google.sh", + "SI": "www.google.si", + "SK": "www.google.sk", + "SL": "www.google.com.sl", + "SM": "www.google.sm", + "SN": "www.google.sn", + "SO": "www.google.so", + "SR": "www.google.sr", + "ST": "www.google.st", + "SV": "www.google.com.sv", + "TD": "www.google.td", + "TG": "www.google.tg", + "TH": "www.google.co.th", + "TJ": "www.google.com.tj", + "TL": "www.google.tl", + "TM": "www.google.tm", + "TN": "www.google.tn", + "TO": "www.google.to", + "TR": "www.google.com.tr", + "TT": "www.google.tt", + "TW": "www.google.com.tw", + "TZ": "www.google.co.tz", + "UA": "www.google.com.ua", + "UG": "www.google.co.ug", + "UK": "www.google.co.uk", + "UY": "www.google.com.uy", + "UZ": "www.google.co.uz", + "VC": "www.google.com.vc", + "VE": "www.google.co.ve", + "VG": "www.google.vg", + "VI": "www.google.co.vi", + "VN": "www.google.com.vn", + "VU": "www.google.vu", + "WS": "www.google.ws", + "ZA": "www.google.co.za", + "ZM": "www.google.co.zm", + "ZW": "www.google.co.zw" + } + }, + "data_type": "traits_v1", + "languages": { + "af": "lang_af", + "ar": "lang_ar", + "be": "lang_be", + "bg": "lang_bg", + "ca": "lang_ca", + "cs": "lang_cs", + "da": "lang_da", + "de": "lang_de", + "el": "lang_el", + "en": "lang_en", + "eo": "lang_eo", + "es": "lang_es", + "et": "lang_et", + "fa": "lang_fa", + "fi": "lang_fi", + "fil": "lang_tl", + "fr": "lang_fr", + "he": "lang_iw", + "hi": "lang_hi", + "hr": "lang_hr", + "hu": "lang_hu", + "hy": "lang_hy", + "id": "lang_id", + "is": "lang_is", + "it": "lang_it", + "ja": "lang_ja", + "ko": "lang_ko", + "lt": "lang_lt", + "lv": "lang_lv", + "nb": "lang_no", + "nl": "lang_nl", + "pl": "lang_pl", + "pt": "lang_pt", + "ro": "lang_ro", + "ru": "lang_ru", + "sk": "lang_sk", + "sl": "lang_sl", + "sr": "lang_sr", + "sv": "lang_sv", + "sw": "lang_sw", + "th": "lang_th", + "tr": "lang_tr", + "uk": "lang_uk", + "vi": "lang_vi", + "zh": "lang_zh-CN", + "zh_Hans": "lang_zh-CN", + "zh_Hant": "lang_zh-TW" + }, + "regions": { + "af-ZA": "ZA", + "ar-AE": "AE", + "ar-BH": "BH", + "ar-DJ": "DJ", + "ar-DZ": "DZ", + "ar-EG": "EG", + "ar-IL": "IL", + "ar-IQ": "IQ", + "ar-JO": "JO", + "ar-KW": "KW", + "ar-LB": "LB", + "ar-LY": "LY", + "ar-MA": "MA", + "ar-OM": "OM", + "ar-PS": "PS", + "ar-QA": "QA", + "ar-SA": "SA", + "ar-SO": "SO", + "ar-TD": "TD", + "ar-TN": "TN", + "be-BY": "BY", + "bg-BG": "BG", + "ca-AD": "AD", + "ca-ES": "ES", + "cs-CZ": "CZ", + "da-DK": "DK", + "de-AT": "AT", + "de-BE": "BE", + "de-CH": "CH", + "de-DE": "DE", + "de-LI": "LI", + "de-LU": "LU", + "el-CY": "CY", + "el-GR": "GR", + "en-AG": "AG", + "en-AI": "AI", + "en-AS": "AS", + "en-AU": "AU", + "en-BI": "BI", + "en-BS": "BS", + "en-BW": "BW", + "en-BZ": "BZ", + "en-CA": "CA", + "en-CK": "CK", + "en-CM": "CM", + "en-DM": "DM", + "en-FJ": "FJ", + "en-FM": "FM", + "en-GB": "GB", + "en-GG": "GG", + "en-GH": "GH", + "en-GI": "GI", + "en-GM": "GM", + "en-GY": "GY", + "en-HK": "HK", + "en-IE": "IE", + "en-IM": "IM", + "en-IN": "IN", + "en-JE": "JE", + "en-JM": "JM", + "en-KE": "KE", + "en-KI": "KI", + "en-LS": "LS", + "en-MG": "MG", + "en-MS": "MS", + "en-MT": "MT", + "en-MU": "MU", + "en-MW": "MW", + "en-NA": "NA", + "en-NG": "NG", + "en-NR": "NR", + "en-NU": "NU", + "en-NZ": "NZ", + "en-PG": "PG", + "en-PH": "PH", + "en-PK": "PK", + "en-PN": "PN", + "en-PR": "PR", + "en-RW": "RW", + "en-SB": "SB", + "en-SC": "SC", + "en-SG": "SG", + "en-SH": "SH", + "en-SL": "SL", + "en-TO": "TO", + "en-TT": "TT", + "en-TZ": "TZ", + "en-UG": "UG", + "en-US": "US", + "en-VC": "VC", + "en-VG": "VG", + "en-VI": "VI", + "en-VU": "VU", + "en-WS": "WS", + "en-ZA": "ZA", + "en-ZM": "ZM", + "en-ZW": "ZW", + "es-AR": "AR", + "es-BO": "BO", + "es-CL": "CL", + "es-CO": "CO", + "es-CR": "CR", + "es-CU": "CU", + "es-DO": "DO", + "es-EC": "EC", + "es-ES": "ES", + "es-GT": "GT", + "es-HN": "HN", + "es-MX": "MX", + "es-NI": "NI", + "es-PA": "PA", + "es-PE": "PE", + "es-PR": "PR", + "es-PY": "PY", + "es-SV": "SV", + "es-US": "US", + "es-UY": "UY", + "es-VE": "VE", + "et-EE": "EE", + "fa-AF": "AF", + "fi-FI": "FI", + "fil-PH": "PH", + "fr-BE": "BE", + "fr-BF": "BF", + "fr-BI": "BI", + "fr-BJ": "BJ", + "fr-CA": "CA", + "fr-CD": "CD", + "fr-CF": "CF", + "fr-CG": "CG", + "fr-CH": "CH", + "fr-CI": "CI", + "fr-CM": "CM", + "fr-DJ": "DJ", + "fr-DZ": "DZ", + "fr-FR": "FR", + "fr-GA": "GA", + "fr-HT": "HT", + "fr-LU": "LU", + "fr-MA": "MA", + "fr-MG": "MG", + "fr-ML": "ML", + "fr-MU": "MU", + "fr-NE": "NE", + "fr-RW": "RW", + "fr-SC": "SC", + "fr-SN": "SN", + "fr-TD": "TD", + "fr-TG": "TG", + "fr-TN": "TN", + "fr-VU": "VU", + "he-IL": "IL", + "hi-IN": "IN", + "hr-BA": "BA", + "hr-HR": "HR", + "hu-HU": "HU", + "hy-AM": "AM", + "id-ID": "ID", + "is-IS": "IS", + "it-CH": "CH", + "it-IT": "IT", + "it-SM": "SM", + "ja-JP": "JP", + "ko-KR": "KR", + "lt-LT": "LT", + "lv-LV": "LV", + "nb-NO": "NO", + "nl-BE": "BE", + "nl-NL": "NL", + "nl-SR": "SR", + "pl-PL": "PL", + "pt-AO": "AO", + "pt-BR": "BR", + "pt-CV": "CV", + "pt-MZ": "MZ", + "pt-PT": "PT", + "pt-ST": "ST", + "pt-TL": "TL", + "ro-MD": "MD", + "ro-RO": "RO", + "ru-BY": "BY", + "ru-KG": "KG", + "ru-KZ": "KZ", + "ru-RU": "RU", + "ru-UA": "UA", + "sk-SK": "SK", + "sl-SI": "SI", + "sr-BA": "BA", + "sr-RS": "RS", + "sv-FI": "FI", + "sv-SE": "SE", + "sw-CD": "CD", + "sw-KE": "KE", + "sw-TZ": "TZ", + "sw-UG": "UG", + "th-TH": "TH", + "tr-CY": "CY", + "tr-TR": "TR", + "uk-UA": "UA", + "vi-VN": "VN", + "zh-CN": "HK", + "zh-HK": "HK", + "zh-SG": "SG", + "zh-TW": "TW" + } + }, + "google videos": { + "all_locale": "ZZ", + "custom": { + "supported_domains": { + "AD": "www.google.ad", + "AE": "www.google.ae", + "AF": "www.google.com.af", + "AG": "www.google.com.ag", + "AI": "www.google.com.ai", + "AL": "www.google.al", + "AM": "www.google.am", + "AO": "www.google.co.ao", + "AR": "www.google.com.ar", + "AS": "www.google.as", + "AT": "www.google.at", + "AU": "www.google.com.au", + "AZ": "www.google.az", + "BA": "www.google.ba", + "BD": "www.google.com.bd", + "BE": "www.google.be", + "BF": "www.google.bf", + "BG": "www.google.bg", + "BH": "www.google.com.bh", + "BI": "www.google.bi", + "BJ": "www.google.bj", + "BN": "www.google.com.bn", + "BO": "www.google.com.bo", + "BR": "www.google.com.br", + "BS": "www.google.bs", + "BT": "www.google.bt", + "BW": "www.google.co.bw", + "BY": "www.google.by", + "BZ": "www.google.com.bz", + "CA": "www.google.ca", + "CAT": "www.google.cat", + "CD": "www.google.cd", + "CF": "www.google.cf", + "CG": "www.google.cg", + "CH": "www.google.ch", + "CI": "www.google.ci", + "CK": "www.google.co.ck", + "CL": "www.google.cl", + "CM": "www.google.cm", + "CN": "www.google.com.hk", + "CO": "www.google.com.co", + "CR": "www.google.co.cr", + "CU": "www.google.com.cu", + "CV": "www.google.cv", + "CY": "www.google.com.cy", + "CZ": "www.google.cz", + "DE": "www.google.de", + "DJ": "www.google.dj", + "DK": "www.google.dk", + "DM": "www.google.dm", + "DO": "www.google.com.do", + "DZ": "www.google.dz", + "EC": "www.google.com.ec", + "EE": "www.google.ee", + "EG": "www.google.com.eg", + "ES": "www.google.es", + "ET": "www.google.com.et", + "FI": "www.google.fi", + "FJ": "www.google.com.fj", + "FM": "www.google.fm", + "FR": "www.google.fr", + "GA": "www.google.ga", + "GE": "www.google.ge", + "GG": "www.google.gg", + "GH": "www.google.com.gh", + "GI": "www.google.com.gi", + "GL": "www.google.gl", + "GM": "www.google.gm", + "GR": "www.google.gr", + "GT": "www.google.com.gt", + "GY": "www.google.gy", + "HK": "www.google.com.hk", + "HN": "www.google.hn", + "HR": "www.google.hr", + "HT": "www.google.ht", + "HU": "www.google.hu", + "ID": "www.google.co.id", + "IE": "www.google.ie", + "IL": "www.google.co.il", + "IM": "www.google.im", + "IN": "www.google.co.in", + "IQ": "www.google.iq", + "IS": "www.google.is", + "IT": "www.google.it", + "JE": "www.google.je", + "JM": "www.google.com.jm", + "JO": "www.google.jo", + "JP": "www.google.co.jp", + "KE": "www.google.co.ke", + "KG": "www.google.kg", + "KH": "www.google.com.kh", + "KI": "www.google.ki", + "KR": "www.google.co.kr", + "KW": "www.google.com.kw", + "KZ": "www.google.kz", + "LA": "www.google.la", + "LB": "www.google.com.lb", + "LI": "www.google.li", + "LK": "www.google.lk", + "LS": "www.google.co.ls", + "LT": "www.google.lt", + "LU": "www.google.lu", + "LV": "www.google.lv", + "LY": "www.google.com.ly", + "MA": "www.google.co.ma", + "MD": "www.google.md", + "ME": "www.google.me", + "MG": "www.google.mg", + "MK": "www.google.mk", + "ML": "www.google.ml", + "MM": "www.google.com.mm", + "MN": "www.google.mn", + "MS": "www.google.ms", + "MT": "www.google.com.mt", + "MU": "www.google.mu", + "MV": "www.google.mv", + "MW": "www.google.mw", + "MX": "www.google.com.mx", + "MY": "www.google.com.my", + "MZ": "www.google.co.mz", + "NA": "www.google.com.na", + "NE": "www.google.ne", + "NG": "www.google.com.ng", + "NI": "www.google.com.ni", + "NL": "www.google.nl", + "NO": "www.google.no", + "NP": "www.google.com.np", + "NR": "www.google.nr", + "NU": "www.google.nu", + "NZ": "www.google.co.nz", + "OM": "www.google.com.om", + "PA": "www.google.com.pa", + "PE": "www.google.com.pe", + "PG": "www.google.com.pg", + "PH": "www.google.com.ph", + "PK": "www.google.com.pk", + "PL": "www.google.pl", + "PN": "www.google.pn", + "PR": "www.google.com.pr", + "PS": "www.google.ps", + "PT": "www.google.pt", + "PY": "www.google.com.py", + "QA": "www.google.com.qa", + "RO": "www.google.ro", + "RS": "www.google.rs", + "RU": "www.google.ru", + "RW": "www.google.rw", + "SA": "www.google.com.sa", + "SB": "www.google.com.sb", + "SC": "www.google.sc", + "SE": "www.google.se", + "SG": "www.google.com.sg", + "SH": "www.google.sh", + "SI": "www.google.si", + "SK": "www.google.sk", + "SL": "www.google.com.sl", + "SM": "www.google.sm", + "SN": "www.google.sn", + "SO": "www.google.so", + "SR": "www.google.sr", + "ST": "www.google.st", + "SV": "www.google.com.sv", + "TD": "www.google.td", + "TG": "www.google.tg", + "TH": "www.google.co.th", + "TJ": "www.google.com.tj", + "TL": "www.google.tl", + "TM": "www.google.tm", + "TN": "www.google.tn", + "TO": "www.google.to", + "TR": "www.google.com.tr", + "TT": "www.google.tt", + "TW": "www.google.com.tw", + "TZ": "www.google.co.tz", + "UA": "www.google.com.ua", + "UG": "www.google.co.ug", + "UK": "www.google.co.uk", + "UY": "www.google.com.uy", + "UZ": "www.google.co.uz", + "VC": "www.google.com.vc", + "VE": "www.google.co.ve", + "VG": "www.google.vg", + "VI": "www.google.co.vi", + "VN": "www.google.com.vn", + "VU": "www.google.vu", + "WS": "www.google.ws", + "ZA": "www.google.co.za", + "ZM": "www.google.co.zm", + "ZW": "www.google.co.zw" + } + }, + "data_type": "traits_v1", + "languages": { + "af": "lang_af", + "ar": "lang_ar", + "be": "lang_be", + "bg": "lang_bg", + "ca": "lang_ca", + "cs": "lang_cs", + "da": "lang_da", + "de": "lang_de", + "el": "lang_el", + "en": "lang_en", + "eo": "lang_eo", + "es": "lang_es", + "et": "lang_et", + "fa": "lang_fa", + "fi": "lang_fi", + "fil": "lang_tl", + "fr": "lang_fr", + "he": "lang_iw", + "hi": "lang_hi", + "hr": "lang_hr", + "hu": "lang_hu", + "hy": "lang_hy", + "id": "lang_id", + "is": "lang_is", + "it": "lang_it", + "ja": "lang_ja", + "ko": "lang_ko", + "lt": "lang_lt", + "lv": "lang_lv", + "nb": "lang_no", + "nl": "lang_nl", + "pl": "lang_pl", + "pt": "lang_pt", + "ro": "lang_ro", + "ru": "lang_ru", + "sk": "lang_sk", + "sl": "lang_sl", + "sr": "lang_sr", + "sv": "lang_sv", + "sw": "lang_sw", + "th": "lang_th", + "tr": "lang_tr", + "uk": "lang_uk", + "vi": "lang_vi", + "zh": "lang_zh-CN", + "zh_Hans": "lang_zh-CN", + "zh_Hant": "lang_zh-TW" + }, + "regions": { + "af-ZA": "ZA", + "ar-AE": "AE", + "ar-BH": "BH", + "ar-DJ": "DJ", + "ar-DZ": "DZ", + "ar-EG": "EG", + "ar-IL": "IL", + "ar-IQ": "IQ", + "ar-JO": "JO", + "ar-KW": "KW", + "ar-LB": "LB", + "ar-LY": "LY", + "ar-MA": "MA", + "ar-OM": "OM", + "ar-PS": "PS", + "ar-QA": "QA", + "ar-SA": "SA", + "ar-SO": "SO", + "ar-TD": "TD", + "ar-TN": "TN", + "be-BY": "BY", + "bg-BG": "BG", + "ca-AD": "AD", + "ca-ES": "ES", + "cs-CZ": "CZ", + "da-DK": "DK", + "de-AT": "AT", + "de-BE": "BE", + "de-CH": "CH", + "de-DE": "DE", + "de-LI": "LI", + "de-LU": "LU", + "el-CY": "CY", + "el-GR": "GR", + "en-AG": "AG", + "en-AI": "AI", + "en-AS": "AS", + "en-AU": "AU", + "en-BI": "BI", + "en-BS": "BS", + "en-BW": "BW", + "en-BZ": "BZ", + "en-CA": "CA", + "en-CK": "CK", + "en-CM": "CM", + "en-DM": "DM", + "en-FJ": "FJ", + "en-FM": "FM", + "en-GB": "GB", + "en-GG": "GG", + "en-GH": "GH", + "en-GI": "GI", + "en-GM": "GM", + "en-GY": "GY", + "en-HK": "HK", + "en-IE": "IE", + "en-IM": "IM", + "en-IN": "IN", + "en-JE": "JE", + "en-JM": "JM", + "en-KE": "KE", + "en-KI": "KI", + "en-LS": "LS", + "en-MG": "MG", + "en-MS": "MS", + "en-MT": "MT", + "en-MU": "MU", + "en-MW": "MW", + "en-NA": "NA", + "en-NG": "NG", + "en-NR": "NR", + "en-NU": "NU", + "en-NZ": "NZ", + "en-PG": "PG", + "en-PH": "PH", + "en-PK": "PK", + "en-PN": "PN", + "en-PR": "PR", + "en-RW": "RW", + "en-SB": "SB", + "en-SC": "SC", + "en-SG": "SG", + "en-SH": "SH", + "en-SL": "SL", + "en-TO": "TO", + "en-TT": "TT", + "en-TZ": "TZ", + "en-UG": "UG", + "en-US": "US", + "en-VC": "VC", + "en-VG": "VG", + "en-VI": "VI", + "en-VU": "VU", + "en-WS": "WS", + "en-ZA": "ZA", + "en-ZM": "ZM", + "en-ZW": "ZW", + "es-AR": "AR", + "es-BO": "BO", + "es-CL": "CL", + "es-CO": "CO", + "es-CR": "CR", + "es-CU": "CU", + "es-DO": "DO", + "es-EC": "EC", + "es-ES": "ES", + "es-GT": "GT", + "es-HN": "HN", + "es-MX": "MX", + "es-NI": "NI", + "es-PA": "PA", + "es-PE": "PE", + "es-PR": "PR", + "es-PY": "PY", + "es-SV": "SV", + "es-US": "US", + "es-UY": "UY", + "es-VE": "VE", + "et-EE": "EE", + "fa-AF": "AF", + "fi-FI": "FI", + "fil-PH": "PH", + "fr-BE": "BE", + "fr-BF": "BF", + "fr-BI": "BI", + "fr-BJ": "BJ", + "fr-CA": "CA", + "fr-CD": "CD", + "fr-CF": "CF", + "fr-CG": "CG", + "fr-CH": "CH", + "fr-CI": "CI", + "fr-CM": "CM", + "fr-DJ": "DJ", + "fr-DZ": "DZ", + "fr-FR": "FR", + "fr-GA": "GA", + "fr-HT": "HT", + "fr-LU": "LU", + "fr-MA": "MA", + "fr-MG": "MG", + "fr-ML": "ML", + "fr-MU": "MU", + "fr-NE": "NE", + "fr-RW": "RW", + "fr-SC": "SC", + "fr-SN": "SN", + "fr-TD": "TD", + "fr-TG": "TG", + "fr-TN": "TN", + "fr-VU": "VU", + "he-IL": "IL", + "hi-IN": "IN", + "hr-BA": "BA", + "hr-HR": "HR", + "hu-HU": "HU", + "hy-AM": "AM", + "id-ID": "ID", + "is-IS": "IS", + "it-CH": "CH", + "it-IT": "IT", + "it-SM": "SM", + "ja-JP": "JP", + "ko-KR": "KR", + "lt-LT": "LT", + "lv-LV": "LV", + "nb-NO": "NO", + "nl-BE": "BE", + "nl-NL": "NL", + "nl-SR": "SR", + "pl-PL": "PL", + "pt-AO": "AO", + "pt-BR": "BR", + "pt-CV": "CV", + "pt-MZ": "MZ", + "pt-PT": "PT", + "pt-ST": "ST", + "pt-TL": "TL", + "ro-MD": "MD", + "ro-RO": "RO", + "ru-BY": "BY", + "ru-KG": "KG", + "ru-KZ": "KZ", + "ru-RU": "RU", + "ru-UA": "UA", + "sk-SK": "SK", + "sl-SI": "SI", + "sr-BA": "BA", + "sr-RS": "RS", + "sv-FI": "FI", + "sv-SE": "SE", + "sw-CD": "CD", + "sw-KE": "KE", + "sw-TZ": "TZ", + "sw-UG": "UG", + "th-TH": "TH", + "tr-CY": "CY", + "tr-TR": "TR", + "uk-UA": "UA", + "vi-VN": "VN", + "zh-CN": "HK", + "zh-HK": "HK", + "zh-SG": "SG", + "zh-TW": "TW" + } + }, + "peertube": { + "all_locale": null, + "custom": {}, + "data_type": "traits_v1", + "languages": { + "ca": "ca", + "cs": "cs", + "de": "de", + "el": "el", + "en": "en", + "eo": "eo", + "es": "es", + "eu": "eu", + "fi": "fi", + "fr": "fr", + "gd": "gd", + "it": "it", + "ja": "ja", + "nl": "nl", + "pl": "pl", + "pt": "pt", + "ru": "ru", + "sv": "sv", + "zh": "zh", + "zh_Hans": "zh", + "zh_Hant": "zh" + }, + "regions": {} + }, + "qwant": { + "all_locale": null, + "custom": {}, + "data_type": "traits_v1", + "languages": {}, + "regions": { + "bg-BG": "bg_BG", + "ca-ES": "ca_ES", + "cs-CZ": "cs_CZ", + "da-DK": "da_DK", + "de-AT": "de_AT", + "de-CH": "de_CH", + "de-DE": "de_DE", + "el-GR": "el_GR", + "en-AU": "en_AU", + "en-CA": "en_CA", + "en-GB": "en_GB", + "en-IE": "en_IE", + "en-MY": "en_MY", + "en-NZ": "en_NZ", + "en-US": "en_US", + "es-AR": "es_AR", + "es-CL": "es_CL", + "es-ES": "es_ES", + "es-MX": "es_MX", + "et-EE": "et_EE", + "fi-FI": "fi_FI", + "fr-BE": "fr_BE", + "fr-CA": "fr_CA", + "fr-CH": "fr_CH", + "fr-FR": "fr_FR", + "hu-HU": "hu_HU", + "it-CH": "it_CH", + "it-IT": "it_IT", + "ko-KR": "ko_KR", + "nb-NO": "nb_NO", + "nl-BE": "nl_BE", + "nl-NL": "nl_NL", + "pl-PL": "pl_PL", + "pt-PT": "pt_PT", + "ro-RO": "ro_RO", + "sv-SE": "sv_SE", + "th-TH": "th_TH", + "zh-CN": "zh_CN", + "zh-HK": "zh_HK" + } + }, + "qwant images": { + "all_locale": null, + "custom": {}, + "data_type": "traits_v1", + "languages": {}, + "regions": { + "bg-BG": "bg_BG", + "ca-ES": "ca_ES", + "cs-CZ": "cs_CZ", + "da-DK": "da_DK", + "de-AT": "de_AT", + "de-CH": "de_CH", + "de-DE": "de_DE", + "el-GR": "el_GR", + "en-AU": "en_AU", + "en-CA": "en_CA", + "en-GB": "en_GB", + "en-IE": "en_IE", + "en-MY": "en_MY", + "en-NZ": "en_NZ", + "en-US": "en_US", + "es-AR": "es_AR", + "es-CL": "es_CL", + "es-ES": "es_ES", + "es-MX": "es_MX", + "et-EE": "et_EE", + "fi-FI": "fi_FI", + "fr-BE": "fr_BE", + "fr-CA": "fr_CA", + "fr-CH": "fr_CH", + "fr-FR": "fr_FR", + "hu-HU": "hu_HU", + "it-CH": "it_CH", + "it-IT": "it_IT", + "ko-KR": "ko_KR", + "nb-NO": "nb_NO", + "nl-BE": "nl_BE", + "nl-NL": "nl_NL", + "pl-PL": "pl_PL", + "pt-PT": "pt_PT", + "ro-RO": "ro_RO", + "sv-SE": "sv_SE", + "th-TH": "th_TH", + "zh-CN": "zh_CN", + "zh-HK": "zh_HK" + } + }, + "qwant news": { + "all_locale": null, + "custom": {}, + "data_type": "traits_v1", + "languages": {}, + "regions": { + "ca-ES": "ca_ES", + "de-AT": "de_AT", + "de-CH": "de_CH", + "de-DE": "de_DE", + "en-AU": "en_AU", + "en-CA": "en_CA", + "en-GB": "en_GB", + "en-IE": "en_IE", + "en-MY": "en_MY", + "en-NZ": "en_NZ", + "en-US": "en_US", + "es-AR": "es_AR", + "es-CL": "es_CL", + "es-ES": "es_ES", + "es-MX": "es_MX", + "fr-BE": "fr_BE", + "fr-CA": "fr_CA", + "fr-CH": "fr_CH", + "fr-FR": "fr_FR", + "it-CH": "it_CH", + "it-IT": "it_IT", + "nl-BE": "nl_BE", + "nl-NL": "nl_NL", + "pt-PT": "pt_PT" + } + }, + "qwant videos": { + "all_locale": null, + "custom": {}, + "data_type": "traits_v1", + "languages": {}, + "regions": { + "bg-BG": "bg_BG", + "ca-ES": "ca_ES", + "cs-CZ": "cs_CZ", + "da-DK": "da_DK", + "de-AT": "de_AT", + "de-CH": "de_CH", + "de-DE": "de_DE", + "el-GR": "el_GR", + "en-AU": "en_AU", + "en-CA": "en_CA", + "en-GB": "en_GB", + "en-IE": "en_IE", + "en-MY": "en_MY", + "en-NZ": "en_NZ", + "en-US": "en_US", + "es-AR": "es_AR", + "es-CL": "es_CL", + "es-ES": "es_ES", + "es-MX": "es_MX", + "et-EE": "et_EE", + "fi-FI": "fi_FI", + "fr-BE": "fr_BE", + "fr-CA": "fr_CA", + "fr-CH": "fr_CH", + "fr-FR": "fr_FR", + "hu-HU": "hu_HU", + "it-CH": "it_CH", + "it-IT": "it_IT", + "ko-KR": "ko_KR", + "nb-NO": "nb_NO", + "nl-BE": "nl_BE", + "nl-NL": "nl_NL", + "pl-PL": "pl_PL", + "pt-PT": "pt_PT", + "ro-RO": "ro_RO", + "sv-SE": "sv_SE", + "th-TH": "th_TH", + "zh-CN": "zh_CN", + "zh-HK": "zh_HK" + } + }, + "sepiasearch": { + "all_locale": null, + "custom": {}, + "data_type": "traits_v1", + "languages": { + "ca": "ca", + "cs": "cs", + "de": "de", + "el": "el", + "en": "en", + "eo": "eo", + "es": "es", + "eu": "eu", + "fi": "fi", + "fr": "fr", + "gd": "gd", + "it": "it", + "ja": "ja", + "nl": "nl", + "pl": "pl", + "pt": "pt", + "ru": "ru", + "sv": "sv", + "zh": "zh", + "zh_Hans": "zh", + "zh_Hant": "zh" + }, + "regions": {} + }, + "startpage": { + "all_locale": null, + "custom": {}, + "data_type": "traits_v1", + "languages": { + "af": "afrikaans", + "am": "amharic", + "ar": "arabic", + "az": "azerbaijani", + "be": "belarusian", + "bg": "bulgarian", + "bn": "bengali", + "bs": "bosnian", + "ca": "catalan", + "cs": "czech", + "cy": "welsh", + "da": "dansk", + "de": "deutsch", + "el": "greek", + "en": "english", + "eo": "esperanto", + "es": "espanol", + "et": "estonian", + "eu": "basque", + "fa": "persian", + "fi": "suomi", + "fo": "faroese", + "fr": "francais", + "fy": "frisian", + "ga": "irish", + "gd": "gaelic", + "gl": "galician", + "gu": "gujarati", + "he": "hebrew", + "hi": "hindi", + "hr": "croatian", + "hu": "hungarian", + "ia": "interlingua", + "id": "indonesian", + "is": "icelandic", + "it": "italiano", + "ja": "nihongo", + "jv": "javanese", + "ka": "georgian", + "kn": "kannada", + "ko": "hangul", + "la": "latin", + "lt": "lithuanian", + "lv": "latvian", + "mai": "bihari", + "mk": "macedonian", + "ml": "malayalam", + "mr": "marathi", + "ms": "malay", + "mt": "maltese", + "nb": "norsk", + "ne": "nepali", + "nl": "nederlands", + "oc": "occitan", + "pa": "punjabi", + "pl": "polski", + "pt": "portugues", + "ro": "romanian", + "ru": "russian", + "si": "sinhalese", + "sk": "slovak", + "sl": "slovenian", + "sq": "albanian", + "sr": "serbian", + "su": "sudanese", + "sv": "svenska", + "sw": "swahili", + "ta": "tamil", + "te": "telugu", + "th": "thai", + "ti": "tigrinya", + "tl": "tagalog", + "tr": "turkce", + "uk": "ukrainian", + "ur": "urdu", + "uz": "uzbek", + "vi": "vietnamese", + "xh": "xhosa", + "zh": "jiantizhongwen", + "zh_Hant": "fantizhengwen", + "zu": "zulu" + }, + "regions": { + "ar-EG": "ar_EG", + "bg-BG": "bg_BG", + "ca-ES": "ca_ES", + "cs-CZ": "cs_CZ", + "da-DK": "da_DK", + "de-AT": "de_AT", + "de-CH": "de_CH", + "de-DE": "de_DE", + "el-GR": "el_GR", + "en-AU": "en_AU", + "en-CA": "en_CA", + "en-GB": "en-GB_GB", + "en-IE": "en_IE", + "en-IN": "en_IN", + "en-MY": "en_MY", + "en-NZ": "en_NZ", + "en-PH": "en_PH", + "en-US": "en_US", + "en-ZA": "en_ZA", + "es-AR": "es_AR", + "es-CL": "es_CL", + "es-CO": "es_CO", + "es-ES": "es_ES", + "es-MX": "es_MX", + "es-PE": "es_PE", + "es-US": "es_US", + "es-UY": "es_UY", + "es-VE": "es_VE", + "fi-FI": "fi_FI", + "fil-PH": "fil_PH", + "fr-BE": "fr_BE", + "fr-CA": "fr_CA", + "fr-CH": "fr_CH", + "fr-FR": "fr_FR", + "hi-IN": "hi_IN", + "id-ID": "id_ID", + "it-CH": "it_CH", + "it-IT": "it_IT", + "ja-JP": "ja_JP", + "ko-KR": "ko_KR", + "ms-MY": "ms_MY", + "nb-NO": "no_NO", + "nl-BE": "nl_BE", + "nl-NL": "nl_NL", + "pl-PL": "pl_PL", + "pt-BR": "pt-BR_BR", + "pt-PT": "pt_PT", + "ro-RO": "ro_RO", + "ru-BY": "ru_BY", + "ru-RU": "ru_RU", + "sv-SE": "sv_SE", + "tr-TR": "tr_TR", + "uk-UA": "uk_UA", + "vi-VN": "vi_VN", + "zh-CN": "zh-CN_CN", + "zh-HK": "zh-TW_HK", + "zh-TW": "zh-TW_TW" + } + }, + "wikidata": { + "all_locale": null, + "custom": { + "wiki_netloc": {} + }, + "data_type": "traits_v1", + "languages": { + "af": "af", + "am": "am", + "ar": "ar", + "as": "as", + "az": "az", + "be": "be", + "bg": "bg", + "bn": "bn", + "bs": "bs", + "ca": "ca", + "ckb": "ckb", + "cs": "cs", + "da": "da", + "de": "de", + "el": "el", + "en": "en", + "es": "es", + "et": "et", + "fa": "fa", + "fi": "fi", + "fil": "tl", + "fo": "fo", + "fr": "fr", + "fy": "fy", + "gl": "gl", + "gsw": "als", + "gu": "gu", + "he": "he", + "hi": "hi", + "hsb": "hsb", + "hu": "hu", + "hy": "hy", + "id": "id", + "is": "is", + "it": "it", + "ja": "ja", + "jv": "jv", + "ka": "ka", + "kn": "kn", + "ko": "ko", + "lb": "lb", + "lt": "lt", + "lv": "lv", + "mai": "mai", + "mk": "mk", + "ml": "ml", + "mn": "mn", + "mr": "mr", + "ne": "ne", + "no": "no", + "or": "or", + "os": "os", + "pa": "pa", + "pl": "pl", + "ps": "ps", + "pt": "pt", + "qu": "qu", + "ro": "ro", + "ru": "ru", + "sa": "sa", + "sah": "sah", + "sd": "sd", + "si": "si", + "sk": "sk", + "sl": "sl", + "sq": "sq", + "sr": "sr", + "ta": "ta", + "te": "te", + "th": "th", + "tr": "tr", + "uk": "uk", + "ur": "ur", + "uz": "uz", + "vi": "vi", + "yi": "yi", + "zh": "zh", + "zh_Hant": "zh-classical" + }, + "regions": {} + }, + "wikipedia": { + "all_locale": null, + "custom": { + "wiki_netloc": { + "af": "af.wikipedia.org", + "als": "als.wikipedia.org", + "am": "am.wikipedia.org", + "ar": "ar.wikipedia.org", + "as": "as.wikipedia.org", + "az": "az.wikipedia.org", + "be": "be.wikipedia.org", + "bg": "bg.wikipedia.org", + "bn": "bn.wikipedia.org", + "bs": "bs.wikipedia.org", + "ca": "ca.wikipedia.org", + "ckb": "ckb.wikipedia.org", + "cs": "cs.wikipedia.org", + "da": "da.wikipedia.org", + "de": "de.wikipedia.org", + "el": "el.wikipedia.org", + "en": "en.wikipedia.org", + "es": "es.wikipedia.org", + "et": "et.wikipedia.org", + "fa": "fa.wikipedia.org", + "fi": "fi.wikipedia.org", + "fo": "fo.wikipedia.org", + "fr": "fr.wikipedia.org", + "fy": "fy.wikipedia.org", + "gl": "gl.wikipedia.org", + "gu": "gu.wikipedia.org", + "he": "he.wikipedia.org", + "hi": "hi.wikipedia.org", + "hsb": "hsb.wikipedia.org", + "hu": "hu.wikipedia.org", + "hy": "hy.wikipedia.org", + "id": "id.wikipedia.org", + "is": "is.wikipedia.org", + "it": "it.wikipedia.org", + "ja": "ja.wikipedia.org", + "jv": "jv.wikipedia.org", + "ka": "ka.wikipedia.org", + "kn": "kn.wikipedia.org", + "ko": "ko.wikipedia.org", + "lb": "lb.wikipedia.org", + "lt": "lt.wikipedia.org", + "lv": "lv.wikipedia.org", + "mai": "mai.wikipedia.org", + "mk": "mk.wikipedia.org", + "ml": "ml.wikipedia.org", + "mn": "mn.wikipedia.org", + "mr": "mr.wikipedia.org", + "ne": "ne.wikipedia.org", + "no": "no.wikipedia.org", + "or": "or.wikipedia.org", + "os": "os.wikipedia.org", + "pa": "pa.wikipedia.org", + "pl": "pl.wikipedia.org", + "ps": "ps.wikipedia.org", + "pt": "pt.wikipedia.org", + "qu": "qu.wikipedia.org", + "ro": "ro.wikipedia.org", + "ru": "ru.wikipedia.org", + "sa": "sa.wikipedia.org", + "sah": "sah.wikipedia.org", + "sd": "sd.wikipedia.org", + "si": "si.wikipedia.org", + "sk": "sk.wikipedia.org", + "sl": "sl.wikipedia.org", + "sq": "sq.wikipedia.org", + "sr": "sr.wikipedia.org", + "ta": "ta.wikipedia.org", + "te": "te.wikipedia.org", + "th": "th.wikipedia.org", + "tl": "tl.wikipedia.org", + "tr": "tr.wikipedia.org", + "uk": "uk.wikipedia.org", + "ur": "ur.wikipedia.org", + "uz": "uz.wikipedia.org", + "vi": "vi.wikipedia.org", + "yi": "yi.wikipedia.org", + "zh": "zh.wikipedia.org", + "zh-classical": "zh-classical.wikipedia.org" + } + }, + "data_type": "traits_v1", + "languages": { + "af": "af", + "am": "am", + "ar": "ar", + "as": "as", + "az": "az", + "be": "be", + "bg": "bg", + "bn": "bn", + "bs": "bs", + "ca": "ca", + "ckb": "ckb", + "cs": "cs", + "da": "da", + "de": "de", + "el": "el", + "en": "en", + "es": "es", + "et": "et", + "fa": "fa", + "fi": "fi", + "fil": "tl", + "fo": "fo", + "fr": "fr", + "fy": "fy", + "gl": "gl", + "gsw": "als", + "gu": "gu", + "he": "he", + "hi": "hi", + "hsb": "hsb", + "hu": "hu", + "hy": "hy", + "id": "id", + "is": "is", + "it": "it", + "ja": "ja", + "jv": "jv", + "ka": "ka", + "kn": "kn", + "ko": "ko", + "lb": "lb", + "lt": "lt", + "lv": "lv", + "mai": "mai", + "mk": "mk", + "ml": "ml", + "mn": "mn", + "mr": "mr", + "ne": "ne", + "no": "no", + "or": "or", + "os": "os", + "pa": "pa", + "pl": "pl", + "ps": "ps", + "pt": "pt", + "qu": "qu", + "ro": "ro", + "ru": "ru", + "sa": "sa", + "sah": "sah", + "sd": "sd", + "si": "si", + "sk": "sk", + "sl": "sl", + "sq": "sq", + "sr": "sr", + "ta": "ta", + "te": "te", + "th": "th", + "tr": "tr", + "uk": "uk", + "ur": "ur", + "uz": "uz", + "vi": "vi", + "yi": "yi", + "zh": "zh", + "zh_Hans": "zh", + "zh_Hant": "zh-classical" + }, + "regions": {} + }, + "yahoo": { + "all_locale": "any", + "custom": {}, + "data_type": "traits_v1", + "languages": { + "ar": "ar", + "bg": "bg", + "cs": "cs", + "da": "da", + "de": "de", + "el": "el", + "en": "en", + "es": "es", + "et": "et", + "fi": "fi", + "fr": "fr", + "he": "he", + "hr": "hr", + "hu": "hu", + "it": "it", + "ja": "ja", + "ko": "ko", + "lt": "lt", + "lv": "lv", + "nl": "nl", + "no": "no", + "pl": "pl", + "pt": "pt", + "ro": "ro", + "ru": "ru", + "sk": "sk", + "sl": "sl", + "sv": "sv", + "th": "th", + "tr": "tr", + "zh_Hans": "zh_chs", + "zh_Hant": "zh_cht" + }, + "regions": {} + } +} \ No newline at end of file diff --git a/searx/data/engines_languages.json b/searx/data/engines_languages.json deleted file mode 100644 index acd36439c..000000000 --- a/searx/data/engines_languages.json +++ /dev/null @@ -1,4381 +0,0 @@ -{ - "bing": [ - "af", - "am", - "ar", - "as", - "az-latn", - "be", - "bg", - "bn", - "bs-latn", - "ca", - "ca-es-valencia", - "chr-cher", - "cs", - "cy", - "da", - "de", - "el", - "en", - "es", - "et", - "eu", - "fa", - "fi", - "fil", - "fr", - "ga", - "gd", - "gl", - "gu", - "ha-latn", - "he", - "hi", - "hr", - "hu", - "hy", - "id", - "ig", - "is", - "it", - "ja", - "ka", - "kk", - "km", - "kn", - "ko", - "kok", - "ku-arab", - "ky", - "lb", - "lo", - "lt", - "lv", - "mi", - "mk", - "ml", - "mn-Cyrl-MN", - "mr", - "ms", - "mt", - "nb", - "ne", - "nl", - "nn", - "nso", - "or", - "pa-arab", - "pa-guru", - "pl", - "prs", - "pt-BR", - "pt-PT", - "quc", - "quz", - "ro", - "ru", - "rw", - "sd-arab", - "si", - "sk", - "sl", - "sq", - "sr-cyrl", - "sr-latn", - "sv", - "sw", - "ta", - "te", - "tg-cyrl", - "th", - "ti", - "tk", - "tn", - "tr", - "tt", - "ug", - "uk", - "ur", - "uz-latn", - "vi", - "wo", - "xh", - "yo", - "zh-Hans", - "zh-Hant", - "zu" - ], - "bing images": [ - "af", - "am", - "ar", - "as", - "az-latn", - "be", - "bg", - "bn", - "bs-latn", - "ca", - "ca-es-valencia", - "chr-cher", - "cs", - "cy", - "da", - "de", - "el", - "en", - "es", - "et", - "eu", - "fa", - "fi", - "fil", - "fr", - "ga", - "gd", - "gl", - "gu", - "ha-latn", - "he", - "hi", - "hr", - "hu", - "hy", - "id", - "ig", - "is", - "it", - "ja", - "ka", - "kk", - "km", - "kn", - "ko", - "kok", - "ku-arab", - "ky", - "lb", - "lo", - "lt", - "lv", - "mi", - "mk", - "ml", - "mn-Cyrl-MN", - "mr", - "ms", - "mt", - "nb", - "ne", - "nl", - "nn", - "nso", - "or", - "pa-arab", - "pa-guru", - "pl", - "prs", - "pt-BR", - "pt-PT", - "quc", - "quz", - "ro", - "ru", - "rw", - "sd-arab", - "si", - "sk", - "sl", - "sq", - "sr-cyrl", - "sr-latn", - "sv", - "sw", - "ta", - "te", - "tg-cyrl", - "th", - "ti", - "tk", - "tn", - "tr", - "tt", - "ug", - "uk", - "ur", - "uz-latn", - "vi", - "wo", - "xh", - "yo", - "zh-Hans", - "zh-Hant", - "zu" - ], - "bing news": [ - "af", - "am", - "ar", - "as", - "az-latn", - "be", - "bg", - "bn", - "bs-latn", - "ca", - "ca-es-valencia", - "chr-cher", - "cs", - "cy", - "da", - "de", - "el", - "en", - "es", - "et", - "eu", - "fa", - "fi", - "fil", - "fr", - "ga", - "gd", - "gl", - "gu", - "ha-latn", - "he", - "hi", - "hr", - "hu", - "hy", - "id", - "ig", - "is", - "it", - "ja", - "ka", - "kk", - "km", - "kn", - "ko", - "kok", - "ku-arab", - "ky", - "lb", - "lo", - "lt", - "lv", - "mi", - "mk", - "ml", - "mn-Cyrl-MN", - "mr", - "ms", - "mt", - "nb", - "ne", - "nl", - "nn", - "nso", - "or", - "pa-arab", - "pa-guru", - "pl", - "prs", - "pt-BR", - "pt-PT", - "quc", - "quz", - "ro", - "ru", - "rw", - "sd-arab", - "si", - "sk", - "sl", - "sq", - "sr-cyrl", - "sr-latn", - "sv", - "sw", - "ta", - "te", - "tg-cyrl", - "th", - "ti", - "tk", - "tn", - "tr", - "tt", - "ug", - "uk", - "ur", - "uz-latn", - "vi", - "wo", - "xh", - "yo", - "zh-Hans", - "zh-Hant", - "zu" - ], - "bing videos": [ - "af", - "am", - "ar", - "as", - "az-latn", - "be", - "bg", - "bn", - "bs-latn", - "ca", - "ca-es-valencia", - "chr-cher", - "cs", - "cy", - "da", - "de", - "el", - "en", - "es", - "et", - "eu", - "fa", - "fi", - "fil", - "fr", - "ga", - "gd", - "gl", - "gu", - "ha-latn", - "he", - "hi", - "hr", - "hu", - "hy", - "id", - "ig", - "is", - "it", - "ja", - "ka", - "kk", - "km", - "kn", - "ko", - "kok", - "ku-arab", - "ky", - "lb", - "lo", - "lt", - "lv", - "mi", - "mk", - "ml", - "mn-Cyrl-MN", - "mr", - "ms", - "mt", - "nb", - "ne", - "nl", - "nn", - "nso", - "or", - "pa-arab", - "pa-guru", - "pl", - "prs", - "pt-BR", - "pt-PT", - "quc", - "quz", - "ro", - "ru", - "rw", - "sd-arab", - "si", - "sk", - "sl", - "sq", - "sr-cyrl", - "sr-latn", - "sv", - "sw", - "ta", - "te", - "tg-cyrl", - "th", - "ti", - "tk", - "tn", - "tr", - "tt", - "ug", - "uk", - "ur", - "uz-latn", - "vi", - "wo", - "xh", - "yo", - "zh-Hans", - "zh-Hant", - "zu" - ], - "dailymotion": [ - "ar_AA", - "ar_AE", - "ar_EG", - "ar_SA", - "de_AT", - "de_CH", - "de_DE", - "el_GR", - "en_AU", - "en_CA", - "en_EN", - "en_GB", - "en_HK", - "en_IE", - "en_IN", - "en_NG", - "en_PH", - "en_PK", - "en_SG", - "en_US", - "en_ZA", - "es_AR", - "es_ES", - "es_MX", - "fr_BE", - "fr_CA", - "fr_CH", - "fr_CI", - "fr_FR", - "fr_MA", - "fr_SN", - "fr_TN", - "id_ID", - "it_CH", - "it_IT", - "ja_JP", - "ko_KR", - "ms_MY", - "nl_BE", - "nl_NL", - "pl_PL", - "pt_BR", - "pt_PT", - "ro_RO", - "ru_RU", - "th_TH", - "tr_TR", - "vi_VN", - "zh_CN", - "zh_TW" - ], - "ddg definitions": [ - "ar-XA", - "bg-BG", - "ca-CT", - "ca-ES", - "cs-CZ", - "da-DK", - "de-AT", - "de-CH", - "de-DE", - "el-GR", - "en-AU", - "en-CA", - "en-ID", - "en-IE", - "en-IL", - "en-IN", - "en-MY", - "en-NZ", - "en-PH", - "en-PK", - "en-SG", - "en-TH", - "en-UK", - "en-US", - "en-VN", - "en-ZA", - "es-AR", - "es-CL", - "es-CO", - "es-ES", - "es-MX", - "es-PE", - "es-US", - "et-EE", - "fi-FI", - "fr-BE", - "fr-CA", - "fr-CH", - "fr-FR", - "hr-HR", - "hu-HU", - "it-IT", - "jp-JP", - "kr-KR", - "lt-LT", - "lv-LV", - "nl-BE", - "nl-NL", - "no-NO", - "pl-PL", - "pt-BR", - "pt-PT", - "ro-RO", - "ru-RU", - "sk-SK", - "sl-SL", - "sv-SE", - "tr-TR", - "tzh-HK", - "tzh-TW", - "uk-UA", - "wt-WT", - "zh-CN" - ], - "duckduckgo": [ - "ar-XA", - "bg-BG", - "ca-CT", - "ca-ES", - "cs-CZ", - "da-DK", - "de-AT", - "de-CH", - "de-DE", - "el-GR", - "en-AU", - "en-CA", - "en-ID", - "en-IE", - "en-IL", - "en-IN", - "en-MY", - "en-NZ", - "en-PH", - "en-PK", - "en-SG", - "en-TH", - "en-UK", - "en-US", - "en-VN", - "en-ZA", - "es-AR", - "es-CL", - "es-CO", - "es-ES", - "es-MX", - "es-PE", - "es-US", - "et-EE", - "fi-FI", - "fr-BE", - "fr-CA", - "fr-CH", - "fr-FR", - "hr-HR", - "hu-HU", - "it-IT", - "jp-JP", - "kr-KR", - "lt-LT", - "lv-LV", - "nl-BE", - "nl-NL", - "no-NO", - "pl-PL", - "pt-BR", - "pt-PT", - "ro-RO", - "ru-RU", - "sk-SK", - "sl-SL", - "sv-SE", - "tr-TR", - "tzh-HK", - "tzh-TW", - "uk-UA", - "wt-WT", - "zh-CN" - ], - "duckduckgo images": [ - "ar-XA", - "bg-BG", - "ca-CT", - "ca-ES", - "cs-CZ", - "da-DK", - "de-AT", - "de-CH", - "de-DE", - "el-GR", - "en-AU", - "en-CA", - "en-ID", - "en-IE", - "en-IL", - "en-IN", - "en-MY", - "en-NZ", - "en-PH", - "en-PK", - "en-SG", - "en-TH", - "en-UK", - "en-US", - "en-VN", - "en-ZA", - "es-AR", - "es-CL", - "es-CO", - "es-ES", - "es-MX", - "es-PE", - "es-US", - "et-EE", - "fi-FI", - "fr-BE", - "fr-CA", - "fr-CH", - "fr-FR", - "hr-HR", - "hu-HU", - "it-IT", - "jp-JP", - "kr-KR", - "lt-LT", - "lv-LV", - "nl-BE", - "nl-NL", - "no-NO", - "pl-PL", - "pt-BR", - "pt-PT", - "ro-RO", - "ru-RU", - "sk-SK", - "sl-SL", - "sv-SE", - "tr-TR", - "tzh-HK", - "tzh-TW", - "uk-UA", - "wt-WT", - "zh-CN" - ], - "google": { - "af": { - "name": "Afrikaans" - }, - "ar": { - "name": "\u0627\u0644\u0639\u0631\u0628\u064a\u0629" - }, - "be": { - "name": "\u0431\u0435\u043b\u0430\u0440\u0443\u0441\u043a\u0430\u044f" - }, - "bg": { - "name": "\u0431\u044a\u043b\u0433\u0430\u0440\u0441\u043a\u0438" - }, - "ca": { - "name": "catal\u00e0" - }, - "cs": { - "name": "\u010de\u0161tina" - }, - "da": { - "name": "dansk" - }, - "de": { - "name": "Deutsch" - }, - "el": { - "name": "\u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ac" - }, - "en": { - "name": "English" - }, - "eo": { - "name": "esperanto" - }, - "es": { - "name": "espa\u00f1ol" - }, - "et": { - "name": "eesti" - }, - "fa": { - "name": "\u0641\u0627\u0631\u0633\u06cc" - }, - "fi": { - "name": "suomi" - }, - "fr": { - "name": "fran\u00e7ais" - }, - "hi": { - "name": "\u0939\u093f\u0928\u094d\u0926\u0940" - }, - "hr": { - "name": "hrvatski" - }, - "hu": { - "name": "magyar" - }, - "hy": { - "name": "\u0570\u0561\u0575\u0565\u0580\u0565\u0576" - }, - "id": { - "name": "Indonesia" - }, - "is": { - "name": "\u00edslenska" - }, - "it": { - "name": "italiano" - }, - "iw": { - "name": "\u05e2\u05d1\u05e8\u05d9\u05ea" - }, - "ja": { - "name": "\u65e5\u672c\u8a9e" - }, - "ko": { - "name": "\ud55c\uad6d\uc5b4" - }, - "lt": { - "name": "lietuvi\u0173" - }, - "lv": { - "name": "latvie\u0161u" - }, - "nl": { - "name": "Nederlands" - }, - "no": { - "name": "norsk" - }, - "pl": { - "name": "polski" - }, - "pt": { - "name": "portugu\u00eas" - }, - "ro": { - "name": "rom\u00e2n\u0103" - }, - "ru": { - "name": "\u0440\u0443\u0441\u0441\u043a\u0438\u0439" - }, - "sk": { - "name": "sloven\u010dina" - }, - "sl": { - "name": "sloven\u0161\u010dina" - }, - "sr": { - "name": "\u0441\u0440\u043f\u0441\u043a\u0438" - }, - "sv": { - "name": "svenska" - }, - "sw": { - "name": "Kiswahili" - }, - "th": { - "name": "\u0e44\u0e17\u0e22" - }, - "tl": { - "name": "Filipino" - }, - "tr": { - "name": "T\u00fcrk\u00e7e" - }, - "uk": { - "name": "\u0443\u043a\u0440\u0430\u0457\u043d\u0441\u044c\u043a\u0430" - }, - "vi": { - "name": "Ti\u1ebfng Vi\u1ec7t" - }, - "zh-CN": { - "name": "\u4e2d\u6587 (\u7b80\u4f53)" - }, - "zh-TW": { - "name": "\u4e2d\u6587 (\u7e41\u9ad4)" - } - }, - "google images": { - "af": { - "name": "Afrikaans" - }, - "ar": { - "name": "\u0627\u0644\u0639\u0631\u0628\u064a\u0629" - }, - "be": { - "name": "\u0431\u0435\u043b\u0430\u0440\u0443\u0441\u043a\u0430\u044f" - }, - "bg": { - "name": "\u0431\u044a\u043b\u0433\u0430\u0440\u0441\u043a\u0438" - }, - "ca": { - "name": "catal\u00e0" - }, - "cs": { - "name": "\u010de\u0161tina" - }, - "da": { - "name": "dansk" - }, - "de": { - "name": "Deutsch" - }, - "el": { - "name": "\u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ac" - }, - "en": { - "name": "English" - }, - "eo": { - "name": "esperanto" - }, - "es": { - "name": "espa\u00f1ol" - }, - "et": { - "name": "eesti" - }, - "fa": { - "name": "\u0641\u0627\u0631\u0633\u06cc" - }, - "fi": { - "name": "suomi" - }, - "fr": { - "name": "fran\u00e7ais" - }, - "hi": { - "name": "\u0939\u093f\u0928\u094d\u0926\u0940" - }, - "hr": { - "name": "hrvatski" - }, - "hu": { - "name": "magyar" - }, - "hy": { - "name": "\u0570\u0561\u0575\u0565\u0580\u0565\u0576" - }, - "id": { - "name": "Indonesia" - }, - "is": { - "name": "\u00edslenska" - }, - "it": { - "name": "italiano" - }, - "iw": { - "name": "\u05e2\u05d1\u05e8\u05d9\u05ea" - }, - "ja": { - "name": "\u65e5\u672c\u8a9e" - }, - "ko": { - "name": "\ud55c\uad6d\uc5b4" - }, - "lt": { - "name": "lietuvi\u0173" - }, - "lv": { - "name": "latvie\u0161u" - }, - "nl": { - "name": "Nederlands" - }, - "no": { - "name": "norsk" - }, - "pl": { - "name": "polski" - }, - "pt": { - "name": "portugu\u00eas" - }, - "ro": { - "name": "rom\u00e2n\u0103" - }, - "ru": { - "name": "\u0440\u0443\u0441\u0441\u043a\u0438\u0439" - }, - "sk": { - "name": "sloven\u010dina" - }, - "sl": { - "name": "sloven\u0161\u010dina" - }, - "sr": { - "name": "\u0441\u0440\u043f\u0441\u043a\u0438" - }, - "sv": { - "name": "svenska" - }, - "sw": { - "name": "Kiswahili" - }, - "th": { - "name": "\u0e44\u0e17\u0e22" - }, - "tl": { - "name": "Filipino" - }, - "tr": { - "name": "T\u00fcrk\u00e7e" - }, - "uk": { - "name": "\u0443\u043a\u0440\u0430\u0457\u043d\u0441\u044c\u043a\u0430" - }, - "vi": { - "name": "Ti\u1ebfng Vi\u1ec7t" - }, - "zh-CN": { - "name": "\u4e2d\u6587 (\u7b80\u4f53)" - }, - "zh-TW": { - "name": "\u4e2d\u6587 (\u7e41\u9ad4)" - } - }, - "google news": { - "af": { - "name": "Afrikaans" - }, - "ar": { - "name": "\u0627\u0644\u0639\u0631\u0628\u064a\u0629" - }, - "be": { - "name": "\u0431\u0435\u043b\u0430\u0440\u0443\u0441\u043a\u0430\u044f" - }, - "bg": { - "name": "\u0431\u044a\u043b\u0433\u0430\u0440\u0441\u043a\u0438" - }, - "ca": { - "name": "catal\u00e0" - }, - "cs": { - "name": "\u010de\u0161tina" - }, - "da": { - "name": "dansk" - }, - "de": { - "name": "Deutsch" - }, - "el": { - "name": "\u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ac" - }, - "en": { - "name": "English" - }, - "eo": { - "name": "esperanto" - }, - "es": { - "name": "espa\u00f1ol" - }, - "et": { - "name": "eesti" - }, - "fa": { - "name": "\u0641\u0627\u0631\u0633\u06cc" - }, - "fi": { - "name": "suomi" - }, - "fr": { - "name": "fran\u00e7ais" - }, - "hi": { - "name": "\u0939\u093f\u0928\u094d\u0926\u0940" - }, - "hr": { - "name": "hrvatski" - }, - "hu": { - "name": "magyar" - }, - "hy": { - "name": "\u0570\u0561\u0575\u0565\u0580\u0565\u0576" - }, - "id": { - "name": "Indonesia" - }, - "is": { - "name": "\u00edslenska" - }, - "it": { - "name": "italiano" - }, - "iw": { - "name": "\u05e2\u05d1\u05e8\u05d9\u05ea" - }, - "ja": { - "name": "\u65e5\u672c\u8a9e" - }, - "ko": { - "name": "\ud55c\uad6d\uc5b4" - }, - "lt": { - "name": "lietuvi\u0173" - }, - "lv": { - "name": "latvie\u0161u" - }, - "nl": { - "name": "Nederlands" - }, - "no": { - "name": "norsk" - }, - "pl": { - "name": "polski" - }, - "pt": { - "name": "portugu\u00eas" - }, - "ro": { - "name": "rom\u00e2n\u0103" - }, - "ru": { - "name": "\u0440\u0443\u0441\u0441\u043a\u0438\u0439" - }, - "sk": { - "name": "sloven\u010dina" - }, - "sl": { - "name": "sloven\u0161\u010dina" - }, - "sr": { - "name": "\u0441\u0440\u043f\u0441\u043a\u0438" - }, - "sv": { - "name": "svenska" - }, - "sw": { - "name": "Kiswahili" - }, - "th": { - "name": "\u0e44\u0e17\u0e22" - }, - "tl": { - "name": "Filipino" - }, - "tr": { - "name": "T\u00fcrk\u00e7e" - }, - "uk": { - "name": "\u0443\u043a\u0440\u0430\u0457\u043d\u0441\u044c\u043a\u0430" - }, - "vi": { - "name": "Ti\u1ebfng Vi\u1ec7t" - }, - "zh-CN": { - "name": "\u4e2d\u6587 (\u7b80\u4f53)" - }, - "zh-TW": { - "name": "\u4e2d\u6587 (\u7e41\u9ad4)" - } - }, - "google scholar": { - "af": { - "name": "Afrikaans" - }, - "ar": { - "name": "\u0627\u0644\u0639\u0631\u0628\u064a\u0629" - }, - "be": { - "name": "\u0431\u0435\u043b\u0430\u0440\u0443\u0441\u043a\u0430\u044f" - }, - "bg": { - "name": "\u0431\u044a\u043b\u0433\u0430\u0440\u0441\u043a\u0438" - }, - "ca": { - "name": "catal\u00e0" - }, - "cs": { - "name": "\u010de\u0161tina" - }, - "da": { - "name": "dansk" - }, - "de": { - "name": "Deutsch" - }, - "el": { - "name": "\u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ac" - }, - "en": { - "name": "English" - }, - "eo": { - "name": "esperanto" - }, - "es": { - "name": "espa\u00f1ol" - }, - "et": { - "name": "eesti" - }, - "fa": { - "name": "\u0641\u0627\u0631\u0633\u06cc" - }, - "fi": { - "name": "suomi" - }, - "fr": { - "name": "fran\u00e7ais" - }, - "hi": { - "name": "\u0939\u093f\u0928\u094d\u0926\u0940" - }, - "hr": { - "name": "hrvatski" - }, - "hu": { - "name": "magyar" - }, - "hy": { - "name": "\u0570\u0561\u0575\u0565\u0580\u0565\u0576" - }, - "id": { - "name": "Indonesia" - }, - "is": { - "name": "\u00edslenska" - }, - "it": { - "name": "italiano" - }, - "iw": { - "name": "\u05e2\u05d1\u05e8\u05d9\u05ea" - }, - "ja": { - "name": "\u65e5\u672c\u8a9e" - }, - "ko": { - "name": "\ud55c\uad6d\uc5b4" - }, - "lt": { - "name": "lietuvi\u0173" - }, - "lv": { - "name": "latvie\u0161u" - }, - "nl": { - "name": "Nederlands" - }, - "no": { - "name": "norsk" - }, - "pl": { - "name": "polski" - }, - "pt": { - "name": "portugu\u00eas" - }, - "ro": { - "name": "rom\u00e2n\u0103" - }, - "ru": { - "name": "\u0440\u0443\u0441\u0441\u043a\u0438\u0439" - }, - "sk": { - "name": "sloven\u010dina" - }, - "sl": { - "name": "sloven\u0161\u010dina" - }, - "sr": { - "name": "\u0441\u0440\u043f\u0441\u043a\u0438" - }, - "sv": { - "name": "svenska" - }, - "sw": { - "name": "Kiswahili" - }, - "th": { - "name": "\u0e44\u0e17\u0e22" - }, - "tl": { - "name": "Filipino" - }, - "tr": { - "name": "T\u00fcrk\u00e7e" - }, - "uk": { - "name": "\u0443\u043a\u0440\u0430\u0457\u043d\u0441\u044c\u043a\u0430" - }, - "vi": { - "name": "Ti\u1ebfng Vi\u1ec7t" - }, - "zh-CN": { - "name": "\u4e2d\u6587 (\u7b80\u4f53)" - }, - "zh-TW": { - "name": "\u4e2d\u6587 (\u7e41\u9ad4)" - } - }, - "google videos": { - "af": { - "name": "Afrikaans" - }, - "ar": { - "name": "\u0627\u0644\u0639\u0631\u0628\u064a\u0629" - }, - "be": { - "name": "\u0431\u0435\u043b\u0430\u0440\u0443\u0441\u043a\u0430\u044f" - }, - "bg": { - "name": "\u0431\u044a\u043b\u0433\u0430\u0440\u0441\u043a\u0438" - }, - "ca": { - "name": "catal\u00e0" - }, - "cs": { - "name": "\u010de\u0161tina" - }, - "da": { - "name": "dansk" - }, - "de": { - "name": "Deutsch" - }, - "el": { - "name": "\u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ac" - }, - "en": { - "name": "English" - }, - "eo": { - "name": "esperanto" - }, - "es": { - "name": "espa\u00f1ol" - }, - "et": { - "name": "eesti" - }, - "fa": { - "name": "\u0641\u0627\u0631\u0633\u06cc" - }, - "fi": { - "name": "suomi" - }, - "fr": { - "name": "fran\u00e7ais" - }, - "hi": { - "name": "\u0939\u093f\u0928\u094d\u0926\u0940" - }, - "hr": { - "name": "hrvatski" - }, - "hu": { - "name": "magyar" - }, - "hy": { - "name": "\u0570\u0561\u0575\u0565\u0580\u0565\u0576" - }, - "id": { - "name": "Indonesia" - }, - "is": { - "name": "\u00edslenska" - }, - "it": { - "name": "italiano" - }, - "iw": { - "name": "\u05e2\u05d1\u05e8\u05d9\u05ea" - }, - "ja": { - "name": "\u65e5\u672c\u8a9e" - }, - "ko": { - "name": "\ud55c\uad6d\uc5b4" - }, - "lt": { - "name": "lietuvi\u0173" - }, - "lv": { - "name": "latvie\u0161u" - }, - "nl": { - "name": "Nederlands" - }, - "no": { - "name": "norsk" - }, - "pl": { - "name": "polski" - }, - "pt": { - "name": "portugu\u00eas" - }, - "ro": { - "name": "rom\u00e2n\u0103" - }, - "ru": { - "name": "\u0440\u0443\u0441\u0441\u043a\u0438\u0439" - }, - "sk": { - "name": "sloven\u010dina" - }, - "sl": { - "name": "sloven\u0161\u010dina" - }, - "sr": { - "name": "\u0441\u0440\u043f\u0441\u043a\u0438" - }, - "sv": { - "name": "svenska" - }, - "sw": { - "name": "Kiswahili" - }, - "th": { - "name": "\u0e44\u0e17\u0e22" - }, - "tl": { - "name": "Filipino" - }, - "tr": { - "name": "T\u00fcrk\u00e7e" - }, - "uk": { - "name": "\u0443\u043a\u0440\u0430\u0457\u043d\u0441\u044c\u043a\u0430" - }, - "vi": { - "name": "Ti\u1ebfng Vi\u1ec7t" - }, - "zh-CN": { - "name": "\u4e2d\u6587 (\u7b80\u4f53)" - }, - "zh-TW": { - "name": "\u4e2d\u6587 (\u7e41\u9ad4)" - } - }, - "peertube": [ - "ca", - "cs", - "de", - "el", - "en", - "eo", - "es", - "eu", - "fi", - "fr", - "gd", - "it", - "ja", - "nl", - "oc", - "pl", - "pt", - "ru", - "sv", - "zh" - ], - "qwant": { - "bg-BG": "bg_BG", - "ca-ES": "ca_ES", - "cs-CZ": "cs_CZ", - "da-DK": "da_DK", - "de-AT": "de_AT", - "de-CH": "de_CH", - "de-DE": "de_DE", - "el-GR": "el_GR", - "en-AU": "en_AU", - "en-CA": "en_CA", - "en-GB": "en_GB", - "en-IE": "en_IE", - "en-MY": "en_MY", - "en-NZ": "en_NZ", - "en-US": "en_US", - "es-AR": "es_AR", - "es-CL": "es_CL", - "es-ES": "es_ES", - "es-MX": "es_MX", - "et-EE": "et_EE", - "fi-FI": "fi_FI", - "fr-BE": "fr_BE", - "fr-CA": "fr_CA", - "fr-CH": "fr_CH", - "fr-FR": "fr_FR", - "hu-HU": "hu_HU", - "it-CH": "it_CH", - "it-IT": "it_IT", - "ko-KR": "ko_KR", - "nb-NO": "nb_NO", - "nl-BE": "nl_BE", - "nl-NL": "nl_NL", - "pl-PL": "pl_PL", - "pt-PT": "pt_PT", - "ro-RO": "ro_RO", - "sv-SE": "sv_SE", - "th-TH": "th_TH", - "zh-CN": "zh_CN", - "zh-HK": "zh_HK" - }, - "qwant images": { - "bg-BG": "bg_BG", - "ca-ES": "ca_ES", - "cs-CZ": "cs_CZ", - "da-DK": "da_DK", - "de-AT": "de_AT", - "de-CH": "de_CH", - "de-DE": "de_DE", - "el-GR": "el_GR", - "en-AU": "en_AU", - "en-CA": "en_CA", - "en-GB": "en_GB", - "en-IE": "en_IE", - "en-MY": "en_MY", - "en-NZ": "en_NZ", - "en-US": "en_US", - "es-AR": "es_AR", - "es-CL": "es_CL", - "es-ES": "es_ES", - "es-MX": "es_MX", - "et-EE": "et_EE", - "fi-FI": "fi_FI", - "fr-BE": "fr_BE", - "fr-CA": "fr_CA", - "fr-CH": "fr_CH", - "fr-FR": "fr_FR", - "hu-HU": "hu_HU", - "it-CH": "it_CH", - "it-IT": "it_IT", - "ko-KR": "ko_KR", - "nb-NO": "nb_NO", - "nl-BE": "nl_BE", - "nl-NL": "nl_NL", - "pl-PL": "pl_PL", - "pt-PT": "pt_PT", - "ro-RO": "ro_RO", - "sv-SE": "sv_SE", - "th-TH": "th_TH", - "zh-CN": "zh_CN", - "zh-HK": "zh_HK" - }, - "qwant news": { - "ca-ES": "ca_ES", - "de-AT": "de_AT", - "de-CH": "de_CH", - "de-DE": "de_DE", - "en-AU": "en_AU", - "en-CA": "en_CA", - "en-GB": "en_GB", - "en-IE": "en_IE", - "en-MY": "en_MY", - "en-NZ": "en_NZ", - "en-US": "en_US", - "es-AR": "es_AR", - "es-CL": "es_CL", - "es-ES": "es_ES", - "es-MX": "es_MX", - "fr-BE": "fr_BE", - "fr-CA": "fr_CA", - "fr-CH": "fr_CH", - "fr-FR": "fr_FR", - "it-CH": "it_CH", - "it-IT": "it_IT", - "nl-BE": "nl_BE", - "nl-NL": "nl_NL", - "pt-PT": "pt_PT" - }, - "qwant videos": { - "bg-BG": "bg_BG", - "ca-ES": "ca_ES", - "cs-CZ": "cs_CZ", - "da-DK": "da_DK", - "de-AT": "de_AT", - "de-CH": "de_CH", - "de-DE": "de_DE", - "el-GR": "el_GR", - "en-AU": "en_AU", - "en-CA": "en_CA", - "en-GB": "en_GB", - "en-IE": "en_IE", - "en-MY": "en_MY", - "en-NZ": "en_NZ", - "en-US": "en_US", - "es-AR": "es_AR", - "es-CL": "es_CL", - "es-ES": "es_ES", - "es-MX": "es_MX", - "et-EE": "et_EE", - "fi-FI": "fi_FI", - "fr-BE": "fr_BE", - "fr-CA": "fr_CA", - "fr-CH": "fr_CH", - "fr-FR": "fr_FR", - "hu-HU": "hu_HU", - "it-CH": "it_CH", - "it-IT": "it_IT", - "ko-KR": "ko_KR", - "nb-NO": "nb_NO", - "nl-BE": "nl_BE", - "nl-NL": "nl_NL", - "pl-PL": "pl_PL", - "pt-PT": "pt_PT", - "ro-RO": "ro_RO", - "sv-SE": "sv_SE", - "th-TH": "th_TH", - "zh-CN": "zh_CN", - "zh-HK": "zh_HK" - }, - "startpage": { - "af": { - "alias": "afrikaans" - }, - "am": { - "alias": "amharic" - }, - "ar": { - "alias": "arabic" - }, - "az": { - "alias": "azerbaijani" - }, - "be": { - "alias": "belarusian" - }, - "bg": { - "alias": "bulgarian" - }, - "bn": { - "alias": "bengali" - }, - "bs": { - "alias": "bosnian" - }, - "ca": { - "alias": "catalan" - }, - "cs": { - "alias": "czech" - }, - "cy": { - "alias": "welsh" - }, - "da": { - "alias": "dansk" - }, - "de": { - "alias": "deutsch" - }, - "el": { - "alias": "greek" - }, - "en": { - "alias": "english" - }, - "en-GB": { - "alias": "english_uk" - }, - "eo": { - "alias": "esperanto" - }, - "es": { - "alias": "espanol" - }, - "et": { - "alias": "estonian" - }, - "eu": { - "alias": "basque" - }, - "fa": { - "alias": "persian" - }, - "fi": { - "alias": "suomi" - }, - "fo": { - "alias": "faroese" - }, - "fr": { - "alias": "francais" - }, - "fy": { - "alias": "frisian" - }, - "ga": { - "alias": "irish" - }, - "gd": { - "alias": "gaelic" - }, - "gl": { - "alias": "galician" - }, - "gu": { - "alias": "gujarati" - }, - "he": { - "alias": "hebrew" - }, - "hi": { - "alias": "hindi" - }, - "hr": { - "alias": "croatian" - }, - "hu": { - "alias": "hungarian" - }, - "ia": { - "alias": "interlingua" - }, - "id": { - "alias": "indonesian" - }, - "is": { - "alias": "icelandic" - }, - "it": { - "alias": "italiano" - }, - "ja": { - "alias": "nihongo" - }, - "jv": { - "alias": "javanese" - }, - "ka": { - "alias": "georgian" - }, - "kn": { - "alias": "kannada" - }, - "ko": { - "alias": "hangul" - }, - "la": { - "alias": "latin" - }, - "lt": { - "alias": "lithuanian" - }, - "lv": { - "alias": "latvian" - }, - "mai": { - "alias": "bihari" - }, - "mk": { - "alias": "macedonian" - }, - "ml": { - "alias": "malayalam" - }, - "mr": { - "alias": "marathi" - }, - "ms": { - "alias": "malay" - }, - "mt": { - "alias": "maltese" - }, - "ne": { - "alias": "nepali" - }, - "nl": { - "alias": "nederlands" - }, - "no": { - "alias": "norsk" - }, - "oc": { - "alias": "occitan" - }, - "pa": { - "alias": "punjabi" - }, - "pl": { - "alias": "polski" - }, - "pt": { - "alias": "portugues" - }, - "ro": { - "alias": "romanian" - }, - "ru": { - "alias": "russian" - }, - "si": { - "alias": "sinhalese" - }, - "sk": { - "alias": "slovak" - }, - "sl": { - "alias": "slovenian" - }, - "sq": { - "alias": "albanian" - }, - "sr": { - "alias": "serbian" - }, - "su": { - "alias": "sudanese" - }, - "sv": { - "alias": "svenska" - }, - "sw": { - "alias": "swahili" - }, - "ta": { - "alias": "tamil" - }, - "te": { - "alias": "telugu" - }, - "th": { - "alias": "thai" - }, - "ti": { - "alias": "tigrinya" - }, - "tl": { - "alias": "tagalog" - }, - "tr": { - "alias": "turkce" - }, - "uk": { - "alias": "ukrainian" - }, - "ur": { - "alias": "urdu" - }, - "uz": { - "alias": "uzbek" - }, - "vi": { - "alias": "vietnamese" - }, - "xh": { - "alias": "xhosa" - }, - "zh": { - "alias": "jiantizhongwen" - }, - "zh-HK": { - "alias": "fantizhengwen" - }, - "zh-TW": { - "alias": "fantizhengwen" - }, - "zu": { - "alias": "zulu" - } - }, - "wikidata": { - "ab": { - "english_name": "Abkhazian", - "name": "\u0410\u0525\u0441\u0443\u0430" - }, - "ace": { - "english_name": "Acehnese", - "name": "Basa Ac\u00e8h" - }, - "ady": { - "english_name": "Adyghe", - "name": "\u0410\u0434\u044b\u0433\u044d\u0431\u0437\u044d" - }, - "af": { - "english_name": "Afrikaans", - "name": "Afrikaans" - }, - "ak": { - "english_name": "Akan", - "name": "Akana" - }, - "als": { - "english_name": "Alemannic", - "name": "Alemannisch" - }, - "alt": { - "english_name": "Southern Altai", - "name": "\u0410\u043b\u0442\u0430\u0439" - }, - "am": { - "english_name": "Amharic", - "name": "\u12a0\u121b\u122d\u129b" - }, - "ami": { - "english_name": "Amis", - "name": "Pangcah" - }, - "an": { - "english_name": "Aragonese", - "name": "Aragon\u00e9s" - }, - "ang": { - "english_name": "Anglo-Saxon", - "name": "\u00c6nglisc" - }, - "ar": { - "english_name": "Arabic", - "name": "\u0627\u0644\u0639\u0631\u0628\u064a\u0629" - }, - "arc": { - "english_name": "Aramaic", - "name": "\u0710\u072a\u0721\u071d\u0710" - }, - "ary": { - "english_name": "Moroccan Arabic", - "name": "\u062f\u0627\u0631\u064a\u062c\u0629" - }, - "arz": { - "english_name": "Egyptian Arabic", - "name": "\u0645\u0635\u0631\u0649 (Ma\u1e63ri)" - }, - "as": { - "english_name": "Assamese", - "name": "\u0985\u09b8\u09ae\u09c0\u09af\u09bc\u09be" - }, - "ast": { - "english_name": "Asturian", - "name": "Asturianu" - }, - "atj": { - "english_name": "Atikamekw", - "name": "Atikamekw" - }, - "av": { - "english_name": "Avar", - "name": "\u0410\u0432\u0430\u0440" - }, - "avk": { - "english_name": "Kotava", - "name": "Kotava" - }, - "awa": { - "english_name": "Awadhi", - "name": "\u0905\u0935\u0927\u0940" - }, - "ay": { - "english_name": "Aymara", - "name": "Aymar" - }, - "az": { - "english_name": "Azerbaijani", - "name": "Az\u0259rbaycanca" - }, - "azb": { - "english_name": "South Azerbaijani", - "name": "\u062a\u06c6\u0631\u06a9\u062c\u0647" - }, - "ba": { - "english_name": "Bashkir", - "name": "\u0411\u0430\u0448\u04a1\u043e\u0440\u0442" - }, - "ban": { - "english_name": "Balinese", - "name": "Bali" - }, - "bar": { - "english_name": "Bavarian", - "name": "Boarisch" - }, - "bat-smg": { - "english_name": "Samogitian", - "name": "\u017demait\u0117\u0161ka" - }, - "bcl": { - "english_name": "Central Bicolano", - "name": "Bikol" - }, - "be": { - "english_name": "Belarusian", - "name": "\u0411\u0435\u043b\u0430\u0440\u0443\u0441\u043a\u0430\u044f" - }, - "be-tarask": { - "english_name": "Belarusian (Tara\u0161kievica)", - "name": "\u0411\u0435\u043b\u0430\u0440\u0443\u0441\u043a\u0430\u044f (\u0442\u0430\u0440\u0430\u0448\u043a\u0435\u0432\u0456\u0446\u0430)" - }, - "bg": { - "english_name": "Bulgarian", - "name": "\u0411\u044a\u043b\u0433\u0430\u0440\u0441\u043a\u0438" - }, - "bh": { - "english_name": "Bhojpuri", - "name": "\u092d\u094b\u091c\u092a\u0941\u0930\u0940" - }, - "bi": { - "english_name": "Bislama", - "name": "Bislama" - }, - "bjn": { - "english_name": "Banjar", - "name": "Bahasa Banjar" - }, - "blk": { - "english_name": "Pa'O", - "name": "\u1015\u1021\u102d\u102f\u101d\u103a\u108f\u1018\u102c\u108f\u101e\u102c\u108f" - }, - "bm": { - "english_name": "Bambara", - "name": "Bamanankan" - }, - "bn": { - "english_name": "Bengali", - "name": "\u09ac\u09be\u0982\u09b2\u09be" - }, - "bo": { - "english_name": "Tibetan", - "name": "\u0f56\u0f7c\u0f51\u0f0b\u0f66\u0f90\u0f51" - }, - "bpy": { - "english_name": "Bishnupriya Manipuri", - "name": "\u0987\u09ae\u09be\u09b0 \u09a0\u09be\u09b0/\u09ac\u09bf\u09b7\u09cd\u09a3\u09c1\u09aa\u09cd\u09b0\u09bf\u09af\u09bc\u09be \u09ae\u09a3\u09bf\u09aa\u09c1\u09b0\u09c0" - }, - "br": { - "english_name": "Breton", - "name": "Brezhoneg" - }, - "bs": { - "english_name": "Bosnian", - "name": "Bosanski" - }, - "bug": { - "english_name": "Buginese", - "name": "Basa Ugi" - }, - "bxr": { - "english_name": "Buryat", - "name": "\u0411\u0443\u0440\u044f\u0430\u0434" - }, - "ca": { - "english_name": "Catalan", - "name": "Catal\u00e0" - }, - "cbk-zam": { - "english_name": "Zamboanga Chavacano", - "name": "Chavacano de Zamboanga" - }, - "cdo": { - "english_name": "Min Dong", - "name": "M\u00ecng-d\u0115\u0324ng-ng\u1e73\u0304" - }, - "ce": { - "english_name": "Chechen", - "name": "\u041d\u043e\u0445\u0447\u0438\u0439\u043d" - }, - "ceb": { - "english_name": "Cebuano", - "name": "Sinugboanong Binisaya" - }, - "ch": { - "english_name": "Chamorro", - "name": "Chamoru" - }, - "chr": { - "english_name": "Cherokee", - "name": "\u13e3\u13b3\u13a9" - }, - "chy": { - "english_name": "Cheyenne", - "name": "Tsets\u00eahest\u00e2hese" - }, - "ckb": { - "english_name": "Sorani", - "name": "Soran\u00ee / \u06a9\u0648\u0631\u062f\u06cc" - }, - "co": { - "english_name": "Corsican", - "name": "Corsu" - }, - "cr": { - "english_name": "Cree", - "name": "Nehiyaw" - }, - "crh": { - "english_name": "Crimean Tatar", - "name": "Q\u0131r\u0131mtatarca" - }, - "cs": { - "english_name": "Czech", - "name": "\u010ce\u0161tina" - }, - "csb": { - "english_name": "Kashubian", - "name": "Kasz\u00ebbsczi" - }, - "cu": { - "english_name": "Old Church Slavonic", - "name": "\u0421\u043b\u043e\u0432\u0463\u043d\u044c\u0441\u043a\u044a" - }, - "cv": { - "english_name": "Chuvash", - "name": "\u0427\u0103\u0432\u0430\u0448" - }, - "cy": { - "english_name": "Welsh", - "name": "Cymraeg" - }, - "da": { - "english_name": "Danish", - "name": "Dansk" - }, - "dag": { - "english_name": "Dagbani", - "name": "Dagbanli" - }, - "de": { - "english_name": "German", - "name": "Deutsch" - }, - "din": { - "english_name": "Dinka", - "name": "Thu\u0254\u014bj\u00e4\u014b" - }, - "diq": { - "english_name": "Zazaki", - "name": "Zazaki" - }, - "dsb": { - "english_name": "Lower Sorbian", - "name": "Dolnoserbski" - }, - "dty": { - "english_name": "Doteli", - "name": "\u0921\u094b\u091f\u0947\u0932\u0940" - }, - "dv": { - "english_name": "Divehi", - "name": "\u078b\u07a8\u0788\u07ac\u0780\u07a8\u0784\u07a6\u0790\u07b0" - }, - "dz": { - "english_name": "Dzongkha", - "name": "\u0f47\u0f7c\u0f44\u0f0b\u0f41" - }, - "ee": { - "english_name": "Ewe", - "name": "E\u028begbe" - }, - "el": { - "english_name": "Greek", - "name": "\u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ac" - }, - "eml": { - "english_name": "Emilian-Romagnol", - "name": "Emili\u00e0n e rumagn\u00f2l" - }, - "en": { - "english_name": "English", - "name": "English" - }, - "eo": { - "english_name": "Esperanto", - "name": "Esperanto" - }, - "es": { - "english_name": "Spanish", - "name": "Espa\u00f1ol" - }, - "et": { - "english_name": "Estonian", - "name": "Eesti" - }, - "eu": { - "english_name": "Basque", - "name": "Euskara" - }, - "ext": { - "english_name": "Extremaduran", - "name": "Estreme\u00f1u" - }, - "fa": { - "english_name": "Persian", - "name": "\u0641\u0627\u0631\u0633\u06cc" - }, - "ff": { - "english_name": "Fula", - "name": "Fulfulde" - }, - "fi": { - "english_name": "Finnish", - "name": "Suomi" - }, - "fiu-vro": { - "english_name": "V\u00f5ro", - "name": "V\u00f5ro" - }, - "fj": { - "english_name": "Fijian", - "name": "Na Vosa Vakaviti" - }, - "fo": { - "english_name": "Faroese", - "name": "F\u00f8royskt" - }, - "fr": { - "english_name": "French", - "name": "Fran\u00e7ais" - }, - "frp": { - "english_name": "Franco-Proven\u00e7al", - "name": "Arpetan" - }, - "frr": { - "english_name": "North Frisian", - "name": "Nordfrasch" - }, - "fur": { - "english_name": "Friulian", - "name": "Furlan" - }, - "fy": { - "english_name": "West Frisian", - "name": "Frysk" - }, - "ga": { - "english_name": "Irish", - "name": "Gaeilge" - }, - "gag": { - "english_name": "Gagauz", - "name": "Gagauz" - }, - "gan": { - "english_name": "Gan", - "name": "\u8d1b\u8a9e" - }, - "gcr": { - "english_name": "Guianan Creole", - "name": "Kriy\u00f2l Gwiyannen" - }, - "gd": { - "english_name": "Scottish Gaelic", - "name": "G\u00e0idhlig" - }, - "gl": { - "english_name": "Galician", - "name": "Galego" - }, - "glk": { - "english_name": "Gilaki", - "name": "\u06af\u06cc\u0644\u06a9\u06cc" - }, - "gn": { - "english_name": "Guarani", - "name": "Ava\u00f1e'\u1ebd" - }, - "gom": { - "english_name": "Goan Konkani", - "name": "\u0917\u094b\u0902\u092f\u091a\u0940 \u0915\u094b\u0902\u0915\u0923\u0940 / G\u00f5ychi Konknni" - }, - "gor": { - "english_name": "Gorontalo", - "name": "Hulontalo" - }, - "got": { - "english_name": "Gothic", - "name": "\ud800\udf32\ud800\udf3f\ud800\udf44\ud800\udf39\ud800\udf43\ud800\udf3a" - }, - "gu": { - "english_name": "Gujarati", - "name": "\u0a97\u0ac1\u0a9c\u0ab0\u0abe\u0aa4\u0ac0" - }, - "guw": { - "english_name": "Gun", - "name": "Gungbe" - }, - "gv": { - "english_name": "Manx", - "name": "Gaelg" - }, - "ha": { - "english_name": "Hausa", - "name": "Hausa / \u0647\u064e\u0648\u064f\u0633\u064e" - }, - "hak": { - "english_name": "Hakka", - "name": "Hak-k\u00e2-fa / \u5ba2\u5bb6\u8a71" - }, - "haw": { - "english_name": "Hawaiian", - "name": "Hawai\u02bbi" - }, - "he": { - "english_name": "Hebrew", - "name": "\u05e2\u05d1\u05e8\u05d9\u05ea" - }, - "hi": { - "english_name": "Hindi", - "name": "\u0939\u093f\u0928\u094d\u0926\u0940" - }, - "hif": { - "english_name": "Fiji Hindi", - "name": "Fiji Hindi" - }, - "hr": { - "english_name": "Croatian", - "name": "Hrvatski" - }, - "hsb": { - "english_name": "Upper Sorbian", - "name": "Hornjoserbsce" - }, - "ht": { - "english_name": "Haitian", - "name": "Kr\u00e8yol ayisyen" - }, - "hu": { - "english_name": "Hungarian", - "name": "Magyar" - }, - "hy": { - "english_name": "Armenian", - "name": "\u0540\u0561\u0575\u0565\u0580\u0565\u0576" - }, - "hyw": { - "english_name": "Western Armenian", - "name": "\u0531\u0580\u0565\u0582\u0574\u057f\u0561\u0570\u0561\u0575\u0565\u0580\u0567\u0576" - }, - "ia": { - "english_name": "Interlingua", - "name": "Interlingua" - }, - "id": { - "english_name": "Indonesian", - "name": "Bahasa Indonesia" - }, - "ie": { - "english_name": "Interlingue", - "name": "Interlingue" - }, - "ig": { - "english_name": "Igbo", - "name": "\u00ccgb\u00f2" - }, - "ik": { - "english_name": "Inupiak", - "name": "I\u00f1upiatun" - }, - "ilo": { - "english_name": "Ilokano", - "name": "Ilokano" - }, - "inh": { - "english_name": "Ingush", - "name": "\u0413\u04c0\u0430\u043b\u0433\u04c0\u0430\u0439" - }, - "io": { - "english_name": "Ido", - "name": "Ido" - }, - "is": { - "english_name": "Icelandic", - "name": "\u00cdslenska" - }, - "it": { - "english_name": "Italian", - "name": "Italiano" - }, - "iu": { - "english_name": "Inuktitut", - "name": "\u1403\u14c4\u1483\u144e\u1450\u1466" - }, - "ja": { - "english_name": "Japanese", - "name": "\u65e5\u672c\u8a9e" - }, - "jam": { - "english_name": "Jamaican Patois", - "name": "Jumiekan Kryuol" - }, - "jbo": { - "english_name": "Lojban", - "name": "Lojban" - }, - "jv": { - "english_name": "Javanese", - "name": "Basa Jawa" - }, - "ka": { - "english_name": "Georgian", - "name": "\u10e5\u10d0\u10e0\u10d7\u10e3\u10da\u10d8" - }, - "kaa": { - "english_name": "Karakalpak", - "name": "Qaraqalpaqsha" - }, - "kab": { - "english_name": "Kabyle", - "name": "Taqbaylit" - }, - "kbd": { - "english_name": "Kabardian Circassian", - "name": "\u0410\u0434\u044b\u0433\u044d\u0431\u0437\u044d (Adighabze)" - }, - "kbp": { - "english_name": "Kabiye", - "name": "Kab\u0269y\u025b" - }, - "kcg": { - "english_name": "Tyap", - "name": "Tyap" - }, - "kg": { - "english_name": "Kongo", - "name": "Kik\u00f4ngo" - }, - "ki": { - "english_name": "Kikuyu", - "name": "G\u0129k\u0169y\u0169" - }, - "kk": { - "english_name": "Kazakh", - "name": "\u049a\u0430\u0437\u0430\u049b\u0448\u0430" - }, - "kl": { - "english_name": "Greenlandic", - "name": "Kalaallisut" - }, - "km": { - "english_name": "Khmer", - "name": "\u1797\u17b6\u179f\u17b6\u1781\u17d2\u1798\u17c2\u179a" - }, - "kn": { - "english_name": "Kannada", - "name": "\u0c95\u0ca8\u0ccd\u0ca8\u0ca1" - }, - "ko": { - "english_name": "Korean", - "name": "\ud55c\uad6d\uc5b4" - }, - "koi": { - "english_name": "Komi-Permyak", - "name": "\u041f\u0435\u0440\u0435\u043c \u041a\u043e\u043c\u0438 (Perem Komi)" - }, - "krc": { - "english_name": "Karachay-Balkar", - "name": "\u041a\u044a\u0430\u0440\u0430\u0447\u0430\u0439-\u041c\u0430\u043b\u043a\u044a\u0430\u0440 (Qarachay-Malqar)" - }, - "ks": { - "english_name": "Kashmiri", - "name": "\u0915\u0936\u094d\u092e\u0940\u0930\u0940 / \u0643\u0634\u0645\u064a\u0631\u064a" - }, - "ksh": { - "english_name": "Ripuarian", - "name": "Ripoarisch" - }, - "ku": { - "english_name": "Kurdish", - "name": "Kurd\u00ee / \u0643\u0648\u0631\u062f\u06cc" - }, - "kv": { - "english_name": "Komi", - "name": "\u041a\u043e\u043c\u0438" - }, - "kw": { - "english_name": "Cornish", - "name": "Kernowek/Karnuack" - }, - "ky": { - "english_name": "Kyrgyz", - "name": "\u041a\u044b\u0440\u0433\u044b\u0437\u0447\u0430" - }, - "la": { - "english_name": "Latin", - "name": "Latina" - }, - "lad": { - "english_name": "Ladino", - "name": "Dzhudezmo" - }, - "lb": { - "english_name": "Luxembourgish", - "name": "L\u00ebtzebuergesch" - }, - "lbe": { - "english_name": "Lak", - "name": "\u041b\u0430\u043a\u043a\u0443" - }, - "lez": { - "english_name": "Lezgian", - "name": "\u041b\u0435\u0437\u0433\u0438 \u0447\u0406\u0430\u043b (Lezgi \u010d\u2019al)" - }, - "lfn": { - "english_name": "Lingua Franca Nova", - "name": "Lingua franca nova" - }, - "lg": { - "english_name": "Luganda", - "name": "Luganda" - }, - "li": { - "english_name": "Limburgish", - "name": "Limburgs" - }, - "lij": { - "english_name": "Ligurian", - "name": "L\u00ecgure" - }, - "lld": { - "english_name": "Ladin", - "name": "Lingaz" - }, - "lmo": { - "english_name": "Lombard", - "name": "Lumbaart" - }, - "ln": { - "english_name": "Lingala", - "name": "Lingala" - }, - "lo": { - "english_name": "Lao", - "name": "\u0ea5\u0eb2\u0ea7" - }, - "lt": { - "english_name": "Lithuanian", - "name": "Lietuvi\u0173" - }, - "ltg": { - "english_name": "Latgalian", - "name": "Latga\u013cu" - }, - "lv": { - "english_name": "Latvian", - "name": "Latvie\u0161u" - }, - "mad": { - "english_name": "Madurese", - "name": "Madhur\u00e2" - }, - "mai": { - "english_name": "Maithili", - "name": "\u092e\u0948\u0925\u093f\u0932\u0940" - }, - "map-bms": { - "english_name": "Banyumasan", - "name": "Basa Banyumasan" - }, - "mdf": { - "english_name": "Moksha", - "name": "\u041c\u043e\u043a\u0448\u0435\u043d\u044c (Mokshanj K\u00e4lj)" - }, - "mg": { - "english_name": "Malagasy", - "name": "Malagasy" - }, - "mhr": { - "english_name": "Meadow Mari", - "name": "\u041e\u043b\u044b\u043a \u041c\u0430\u0440\u0438\u0439 (Olyk Marij)" - }, - "mi": { - "english_name": "Maori", - "name": "M\u0101ori" - }, - "min": { - "english_name": "Minangkabau", - "name": "Minangkabau" - }, - "mk": { - "english_name": "Macedonian", - "name": "\u041c\u0430\u043a\u0435\u0434\u043e\u043d\u0441\u043a\u0438" - }, - "ml": { - "english_name": "Malayalam", - "name": "\u0d2e\u0d32\u0d2f\u0d3e\u0d33\u0d02" - }, - "mn": { - "english_name": "Mongolian", - "name": "\u041c\u043e\u043d\u0433\u043e\u043b" - }, - "mni": { - "english_name": "Meitei", - "name": "\uabc3\uabe4\uabc7\uabe9\uabc2\uabe3\uabdf" - }, - "mnw": { - "english_name": "Mon", - "name": "\u1019\u1014\u103a" - }, - "mr": { - "english_name": "Marathi", - "name": "\u092e\u0930\u093e\u0920\u0940" - }, - "mrj": { - "english_name": "Hill Mari", - "name": "\u041a\u044b\u0440\u044b\u043a \u041c\u0430\u0440\u044b (Kyryk Mary)" - }, - "ms": { - "english_name": "Malay", - "name": "Bahasa Melayu" - }, - "mt": { - "english_name": "Maltese", - "name": "Malti" - }, - "mwl": { - "english_name": "Mirandese", - "name": "Mirand\u00e9s" - }, - "my": { - "english_name": "Burmese", - "name": "\u1019\u103c\u1014\u103a\u1019\u102c\u1018\u102c\u101e\u102c" - }, - "myv": { - "english_name": "Erzya", - "name": "\u042d\u0440\u0437\u044f\u043d\u044c (Erzjanj Kelj)" - }, - "mzn": { - "english_name": "Mazandarani", - "name": "\u0645\u064e\u0632\u0650\u0631\u0648\u0646\u064a" - }, - "na": { - "english_name": "Nauruan", - "name": "dorerin Naoero" - }, - "nah": { - "english_name": "Nahuatl", - "name": "N\u0101huatl" - }, - "nap": { - "english_name": "Neapolitan", - "name": "Nnapulitano" - }, - "nds": { - "english_name": "Low Saxon", - "name": "Plattd\u00fc\u00fctsch" - }, - "nds-nl": { - "english_name": "Dutch Low Saxon", - "name": "Nedersaksisch" - }, - "ne": { - "english_name": "Nepali", - "name": "\u0928\u0947\u092a\u093e\u0932\u0940" - }, - "new": { - "english_name": "Newar", - "name": "\u0928\u0947\u092a\u093e\u0932 \u092d\u093e\u0937\u093e" - }, - "nia": { - "english_name": "Nias", - "name": "Li Niha" - }, - "nl": { - "english_name": "Dutch", - "name": "Nederlands" - }, - "nn": { - "english_name": "Norwegian (Nynorsk)", - "name": "Nynorsk" - }, - "no": { - "english_name": "Norwegian (Bokm\u00e5l)", - "name": "Norsk (Bokm\u00e5l)" - }, - "nov": { - "english_name": "Novial", - "name": "Novial" - }, - "nqo": { - "english_name": "N\u2019Ko", - "name": "\u07d2\u07de\u07cf" - }, - "nrm": { - "english_name": "Norman", - "name": "Nouormand/Normaund" - }, - "nso": { - "english_name": "Northern Sotho", - "name": "Sepedi" - }, - "nv": { - "english_name": "Navajo", - "name": "Din\u00e9 bizaad" - }, - "ny": { - "english_name": "Chichewa", - "name": "Chichewa" - }, - "oc": { - "english_name": "Occitan", - "name": "Occitan" - }, - "olo": { - "english_name": "Livvi-Karelian", - "name": "Karjalan" - }, - "om": { - "english_name": "Oromo", - "name": "Oromoo" - }, - "or": { - "english_name": "Oriya", - "name": "\u0b13\u0b21\u0b3c\u0b3f\u0b06" - }, - "os": { - "english_name": "Ossetian", - "name": "\u0418\u0440\u043e\u043d\u0430\u0443" - }, - "pa": { - "english_name": "Punjabi", - "name": "\u0a2a\u0a70\u0a1c\u0a3e\u0a2c\u0a40" - }, - "pag": { - "english_name": "Pangasinan", - "name": "Pangasinan" - }, - "pam": { - "english_name": "Kapampangan", - "name": "Kapampangan" - }, - "pap": { - "english_name": "Papiamentu", - "name": "Papiamentu" - }, - "pcd": { - "english_name": "Picard", - "name": "Picard" - }, - "pcm": { - "english_name": "Nigerian Pidgin", - "name": "Naij\u00e1" - }, - "pdc": { - "english_name": "Pennsylvania German", - "name": "Deitsch" - }, - "pfl": { - "english_name": "Palatinate German", - "name": "P\u00e4lzisch" - }, - "pi": { - "english_name": "Pali", - "name": "\u092a\u093e\u0934\u093f" - }, - "pih": { - "english_name": "Norfolk", - "name": "Norfuk" - }, - "pl": { - "english_name": "Polish", - "name": "Polski" - }, - "pms": { - "english_name": "Piedmontese", - "name": "Piemont\u00e8is" - }, - "pnb": { - "english_name": "Western Punjabi", - "name": "\u0634\u0627\u06c1 \u0645\u06a9\u06be\u06cc \u067e\u0646\u062c\u0627\u0628\u06cc (Sh\u0101hmukh\u012b Pa\u00f1j\u0101b\u012b)" - }, - "pnt": { - "english_name": "Pontic", - "name": "\u03a0\u03bf\u03bd\u03c4\u03b9\u03b1\u03ba\u03ac" - }, - "ps": { - "english_name": "Pashto", - "name": "\u067e\u069a\u062a\u0648" - }, - "pt": { - "english_name": "Portuguese", - "name": "Portugu\u00eas" - }, - "pwn": { - "english_name": "Paiwan", - "name": "Paiwan" - }, - "qu": { - "english_name": "Quechua", - "name": "Qichwa simi" - }, - "rm": { - "english_name": "Romansh", - "name": "Rumantsch" - }, - "rmy": { - "english_name": "Romani", - "name": "romani - \u0930\u094b\u092e\u093e\u0928\u0940" - }, - "rn": { - "english_name": "Kirundi", - "name": "Ikirundi" - }, - "ro": { - "english_name": "Romanian", - "name": "Rom\u00e2n\u0103" - }, - "roa-rup": { - "english_name": "Aromanian", - "name": "Arm\u00e3neashce" - }, - "roa-tara": { - "english_name": "Tarantino", - "name": "Tarand\u00edne" - }, - "ru": { - "english_name": "Russian", - "name": "\u0420\u0443\u0441\u0441\u043a\u0438\u0439" - }, - "rue": { - "english_name": "Rusyn", - "name": "\u0420\u0443\u0441\u0438\u043d\u044c\u0441\u043a\u044b\u0439" - }, - "rw": { - "english_name": "Kinyarwanda", - "name": "Ikinyarwanda" - }, - "sa": { - "english_name": "Sanskrit", - "name": "\u0938\u0902\u0938\u094d\u0915\u0943\u0924\u092e\u094d" - }, - "sah": { - "english_name": "Sakha", - "name": "\u0421\u0430\u0445\u0430 \u0442\u044b\u043b\u0430 (Saxa Tyla)" - }, - "sat": { - "english_name": "Santali", - "name": "\u1c65\u1c5f\u1c71\u1c5b\u1c5f\u1c72\u1c64" - }, - "sc": { - "english_name": "Sardinian", - "name": "Sardu" - }, - "scn": { - "english_name": "Sicilian", - "name": "Sicilianu" - }, - "sco": { - "english_name": "Scots", - "name": "Scots" - }, - "sd": { - "english_name": "Sindhi", - "name": "\u0633\u0646\u068c\u064a\u060c \u0633\u0646\u062f\u06be\u06cc \u060c \u0938\u093f\u0928\u094d\u0927" - }, - "se": { - "english_name": "Northern Sami", - "name": "S\u00e1megiella" - }, - "sg": { - "english_name": "Sango", - "name": "S\u00e4ng\u00f6" - }, - "sh": { - "english_name": "Serbo-Croatian", - "name": "Srpskohrvatski / \u0421\u0440\u043f\u0441\u043a\u043e\u0445\u0440\u0432\u0430\u0442\u0441\u043a\u0438" - }, - "shi": { - "english_name": "Tachelhit", - "name": "Tacl\u1e25it" - }, - "shn": { - "english_name": "Shan", - "name": "\u101c\u102d\u1075\u103a\u1088\u1010\u1086\u1038" - }, - "si": { - "english_name": "Sinhalese", - "name": "\u0dc3\u0dd2\u0d82\u0dc4\u0dbd" - }, - "simple": { - "english_name": "Simple English", - "name": "Simple English" - }, - "sk": { - "english_name": "Slovak", - "name": "Sloven\u010dina" - }, - "skr": { - "english_name": "Saraiki", - "name": "\u0633\u0631\u0627\u0626\u06cc\u06a9\u06cc" - }, - "sl": { - "english_name": "Slovenian", - "name": "Sloven\u0161\u010dina" - }, - "sm": { - "english_name": "Samoan", - "name": "Gagana Samoa" - }, - "smn": { - "english_name": "Inari Sami", - "name": "Anar\u00e2\u0161kiel\u00e2" - }, - "sn": { - "english_name": "Shona", - "name": "chiShona" - }, - "so": { - "english_name": "Somali", - "name": "Soomaali" - }, - "sq": { - "english_name": "Albanian", - "name": "Shqip" - }, - "sr": { - "english_name": "Serbian", - "name": "\u0421\u0440\u043f\u0441\u043a\u0438 / Srpski" - }, - "srn": { - "english_name": "Sranan", - "name": "Sranantongo" - }, - "ss": { - "english_name": "Swati", - "name": "SiSwati" - }, - "st": { - "english_name": "Sesotho", - "name": "Sesotho" - }, - "stq": { - "english_name": "Saterland Frisian", - "name": "Seeltersk" - }, - "su": { - "english_name": "Sundanese", - "name": "Basa Sunda" - }, - "sv": { - "english_name": "Swedish", - "name": "Svenska" - }, - "sw": { - "english_name": "Swahili", - "name": "Kiswahili" - }, - "szl": { - "english_name": "Silesian", - "name": "\u015al\u016fnski" - }, - "szy": { - "english_name": "Sakizaya", - "name": "Sakizaya" - }, - "ta": { - "english_name": "Tamil", - "name": "\u0ba4\u0bae\u0bbf\u0bb4\u0bcd" - }, - "tay": { - "english_name": "Atayal", - "name": "Tayal" - }, - "tcy": { - "english_name": "Tulu", - "name": "\u0ca4\u0cc1\u0cb3\u0cc1" - }, - "te": { - "english_name": "Telugu", - "name": "\u0c24\u0c46\u0c32\u0c41\u0c17\u0c41" - }, - "tet": { - "english_name": "Tetum", - "name": "Tetun" - }, - "tg": { - "english_name": "Tajik", - "name": "\u0422\u043e\u04b7\u0438\u043a\u04e3" - }, - "th": { - "english_name": "Thai", - "name": "\u0e44\u0e17\u0e22" - }, - "ti": { - "english_name": "Tigrinya", - "name": "\u1275\u130d\u122d\u129b" - }, - "tk": { - "english_name": "Turkmen", - "name": "T\u00fcrkmen" - }, - "tl": { - "english_name": "Tagalog", - "name": "Tagalog" - }, - "tn": { - "english_name": "Tswana", - "name": "Setswana" - }, - "to": { - "english_name": "Tongan", - "name": "faka Tonga" - }, - "tpi": { - "english_name": "Tok Pisin", - "name": "Tok Pisin" - }, - "tr": { - "english_name": "Turkish", - "name": "T\u00fcrk\u00e7e" - }, - "trv": { - "english_name": "Seediq", - "name": "Taroko" - }, - "ts": { - "english_name": "Tsonga", - "name": "Xitsonga" - }, - "tt": { - "english_name": "Tatar", - "name": "Tatar\u00e7a / \u0422\u0430\u0442\u0430\u0440\u0447\u0430" - }, - "tum": { - "english_name": "Tumbuka", - "name": "chiTumbuka" - }, - "tw": { - "english_name": "Twi", - "name": "Twi" - }, - "ty": { - "english_name": "Tahitian", - "name": "Reo M\u0101`ohi" - }, - "tyv": { - "english_name": "Tuvan", - "name": "\u0422\u044b\u0432\u0430" - }, - "udm": { - "english_name": "Udmurt", - "name": "\u0423\u0434\u043c\u0443\u0440\u0442 \u043a\u044b\u043b" - }, - "ug": { - "english_name": "Uyghur", - "name": "\u0626\u06c7\u064a\u063a\u06c7\u0631 \u062a\u0649\u0644\u0649" - }, - "uk": { - "english_name": "Ukrainian", - "name": "\u0423\u043a\u0440\u0430\u0457\u043d\u0441\u044c\u043a\u0430" - }, - "ur": { - "english_name": "Urdu", - "name": "\u0627\u0631\u062f\u0648" - }, - "uz": { - "english_name": "Uzbek", - "name": "O\u2018zbek" - }, - "ve": { - "english_name": "Venda", - "name": "Tshivenda" - }, - "vec": { - "english_name": "Venetian", - "name": "V\u00e8neto" - }, - "vep": { - "english_name": "Vepsian", - "name": "Veps\u00e4n" - }, - "vi": { - "english_name": "Vietnamese", - "name": "Ti\u1ebfng Vi\u1ec7t" - }, - "vls": { - "english_name": "West Flemish", - "name": "West-Vlams" - }, - "vo": { - "english_name": "Volap\u00fck", - "name": "Volap\u00fck" - }, - "wa": { - "english_name": "Walloon", - "name": "Walon" - }, - "war": { - "english_name": "Waray-Waray", - "name": "Winaray" - }, - "wo": { - "english_name": "Wolof", - "name": "Wolof" - }, - "wuu": { - "english_name": "Wu", - "name": "\u5434\u8bed" - }, - "xal": { - "english_name": "Kalmyk", - "name": "\u0425\u0430\u043b\u044c\u043c\u0433" - }, - "xh": { - "english_name": "Xhosa", - "name": "isiXhosa" - }, - "xmf": { - "english_name": "Mingrelian", - "name": "\u10db\u10d0\u10e0\u10d2\u10d0\u10da\u10e3\u10e0\u10d8 (Margaluri)" - }, - "yi": { - "english_name": "Yiddish", - "name": "\u05d9\u05d9\u05b4\u05d3\u05d9\u05e9" - }, - "yo": { - "english_name": "Yoruba", - "name": "Yor\u00f9b\u00e1" - }, - "za": { - "english_name": "Zhuang", - "name": "Cuengh" - }, - "zea": { - "english_name": "Zeelandic", - "name": "Ze\u00eauws" - }, - "zh": { - "english_name": "Chinese", - "name": "\u4e2d\u6587" - }, - "zh-classical": { - "english_name": "Classical Chinese", - "name": "\u53e4\u6587 / \u6587\u8a00\u6587" - }, - "zh-min-nan": { - "english_name": "Min Nan", - "name": "B\u00e2n-l\u00e2m-g\u00fa" - }, - "zh-yue": { - "english_name": "Cantonese", - "name": "\u7cb5\u8a9e" - }, - "zu": { - "english_name": "Zulu", - "name": "isiZulu" - } - }, - "wikipedia": { - "ab": { - "english_name": "Abkhazian", - "name": "\u0410\u0525\u0441\u0443\u0430" - }, - "ace": { - "english_name": "Acehnese", - "name": "Basa Ac\u00e8h" - }, - "ady": { - "english_name": "Adyghe", - "name": "\u0410\u0434\u044b\u0433\u044d\u0431\u0437\u044d" - }, - "af": { - "english_name": "Afrikaans", - "name": "Afrikaans" - }, - "ak": { - "english_name": "Akan", - "name": "Akana" - }, - "als": { - "english_name": "Alemannic", - "name": "Alemannisch" - }, - "alt": { - "english_name": "Southern Altai", - "name": "\u0410\u043b\u0442\u0430\u0439" - }, - "am": { - "english_name": "Amharic", - "name": "\u12a0\u121b\u122d\u129b" - }, - "ami": { - "english_name": "Amis", - "name": "Pangcah" - }, - "an": { - "english_name": "Aragonese", - "name": "Aragon\u00e9s" - }, - "ang": { - "english_name": "Anglo-Saxon", - "name": "\u00c6nglisc" - }, - "ar": { - "english_name": "Arabic", - "name": "\u0627\u0644\u0639\u0631\u0628\u064a\u0629" - }, - "arc": { - "english_name": "Aramaic", - "name": "\u0710\u072a\u0721\u071d\u0710" - }, - "ary": { - "english_name": "Moroccan Arabic", - "name": "\u062f\u0627\u0631\u064a\u062c\u0629" - }, - "arz": { - "english_name": "Egyptian Arabic", - "name": "\u0645\u0635\u0631\u0649 (Ma\u1e63ri)" - }, - "as": { - "english_name": "Assamese", - "name": "\u0985\u09b8\u09ae\u09c0\u09af\u09bc\u09be" - }, - "ast": { - "english_name": "Asturian", - "name": "Asturianu" - }, - "atj": { - "english_name": "Atikamekw", - "name": "Atikamekw" - }, - "av": { - "english_name": "Avar", - "name": "\u0410\u0432\u0430\u0440" - }, - "avk": { - "english_name": "Kotava", - "name": "Kotava" - }, - "awa": { - "english_name": "Awadhi", - "name": "\u0905\u0935\u0927\u0940" - }, - "ay": { - "english_name": "Aymara", - "name": "Aymar" - }, - "az": { - "english_name": "Azerbaijani", - "name": "Az\u0259rbaycanca" - }, - "azb": { - "english_name": "South Azerbaijani", - "name": "\u062a\u06c6\u0631\u06a9\u062c\u0647" - }, - "ba": { - "english_name": "Bashkir", - "name": "\u0411\u0430\u0448\u04a1\u043e\u0440\u0442" - }, - "ban": { - "english_name": "Balinese", - "name": "Bali" - }, - "bar": { - "english_name": "Bavarian", - "name": "Boarisch" - }, - "bat-smg": { - "english_name": "Samogitian", - "name": "\u017demait\u0117\u0161ka" - }, - "bcl": { - "english_name": "Central Bicolano", - "name": "Bikol" - }, - "be": { - "english_name": "Belarusian", - "name": "\u0411\u0435\u043b\u0430\u0440\u0443\u0441\u043a\u0430\u044f" - }, - "be-tarask": { - "english_name": "Belarusian (Tara\u0161kievica)", - "name": "\u0411\u0435\u043b\u0430\u0440\u0443\u0441\u043a\u0430\u044f (\u0442\u0430\u0440\u0430\u0448\u043a\u0435\u0432\u0456\u0446\u0430)" - }, - "bg": { - "english_name": "Bulgarian", - "name": "\u0411\u044a\u043b\u0433\u0430\u0440\u0441\u043a\u0438" - }, - "bh": { - "english_name": "Bhojpuri", - "name": "\u092d\u094b\u091c\u092a\u0941\u0930\u0940" - }, - "bi": { - "english_name": "Bislama", - "name": "Bislama" - }, - "bjn": { - "english_name": "Banjar", - "name": "Bahasa Banjar" - }, - "blk": { - "english_name": "Pa'O", - "name": "\u1015\u1021\u102d\u102f\u101d\u103a\u108f\u1018\u102c\u108f\u101e\u102c\u108f" - }, - "bm": { - "english_name": "Bambara", - "name": "Bamanankan" - }, - "bn": { - "english_name": "Bengali", - "name": "\u09ac\u09be\u0982\u09b2\u09be" - }, - "bo": { - "english_name": "Tibetan", - "name": "\u0f56\u0f7c\u0f51\u0f0b\u0f66\u0f90\u0f51" - }, - "bpy": { - "english_name": "Bishnupriya Manipuri", - "name": "\u0987\u09ae\u09be\u09b0 \u09a0\u09be\u09b0/\u09ac\u09bf\u09b7\u09cd\u09a3\u09c1\u09aa\u09cd\u09b0\u09bf\u09af\u09bc\u09be \u09ae\u09a3\u09bf\u09aa\u09c1\u09b0\u09c0" - }, - "br": { - "english_name": "Breton", - "name": "Brezhoneg" - }, - "bs": { - "english_name": "Bosnian", - "name": "Bosanski" - }, - "bug": { - "english_name": "Buginese", - "name": "Basa Ugi" - }, - "bxr": { - "english_name": "Buryat", - "name": "\u0411\u0443\u0440\u044f\u0430\u0434" - }, - "ca": { - "english_name": "Catalan", - "name": "Catal\u00e0" - }, - "cbk-zam": { - "english_name": "Zamboanga Chavacano", - "name": "Chavacano de Zamboanga" - }, - "cdo": { - "english_name": "Min Dong", - "name": "M\u00ecng-d\u0115\u0324ng-ng\u1e73\u0304" - }, - "ce": { - "english_name": "Chechen", - "name": "\u041d\u043e\u0445\u0447\u0438\u0439\u043d" - }, - "ceb": { - "english_name": "Cebuano", - "name": "Sinugboanong Binisaya" - }, - "ch": { - "english_name": "Chamorro", - "name": "Chamoru" - }, - "chr": { - "english_name": "Cherokee", - "name": "\u13e3\u13b3\u13a9" - }, - "chy": { - "english_name": "Cheyenne", - "name": "Tsets\u00eahest\u00e2hese" - }, - "ckb": { - "english_name": "Sorani", - "name": "Soran\u00ee / \u06a9\u0648\u0631\u062f\u06cc" - }, - "co": { - "english_name": "Corsican", - "name": "Corsu" - }, - "cr": { - "english_name": "Cree", - "name": "Nehiyaw" - }, - "crh": { - "english_name": "Crimean Tatar", - "name": "Q\u0131r\u0131mtatarca" - }, - "cs": { - "english_name": "Czech", - "name": "\u010ce\u0161tina" - }, - "csb": { - "english_name": "Kashubian", - "name": "Kasz\u00ebbsczi" - }, - "cu": { - "english_name": "Old Church Slavonic", - "name": "\u0421\u043b\u043e\u0432\u0463\u043d\u044c\u0441\u043a\u044a" - }, - "cv": { - "english_name": "Chuvash", - "name": "\u0427\u0103\u0432\u0430\u0448" - }, - "cy": { - "english_name": "Welsh", - "name": "Cymraeg" - }, - "da": { - "english_name": "Danish", - "name": "Dansk" - }, - "dag": { - "english_name": "Dagbani", - "name": "Dagbanli" - }, - "de": { - "english_name": "German", - "name": "Deutsch" - }, - "din": { - "english_name": "Dinka", - "name": "Thu\u0254\u014bj\u00e4\u014b" - }, - "diq": { - "english_name": "Zazaki", - "name": "Zazaki" - }, - "dsb": { - "english_name": "Lower Sorbian", - "name": "Dolnoserbski" - }, - "dty": { - "english_name": "Doteli", - "name": "\u0921\u094b\u091f\u0947\u0932\u0940" - }, - "dv": { - "english_name": "Divehi", - "name": "\u078b\u07a8\u0788\u07ac\u0780\u07a8\u0784\u07a6\u0790\u07b0" - }, - "dz": { - "english_name": "Dzongkha", - "name": "\u0f47\u0f7c\u0f44\u0f0b\u0f41" - }, - "ee": { - "english_name": "Ewe", - "name": "E\u028begbe" - }, - "el": { - "english_name": "Greek", - "name": "\u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ac" - }, - "eml": { - "english_name": "Emilian-Romagnol", - "name": "Emili\u00e0n e rumagn\u00f2l" - }, - "en": { - "english_name": "English", - "name": "English" - }, - "eo": { - "english_name": "Esperanto", - "name": "Esperanto" - }, - "es": { - "english_name": "Spanish", - "name": "Espa\u00f1ol" - }, - "et": { - "english_name": "Estonian", - "name": "Eesti" - }, - "eu": { - "english_name": "Basque", - "name": "Euskara" - }, - "ext": { - "english_name": "Extremaduran", - "name": "Estreme\u00f1u" - }, - "fa": { - "english_name": "Persian", - "name": "\u0641\u0627\u0631\u0633\u06cc" - }, - "ff": { - "english_name": "Fula", - "name": "Fulfulde" - }, - "fi": { - "english_name": "Finnish", - "name": "Suomi" - }, - "fiu-vro": { - "english_name": "V\u00f5ro", - "name": "V\u00f5ro" - }, - "fj": { - "english_name": "Fijian", - "name": "Na Vosa Vakaviti" - }, - "fo": { - "english_name": "Faroese", - "name": "F\u00f8royskt" - }, - "fr": { - "english_name": "French", - "name": "Fran\u00e7ais" - }, - "frp": { - "english_name": "Franco-Proven\u00e7al", - "name": "Arpetan" - }, - "frr": { - "english_name": "North Frisian", - "name": "Nordfrasch" - }, - "fur": { - "english_name": "Friulian", - "name": "Furlan" - }, - "fy": { - "english_name": "West Frisian", - "name": "Frysk" - }, - "ga": { - "english_name": "Irish", - "name": "Gaeilge" - }, - "gag": { - "english_name": "Gagauz", - "name": "Gagauz" - }, - "gan": { - "english_name": "Gan", - "name": "\u8d1b\u8a9e" - }, - "gcr": { - "english_name": "Guianan Creole", - "name": "Kriy\u00f2l Gwiyannen" - }, - "gd": { - "english_name": "Scottish Gaelic", - "name": "G\u00e0idhlig" - }, - "gl": { - "english_name": "Galician", - "name": "Galego" - }, - "glk": { - "english_name": "Gilaki", - "name": "\u06af\u06cc\u0644\u06a9\u06cc" - }, - "gn": { - "english_name": "Guarani", - "name": "Ava\u00f1e'\u1ebd" - }, - "gom": { - "english_name": "Goan Konkani", - "name": "\u0917\u094b\u0902\u092f\u091a\u0940 \u0915\u094b\u0902\u0915\u0923\u0940 / G\u00f5ychi Konknni" - }, - "gor": { - "english_name": "Gorontalo", - "name": "Hulontalo" - }, - "got": { - "english_name": "Gothic", - "name": "\ud800\udf32\ud800\udf3f\ud800\udf44\ud800\udf39\ud800\udf43\ud800\udf3a" - }, - "gu": { - "english_name": "Gujarati", - "name": "\u0a97\u0ac1\u0a9c\u0ab0\u0abe\u0aa4\u0ac0" - }, - "guw": { - "english_name": "Gun", - "name": "Gungbe" - }, - "gv": { - "english_name": "Manx", - "name": "Gaelg" - }, - "ha": { - "english_name": "Hausa", - "name": "Hausa / \u0647\u064e\u0648\u064f\u0633\u064e" - }, - "hak": { - "english_name": "Hakka", - "name": "Hak-k\u00e2-fa / \u5ba2\u5bb6\u8a71" - }, - "haw": { - "english_name": "Hawaiian", - "name": "Hawai\u02bbi" - }, - "he": { - "english_name": "Hebrew", - "name": "\u05e2\u05d1\u05e8\u05d9\u05ea" - }, - "hi": { - "english_name": "Hindi", - "name": "\u0939\u093f\u0928\u094d\u0926\u0940" - }, - "hif": { - "english_name": "Fiji Hindi", - "name": "Fiji Hindi" - }, - "hr": { - "english_name": "Croatian", - "name": "Hrvatski" - }, - "hsb": { - "english_name": "Upper Sorbian", - "name": "Hornjoserbsce" - }, - "ht": { - "english_name": "Haitian", - "name": "Kr\u00e8yol ayisyen" - }, - "hu": { - "english_name": "Hungarian", - "name": "Magyar" - }, - "hy": { - "english_name": "Armenian", - "name": "\u0540\u0561\u0575\u0565\u0580\u0565\u0576" - }, - "hyw": { - "english_name": "Western Armenian", - "name": "\u0531\u0580\u0565\u0582\u0574\u057f\u0561\u0570\u0561\u0575\u0565\u0580\u0567\u0576" - }, - "ia": { - "english_name": "Interlingua", - "name": "Interlingua" - }, - "id": { - "english_name": "Indonesian", - "name": "Bahasa Indonesia" - }, - "ie": { - "english_name": "Interlingue", - "name": "Interlingue" - }, - "ig": { - "english_name": "Igbo", - "name": "\u00ccgb\u00f2" - }, - "ik": { - "english_name": "Inupiak", - "name": "I\u00f1upiatun" - }, - "ilo": { - "english_name": "Ilokano", - "name": "Ilokano" - }, - "inh": { - "english_name": "Ingush", - "name": "\u0413\u04c0\u0430\u043b\u0433\u04c0\u0430\u0439" - }, - "io": { - "english_name": "Ido", - "name": "Ido" - }, - "is": { - "english_name": "Icelandic", - "name": "\u00cdslenska" - }, - "it": { - "english_name": "Italian", - "name": "Italiano" - }, - "iu": { - "english_name": "Inuktitut", - "name": "\u1403\u14c4\u1483\u144e\u1450\u1466" - }, - "ja": { - "english_name": "Japanese", - "name": "\u65e5\u672c\u8a9e" - }, - "jam": { - "english_name": "Jamaican Patois", - "name": "Jumiekan Kryuol" - }, - "jbo": { - "english_name": "Lojban", - "name": "Lojban" - }, - "jv": { - "english_name": "Javanese", - "name": "Basa Jawa" - }, - "ka": { - "english_name": "Georgian", - "name": "\u10e5\u10d0\u10e0\u10d7\u10e3\u10da\u10d8" - }, - "kaa": { - "english_name": "Karakalpak", - "name": "Qaraqalpaqsha" - }, - "kab": { - "english_name": "Kabyle", - "name": "Taqbaylit" - }, - "kbd": { - "english_name": "Kabardian Circassian", - "name": "\u0410\u0434\u044b\u0433\u044d\u0431\u0437\u044d (Adighabze)" - }, - "kbp": { - "english_name": "Kabiye", - "name": "Kab\u0269y\u025b" - }, - "kcg": { - "english_name": "Tyap", - "name": "Tyap" - }, - "kg": { - "english_name": "Kongo", - "name": "Kik\u00f4ngo" - }, - "ki": { - "english_name": "Kikuyu", - "name": "G\u0129k\u0169y\u0169" - }, - "kk": { - "english_name": "Kazakh", - "name": "\u049a\u0430\u0437\u0430\u049b\u0448\u0430" - }, - "kl": { - "english_name": "Greenlandic", - "name": "Kalaallisut" - }, - "km": { - "english_name": "Khmer", - "name": "\u1797\u17b6\u179f\u17b6\u1781\u17d2\u1798\u17c2\u179a" - }, - "kn": { - "english_name": "Kannada", - "name": "\u0c95\u0ca8\u0ccd\u0ca8\u0ca1" - }, - "ko": { - "english_name": "Korean", - "name": "\ud55c\uad6d\uc5b4" - }, - "koi": { - "english_name": "Komi-Permyak", - "name": "\u041f\u0435\u0440\u0435\u043c \u041a\u043e\u043c\u0438 (Perem Komi)" - }, - "krc": { - "english_name": "Karachay-Balkar", - "name": "\u041a\u044a\u0430\u0440\u0430\u0447\u0430\u0439-\u041c\u0430\u043b\u043a\u044a\u0430\u0440 (Qarachay-Malqar)" - }, - "ks": { - "english_name": "Kashmiri", - "name": "\u0915\u0936\u094d\u092e\u0940\u0930\u0940 / \u0643\u0634\u0645\u064a\u0631\u064a" - }, - "ksh": { - "english_name": "Ripuarian", - "name": "Ripoarisch" - }, - "ku": { - "english_name": "Kurdish", - "name": "Kurd\u00ee / \u0643\u0648\u0631\u062f\u06cc" - }, - "kv": { - "english_name": "Komi", - "name": "\u041a\u043e\u043c\u0438" - }, - "kw": { - "english_name": "Cornish", - "name": "Kernowek/Karnuack" - }, - "ky": { - "english_name": "Kyrgyz", - "name": "\u041a\u044b\u0440\u0433\u044b\u0437\u0447\u0430" - }, - "la": { - "english_name": "Latin", - "name": "Latina" - }, - "lad": { - "english_name": "Ladino", - "name": "Dzhudezmo" - }, - "lb": { - "english_name": "Luxembourgish", - "name": "L\u00ebtzebuergesch" - }, - "lbe": { - "english_name": "Lak", - "name": "\u041b\u0430\u043a\u043a\u0443" - }, - "lez": { - "english_name": "Lezgian", - "name": "\u041b\u0435\u0437\u0433\u0438 \u0447\u0406\u0430\u043b (Lezgi \u010d\u2019al)" - }, - "lfn": { - "english_name": "Lingua Franca Nova", - "name": "Lingua franca nova" - }, - "lg": { - "english_name": "Luganda", - "name": "Luganda" - }, - "li": { - "english_name": "Limburgish", - "name": "Limburgs" - }, - "lij": { - "english_name": "Ligurian", - "name": "L\u00ecgure" - }, - "lld": { - "english_name": "Ladin", - "name": "Lingaz" - }, - "lmo": { - "english_name": "Lombard", - "name": "Lumbaart" - }, - "ln": { - "english_name": "Lingala", - "name": "Lingala" - }, - "lo": { - "english_name": "Lao", - "name": "\u0ea5\u0eb2\u0ea7" - }, - "lt": { - "english_name": "Lithuanian", - "name": "Lietuvi\u0173" - }, - "ltg": { - "english_name": "Latgalian", - "name": "Latga\u013cu" - }, - "lv": { - "english_name": "Latvian", - "name": "Latvie\u0161u" - }, - "mad": { - "english_name": "Madurese", - "name": "Madhur\u00e2" - }, - "mai": { - "english_name": "Maithili", - "name": "\u092e\u0948\u0925\u093f\u0932\u0940" - }, - "map-bms": { - "english_name": "Banyumasan", - "name": "Basa Banyumasan" - }, - "mdf": { - "english_name": "Moksha", - "name": "\u041c\u043e\u043a\u0448\u0435\u043d\u044c (Mokshanj K\u00e4lj)" - }, - "mg": { - "english_name": "Malagasy", - "name": "Malagasy" - }, - "mhr": { - "english_name": "Meadow Mari", - "name": "\u041e\u043b\u044b\u043a \u041c\u0430\u0440\u0438\u0439 (Olyk Marij)" - }, - "mi": { - "english_name": "Maori", - "name": "M\u0101ori" - }, - "min": { - "english_name": "Minangkabau", - "name": "Minangkabau" - }, - "mk": { - "english_name": "Macedonian", - "name": "\u041c\u0430\u043a\u0435\u0434\u043e\u043d\u0441\u043a\u0438" - }, - "ml": { - "english_name": "Malayalam", - "name": "\u0d2e\u0d32\u0d2f\u0d3e\u0d33\u0d02" - }, - "mn": { - "english_name": "Mongolian", - "name": "\u041c\u043e\u043d\u0433\u043e\u043b" - }, - "mni": { - "english_name": "Meitei", - "name": "\uabc3\uabe4\uabc7\uabe9\uabc2\uabe3\uabdf" - }, - "mnw": { - "english_name": "Mon", - "name": "\u1019\u1014\u103a" - }, - "mr": { - "english_name": "Marathi", - "name": "\u092e\u0930\u093e\u0920\u0940" - }, - "mrj": { - "english_name": "Hill Mari", - "name": "\u041a\u044b\u0440\u044b\u043a \u041c\u0430\u0440\u044b (Kyryk Mary)" - }, - "ms": { - "english_name": "Malay", - "name": "Bahasa Melayu" - }, - "mt": { - "english_name": "Maltese", - "name": "Malti" - }, - "mwl": { - "english_name": "Mirandese", - "name": "Mirand\u00e9s" - }, - "my": { - "english_name": "Burmese", - "name": "\u1019\u103c\u1014\u103a\u1019\u102c\u1018\u102c\u101e\u102c" - }, - "myv": { - "english_name": "Erzya", - "name": "\u042d\u0440\u0437\u044f\u043d\u044c (Erzjanj Kelj)" - }, - "mzn": { - "english_name": "Mazandarani", - "name": "\u0645\u064e\u0632\u0650\u0631\u0648\u0646\u064a" - }, - "na": { - "english_name": "Nauruan", - "name": "dorerin Naoero" - }, - "nah": { - "english_name": "Nahuatl", - "name": "N\u0101huatl" - }, - "nap": { - "english_name": "Neapolitan", - "name": "Nnapulitano" - }, - "nds": { - "english_name": "Low Saxon", - "name": "Plattd\u00fc\u00fctsch" - }, - "nds-nl": { - "english_name": "Dutch Low Saxon", - "name": "Nedersaksisch" - }, - "ne": { - "english_name": "Nepali", - "name": "\u0928\u0947\u092a\u093e\u0932\u0940" - }, - "new": { - "english_name": "Newar", - "name": "\u0928\u0947\u092a\u093e\u0932 \u092d\u093e\u0937\u093e" - }, - "nia": { - "english_name": "Nias", - "name": "Li Niha" - }, - "nl": { - "english_name": "Dutch", - "name": "Nederlands" - }, - "nn": { - "english_name": "Norwegian (Nynorsk)", - "name": "Nynorsk" - }, - "no": { - "english_name": "Norwegian (Bokm\u00e5l)", - "name": "Norsk (Bokm\u00e5l)" - }, - "nov": { - "english_name": "Novial", - "name": "Novial" - }, - "nqo": { - "english_name": "N\u2019Ko", - "name": "\u07d2\u07de\u07cf" - }, - "nrm": { - "english_name": "Norman", - "name": "Nouormand/Normaund" - }, - "nso": { - "english_name": "Northern Sotho", - "name": "Sepedi" - }, - "nv": { - "english_name": "Navajo", - "name": "Din\u00e9 bizaad" - }, - "ny": { - "english_name": "Chichewa", - "name": "Chichewa" - }, - "oc": { - "english_name": "Occitan", - "name": "Occitan" - }, - "olo": { - "english_name": "Livvi-Karelian", - "name": "Karjalan" - }, - "om": { - "english_name": "Oromo", - "name": "Oromoo" - }, - "or": { - "english_name": "Oriya", - "name": "\u0b13\u0b21\u0b3c\u0b3f\u0b06" - }, - "os": { - "english_name": "Ossetian", - "name": "\u0418\u0440\u043e\u043d\u0430\u0443" - }, - "pa": { - "english_name": "Punjabi", - "name": "\u0a2a\u0a70\u0a1c\u0a3e\u0a2c\u0a40" - }, - "pag": { - "english_name": "Pangasinan", - "name": "Pangasinan" - }, - "pam": { - "english_name": "Kapampangan", - "name": "Kapampangan" - }, - "pap": { - "english_name": "Papiamentu", - "name": "Papiamentu" - }, - "pcd": { - "english_name": "Picard", - "name": "Picard" - }, - "pcm": { - "english_name": "Nigerian Pidgin", - "name": "Naij\u00e1" - }, - "pdc": { - "english_name": "Pennsylvania German", - "name": "Deitsch" - }, - "pfl": { - "english_name": "Palatinate German", - "name": "P\u00e4lzisch" - }, - "pi": { - "english_name": "Pali", - "name": "\u092a\u093e\u0934\u093f" - }, - "pih": { - "english_name": "Norfolk", - "name": "Norfuk" - }, - "pl": { - "english_name": "Polish", - "name": "Polski" - }, - "pms": { - "english_name": "Piedmontese", - "name": "Piemont\u00e8is" - }, - "pnb": { - "english_name": "Western Punjabi", - "name": "\u0634\u0627\u06c1 \u0645\u06a9\u06be\u06cc \u067e\u0646\u062c\u0627\u0628\u06cc (Sh\u0101hmukh\u012b Pa\u00f1j\u0101b\u012b)" - }, - "pnt": { - "english_name": "Pontic", - "name": "\u03a0\u03bf\u03bd\u03c4\u03b9\u03b1\u03ba\u03ac" - }, - "ps": { - "english_name": "Pashto", - "name": "\u067e\u069a\u062a\u0648" - }, - "pt": { - "english_name": "Portuguese", - "name": "Portugu\u00eas" - }, - "pwn": { - "english_name": "Paiwan", - "name": "Paiwan" - }, - "qu": { - "english_name": "Quechua", - "name": "Qichwa simi" - }, - "rm": { - "english_name": "Romansh", - "name": "Rumantsch" - }, - "rmy": { - "english_name": "Romani", - "name": "romani - \u0930\u094b\u092e\u093e\u0928\u0940" - }, - "rn": { - "english_name": "Kirundi", - "name": "Ikirundi" - }, - "ro": { - "english_name": "Romanian", - "name": "Rom\u00e2n\u0103" - }, - "roa-rup": { - "english_name": "Aromanian", - "name": "Arm\u00e3neashce" - }, - "roa-tara": { - "english_name": "Tarantino", - "name": "Tarand\u00edne" - }, - "ru": { - "english_name": "Russian", - "name": "\u0420\u0443\u0441\u0441\u043a\u0438\u0439" - }, - "rue": { - "english_name": "Rusyn", - "name": "\u0420\u0443\u0441\u0438\u043d\u044c\u0441\u043a\u044b\u0439" - }, - "rw": { - "english_name": "Kinyarwanda", - "name": "Ikinyarwanda" - }, - "sa": { - "english_name": "Sanskrit", - "name": "\u0938\u0902\u0938\u094d\u0915\u0943\u0924\u092e\u094d" - }, - "sah": { - "english_name": "Sakha", - "name": "\u0421\u0430\u0445\u0430 \u0442\u044b\u043b\u0430 (Saxa Tyla)" - }, - "sat": { - "english_name": "Santali", - "name": "\u1c65\u1c5f\u1c71\u1c5b\u1c5f\u1c72\u1c64" - }, - "sc": { - "english_name": "Sardinian", - "name": "Sardu" - }, - "scn": { - "english_name": "Sicilian", - "name": "Sicilianu" - }, - "sco": { - "english_name": "Scots", - "name": "Scots" - }, - "sd": { - "english_name": "Sindhi", - "name": "\u0633\u0646\u068c\u064a\u060c \u0633\u0646\u062f\u06be\u06cc \u060c \u0938\u093f\u0928\u094d\u0927" - }, - "se": { - "english_name": "Northern Sami", - "name": "S\u00e1megiella" - }, - "sg": { - "english_name": "Sango", - "name": "S\u00e4ng\u00f6" - }, - "sh": { - "english_name": "Serbo-Croatian", - "name": "Srpskohrvatski / \u0421\u0440\u043f\u0441\u043a\u043e\u0445\u0440\u0432\u0430\u0442\u0441\u043a\u0438" - }, - "shi": { - "english_name": "Tachelhit", - "name": "Tacl\u1e25it" - }, - "shn": { - "english_name": "Shan", - "name": "\u101c\u102d\u1075\u103a\u1088\u1010\u1086\u1038" - }, - "si": { - "english_name": "Sinhalese", - "name": "\u0dc3\u0dd2\u0d82\u0dc4\u0dbd" - }, - "simple": { - "english_name": "Simple English", - "name": "Simple English" - }, - "sk": { - "english_name": "Slovak", - "name": "Sloven\u010dina" - }, - "skr": { - "english_name": "Saraiki", - "name": "\u0633\u0631\u0627\u0626\u06cc\u06a9\u06cc" - }, - "sl": { - "english_name": "Slovenian", - "name": "Sloven\u0161\u010dina" - }, - "sm": { - "english_name": "Samoan", - "name": "Gagana Samoa" - }, - "smn": { - "english_name": "Inari Sami", - "name": "Anar\u00e2\u0161kiel\u00e2" - }, - "sn": { - "english_name": "Shona", - "name": "chiShona" - }, - "so": { - "english_name": "Somali", - "name": "Soomaali" - }, - "sq": { - "english_name": "Albanian", - "name": "Shqip" - }, - "sr": { - "english_name": "Serbian", - "name": "\u0421\u0440\u043f\u0441\u043a\u0438 / Srpski" - }, - "srn": { - "english_name": "Sranan", - "name": "Sranantongo" - }, - "ss": { - "english_name": "Swati", - "name": "SiSwati" - }, - "st": { - "english_name": "Sesotho", - "name": "Sesotho" - }, - "stq": { - "english_name": "Saterland Frisian", - "name": "Seeltersk" - }, - "su": { - "english_name": "Sundanese", - "name": "Basa Sunda" - }, - "sv": { - "english_name": "Swedish", - "name": "Svenska" - }, - "sw": { - "english_name": "Swahili", - "name": "Kiswahili" - }, - "szl": { - "english_name": "Silesian", - "name": "\u015al\u016fnski" - }, - "szy": { - "english_name": "Sakizaya", - "name": "Sakizaya" - }, - "ta": { - "english_name": "Tamil", - "name": "\u0ba4\u0bae\u0bbf\u0bb4\u0bcd" - }, - "tay": { - "english_name": "Atayal", - "name": "Tayal" - }, - "tcy": { - "english_name": "Tulu", - "name": "\u0ca4\u0cc1\u0cb3\u0cc1" - }, - "te": { - "english_name": "Telugu", - "name": "\u0c24\u0c46\u0c32\u0c41\u0c17\u0c41" - }, - "tet": { - "english_name": "Tetum", - "name": "Tetun" - }, - "tg": { - "english_name": "Tajik", - "name": "\u0422\u043e\u04b7\u0438\u043a\u04e3" - }, - "th": { - "english_name": "Thai", - "name": "\u0e44\u0e17\u0e22" - }, - "ti": { - "english_name": "Tigrinya", - "name": "\u1275\u130d\u122d\u129b" - }, - "tk": { - "english_name": "Turkmen", - "name": "T\u00fcrkmen" - }, - "tl": { - "english_name": "Tagalog", - "name": "Tagalog" - }, - "tn": { - "english_name": "Tswana", - "name": "Setswana" - }, - "to": { - "english_name": "Tongan", - "name": "faka Tonga" - }, - "tpi": { - "english_name": "Tok Pisin", - "name": "Tok Pisin" - }, - "tr": { - "english_name": "Turkish", - "name": "T\u00fcrk\u00e7e" - }, - "trv": { - "english_name": "Seediq", - "name": "Taroko" - }, - "ts": { - "english_name": "Tsonga", - "name": "Xitsonga" - }, - "tt": { - "english_name": "Tatar", - "name": "Tatar\u00e7a / \u0422\u0430\u0442\u0430\u0440\u0447\u0430" - }, - "tum": { - "english_name": "Tumbuka", - "name": "chiTumbuka" - }, - "tw": { - "english_name": "Twi", - "name": "Twi" - }, - "ty": { - "english_name": "Tahitian", - "name": "Reo M\u0101`ohi" - }, - "tyv": { - "english_name": "Tuvan", - "name": "\u0422\u044b\u0432\u0430" - }, - "udm": { - "english_name": "Udmurt", - "name": "\u0423\u0434\u043c\u0443\u0440\u0442 \u043a\u044b\u043b" - }, - "ug": { - "english_name": "Uyghur", - "name": "\u0626\u06c7\u064a\u063a\u06c7\u0631 \u062a\u0649\u0644\u0649" - }, - "uk": { - "english_name": "Ukrainian", - "name": "\u0423\u043a\u0440\u0430\u0457\u043d\u0441\u044c\u043a\u0430" - }, - "ur": { - "english_name": "Urdu", - "name": "\u0627\u0631\u062f\u0648" - }, - "uz": { - "english_name": "Uzbek", - "name": "O\u2018zbek" - }, - "ve": { - "english_name": "Venda", - "name": "Tshivenda" - }, - "vec": { - "english_name": "Venetian", - "name": "V\u00e8neto" - }, - "vep": { - "english_name": "Vepsian", - "name": "Veps\u00e4n" - }, - "vi": { - "english_name": "Vietnamese", - "name": "Ti\u1ebfng Vi\u1ec7t" - }, - "vls": { - "english_name": "West Flemish", - "name": "West-Vlams" - }, - "vo": { - "english_name": "Volap\u00fck", - "name": "Volap\u00fck" - }, - "wa": { - "english_name": "Walloon", - "name": "Walon" - }, - "war": { - "english_name": "Waray-Waray", - "name": "Winaray" - }, - "wo": { - "english_name": "Wolof", - "name": "Wolof" - }, - "wuu": { - "english_name": "Wu", - "name": "\u5434\u8bed" - }, - "xal": { - "english_name": "Kalmyk", - "name": "\u0425\u0430\u043b\u044c\u043c\u0433" - }, - "xh": { - "english_name": "Xhosa", - "name": "isiXhosa" - }, - "xmf": { - "english_name": "Mingrelian", - "name": "\u10db\u10d0\u10e0\u10d2\u10d0\u10da\u10e3\u10e0\u10d8 (Margaluri)" - }, - "yi": { - "english_name": "Yiddish", - "name": "\u05d9\u05d9\u05b4\u05d3\u05d9\u05e9" - }, - "yo": { - "english_name": "Yoruba", - "name": "Yor\u00f9b\u00e1" - }, - "za": { - "english_name": "Zhuang", - "name": "Cuengh" - }, - "zea": { - "english_name": "Zeelandic", - "name": "Ze\u00eauws" - }, - "zh": { - "english_name": "Chinese", - "name": "\u4e2d\u6587" - }, - "zh-classical": { - "english_name": "Classical Chinese", - "name": "\u53e4\u6587 / \u6587\u8a00\u6587" - }, - "zh-min-nan": { - "english_name": "Min Nan", - "name": "B\u00e2n-l\u00e2m-g\u00fa" - }, - "zh-yue": { - "english_name": "Cantonese", - "name": "\u7cb5\u8a9e" - }, - "zu": { - "english_name": "Zulu", - "name": "isiZulu" - } - }, - "yahoo": [ - "ar", - "bg", - "cs", - "da", - "de", - "el", - "en", - "es", - "et", - "fi", - "fr", - "he", - "hr", - "hu", - "it", - "ja", - "ko", - "lt", - "lv", - "nl", - "no", - "pl", - "pt", - "ro", - "ru", - "sk", - "sl", - "sv", - "th", - "tr", - "zh_chs", - "zh_cht" - ] -} \ No newline at end of file diff --git a/searx/enginelib/__init__.py b/searx/enginelib/__init__.py new file mode 100644 index 000000000..461791b18 --- /dev/null +++ b/searx/enginelib/__init__.py @@ -0,0 +1,136 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +"""Engine related implementations + +.. note:: + + The long term goal is to modularize all relevant implementations to the + engines here in this Python package. In addition to improved modularization, + this will also be necessary in part because the probability of circular + imports will increase due to the increased typification of implementations in + the future. + + ToDo: + + - move :py:obj:`searx.engines.load_engine` to a new module `searx.enginelib`. +""" + + +from __future__ import annotations +from typing import Union, Dict, List, Callable, TYPE_CHECKING + +if TYPE_CHECKING: + from searx.enginelib import traits + + +class Engine: # pylint: disable=too-few-public-methods + """Class of engine instances build from YAML settings. + + Further documentation see :ref:`general engine configuration`. + + .. hint:: + + This class is currently never initialized and only used for type hinting. + """ + + # Common options in the engine module + + engine_type: str + """Type of the engine (:origin:`searx/search/processors`)""" + + paging: bool + """Engine supports multiple pages.""" + + time_range_support: bool + """Engine supports search time range.""" + + safesearch: bool + """Engine supports SafeSearch""" + + language_support: bool + """Engine supports languages (locales) search.""" + + language: str + """For an engine, when there is ``language: ...`` in the YAML settings the engine + does support only this one language: + + .. code:: yaml + + - name: google french + engine: google + language: fr + """ + + region: str + """For an engine, when there is ``region: ...`` in the YAML settings the engine + does support only this one region:: + + .. code:: yaml + + - name: google belgium + engine: google + region: fr-BE + """ + + fetch_traits: Callable + """Function to to fetch engine's traits from origin.""" + + traits: traits.EngineTraits + """Traits of the engine.""" + + # settings.yml + + categories: List[str] + """Tabs, in which the engine is working.""" + + name: str + """Name that will be used across SearXNG to define this engine. In settings, on + the result page ..""" + + engine: str + """Name of the python file used to handle requests and responses to and from + this search engine (file name from :origin:`searx/engines` without + ``.py``).""" + + enable_http: bool + """Enable HTTP (by default only HTTPS is enabled).""" + + shortcut: str + """Code used to execute bang requests (``!foo``)""" + + timeout: float + """Specific timeout for search-engine.""" + + display_error_messages: bool + """Display error messages on the web UI.""" + + proxies: dict + """Set proxies for a specific engine (YAML): + + .. code:: yaml + + proxies : + http: socks5://proxy:port + https: socks5://proxy:port + """ + + disabled: bool + """To disable by default the engine, but not deleting it. It will allow the + user to manually activate it in the settings.""" + + inactive: bool + """Remove the engine from the settings (*disabled & removed*).""" + + about: dict + """Additional fileds describing the engine. + + .. code:: yaml + + about: + website: https://example.com + wikidata_id: Q306656 + official_api_documentation: https://example.com/api-doc + use_official_api: true + require_api_key: true + results: HTML + """ diff --git a/searx/enginelib/traits.py b/searx/enginelib/traits.py new file mode 100644 index 000000000..df7851594 --- /dev/null +++ b/searx/enginelib/traits.py @@ -0,0 +1,250 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +"""Engine's traits are fetched from the origin engines and stored in a JSON file +in the *data folder*. Most often traits are languages and region codes and +their mapping from SearXNG's representation to the representation in the origin +search engine. For new traits new properties can be added to the class +:py:class:`EngineTraits`. + +To load traits from the persistence :py:obj:`EngineTraitsMap.from_data` can be +used. +""" + +from __future__ import annotations +import json +import dataclasses +from typing import Dict, Union, Callable, Optional, TYPE_CHECKING +from typing_extensions import Literal, Self + +from searx import locales +from searx.data import data_dir, ENGINE_TRAITS + +if TYPE_CHECKING: + from . import Engine + + +class EngineTraitsEncoder(json.JSONEncoder): + """Encodes :class:`EngineTraits` to a serializable object, see + :class:`json.JSONEncoder`.""" + + def default(self, o): + """Return dictionary of a :class:`EngineTraits` object.""" + if isinstance(o, EngineTraits): + return o.__dict__ + return super().default(o) + + +@dataclasses.dataclass +class EngineTraits: + """The class is intended to be instantiated for each engine.""" + + regions: Dict[str, str] = dataclasses.field(default_factory=dict) + """Maps SearXNG's internal representation of a region to the one of the engine. + + SearXNG's internal representation can be parsed by babel and the value is + send to the engine: + + .. code:: python + + regions ={ + 'fr-BE' : , + } + + for key, egnine_region regions.items(): + searxng_region = babel.Locale.parse(key, sep='-') + ... + """ + + languages: Dict[str, str] = dataclasses.field(default_factory=dict) + """Maps SearXNG's internal representation of a language to the one of the engine. + + SearXNG's internal representation can be parsed by babel and the value is + send to the engine: + + .. code:: python + + languages = { + 'ca' : , + } + + for key, egnine_lang in languages.items(): + searxng_lang = babel.Locale.parse(key) + ... + """ + + all_locale: Optional[str] = None + """To which locale value SearXNG's ``all`` language is mapped (shown a "Default + language"). + """ + + data_type: Literal['traits_v1'] = 'traits_v1' + """Data type, default is 'traits_v1'. + """ + + custom: Dict[str, Dict] = dataclasses.field(default_factory=dict) + """A place to store engine's custom traits, not related to the SearXNG core + + """ + + def get_language(self, searxng_locale: str, default=None): + """Return engine's language string that *best fits* to SearXNG's locale. + + :param searxng_locale: SearXNG's internal representation of locale + selected by the user. + + :param default: engine's default language + + The *best fits* rules are implemented in + :py:obj:`locales.get_engine_locale`. Except for the special value ``all`` + which is determined from :py:obj`EngineTraits.all_language`. + """ + if searxng_locale == 'all' and self.all_locale is not None: + return self.all_locale + return locales.get_engine_locale(searxng_locale, self.languages, default=default) + + def get_region(self, searxng_locale: str, default=None): + """Return engine's region string that best fits to SearXNG's locale. + + :param searxng_locale: SearXNG's internal representation of locale + selected by the user. + + :param default: engine's default region + + The *best fits* rules are implemented in + :py:obj:`locales.get_engine_locale`. Except for the special value ``all`` + which is determined from :py:obj`EngineTraits.all_language`. + """ + if searxng_locale == 'all' and self.all_locale is not None: + return self.all_locale + return locales.get_engine_locale(searxng_locale, self.regions, default=default) + + def is_locale_supported(self, searxng_locale: str) -> bool: + """A *locale* (SearXNG's internal representation) is considered to be supported + by the engine if the *region* or the *language* is supported by the + engine. For verification the functions :py:func:`self.get_region` and + :py:func:`self.get_region` are used. + """ + if self.data_type == 'traits_v1': + return bool(self.get_region(searxng_locale) or self.get_language(searxng_locale)) + + raise TypeError('engine traits of type %s is unknown' % self.data_type) + + def copy(self): + """Create a copy of the dataclass object.""" + return EngineTraits(**dataclasses.asdict(self)) + + @classmethod + def fetch_traits(cls, engine: Engine) -> Union[Self, None]: + """Call a function ``fetch_traits(engine_traits)`` from engines namespace to fetch + and set properties from the origin engine in the object ``engine_traits``. If + function does not exists, ``None`` is returned. + """ + + fetch_traits = getattr(engine, 'fetch_traits', None) + engine_traits = None + + if fetch_traits: + engine_traits = cls() + fetch_traits(engine_traits) + return engine_traits + + def set_traits(self, engine: Engine): + """Set traits from self object in a :py:obj:`.Engine` namespace. + + :param engine: engine instance build by :py:func:`searx.engines.load_engine` + """ + + if self.data_type == 'traits_v1': + self._set_traits_v1(engine) + else: + raise TypeError('engine traits of type %s is unknown' % self.data_type) + + def _set_traits_v1(self, engine: Engine): + # For an engine, when there is `language: ...` in the YAML settings the engine + # does support only this one language (region):: + # + # - name: google italian + # engine: google + # language: it + # region: it-IT + + traits = self.copy() + + _msg = "settings.yml - engine: '%s' / %s: '%s' not supported" + + languages = traits.languages + if hasattr(engine, 'language'): + if engine.language not in languages: + raise ValueError(_msg % (engine.name, 'language', engine.language)) + traits.languages = {engine.language: languages[engine.language]} + + regions = traits.regions + if hasattr(engine, 'region'): + if engine.region not in regions: + raise ValueError(_msg % (engine.name, 'region', engine.region)) + traits.regions = {engine.region: regions[engine.region]} + + engine.language_support = bool(traits.languages or traits.regions) + + # set the copied & modified traits in engine's namespace + engine.traits = traits + + +class EngineTraitsMap(Dict[str, EngineTraits]): + """A python dictionary to map :class:`EngineTraits` by engine name.""" + + ENGINE_TRAITS_FILE = (data_dir / 'engine_traits.json').resolve() + """File with persistence of the :py:obj:`EngineTraitsMap`.""" + + def save_data(self): + """Store EngineTraitsMap in in file :py:obj:`self.ENGINE_TRAITS_FILE`""" + with open(self.ENGINE_TRAITS_FILE, 'w', encoding='utf-8') as f: + json.dump(self, f, indent=2, sort_keys=True, cls=EngineTraitsEncoder) + + @classmethod + def from_data(cls) -> Self: + """Instantiate :class:`EngineTraitsMap` object from :py:obj:`ENGINE_TRAITS`""" + obj = cls() + for k, v in ENGINE_TRAITS.items(): + obj[k] = EngineTraits(**v) + return obj + + @classmethod + def fetch_traits(cls, log: Callable) -> Self: + from searx import engines # pylint: disable=cyclic-import, import-outside-toplevel + + names = list(engines.engines) + names.sort() + obj = cls() + + for engine_name in names: + engine = engines.engines[engine_name] + + traits = EngineTraits.fetch_traits(engine) + if traits is not None: + log("%-20s: SearXNG languages --> %s " % (engine_name, len(traits.languages))) + log("%-20s: SearXNG regions --> %s" % (engine_name, len(traits.regions))) + obj[engine_name] = traits + + return obj + + def set_traits(self, engine: Engine): + """Set traits in a :py:obj:`Engine` namespace. + + :param engine: engine instance build by :py:func:`searx.engines.load_engine` + """ + + engine_traits = EngineTraits(data_type='traits_v1') + if engine.name in self.keys(): + engine_traits = self[engine.name] + + elif engine.engine in self.keys(): + # The key of the dictionary traits_map is the *engine name* + # configured in settings.xml. When multiple engines are configured + # in settings.yml to use the same origin engine (python module) + # these additional engines can use the languages from the origin + # engine. For this use the configured ``engine: ...`` from + # settings.yml + engine_traits = self[engine.engine] + + engine_traits.set_traits(engine) diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index 52bb5f20d..c8e8e7241 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -11,24 +11,22 @@ usage:: """ +from __future__ import annotations + import sys import copy -from typing import Dict, List, Optional - from os.path import realpath, dirname -from babel.localedata import locale_identifiers -from searx import logger, settings -from searx.data import ENGINES_LANGUAGES -from searx.network import get -from searx.utils import load_module, match_language, gen_useragent +from typing import TYPE_CHECKING, Dict, Optional + +from searx import logger, settings +from searx.utils import load_module + +if TYPE_CHECKING: + from searx.enginelib import Engine logger = logger.getChild('engines') ENGINE_DIR = dirname(realpath(__file__)) -BABEL_LANGS = [ - lang_parts[0] + '-' + lang_parts[-1] if len(lang_parts) > 1 else lang_parts[0] - for lang_parts in (lang_code.split('_') for lang_code in locale_identifiers()) -] ENGINE_DEFAULT_ARGS = { "engine_type": "online", "inactive": False, @@ -36,8 +34,6 @@ ENGINE_DEFAULT_ARGS = { "timeout": settings["outgoing"]["request_timeout"], "shortcut": "-", "categories": ["general"], - "supported_languages": [], - "language_aliases": {}, "paging": False, "safesearch": False, "time_range_support": False, @@ -52,24 +48,6 @@ ENGINE_DEFAULT_ARGS = { OTHER_CATEGORY = 'other' -class Engine: # pylint: disable=too-few-public-methods - """This class is currently never initialized and only used for type hinting.""" - - name: str - engine: str - shortcut: str - categories: List[str] - supported_languages: List[str] - about: dict - inactive: bool - disabled: bool - language_support: bool - paging: bool - safesearch: bool - time_range_support: bool - timeout: float - - # Defaults for the namespace of an engine module, see :py:func:`load_engine` categories = {'general': []} @@ -136,9 +114,15 @@ def load_engine(engine_data: dict) -> Optional[Engine]: return None update_engine_attributes(engine, engine_data) - set_language_attributes(engine) update_attributes_for_tor(engine) + # avoid cyclic imports + # pylint: disable=import-outside-toplevel + from searx.enginelib.traits import EngineTraitsMap + + trait_map = EngineTraitsMap.from_data() + trait_map.set_traits(engine) + if not is_engine_active(engine): return None @@ -190,60 +174,6 @@ def update_engine_attributes(engine: Engine, engine_data): setattr(engine, arg_name, copy.deepcopy(arg_value)) -def set_language_attributes(engine: Engine): - # assign supported languages from json file - if engine.name in ENGINES_LANGUAGES: - engine.supported_languages = ENGINES_LANGUAGES[engine.name] - - elif engine.engine in ENGINES_LANGUAGES: - # The key of the dictionary ENGINES_LANGUAGES is the *engine name* - # configured in settings.xml. When multiple engines are configured in - # settings.yml to use the same origin engine (python module) these - # additional engines can use the languages from the origin engine. - # For this use the configured ``engine: ...`` from settings.yml - engine.supported_languages = ENGINES_LANGUAGES[engine.engine] - - if hasattr(engine, 'language'): - # For an engine, when there is `language: ...` in the YAML settings, the - # engine supports only one language, in this case - # engine.supported_languages should contains this value defined in - # settings.yml - if engine.language not in engine.supported_languages: - raise ValueError( - "settings.yml - engine: '%s' / language: '%s' not supported" % (engine.name, engine.language) - ) - - if isinstance(engine.supported_languages, dict): - engine.supported_languages = {engine.language: engine.supported_languages[engine.language]} - else: - engine.supported_languages = [engine.language] - - # find custom aliases for non standard language codes - for engine_lang in engine.supported_languages: - iso_lang = match_language(engine_lang, BABEL_LANGS, fallback=None) - if ( - iso_lang - and iso_lang != engine_lang - and not engine_lang.startswith(iso_lang) - and iso_lang not in engine.supported_languages - ): - engine.language_aliases[iso_lang] = engine_lang - - # language_support - engine.language_support = len(engine.supported_languages) > 0 - - # assign language fetching method if auxiliary method exists - if hasattr(engine, '_fetch_supported_languages'): - headers = { - 'User-Agent': gen_useragent(), - 'Accept-Language': "en-US,en;q=0.5", # bing needs to set the English language - } - engine.fetch_supported_languages = ( - # pylint: disable=protected-access - lambda: engine._fetch_supported_languages(get(engine.supported_languages_url, headers=headers)) - ) - - def update_attributes_for_tor(engine: Engine) -> bool: if using_tor_proxy(engine) and hasattr(engine, 'onion_url'): engine.search_url = engine.onion_url + getattr(engine, 'search_path', '') diff --git a/searx/engines/archlinux.py b/searx/engines/archlinux.py index b5e426107..56c3b447f 100644 --- a/searx/engines/archlinux.py +++ b/searx/engines/archlinux.py @@ -1,15 +1,32 @@ # SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint """ - Arch Linux Wiki +Arch Linux Wiki +~~~~~~~~~~~~~~~ + +This implementation does not use a official API: Mediawiki provides API, but +Arch Wiki blocks access to it. - API: Mediawiki provides API, but Arch Wiki blocks access to it """ -from urllib.parse import urlencode, urljoin -from lxml import html +from typing import TYPE_CHECKING +from urllib.parse import urlencode, urljoin, urlparse +import lxml +import babel + +from searx import network from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex +from searx.enginelib.traits import EngineTraits +from searx.locales import language_tag + +if TYPE_CHECKING: + import logging + + logger: logging.Logger + +traits: EngineTraits + -# about about = { "website": 'https://wiki.archlinux.org/', "wikidata_id": 'Q101445877', @@ -22,125 +39,113 @@ about = { # engine dependent config categories = ['it', 'software wikis'] paging = True -base_url = 'https://wiki.archlinux.org' - -# xpath queries -xpath_results = '//ul[@class="mw-search-results"]/li' -xpath_link = './/div[@class="mw-search-result-heading"]/a' +main_wiki = 'wiki.archlinux.org' -# cut 'en' from 'en-US', 'de' from 'de-CH', and so on -def locale_to_lang_code(locale): - if locale.find('-') >= 0: - locale = locale.split('-')[0] - return locale - - -# wikis for some languages were moved off from the main site, we need to make -# requests to correct URLs to be able to get results in those languages -lang_urls = { - # fmt: off - 'all': { - 'base': 'https://wiki.archlinux.org', - 'search': '/index.php?title=Special:Search&offset={offset}&{query}' - }, - 'de': { - 'base': 'https://wiki.archlinux.de', - 'search': '/index.php?title=Spezial:Suche&offset={offset}&{query}' - }, - 'fr': { - 'base': 'https://wiki.archlinux.fr', - 'search': '/index.php?title=Spécial:Recherche&offset={offset}&{query}' - }, - 'ja': { - 'base': 'https://wiki.archlinuxjp.org', - 'search': '/index.php?title=特別:検索&offset={offset}&{query}' - }, - 'ro': { - 'base': 'http://wiki.archlinux.ro', - 'search': '/index.php?title=Special:Căutare&offset={offset}&{query}' - }, - 'tr': { - 'base': 'http://archtr.org/wiki', - 'search': '/index.php?title=Özel:Ara&offset={offset}&{query}' - } - # fmt: on -} - - -# get base & search URLs for selected language -def get_lang_urls(language): - if language in lang_urls: - return lang_urls[language] - return lang_urls['all'] - - -# Language names to build search requests for -# those languages which are hosted on the main site. -main_langs = { - 'ar': 'العربية', - 'bg': 'Български', - 'cs': 'Česky', - 'da': 'Dansk', - 'el': 'Ελληνικά', - 'es': 'Español', - 'he': 'עברית', - 'hr': 'Hrvatski', - 'hu': 'Magyar', - 'it': 'Italiano', - 'ko': '한국어', - 'lt': 'Lietuviškai', - 'nl': 'Nederlands', - 'pl': 'Polski', - 'pt': 'Português', - 'ru': 'Русский', - 'sl': 'Slovenský', - 'th': 'ไทย', - 'uk': 'Українська', - 'zh': '简体中文', -} -supported_languages = dict(lang_urls, **main_langs) - - -# do search-request def request(query, params): - # translate the locale (e.g. 'en-US') to language code ('en') - language = locale_to_lang_code(params['language']) - # if our language is hosted on the main site, we need to add its name - # to the query in order to narrow the results to that language - if language in main_langs: - query += ' (' + main_langs[language] + ')' - - # prepare the request parameters - query = urlencode({'search': query}) + sxng_lang = params['searxng_locale'].split('-')[0] + netloc = traits.custom['wiki_netloc'].get(sxng_lang, main_wiki) + title = traits.custom['title'].get(sxng_lang, 'Special:Search') + base_url = 'https://' + netloc + '/index.php?' offset = (params['pageno'] - 1) * 20 - # get request URLs for our language of choice - urls = get_lang_urls(language) - search_url = urls['base'] + urls['search'] + if netloc == main_wiki: + eng_lang: str = traits.get_language(sxng_lang, 'English') + query += ' (' + eng_lang + ')' + elif netloc == 'wiki.archlinuxcn.org': + base_url = 'https://' + netloc + '/wzh/index.php?' - params['url'] = search_url.format(query=query, offset=offset) + args = { + 'search': query, + 'title': title, + 'limit': 20, + 'offset': offset, + 'profile': 'default', + } + params['url'] = base_url + urlencode(args) return params -# get response from search-request def response(resp): - # get the base URL for the language in which request was made - language = locale_to_lang_code(resp.search_params['language']) - base_url = get_lang_urls(language)['base'] results = [] + dom = lxml.html.fromstring(resp.text) - dom = html.fromstring(resp.text) + # get the base URL for the language in which request was made + sxng_lang = resp.search_params['searxng_locale'].split('-')[0] + netloc = traits.custom['wiki_netloc'].get(sxng_lang, main_wiki) + base_url = 'https://' + netloc + '/index.php?' - # parse results - for result in eval_xpath_list(dom, xpath_results): - link = eval_xpath_getindex(result, xpath_link, 0) - href = urljoin(base_url, link.attrib.get('href')) - title = extract_text(link) - - results.append({'url': href, 'title': title}) + for result in eval_xpath_list(dom, '//ul[@class="mw-search-results"]/li'): + link = eval_xpath_getindex(result, './/div[@class="mw-search-result-heading"]/a', 0) + content = extract_text(result.xpath('.//div[@class="searchresult"]')) + results.append( + { + 'url': urljoin(base_url, link.get('href')), + 'title': extract_text(link), + 'content': content, + } + ) return results + + +def fetch_traits(engine_traits: EngineTraits): + """Fetch languages from Archlinix-Wiki. The location of the Wiki address of a + language is mapped in a :py:obj:`custom field + ` (``wiki_netloc``). Depending + on the location, the ``title`` argument in the request is translated. + + .. code:: python + + "custom": { + "wiki_netloc": { + "de": "wiki.archlinux.de", + # ... + "zh": "wiki.archlinuxcn.org" + } + "title": { + "de": "Spezial:Suche", + # ... + "zh": "Special:\u641c\u7d22" + }, + }, + + """ + + engine_traits.custom['wiki_netloc'] = {} + engine_traits.custom['title'] = {} + + title_map = { + 'de': 'Spezial:Suche', + 'fa': 'ویژه:جستجو', + 'ja': '特別:検索', + 'zh': 'Special:搜索', + } + + resp = network.get('https://wiki.archlinux.org/') + if not resp.ok: + print("ERROR: response from wiki.archlinix.org is not OK.") + + dom = lxml.html.fromstring(resp.text) + for a in eval_xpath_list(dom, "//a[@class='interlanguage-link-target']"): + + sxng_tag = language_tag(babel.Locale.parse(a.get('lang'), sep='-')) + # zh_Hans --> zh + sxng_tag = sxng_tag.split('_')[0] + + netloc = urlparse(a.get('href')).netloc + if netloc != 'wiki.archlinux.org': + title = title_map.get(sxng_tag) + if not title: + print("ERROR: title tag from %s (%s) is unknown" % (netloc, sxng_tag)) + continue + engine_traits.custom['wiki_netloc'][sxng_tag] = netloc + engine_traits.custom['title'][sxng_tag] = title + + eng_tag = extract_text(eval_xpath_list(a, ".//span")) + engine_traits.languages[sxng_tag] = eng_tag + + engine_traits.languages['en'] = 'English' diff --git a/searx/engines/bing.py b/searx/engines/bing.py index 783c0056a..0f85c7036 100644 --- a/searx/engines/bing.py +++ b/searx/engines/bing.py @@ -1,16 +1,53 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint -"""Bing (Web) +"""This is the implementation of the Bing-WEB engine. Some of this +implementations are shared by other engines: + +- :ref:`bing images engine` +- :ref:`bing news engine` +- :ref:`bing videos engine` + +On the `preference page`_ Bing offers a lot of languages an regions (see section +'Search results languages' and 'Country/region'). However, the abundant choice +does not correspond to reality, where Bing has a full-text indexer only for a +limited number of languages. By example: you can select a language like Māori +but you never get a result in this language. + +What comes a bit closer to the truth are the `search-APIs`_ but they don`t seem +to be completely correct either (if you take a closer look you will find some +inaccuracies there too): + +- :py:obj:`searx.engines.bing.bing_traits_url` +- :py:obj:`searx.engines.bing_videos.bing_traits_url` +- :py:obj:`searx.engines.bing_images.bing_traits_url` +- :py:obj:`searx.engines.bing_news.bing_traits_url` + +.. _preference page: https://www.bing.com/account/general +.. _search-APIs: https://learn.microsoft.com/en-us/bing/search-apis/ -- https://github.com/searx/searx/issues/2019#issuecomment-648227442 """ -# pylint: disable=too-many-branches +# pylint: disable=too-many-branches, invalid-name +from typing import TYPE_CHECKING +import datetime import re -from urllib.parse import urlencode, urlparse, parse_qs +import uuid +from urllib.parse import urlencode from lxml import html -from searx.utils import eval_xpath, extract_text, eval_xpath_list, match_language, eval_xpath_getindex -from searx.network import multi_requests, Request +import babel +import babel.languages + +from searx.utils import eval_xpath, extract_text, eval_xpath_list, eval_xpath_getindex +from searx import network +from searx.locales import language_tag, region_tag +from searx.enginelib.traits import EngineTraits + +if TYPE_CHECKING: + import logging + + logger: logging.Logger + +traits: EngineTraits about = { "website": 'https://www.bing.com', @@ -21,56 +58,124 @@ about = { "results": 'HTML', } +send_accept_language_header = True +"""Bing tries to guess user's language and territory from the HTTP +Accept-Language. Optional the user can select a search-language (can be +different to the UI language) and a region (market code).""" + # engine dependent config categories = ['general', 'web'] paging = True -time_range_support = False -safesearch = False -send_accept_language_header = True -supported_languages_url = 'https://www.bing.com/account/general' -language_aliases = {} +time_range_support = True +safesearch = True +safesearch_types = {2: 'STRICT', 1: 'DEMOTE', 0: 'OFF'} # cookie: ADLT=STRICT -# search-url -base_url = 'https://www.bing.com/' +base_url = 'https://www.bing.com/search' +"""Bing (Web) search URL""" -# initial query: https://www.bing.com/search?q=foo&search=&form=QBLH -inital_query = 'search?{query}&search=&form=QBLH' - -# following queries: https://www.bing.com/search?q=foo&search=&first=11&FORM=PERE -page_query = 'search?{query}&search=&first={offset}&FORM=PERE' +bing_traits_url = 'https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/reference/market-codes' +"""Bing (Web) search API description""" def _get_offset_from_pageno(pageno): return (pageno - 1) * 10 + 1 +def set_bing_cookies(params, engine_language, engine_region, SID): + + # set cookies + # ----------- + + params['cookies']['_EDGE_V'] = '1' + + # _EDGE_S: F=1&SID=3A5253BD6BCA609509B741876AF961CA&mkt=zh-tw + _EDGE_S = [ + 'F=1', + 'SID=%s' % SID, + 'mkt=%s' % engine_region.lower(), + 'ui=%s' % engine_language.lower(), + ] + params['cookies']['_EDGE_S'] = '&'.join(_EDGE_S) + logger.debug("cookie _EDGE_S=%s", params['cookies']['_EDGE_S']) + + # "_EDGE_CD": "m=zh-tw", + + _EDGE_CD = [ # pylint: disable=invalid-name + 'm=%s' % engine_region.lower(), # search region: zh-cn + 'u=%s' % engine_language.lower(), # UI: en-us + ] + + params['cookies']['_EDGE_CD'] = '&'.join(_EDGE_CD) + ';' + logger.debug("cookie _EDGE_CD=%s", params['cookies']['_EDGE_CD']) + + SRCHHPGUSR = [ # pylint: disable=invalid-name + 'SRCHLANG=%s' % engine_language, + # Trying to set ADLT cookie here seems not to have any effect, I assume + # there is some age verification by a cookie (and/or session ID) needed, + # to disable the SafeSearch. + 'ADLT=%s' % safesearch_types.get(params['safesearch'], 'DEMOTE'), + ] + params['cookies']['SRCHHPGUSR'] = '&'.join(SRCHHPGUSR) + logger.debug("cookie SRCHHPGUSR=%s", params['cookies']['SRCHHPGUSR']) + + def request(query, params): + """Assemble a Bing-Web request.""" - offset = _get_offset_from_pageno(params.get('pageno', 1)) + engine_region = traits.get_region(params['searxng_locale'], 'en-US') + engine_language = traits.get_language(params['searxng_locale'], 'en') - # logger.debug("params['pageno'] --> %s", params.get('pageno')) - # logger.debug(" offset --> %s", offset) + SID = uuid.uuid1().hex.upper() + CVID = uuid.uuid1().hex.upper() - search_string = page_query - if offset == 1: - search_string = inital_query + set_bing_cookies(params, engine_language, engine_region, SID) - if params['language'] == 'all': - lang = 'EN' - else: - lang = match_language(params['language'], supported_languages, language_aliases) + # build URL query + # --------------- - query = 'language:{} {}'.format(lang.split('-')[0].upper(), query) + # query term + page = int(params.get('pageno', 1)) + query_params = { + # fmt: off + 'q': query, + 'pq': query, + 'cvid': CVID, + 'qs': 'n', + 'sp': '-1' + # fmt: on + } - search_path = search_string.format(query=urlencode({'q': query}), offset=offset) - - if offset > 1: - referer = base_url + inital_query.format(query=urlencode({'q': query})) + # page + if page > 1: + referer = base_url + '?' + urlencode(query_params) params['headers']['Referer'] = referer logger.debug("headers.Referer --> %s", referer) - params['url'] = base_url + search_path - params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' + query_params['first'] = _get_offset_from_pageno(page) + + if page == 2: + query_params['FORM'] = 'PERE' + elif page > 2: + query_params['FORM'] = 'PERE%s' % (page - 2) + + filters = '' + if params['time_range']: + query_params['filt'] = 'custom' + + if params['time_range'] == 'day': + filters = 'ex1:"ez1"' + elif params['time_range'] == 'week': + filters = 'ex1:"ez2"' + elif params['time_range'] == 'month': + filters = 'ex1:"ez3"' + elif params['time_range'] == 'year': + epoch_1970 = datetime.date(1970, 1, 1) + today_no = (datetime.date.today() - epoch_1970).days + filters = 'ex1:"ez5_%s_%s"' % (today_no - 365, today_no) + + params['url'] = base_url + '?' + urlencode(query_params) + if filters: + params['url'] = params['url'] + '&filters=' + filters return params @@ -107,7 +212,8 @@ def response(resp): url_cite = extract_text(eval_xpath(result, './/div[@class="b_attribution"]/cite')) # Bing can shorten the URL either at the end or in the middle of the string if ( - url_cite.startswith('https://') + url_cite + and url_cite.startswith('https://') and '…' not in url_cite and '...' not in url_cite and '›' not in url_cite @@ -127,9 +233,9 @@ def response(resp): # resolve all Bing redirections in parallel request_list = [ - Request.get(u, allow_redirects=False, headers=resp.search_params['headers']) for u in url_to_resolve + network.Request.get(u, allow_redirects=False, headers=resp.search_params['headers']) for u in url_to_resolve ] - response_list = multi_requests(request_list) + response_list = network.multi_requests(request_list) for i, redirect_response in enumerate(response_list): if not isinstance(redirect_response, Exception): results[url_to_resolve_index[i]]['url'] = redirect_response.headers['location'] @@ -157,27 +263,71 @@ def response(resp): return results -# get supported languages from their site -def _fetch_supported_languages(resp): +def fetch_traits(engine_traits: EngineTraits): + """Fetch languages and regions from Bing-Web.""" - lang_tags = set() + xpath_market_codes = '//table[1]/tbody/tr/td[3]' + # xpath_country_codes = '//table[2]/tbody/tr/td[2]' + xpath_language_codes = '//table[3]/tbody/tr/td[2]' + + _fetch_traits(engine_traits, bing_traits_url, xpath_language_codes, xpath_market_codes) + + +def _fetch_traits(engine_traits: EngineTraits, url: str, xpath_language_codes: str, xpath_market_codes: str): + + # insert alias to map from a language (zh) to a language + script (zh_Hans) + engine_traits.languages['zh'] = 'zh-hans' + + resp = network.get(url) + + if not resp.ok: + print("ERROR: response from peertube is not OK.") dom = html.fromstring(resp.text) - lang_links = eval_xpath(dom, '//div[@id="language-section"]//li') - for _li in lang_links: + map_lang = {'jp': 'ja'} + for td in eval_xpath(dom, xpath_language_codes): + eng_lang = td.text - href = eval_xpath(_li, './/@href')[0] - (_scheme, _netloc, _path, _params, query, _fragment) = urlparse(href) - query = parse_qs(query, keep_blank_values=True) + if eng_lang in ('en-gb', 'pt-br'): + # language 'en' is already in the list and a language 'en-gb' can't + # be handled in SearXNG, same with pt-br which is covered by pt-pt. + continue - # fmt: off - setlang = query.get('setlang', [None, ])[0] - # example: 'mn-Cyrl-MN' --> '['mn', 'Cyrl-MN'] - lang, nation = (setlang.split('-', maxsplit=1) + [None,])[:2] # fmt: skip - # fmt: on + babel_lang = map_lang.get(eng_lang, eng_lang).replace('-', '_') + try: + sxng_tag = language_tag(babel.Locale.parse(babel_lang)) + except babel.UnknownLocaleError: + print("ERROR: language (%s) is unknown by babel" % (eng_lang)) + continue + conflict = engine_traits.languages.get(sxng_tag) + if conflict: + if conflict != eng_lang: + print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_lang)) + continue + engine_traits.languages[sxng_tag] = eng_lang - tag = lang + '-' + nation if nation else lang - lang_tags.add(tag) + map_region = { + 'en-ID': 'id_ID', + 'no-NO': 'nb_NO', + } - return list(lang_tags) + for td in eval_xpath(dom, xpath_market_codes): + eng_region = td.text + babel_region = map_region.get(eng_region, eng_region).replace('-', '_') + + if eng_region == 'en-WW': + engine_traits.all_locale = eng_region + continue + + try: + sxng_tag = region_tag(babel.Locale.parse(babel_region)) + except babel.UnknownLocaleError: + print("ERROR: region (%s) is unknown by babel" % (eng_region)) + continue + conflict = engine_traits.regions.get(sxng_tag) + if conflict: + if conflict != eng_region: + print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_region)) + continue + engine_traits.regions[sxng_tag] = eng_region diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py index 107ce3cff..bd3a34aa5 100644 --- a/searx/engines/bing_images.py +++ b/searx/engines/bing_images.py @@ -1,20 +1,30 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint -"""Bing (Images) - +"""Bing-Images: description see :py:obj:`searx.engines.bing`. """ +# pylint: disable=invalid-name -from json import loads + +from typing import TYPE_CHECKING +import uuid +import json from urllib.parse import urlencode from lxml import html -from searx.utils import match_language -from searx.engines.bing import language_aliases -from searx.engines.bing import ( # pylint: disable=unused-import - _fetch_supported_languages, - supported_languages_url, +from searx.enginelib.traits import EngineTraits +from searx.engines.bing import ( + set_bing_cookies, + _fetch_traits, ) +from searx.engines.bing import send_accept_language_header # pylint: disable=unused-import + +if TYPE_CHECKING: + import logging + + logger: logging.Logger + +traits: EngineTraits # about about = { @@ -31,77 +41,92 @@ categories = ['images', 'web'] paging = True safesearch = True time_range_support = True -send_accept_language_header = True -supported_languages_url = 'https://www.bing.com/account/general' -number_of_results = 28 -# search-url -base_url = 'https://www.bing.com/' -search_string = ( +base_url = 'https://www.bing.com/images/async' +"""Bing (Images) search URL""" + +bing_traits_url = 'https://learn.microsoft.com/en-us/bing/search-apis/bing-image-search/reference/market-codes' +"""Bing (Images) search API description""" + +time_map = { # fmt: off - 'images/search' - '?{query}' - '&count={count}' - '&first={first}' - '&tsc=ImageHoverTitle' + 'day': 60 * 24, + 'week': 60 * 24 * 7, + 'month': 60 * 24 * 31, + 'year': 60 * 24 * 365, # fmt: on -) -time_range_string = '&qft=+filterui:age-lt{interval}' -time_range_dict = {'day': '1440', 'week': '10080', 'month': '43200', 'year': '525600'} - -# safesearch definitions -safesearch_types = {2: 'STRICT', 1: 'DEMOTE', 0: 'OFF'} +} -# do search-request def request(query, params): - offset = ((params['pageno'] - 1) * number_of_results) + 1 + """Assemble a Bing-Image request.""" - search_path = search_string.format(query=urlencode({'q': query}), count=number_of_results, first=offset) + engine_region = traits.get_region(params['searxng_locale'], 'en-US') + engine_language = traits.get_language(params['searxng_locale'], 'en') - language = match_language(params['language'], supported_languages, language_aliases).lower() + SID = uuid.uuid1().hex.upper() + set_bing_cookies(params, engine_language, engine_region, SID) - params['cookies']['SRCHHPGUSR'] = 'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE') + # build URL query + # - example: https://www.bing.com/images/async?q=foo&first=155&count=35 - params['cookies']['_EDGE_S'] = 'mkt=' + language + '&ui=' + language + '&F=1' + query_params = { + # fmt: off + 'q': query, + 'async' : 'content', + # to simplify the page count lets use the default of 35 images per page + 'first' : (int(params.get('pageno', 1)) - 1) * 35 + 1, + 'count' : 35, + # fmt: on + } - params['url'] = base_url + search_path - if params['time_range'] in time_range_dict: - params['url'] += time_range_string.format(interval=time_range_dict[params['time_range']]) + # time range + # - example: one year (525600 minutes) 'qft=+filterui:age-lt525600' + + if params['time_range']: + query_params['qft'] = 'filterui:age-lt%s' % time_map[params['time_range']] + + params['url'] = base_url + '?' + urlencode(query_params) return params -# get response from search-request def response(resp): - results = [] + """Get response from Bing-Images""" + results = [] dom = html.fromstring(resp.text) - # parse results - for result in dom.xpath('//div[@class="imgpt"]'): - img_format = result.xpath('./div[contains(@class, "img_info")]/span/text()')[0] - # Microsoft seems to experiment with this code so don't make the path too specific, - # just catch the text section for the first anchor in img_info assuming this to be - # the originating site. - source = result.xpath('./div[contains(@class, "img_info")]//a/text()')[0] + for result in dom.xpath('//ul[contains(@class, "dgControl_list")]/li'): - m = loads(result.xpath('./a/@m')[0]) + metadata = result.xpath('.//a[@class="iusc"]/@m') + if not metadata: + continue - # strip 'Unicode private use area' highlighting, they render to Tux - # the Linux penguin and a standing diamond on my machine... - title = m.get('t', '').replace('\ue000', '').replace('\ue001', '') + metadata = json.loads(result.xpath('.//a[@class="iusc"]/@m')[0]) + title = ' '.join(result.xpath('.//div[@class="infnmpt"]//a/text()')).strip() + img_format = ' '.join(result.xpath('.//div[@class="imgpt"]/div/span/text()')).strip() + source = ' '.join(result.xpath('.//div[@class="imgpt"]//div[@class="lnkw"]//a/text()')).strip() results.append( { 'template': 'images.html', - 'url': m['purl'], - 'thumbnail_src': m['turl'], - 'img_src': m['murl'], - 'content': '', + 'url': metadata['purl'], + 'thumbnail_src': metadata['turl'], + 'img_src': metadata['murl'], + 'content': metadata['desc'], 'title': title, 'source': source, 'img_format': img_format, } ) - return results + + +def fetch_traits(engine_traits: EngineTraits): + """Fetch languages and regions from Bing-News.""" + + xpath_market_codes = '//table[1]/tbody/tr/td[3]' + # xpath_country_codes = '//table[2]/tbody/tr/td[2]' + xpath_language_codes = '//table[3]/tbody/tr/td[2]' + + _fetch_traits(engine_traits, bing_traits_url, xpath_language_codes, xpath_market_codes) diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py index 7eea17bb4..d8c63857a 100644 --- a/searx/engines/bing_news.py +++ b/searx/engines/bing_news.py @@ -1,24 +1,30 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint -"""Bing (News) +"""Bing-News: description see :py:obj:`searx.engines.bing`. """ -from urllib.parse import ( - urlencode, - urlparse, - parse_qsl, - quote, -) -from datetime import datetime -from dateutil import parser -from lxml import etree -from lxml.etree import XPath -from searx.utils import match_language, eval_xpath_getindex -from searx.engines.bing import ( # pylint: disable=unused-import - language_aliases, - _fetch_supported_languages, - supported_languages_url, +# pylint: disable=invalid-name + +from typing import TYPE_CHECKING +import uuid +from urllib.parse import urlencode + +from lxml import html + +from searx.enginelib.traits import EngineTraits +from searx.engines.bing import ( + set_bing_cookies, + _fetch_traits, ) +from searx.engines.bing import send_accept_language_header # pylint: disable=unused-import + +if TYPE_CHECKING: + import logging + + logger: logging.Logger + +traits: EngineTraits + # about about = { @@ -34,108 +40,111 @@ about = { categories = ['news'] paging = True time_range_support = True -send_accept_language_header = True +time_map = { + 'day': '4', + 'week': '8', + 'month': '9', +} +"""A string '4' means *last hour*. We use *last hour* for ``day`` here since the +difference of *last day* and *last week* in the result list is just marginally. +""" -# search-url -base_url = 'https://www.bing.com/' -search_string = 'news/search?{query}&first={offset}&format=RSS' -search_string_with_time = 'news/search?{query}&first={offset}&qft=interval%3d"{interval}"&format=RSS' -time_range_dict = {'day': '7', 'week': '8', 'month': '9'} +base_url = 'https://www.bing.com/news/infinitescrollajax' +"""Bing (News) search URL""" +bing_traits_url = 'https://learn.microsoft.com/en-us/bing/search-apis/bing-news-search/reference/market-codes' +"""Bing (News) search API description""" -def url_cleanup(url_string): - """remove click""" - - parsed_url = urlparse(url_string) - if parsed_url.netloc == 'www.bing.com' and parsed_url.path == '/news/apiclick.aspx': - query = dict(parse_qsl(parsed_url.query)) - url_string = query.get('url', None) - return url_string - - -def image_url_cleanup(url_string): - """replace the http://*bing.com/th?id=... by https://www.bing.com/th?id=...""" - - parsed_url = urlparse(url_string) - if parsed_url.netloc.endswith('bing.com') and parsed_url.path == '/th': - query = dict(parse_qsl(parsed_url.query)) - url_string = "https://www.bing.com/th?id=" + quote(query.get('id')) - return url_string - - -def _get_url(query, language, offset, time_range): - if time_range in time_range_dict: - search_path = search_string_with_time.format( - # fmt: off - query = urlencode({ - 'q': query, - 'setmkt': language - }), - offset = offset, - interval = time_range_dict[time_range] - # fmt: on - ) - else: - # e.g. setmkt=de-de&setlang=de - search_path = search_string.format( - # fmt: off - query = urlencode({ - 'q': query, - 'setmkt': language - }), - offset = offset - # fmt: on - ) - return base_url + search_path +mkt_alias = { + 'zh': 'en-WW', + 'zh-CN': 'en-WW', +} +"""Bing News has an official market code 'zh-CN' but we won't get a result with +this market code. For 'zh' and 'zh-CN' we better use the *Worldwide aggregate* +market code (en-WW). +""" def request(query, params): + """Assemble a Bing-News request.""" - if params['time_range'] and params['time_range'] not in time_range_dict: - return params + sxng_locale = params['searxng_locale'] + engine_region = traits.get_region(mkt_alias.get(sxng_locale, sxng_locale), traits.all_locale) + engine_language = traits.get_language(sxng_locale, 'en') - offset = (params['pageno'] - 1) * 10 + 1 - if params['language'] == 'all': - language = 'en-US' - else: - language = match_language(params['language'], supported_languages, language_aliases) - params['url'] = _get_url(query, language, offset, params['time_range']) + SID = uuid.uuid1().hex.upper() + set_bing_cookies(params, engine_language, engine_region, SID) + + # build URL query + # + # example: https://www.bing.com/news/infinitescrollajax?q=london&first=1 + + query_params = { + # fmt: off + 'q': query, + 'InfiniteScroll': 1, + # to simplify the page count lets use the default of 10 images per page + 'first' : (int(params.get('pageno', 1)) - 1) * 10 + 1, + # fmt: on + } + + if params['time_range']: + # qft=interval:"7" + query_params['qft'] = 'qft=interval="%s"' % time_map.get(params['time_range'], '9') + + params['url'] = base_url + '?' + urlencode(query_params) return params def response(resp): - + """Get response from Bing-Video""" results = [] - rss = etree.fromstring(resp.content) - namespaces = rss.nsmap - for item in rss.xpath('./channel/item'): - # url / title / content - url = url_cleanup(eval_xpath_getindex(item, './link/text()', 0, default=None)) - title = eval_xpath_getindex(item, './title/text()', 0, default=url) - content = eval_xpath_getindex(item, './description/text()', 0, default='') + if not resp.ok or not resp.text: + return results - # publishedDate - publishedDate = eval_xpath_getindex(item, './pubDate/text()', 0, default=None) - try: - publishedDate = parser.parse(publishedDate, dayfirst=False) - except TypeError: - publishedDate = datetime.now() - except ValueError: - publishedDate = datetime.now() + dom = html.fromstring(resp.text) - # thumbnail - thumbnail = eval_xpath_getindex(item, XPath('./News:Image/text()', namespaces=namespaces), 0, default=None) - if thumbnail is not None: - thumbnail = image_url_cleanup(thumbnail) + for newsitem in dom.xpath('//div[contains(@class, "newsitem")]'): - # append result - if thumbnail is not None: - results.append( - {'url': url, 'title': title, 'publishedDate': publishedDate, 'content': content, 'img_src': thumbnail} - ) - else: - results.append({'url': url, 'title': title, 'publishedDate': publishedDate, 'content': content}) + url = newsitem.xpath('./@url')[0] + title = ' '.join(newsitem.xpath('.//div[@class="caption"]//a[@class="title"]/text()')).strip() + content = ' '.join(newsitem.xpath('.//div[@class="snippet"]/text()')).strip() + thumbnail = None + author = newsitem.xpath('./@data-author')[0] + metadata = ' '.join(newsitem.xpath('.//div[@class="source"]/span/text()')).strip() + + img_src = newsitem.xpath('.//a[@class="imagelink"]//img/@src') + if img_src: + thumbnail = 'https://www.bing.com/' + img_src[0] + + results.append( + { + 'url': url, + 'title': title, + 'content': content, + 'img_src': thumbnail, + 'author': author, + 'metadata': metadata, + } + ) return results + + +def fetch_traits(engine_traits: EngineTraits): + """Fetch languages and regions from Bing-News. + + The :py:obj:`description ` of the + first table says *"query parameter when calling the Video Search API."* + .. thats why I use the 4. table "News Category API markets" for the + ``xpath_market_codes``. + + """ + + xpath_market_codes = '//table[4]/tbody/tr/td[3]' + # xpath_country_codes = '//table[2]/tbody/tr/td[2]' + xpath_language_codes = '//table[3]/tbody/tr/td[2]' + + _fetch_traits(engine_traits, bing_traits_url, xpath_language_codes, xpath_market_codes) diff --git a/searx/engines/bing_videos.py b/searx/engines/bing_videos.py index 85071de21..8ee0bb66e 100644 --- a/searx/engines/bing_videos.py +++ b/searx/engines/bing_videos.py @@ -1,21 +1,30 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint -"""Bing (Videos) - +"""Bing-Videos: description see :py:obj:`searx.engines.bing`. """ +# pylint: disable=invalid-name -from json import loads +from typing import TYPE_CHECKING +import uuid +import json from urllib.parse import urlencode from lxml import html -from searx.utils import match_language -from searx.engines.bing import language_aliases - -from searx.engines.bing import ( # pylint: disable=unused-import - _fetch_supported_languages, - supported_languages_url, +from searx.enginelib.traits import EngineTraits +from searx.engines.bing import ( + set_bing_cookies, + _fetch_traits, ) +from searx.engines.bing import send_accept_language_header # pylint: disable=unused-import + +if TYPE_CHECKING: + import logging + + logger: logging.Logger + +traits: EngineTraits + about = { "website": 'https://www.bing.com/videos', @@ -26,65 +35,76 @@ about = { "results": 'HTML', } +# engine dependent config categories = ['videos', 'web'] paging = True safesearch = True time_range_support = True -send_accept_language_header = True -number_of_results = 28 -base_url = 'https://www.bing.com/' -search_string = ( +base_url = 'https://www.bing.com/videos/asyncv2' +"""Bing (Videos) async search URL.""" + +bing_traits_url = 'https://learn.microsoft.com/en-us/bing/search-apis/bing-video-search/reference/market-codes' +"""Bing (Video) search API description""" + +time_map = { # fmt: off - 'videos/search' - '?{query}' - '&count={count}' - '&first={first}' - '&scope=video' - '&FORM=QBLH' + 'day': 60 * 24, + 'week': 60 * 24 * 7, + 'month': 60 * 24 * 31, + 'year': 60 * 24 * 365, # fmt: on -) -time_range_string = '&qft=+filterui:videoage-lt{interval}' -time_range_dict = {'day': '1440', 'week': '10080', 'month': '43200', 'year': '525600'} - -# safesearch definitions -safesearch_types = {2: 'STRICT', 1: 'DEMOTE', 0: 'OFF'} +} -# do search-request def request(query, params): - offset = ((params['pageno'] - 1) * number_of_results) + 1 + """Assemble a Bing-Video request.""" - search_path = search_string.format(query=urlencode({'q': query}), count=number_of_results, first=offset) + engine_region = traits.get_region(params['searxng_locale'], 'en-US') + engine_language = traits.get_language(params['searxng_locale'], 'en') - # safesearch cookie - params['cookies']['SRCHHPGUSR'] = 'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE') + SID = uuid.uuid1().hex.upper() + set_bing_cookies(params, engine_language, engine_region, SID) - # language cookie - language = match_language(params['language'], supported_languages, language_aliases).lower() - params['cookies']['_EDGE_S'] = 'mkt=' + language + '&F=1' + # build URL query + # + # example: https://www.bing.com/videos/asyncv2?q=foo&async=content&first=1&count=35 - # query and paging - params['url'] = base_url + search_path + query_params = { + # fmt: off + 'q': query, + 'async' : 'content', + # to simplify the page count lets use the default of 35 images per page + 'first' : (int(params.get('pageno', 1)) - 1) * 35 + 1, + 'count' : 35, + # fmt: on + } # time range - if params['time_range'] in time_range_dict: - params['url'] += time_range_string.format(interval=time_range_dict[params['time_range']]) + # + # example: one week (10080 minutes) '&qft= filterui:videoage-lt10080' '&form=VRFLTR' + + if params['time_range']: + query_params['form'] = 'VRFLTR' + query_params['qft'] = ' filterui:videoage-lt%s' % time_map[params['time_range']] + + params['url'] = base_url + '?' + urlencode(query_params) return params -# get response from search-request def response(resp): + """Get response from Bing-Video""" results = [] dom = html.fromstring(resp.text) - for result in dom.xpath('//div[@class="dg_u"]/div[contains(@class, "mc_vtvc")]'): - metadata = loads(result.xpath('.//div[@class="vrhdata"]/@vrhm')[0]) + for result in dom.xpath('//div[@class="dg_u"]//div[contains(@id, "mc_vtvc_video")]'): + metadata = json.loads(result.xpath('.//div[@class="vrhdata"]/@vrhm')[0]) info = ' - '.join(result.xpath('.//div[@class="mc_vtvc_meta_block"]//span/text()')).strip() content = '{0} - {1}'.format(metadata['du'], info) - thumbnail = '{0}th?id={1}'.format(base_url, metadata['thid']) + thumbnail = result.xpath('.//div[contains(@class, "mc_vtvc_th")]//img/@src')[0] + results.append( { 'url': metadata['murl'], @@ -96,3 +116,13 @@ def response(resp): ) return results + + +def fetch_traits(engine_traits: EngineTraits): + """Fetch languages and regions from Bing-Videos.""" + + xpath_market_codes = '//table[1]/tbody/tr/td[3]' + # xpath_country_codes = '//table[2]/tbody/tr/td[2]' + xpath_language_codes = '//table[3]/tbody/tr/td[2]' + + _fetch_traits(engine_traits, bing_traits_url, xpath_language_codes, xpath_market_codes) diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py index 7dd84dd27..d734ec3c8 100644 --- a/searx/engines/dailymotion.py +++ b/searx/engines/dailymotion.py @@ -1,17 +1,35 @@ # SPDX-License-Identifier: AGPL-3.0-or-later -"""Dailymotion (Videos) +# lint: pylint +""" +Dailymotion (Videos) +~~~~~~~~~~~~~~~~~~~~ + +.. _REST GET: https://developers.dailymotion.com/tools/ +.. _Global API Parameters: https://developers.dailymotion.com/api/#global-parameters +.. _Video filters API: https://developers.dailymotion.com/api/#video-filters +.. _Fields selection: https://developers.dailymotion.com/api/#fields-selection """ -from typing import Set +from typing import TYPE_CHECKING + from datetime import datetime, timedelta from urllib.parse import urlencode import time import babel from searx.exceptions import SearxEngineAPIException -from searx.network import raise_for_httperror +from searx import network from searx.utils import html_to_text +from searx.locales import region_tag, language_tag +from searx.enginelib.traits import EngineTraits + +if TYPE_CHECKING: + import logging + + logger: logging.Logger + +traits: EngineTraits # about about = { @@ -37,11 +55,24 @@ time_delta_dict = { } safesearch = True -safesearch_params = {2: '&is_created_for_kids=true', 1: '&is_created_for_kids=true', 0: ''} +safesearch_params = { + 2: {'is_created_for_kids': 'true'}, + 1: {'is_created_for_kids': 'true'}, + 0: {}, +} +"""True if this video is "Created for Kids" / intends to target an audience +under the age of 16 (``is_created_for_kids`` in `Video filters API`_ ) +""" -# search-url -# - https://developers.dailymotion.com/tools/ -# - https://www.dailymotion.com/doc/api/obj-video.html +family_filter_map = { + 2: 'true', + 1: 'true', + 0: 'false', +} +"""By default, the family filter is turned on. Setting this parameter to +``false`` will stop filtering-out explicit content from searches and global +contexts (``family_filter`` in `Global API Parameters`_ ). +""" result_fields = [ 'allow_embed', @@ -53,27 +84,21 @@ result_fields = [ 'thumbnail_360_url', 'id', ] -search_url = ( - 'https://api.dailymotion.com/videos?' - 'fields={fields}&password_protected={password_protected}&private={private}&sort={sort}&limit={limit}' -).format( - fields=','.join(result_fields), - password_protected='false', - private='false', - sort='relevance', - limit=number_of_results, -) +"""`Fields selection`_, by default, a few fields are returned. To request more +specific fields, the ``fields`` parameter is used with the list of fields +SearXNG needs in the response to build a video result list. +""" + +search_url = 'https://api.dailymotion.com/videos?' +"""URL to retrieve a list of videos. + +- `REST GET`_ +- `Global API Parameters`_ +- `Video filters API`_ +""" + iframe_src = "https://www.dailymotion.com/embed/video/{video_id}" - -# The request query filters by 'languages' & 'country', therefore instead of -# fetching only languages we need to fetch locales. -supported_languages_url = 'https://api.dailymotion.com/locales' -supported_languages_iso639: Set[str] = set() - - -def init(_engine_settings): - global supported_languages_iso639 - supported_languages_iso639 = set([language.split('_')[0] for language in supported_languages]) +"""URL template to embed video in SearXNG's result list.""" def request(query, params): @@ -81,34 +106,42 @@ def request(query, params): if not query: return False - language = params['language'] - if language == 'all': - language = 'en-US' - locale = babel.Locale.parse(language, sep='-') + eng_region = traits.get_region(params['searxng_locale'], 'en_US') + eng_lang = traits.get_language(params['searxng_locale'], 'en') - language_iso639 = locale.language - if locale.language not in supported_languages_iso639: - language_iso639 = 'en' - - query_args = { + args = { 'search': query, - 'languages': language_iso639, + 'family_filter': family_filter_map.get(params['safesearch'], 'false'), + 'thumbnail_ratio': 'original', # original|widescreen|square + # https://developers.dailymotion.com/api/#video-filters + 'languages': eng_lang, 'page': params['pageno'], + 'password_protected': 'false', + 'private': 'false', + 'sort': 'relevance', + 'limit': number_of_results, + 'fields': ','.join(result_fields), } - if locale.territory: - localization = locale.language + '_' + locale.territory - if localization in supported_languages: - query_args['country'] = locale.territory + args.update(safesearch_params.get(params['safesearch'], {})) + + # Don't add localization and country arguments if the user does select a + # language (:de, :en, ..) + + if len(params['searxng_locale'].split('-')) > 1: + # https://developers.dailymotion.com/api/#global-parameters + args['localization'] = eng_region + args['country'] = eng_region.split('_')[1] + # Insufficient rights for the `ams_country' parameter of route `GET /videos' + # 'ams_country': eng_region.split('_')[1], time_delta = time_delta_dict.get(params["time_range"]) if time_delta: created_after = datetime.now() - time_delta - query_args['created_after'] = datetime.timestamp(created_after) + args['created_after'] = datetime.timestamp(created_after) - query_str = urlencode(query_args) - params['url'] = search_url + '&' + query_str + safesearch_params.get(params['safesearch'], '') - params['raise_for_httperror'] = False + query_str = urlencode(args) + params['url'] = search_url + query_str return params @@ -123,7 +156,7 @@ def response(resp): if 'error' in search_res: raise SearxEngineAPIException(search_res['error'].get('message')) - raise_for_httperror(resp) + network.raise_for_httperror(resp) # parse results for res in search_res.get('list', []): @@ -167,7 +200,53 @@ def response(resp): return results -# get supported languages from their site -def _fetch_supported_languages(resp): - response_json = resp.json() - return [item['locale'] for item in response_json['list']] +def fetch_traits(engine_traits: EngineTraits): + """Fetch locales & languages from dailymotion. + + Locales fetched from `api/locales `_. + There are duplications in the locale codes returned from Dailymotion which + can be ignored:: + + en_EN --> en_GB, en_US + ar_AA --> ar_EG, ar_AE, ar_SA + + The language list `api/languages `_ + contains over 7000 *languages* codes (see PR1071_). We use only those + language codes that are used in the locales. + + .. _PR1071: https://github.com/searxng/searxng/pull/1071 + + """ + + resp = network.get('https://api.dailymotion.com/locales') + if not resp.ok: + print("ERROR: response from dailymotion/locales is not OK.") + + for item in resp.json()['list']: + eng_tag = item['locale'] + if eng_tag in ('en_EN', 'ar_AA'): + continue + try: + sxng_tag = region_tag(babel.Locale.parse(eng_tag)) + except babel.UnknownLocaleError: + print("ERROR: item unknown --> %s" % item) + continue + + conflict = engine_traits.regions.get(sxng_tag) + if conflict: + if conflict != eng_tag: + print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag)) + continue + engine_traits.regions[sxng_tag] = eng_tag + + locale_lang_list = [x.split('_')[0] for x in engine_traits.regions.values()] + + resp = network.get('https://api.dailymotion.com/languages') + if not resp.ok: + print("ERROR: response from dailymotion/languages is not OK.") + + for item in resp.json()['list']: + eng_tag = item['code'] + if eng_tag in locale_lang_list: + sxng_tag = language_tag(babel.Locale.parse(eng_tag)) + engine_traits.languages[sxng_tag] = eng_tag diff --git a/searx/engines/demo_offline.py b/searx/engines/demo_offline.py index aeb74f443..9d6e3b52d 100644 --- a/searx/engines/demo_offline.py +++ b/searx/engines/demo_offline.py @@ -63,7 +63,7 @@ def search(query, request_params): for row in result_list: entry = { 'query': query, - 'language': request_params['language'], + 'language': request_params['searxng_locale'], 'value': row.get("value"), # choose a result template or comment out to use the *default* 'template': 'key-value.html', diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py index 2a7956ca8..85e977bdb 100644 --- a/searx/engines/duckduckgo.py +++ b/searx/engines/duckduckgo.py @@ -1,71 +1,207 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint -"""DuckDuckGo Lite +""" +DuckDuckGo Lite +~~~~~~~~~~~~~~~ """ -from json import loads - -from lxml.html import fromstring +from typing import TYPE_CHECKING +from urllib.parse import urlencode +import json +import babel +import lxml.html +from searx import ( + network, + locales, + redislib, +) +from searx import redisdb from searx.utils import ( - dict_subset, eval_xpath, eval_xpath_getindex, extract_text, - match_language, ) -from searx.network import get +from searx.enginelib.traits import EngineTraits +from searx.exceptions import SearxEngineAPIException + +if TYPE_CHECKING: + import logging + + logger: logging.Logger + +traits: EngineTraits -# about about = { "website": 'https://lite.duckduckgo.com/lite/', "wikidata_id": 'Q12805', - "official_api_documentation": 'https://duckduckgo.com/api', "use_official_api": False, "require_api_key": False, "results": 'HTML', } +send_accept_language_header = True +"""DuckDuckGo-Lite tries to guess user's prefered language from the HTTP +``Accept-Language``. Optional the user can select a region filter (but not a +language). +""" + # engine dependent config categories = ['general', 'web'] paging = True -supported_languages_url = 'https://duckduckgo.com/util/u588.js' time_range_support = True -send_accept_language_header = True +safesearch = True # user can't select but the results are filtered -language_aliases = { - 'ar-SA': 'ar-XA', - 'es-419': 'es-XL', - 'ja': 'jp-JP', - 'ko': 'kr-KR', - 'sl-SI': 'sl-SL', - 'zh-TW': 'tzh-TW', - 'zh-HK': 'tzh-HK', -} +url = 'https://lite.duckduckgo.com/lite/' +# url_ping = 'https://duckduckgo.com/t/sl_l' time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'} +form_data = {'v': 'l', 'api': 'd.js', 'o': 'json'} -# search-url -url = 'https://lite.duckduckgo.com/lite/' -url_ping = 'https://duckduckgo.com/t/sl_l' -# match query's language to a region code that duckduckgo will accept -def get_region_code(lang, lang_list=None): - if lang == 'all': - return None +def cache_vqd(query, value): + """Caches a ``vqd`` value from a query. - lang_code = match_language(lang, lang_list or [], language_aliases, 'wt-WT') - lang_parts = lang_code.split('-') + The vqd value depends on the query string and is needed for the follow up + pages or the images loaded by a XMLHttpRequest: - # country code goes first - return lang_parts[1].lower() + '-' + lang_parts[0].lower() + - DuckDuckGo Web: `https://links.duckduckgo.com/d.js?q=...&vqd=...` + - DuckDuckGo Images: `https://duckduckgo.com/i.js??q=...&vqd=...` + + """ + c = redisdb.client() + if c: + logger.debug("cache vqd value: %s", value) + key = 'SearXNG_ddg_vqd' + redislib.secret_hash(query) + c.set(key, value, ex=600) + + +def get_vqd(query, headers): + """Returns the ``vqd`` that fits to the *query*. If there is no ``vqd`` cached + (:py:obj:`cache_vqd`) the query is sent to DDG to get a vqd value from the + response. + + """ + value = None + c = redisdb.client() + if c: + key = 'SearXNG_ddg_vqd' + redislib.secret_hash(query) + value = c.get(key) + if value: + value = value.decode('utf-8') + logger.debug("re-use cached vqd value: %s", value) + return value + + query_url = 'https://duckduckgo.com/?{query}&iar=images'.format(query=urlencode({'q': query})) + res = network.get(query_url, headers=headers) + content = res.text + if content.find('vqd=\'') == -1: + raise SearxEngineAPIException('Request failed') + value = content[content.find('vqd=\'') + 5 :] + value = value[: value.find('\'')] + logger.debug("new vqd value: %s", value) + cache_vqd(query, value) + return value + + +def get_ddg_lang(eng_traits: EngineTraits, sxng_locale, default='en_US'): + """Get DuckDuckGo's language identifier from SearXNG's locale. + + DuckDuckGo defines its lanaguages by region codes (see + :py:obj:`fetch_traits`). + + To get region and language of a DDG service use: + + .. code: python + + eng_region = traits.get_region(params['searxng_locale'], traits.all_locale) + eng_lang = get_ddg_lang(traits, params['searxng_locale']) + + It might confuse, but the ``l`` value of the cookie is what SearXNG calls + the *region*: + + .. code:: python + + # !ddi paris :es-AR --> {'ad': 'es_AR', 'ah': 'ar-es', 'l': 'ar-es'} + params['cookies']['ad'] = eng_lang + params['cookies']['ah'] = eng_region + params['cookies']['l'] = eng_region + + .. hint:: + + `DDG-lite `__ does not offer a language + selection to the user, only a region can be selected by the user + (``eng_region`` from the example above). DDG-lite stores the selected + region in a cookie:: + + params['cookies']['kl'] = eng_region # 'ar-es' + + """ + return eng_traits.custom['lang_region'].get(sxng_locale, eng_traits.get_language(sxng_locale, default)) + + +ddg_reg_map = { + 'tw-tzh': 'zh_TW', + 'hk-tzh': 'zh_HK', + 'ct-ca': 'skip', # ct-ca and es-ca both map to ca_ES + 'es-ca': 'ca_ES', + 'id-en': 'id_ID', + 'no-no': 'nb_NO', + 'jp-jp': 'ja_JP', + 'kr-kr': 'ko_KR', + 'xa-ar': 'ar_SA', + 'sl-sl': 'sl_SI', + 'th-en': 'th_TH', + 'vn-en': 'vi_VN', +} + +ddg_lang_map = { + # use ar --> ar_EG (Egypt's arabic) + "ar_DZ": 'lang_region', + "ar_JO": 'lang_region', + "ar_SA": 'lang_region', + # use bn --> bn_BD + 'bn_IN': 'lang_region', + # use de --> de_DE + 'de_CH': 'lang_region', + # use en --> en_US, + 'en_AU': 'lang_region', + 'en_CA': 'lang_region', + 'en_GB': 'lang_region', + # Esperanto + 'eo_XX': 'eo', + # use es --> es_ES, + 'es_AR': 'lang_region', + 'es_CL': 'lang_region', + 'es_CO': 'lang_region', + 'es_CR': 'lang_region', + 'es_EC': 'lang_region', + 'es_MX': 'lang_region', + 'es_PE': 'lang_region', + 'es_UY': 'lang_region', + 'es_VE': 'lang_region', + # use fr --> rf_FR + 'fr_CA': 'lang_region', + 'fr_CH': 'lang_region', + 'fr_BE': 'lang_region', + # use nl --> nl_NL + 'nl_BE': 'lang_region', + # use pt --> pt_PT + 'pt_BR': 'lang_region', + # skip these languages + 'od_IN': 'skip', + 'io_XX': 'skip', + 'tokipona_XX': 'skip', +} def request(query, params): + eng_region = traits.get_region(params['searxng_locale'], traits.all_locale) + # eng_lang = get_ddg_lang(traits, params['searxng_locale']) + params['url'] = url params['method'] = 'POST' - params['data']['q'] = query # The API is not documented, so we do some reverse engineering and emulate @@ -88,23 +224,19 @@ def request(query, params): params['data']['s'] = offset params['data']['dc'] = offset + 1 + # request needs a vqd argument + params['data']['vqd'] = get_vqd(query, params["headers"]) + # initial page does not have additional data in the input form if params['pageno'] > 1: - # request the second page (and more pages) needs 'o' and 'api' arguments - params['data']['o'] = 'json' - params['data']['api'] = 'd.js' - # initial page does not have additional data in the input form - if params['pageno'] > 2: - # request the third page (and more pages) some more arguments - params['data']['nextParams'] = '' - params['data']['v'] = '' - params['data']['vqd'] = '' + params['data']['o'] = form_data.get('o', 'json') + params['data']['api'] = form_data.get('api', 'd.js') + params['data']['nextParams'] = form_data.get('nextParams', '') + params['data']['v'] = form_data.get('v', 'l') - region_code = get_region_code(params['language'], supported_languages) - if region_code: - params['data']['kl'] = region_code - params['cookies']['kl'] = region_code + params['data']['kl'] = eng_region + params['cookies']['kl'] = eng_region params['data']['df'] = '' if params['time_range'] in time_range_dict: @@ -116,26 +248,40 @@ def request(query, params): return params -# get response from search-request def response(resp): - headers_ping = dict_subset(resp.request.headers, ['User-Agent', 'Accept-Encoding', 'Accept', 'Cookie']) - get(url_ping, headers=headers_ping) - if resp.status_code == 303: return [] results = [] - doc = fromstring(resp.text) + doc = lxml.html.fromstring(resp.text) result_table = eval_xpath(doc, '//html/body/form/div[@class="filters"]/table') - if not len(result_table) >= 3: + + if len(result_table) == 2: + # some locales (at least China) does not have a "next page" button and + # the layout of the HTML tables is different. + result_table = result_table[1] + elif not len(result_table) >= 3: # no more results return [] - result_table = result_table[2] + else: + result_table = result_table[2] + # update form data from response + form = eval_xpath(doc, '//html/body/form/div[@class="filters"]/table//input/..') + if len(form): + + form = form[0] + form_data['v'] = eval_xpath(form, '//input[@name="v"]/@value')[0] + form_data['api'] = eval_xpath(form, '//input[@name="api"]/@value')[0] + form_data['o'] = eval_xpath(form, '//input[@name="o"]/@value')[0] + logger.debug('form_data: %s', form_data) + + value = eval_xpath(form, '//input[@name="vqd"]/@value')[0] + query = resp.search_params['data']['q'] + cache_vqd(query, value) tr_rows = eval_xpath(result_table, './/tr') - # In the last is the form of the 'previous/next page' links tr_rows = tr_rows[:-1] @@ -172,15 +318,105 @@ def response(resp): return results -# get supported languages from their site -def _fetch_supported_languages(resp): +def fetch_traits(engine_traits: EngineTraits): + """Fetch languages & regions from DuckDuckGo. - # response is a js file with regions as an embedded object - response_page = resp.text - response_page = response_page[response_page.find('regions:{') + 8 :] - response_page = response_page[: response_page.find('}') + 1] + SearXNG's ``all`` locale maps DuckDuckGo's "Alle regions" (``wt-wt``). + DuckDuckGo's language "Browsers prefered language" (``wt_WT``) makes no + sense in a SearXNG request since SearXNG's ``all`` will not add a + ``Accept-Language`` HTTP header. The value in ``engine_traits.all_locale`` + is ``wt-wt`` (the region). - regions_json = loads(response_page) - supported_languages = map((lambda x: x[3:] + '-' + x[:2].upper()), regions_json.keys()) + Beside regions DuckDuckGo also defines its lanaguages by region codes. By + example these are the english languages in DuckDuckGo: - return list(supported_languages) + - en_US + - en_AU + - en_CA + - en_GB + + The function :py:obj:`get_ddg_lang` evaluates DuckDuckGo's language from + SearXNG's locale. + + """ + # pylint: disable=too-many-branches, too-many-statements + # fetch regions + + engine_traits.all_locale = 'wt-wt' + + # updated from u588 to u661 / should be updated automatically? + resp = network.get('https://duckduckgo.com/util/u661.js') + + if not resp.ok: + print("ERROR: response from DuckDuckGo is not OK.") + + pos = resp.text.find('regions:{') + 8 + js_code = resp.text[pos:] + pos = js_code.find('}') + 1 + regions = json.loads(js_code[:pos]) + + for eng_tag, name in regions.items(): + + if eng_tag == 'wt-wt': + engine_traits.all_locale = 'wt-wt' + continue + + region = ddg_reg_map.get(eng_tag) + if region == 'skip': + continue + + if not region: + eng_territory, eng_lang = eng_tag.split('-') + region = eng_lang + '_' + eng_territory.upper() + + try: + sxng_tag = locales.region_tag(babel.Locale.parse(region)) + except babel.UnknownLocaleError: + print("ERROR: %s (%s) -> %s is unknown by babel" % (name, eng_tag, region)) + continue + + conflict = engine_traits.regions.get(sxng_tag) + if conflict: + if conflict != eng_tag: + print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag)) + continue + engine_traits.regions[sxng_tag] = eng_tag + + # fetch languages + + engine_traits.custom['lang_region'] = {} + + pos = resp.text.find('languages:{') + 10 + js_code = resp.text[pos:] + pos = js_code.find('}') + 1 + js_code = '{"' + js_code[1:pos].replace(':', '":').replace(',', ',"') + languages = json.loads(js_code) + + for eng_lang, name in languages.items(): + + if eng_lang == 'wt_WT': + continue + + babel_tag = ddg_lang_map.get(eng_lang, eng_lang) + if babel_tag == 'skip': + continue + + try: + + if babel_tag == 'lang_region': + sxng_tag = locales.region_tag(babel.Locale.parse(eng_lang)) + engine_traits.custom['lang_region'][sxng_tag] = eng_lang + continue + + sxng_tag = locales.language_tag(babel.Locale.parse(babel_tag)) + + except babel.UnknownLocaleError: + print("ERROR: language %s (%s) is unknown by babel" % (name, eng_lang)) + continue + + conflict = engine_traits.languages.get(sxng_tag) + if conflict: + if conflict != eng_lang: + print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_lang)) + continue + engine_traits.languages[sxng_tag] = eng_lang diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py index 7ed0de35c..39fed87e7 100644 --- a/searx/engines/duckduckgo_definitions.py +++ b/searx/engines/duckduckgo_definitions.py @@ -1,22 +1,33 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint -"""DuckDuckGo (Instant Answer API) +""" +DuckDuckGo Instant Answer API +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The `DDG-API `__ is no longer documented but from +reverse engineering we can see that some services (e.g. instant answers) still +in use from the DDG search engine. + +As far we can say the *instant answers* API does not support languages, or at +least we could not find out how language support should work. It seems that +most of the features are based on English terms. """ -import json +from typing import TYPE_CHECKING + from urllib.parse import urlencode, urlparse, urljoin from lxml import html from searx.data import WIKIDATA_UNITS -from searx.engines.duckduckgo import language_aliases -from searx.engines.duckduckgo import ( # pylint: disable=unused-import - _fetch_supported_languages, - supported_languages_url, -) -from searx.utils import extract_text, html_to_text, match_language, get_string_replaces_function +from searx.utils import extract_text, html_to_text, get_string_replaces_function from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom +if TYPE_CHECKING: + import logging + + logger: logging.Logger + # about about = { "website": 'https://duckduckgo.com/', @@ -37,7 +48,7 @@ replace_http_by_https = get_string_replaces_function({'http:': 'https:'}) def is_broken_text(text): - """duckduckgo may return something like "http://somewhere Related website" + """duckduckgo may return something like ``http://somewhere Related website`` The href URL is broken, the "Related website" may contains some HTML. @@ -62,8 +73,6 @@ def result_to_text(text, htmlResult): def request(query, params): params['url'] = URL.format(query=urlencode({'q': query})) - language = match_language(params['language'], supported_languages, language_aliases) - language = language.split('-')[0] return params @@ -71,7 +80,7 @@ def response(resp): # pylint: disable=too-many-locals, too-many-branches, too-many-statements results = [] - search_res = json.loads(resp.text) + search_res = resp.json() # search_res.get('Entity') possible values (not exhaustive) : # * continent / country / department / location / waterfall @@ -235,7 +244,7 @@ def unit_to_str(unit): def area_to_str(area): - """parse {'unit': 'http://www.wikidata.org/entity/Q712226', 'amount': '+20.99'}""" + """parse ``{'unit': 'https://www.wikidata.org/entity/Q712226', 'amount': '+20.99'}``""" unit = unit_to_str(area.get('unit')) if unit is not None: try: diff --git a/searx/engines/duckduckgo_images.py b/searx/engines/duckduckgo_images.py index 19f649ef4..d8a6f1340 100644 --- a/searx/engines/duckduckgo_images.py +++ b/searx/engines/duckduckgo_images.py @@ -1,26 +1,30 @@ # SPDX-License-Identifier: AGPL-3.0-or-later """ - DuckDuckGo (Images) +DuckDuckGo Images +~~~~~~~~~~~~~~~~~ """ -from json import loads +from typing import TYPE_CHECKING from urllib.parse import urlencode -from searx.exceptions import SearxEngineAPIException -from searx.engines.duckduckgo import get_region_code -from searx.engines.duckduckgo import ( # pylint: disable=unused-import - _fetch_supported_languages, - supported_languages_url, + +from searx.engines.duckduckgo import fetch_traits # pylint: disable=unused-import +from searx.engines.duckduckgo import ( + get_ddg_lang, + get_vqd, ) -from searx.network import get +from searx.enginelib.traits import EngineTraits + +if TYPE_CHECKING: + import logging + + logger: logging.Logger + +traits: EngineTraits # about about = { "website": 'https://duckduckgo.com/', "wikidata_id": 'Q12805', - "official_api_documentation": { - 'url': 'https://duckduckgo.com/api', - 'comment': 'but images are not supported', - }, "use_official_api": False, "require_api_key": False, "results": 'JSON (site requires js to get images)', @@ -32,70 +36,64 @@ paging = True safesearch = True send_accept_language_header = True -# search-url -images_url = 'https://duckduckgo.com/i.js?{query}&s={offset}&p={safesearch}&o=json&vqd={vqd}' -site_url = 'https://duckduckgo.com/?{query}&iar=images&iax=1&ia=images' +safesearch_cookies = {0: '-2', 1: None, 2: '1'} +safesearch_args = {0: '1', 1: None, 2: '1'} -# run query in site to get vqd number needed for requesting images -# TODO: find a way to get this number without an extra request (is it a hash of the query?) -def get_vqd(query, headers): - query_url = site_url.format(query=urlencode({'q': query})) - res = get(query_url, headers=headers) - content = res.text - if content.find('vqd=\'') == -1: - raise SearxEngineAPIException('Request failed') - vqd = content[content.find('vqd=\'') + 5 :] - vqd = vqd[: vqd.find('\'')] - return vqd - - -# do search-request def request(query, params): - # to avoid running actual external requests when testing - if 'is_test' not in params: - vqd = get_vqd(query, params['headers']) - else: - vqd = '12345' - offset = (params['pageno'] - 1) * 50 + eng_region = traits.get_region(params['searxng_locale'], traits.all_locale) + eng_lang = get_ddg_lang(traits, params['searxng_locale']) - safesearch = params['safesearch'] - 1 + args = { + 'q': query, + 'o': 'json', + # 'u': 'bing', + 'l': eng_region, + 'vqd': get_vqd(query, params["headers"]), + } - region_code = get_region_code(params['language'], lang_list=supported_languages) - if region_code: - params['url'] = images_url.format( - query=urlencode({'q': query, 'l': region_code}), offset=offset, safesearch=safesearch, vqd=vqd - ) - else: - params['url'] = images_url.format(query=urlencode({'q': query}), offset=offset, safesearch=safesearch, vqd=vqd) + if params['pageno'] > 1: + args['s'] = (params['pageno'] - 1) * 100 + + params['cookies']['ad'] = eng_lang # zh_CN + params['cookies']['ah'] = eng_region # "us-en,de-de" + params['cookies']['l'] = eng_region # "hk-tzh" + logger.debug("cookies: %s", params['cookies']) + + safe_search = safesearch_cookies.get(params['safesearch']) + if safe_search is not None: + params['cookies']['p'] = safe_search # "-2", "1" + safe_search = safesearch_args.get(params['safesearch']) + if safe_search is not None: + args['p'] = safe_search # "-1", "1" + + args = urlencode(args) + params['url'] = 'https://duckduckgo.com/i.js?{args}&f={f}'.format(args=args, f=',,,,,') + + params['headers']['Accept'] = 'application/json, text/javascript, */*; q=0.01' + params['headers']['Referer'] = 'https://duckduckgo.com/' + params['headers']['X-Requested-With'] = 'XMLHttpRequest' + logger.debug("headers: %s", params['headers']) return params -# get response from search-request def response(resp): results = [] + res_json = resp.json() - content = resp.text - res_json = loads(content) - - # parse results for result in res_json['results']: - title = result['title'] - url = result['url'] - thumbnail = result['thumbnail'] - image = result['image'] - - # append result results.append( { 'template': 'images.html', - 'title': title, + 'title': result['title'], 'content': '', - 'thumbnail_src': thumbnail, - 'img_src': image, - 'url': url, + 'thumbnail_src': result['thumbnail'], + 'img_src': result['image'], + 'url': result['url'], + 'img_format': '%s x %s' % (result['width'], result['height']), + 'source': result['source'], } ) diff --git a/searx/engines/duckduckgo_weather.py b/searx/engines/duckduckgo_weather.py index 0540cbcb5..4f0ce1b49 100644 --- a/searx/engines/duckduckgo_weather.py +++ b/searx/engines/duckduckgo_weather.py @@ -1,13 +1,29 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint -"""DuckDuckGo Weather""" +""" +DuckDuckGo Weather +~~~~~~~~~~~~~~~~~~ +""" +from typing import TYPE_CHECKING from json import loads from urllib.parse import quote from datetime import datetime from flask_babel import gettext +from searx.engines.duckduckgo import fetch_traits # pylint: disable=unused-import +from searx.engines.duckduckgo import get_ddg_lang +from searx.enginelib.traits import EngineTraits + +if TYPE_CHECKING: + import logging + + logger: logging.Logger + +traits: EngineTraits + + about = { "website": 'https://duckduckgo.com/', "wikidata_id": 'Q12805', @@ -17,9 +33,11 @@ about = { "results": "JSON", } -categories = ["others"] +send_accept_language_header = True -url = "https://duckduckgo.com/js/spice/forecast/{query}/{lang}" +# engine dependent config +categories = ["others"] +URL = "https://duckduckgo.com/js/spice/forecast/{query}/{lang}" def generate_condition_table(condition): @@ -72,8 +90,17 @@ def generate_day_table(day): def request(query, params): - params["url"] = url.format(query=quote(query), lang=params['language'].split('-')[0]) + eng_region = traits.get_region(params['searxng_locale'], traits.all_locale) + eng_lang = get_ddg_lang(traits, params['searxng_locale']) + + # !ddw paris :es-AR --> {'ad': 'es_AR', 'ah': 'ar-es', 'l': 'ar-es'} + params['cookies']['ad'] = eng_lang + params['cookies']['ah'] = eng_region + params['cookies']['l'] = eng_region + logger.debug("cookies: %s", params['cookies']) + + params["url"] = URL.format(query=quote(query), lang=eng_lang.split('_')[0]) return params diff --git a/searx/engines/gentoo.py b/searx/engines/gentoo.py index 856c93710..f0cb6a794 100644 --- a/searx/engines/gentoo.py +++ b/searx/engines/gentoo.py @@ -25,6 +25,7 @@ base_url = 'https://wiki.gentoo.org' # xpath queries xpath_results = '//ul[@class="mw-search-results"]/li' xpath_link = './/div[@class="mw-search-result-heading"]/a' +xpath_content = './/div[@class="searchresult"]' # cut 'en' from 'en-US', 'de' from 'de-CH', and so on @@ -77,8 +78,6 @@ main_langs = { 'uk': 'Українська', 'zh': '简体中文', } -supported_languages = dict(lang_urls, **main_langs) - # do search-request def request(query, params): @@ -118,7 +117,8 @@ def response(resp): link = result.xpath(xpath_link)[0] href = urljoin(base_url, link.attrib.get('href')) title = extract_text(link) + content = extract_text(result.xpath(xpath_content)) - results.append({'url': href, 'title': title}) + results.append({'url': href, 'title': title, 'content': content}) return results diff --git a/searx/engines/google.py b/searx/engines/google.py index bdb351432..708068f3a 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -1,34 +1,39 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint -"""This is the implementation of the google WEB engine. Some of this -implementations are shared by other engines: +"""This is the implementation of the Google WEB engine. Some of this +implementations (manly the :py:obj:`get_google_info`) are shared by other +engines: - :ref:`google images engine` - :ref:`google news engine` - :ref:`google videos engine` - -The google WEB engine itself has a special setup option: - -.. code:: yaml - - - name: google - ... - use_mobile_ui: false - -``use_mobile_ui``: (default: ``false``) - Enables to use *mobile endpoint* to bypass the google blocking (see - :issue:`159`). On the mobile UI of Google Search, the button :guilabel:`More - results` is not affected by Google rate limiting and we can still do requests - while actively blocked by the original Google search. By activate - ``use_mobile_ui`` this behavior is simulated by adding the parameter - ``async=use_ac:true,_fmt:pc`` to the :py:func:`request`. +- :ref:`google scholar engine` +- :ref:`google autocomplete` """ +from typing import TYPE_CHECKING + +import re from urllib.parse import urlencode from lxml import html -from searx.utils import match_language, extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex +import babel +import babel.core +import babel.languages + +from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex +from searx.locales import language_tag, region_tag, get_offical_locales +from searx import network from searx.exceptions import SearxEngineCaptchaException +from searx.enginelib.traits import EngineTraits + +if TYPE_CHECKING: + import logging + + logger: logging.Logger + +traits: EngineTraits + # about about = { @@ -45,64 +50,6 @@ categories = ['general', 'web'] paging = True time_range_support = True safesearch = True -send_accept_language_header = True -use_mobile_ui = False -supported_languages_url = 'https://www.google.com/preferences?#languages' - -# based on https://en.wikipedia.org/wiki/List_of_Google_domains and tests -google_domains = { - 'BG': 'google.bg', # Bulgaria - 'CZ': 'google.cz', # Czech Republic - 'DE': 'google.de', # Germany - 'DK': 'google.dk', # Denmark - 'AT': 'google.at', # Austria - 'CH': 'google.ch', # Switzerland - 'GR': 'google.gr', # Greece - 'AU': 'google.com.au', # Australia - 'CA': 'google.ca', # Canada - 'GB': 'google.co.uk', # United Kingdom - 'ID': 'google.co.id', # Indonesia - 'IE': 'google.ie', # Ireland - 'IN': 'google.co.in', # India - 'MY': 'google.com.my', # Malaysia - 'NZ': 'google.co.nz', # New Zealand - 'PH': 'google.com.ph', # Philippines - 'SG': 'google.com.sg', # Singapore - 'US': 'google.com', # United States (google.us) redirects to .com - 'ZA': 'google.co.za', # South Africa - 'AR': 'google.com.ar', # Argentina - 'CL': 'google.cl', # Chile - 'ES': 'google.es', # Spain - 'MX': 'google.com.mx', # Mexico - 'EE': 'google.ee', # Estonia - 'FI': 'google.fi', # Finland - 'BE': 'google.be', # Belgium - 'FR': 'google.fr', # France - 'IL': 'google.co.il', # Israel - 'HR': 'google.hr', # Croatia - 'HU': 'google.hu', # Hungary - 'IT': 'google.it', # Italy - 'JP': 'google.co.jp', # Japan - 'KR': 'google.co.kr', # South Korea - 'LT': 'google.lt', # Lithuania - 'LV': 'google.lv', # Latvia - 'NO': 'google.no', # Norway - 'NL': 'google.nl', # Netherlands - 'PL': 'google.pl', # Poland - 'BR': 'google.com.br', # Brazil - 'PT': 'google.pt', # Portugal - 'RO': 'google.ro', # Romania - 'RU': 'google.ru', # Russia - 'SK': 'google.sk', # Slovakia - 'SI': 'google.si', # Slovenia - 'SE': 'google.se', # Sweden - 'TH': 'google.co.th', # Thailand - 'TR': 'google.com.tr', # Turkey - 'UA': 'google.com.ua', # Ukraine - 'CN': 'google.com.hk', # There is no google.cn, we use .com.hk for zh-CN - 'HK': 'google.com.hk', # Hong Kong - 'TW': 'google.com.tw', # Taiwan -} time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'} @@ -112,50 +59,50 @@ filter_mapping = {0: 'off', 1: 'medium', 2: 'high'} # specific xpath variables # ------------------------ -results_xpath = './/div[@data-sokoban-container]' +results_xpath = './/div[contains(@jscontroller, "SC7lYd")]' title_xpath = './/a/h3[1]' href_xpath = './/a[h3]/@href' -content_xpath = './/div[@data-content-feature=1]' - -# google *sections* are no usual *results*, we ignore them -g_section_with_header = './g-section-with-header' - +content_xpath = './/div[@data-sncf]' # Suggestions are links placed in a *card-section*, we extract only the text # from the links not the links itself. suggestion_xpath = '//div[contains(@class, "EIaa9b")]//a' +# UI_ASYNC = 'use_ac:true,_fmt:html' # returns a HTTP 500 when user search for +# # celebrities like '!google natasha allegri' +# # or '!google chris evans' +UI_ASYNC = 'use_ac:true,_fmt:prog' +"""Format of the response from UI's async request.""" -def get_lang_info(params, lang_list, custom_aliases, supported_any_language): - """Composing various language properties for the google engines. + +def get_google_info(params, eng_traits): + """Composing various (language) properties for the google engines (:ref:`google + API`). This function is called by the various google engines (:ref:`google web engine`, :ref:`google images engine`, :ref:`google news engine` and :ref:`google videos engine`). - :param dict param: request parameters of the engine + :param dict param: Request parameters of the engine. At least + a ``searxng_locale`` key should be in the dictionary. - :param list lang_list: list of supported languages of the engine - :py:obj:`ENGINES_LANGUAGES[engine-name] ` - - :param dict lang_list: custom aliases for non standard language codes - (used when calling :py:func:`searx.utils.match_language`) - - :param bool supported_any_language: When a language is not specified, the - language interpretation is left up to Google to decide how the search - results should be delivered. This argument is ``True`` for the google - engine and ``False`` for the other engines (google-images, -news, - -scholar, -videos). + :param eng_traits: Engine's traits fetched from google preferences + (:py:obj:`searx.enginelib.traits.EngineTraits`) :rtype: dict :returns: Py-Dictionary with the key/value pairs: language: - Return value from :py:func:`searx.utils.match_language` + The language code that is used by google (e.g. ``lang_en`` or + ``lang_zh-TW``) country: - The country code (e.g. US, AT, CA, FR, DE ..) + The country code that is used by google (e.g. ``US`` or ``TW``) + + locale: + A instance of :py:obj:`babel.core.Locale` build from the + ``searxng_locale`` value. subdomain: Google subdomain :py:obj:`google_domains` that fits to the country @@ -165,52 +112,67 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language): Py-Dictionary with additional request arguments (can be passed to :py:func:`urllib.parse.urlencode`). + - ``hl`` parameter: specifies the interface language of user interface. + - ``lr`` parameter: restricts search results to documents written in + a particular language. + - ``cr`` parameter: restricts search results to documents + originating in a particular country. + - ``ie`` parameter: sets the character encoding scheme that should + be used to interpret the query string ('utf8'). + - ``oe`` parameter: sets the character encoding scheme that should + be used to decode the XML result ('utf8'). + headers: Py-Dictionary with additional HTTP headers (can be passed to request's headers) + + - ``Accept: '*/*`` + """ + ret_val = { 'language': None, 'country': None, 'subdomain': None, 'params': {}, 'headers': {}, + 'cookies': {}, + 'locale': None, } - # language ... + sxng_locale = params.get('searxng_locale', 'all') + try: + locale = babel.Locale.parse(sxng_locale, sep='-') + except babel.core.UnknownLocaleError: + locale = None - _lang = params['language'] - _any_language = _lang.lower() == 'all' - if _any_language: - _lang = 'en-US' - language = match_language(_lang, lang_list, custom_aliases) - ret_val['language'] = language + eng_lang = eng_traits.get_language(sxng_locale, 'lang_en') + lang_code = eng_lang.split('_')[-1] # lang_zh-TW --> zh-TW / lang_en --> en + country = eng_traits.get_region(sxng_locale, eng_traits.all_locale) - # country ... + # Test zh_hans & zh_hant --> in the topmost links in the result list of list + # TW and HK you should a find wiktionary.org zh_hant link. In the result + # list of zh-CN should not be no hant link instead you should find + # zh.m.wikipedia.org/zh somewhere in the top. - _l = _lang.split('-') - if len(_l) == 2: - country = _l[1] - else: - country = _l[0].upper() - if country == 'EN': - country = 'US' + # '!go 日 :zh-TW' --> https://zh.m.wiktionary.org/zh-hant/%E6%97%A5 + # '!go 日 :zh-CN' --> https://zh.m.wikipedia.org/zh/%E6%97%A5 + + ret_val['language'] = eng_lang ret_val['country'] = country - - # subdomain ... - - ret_val['subdomain'] = 'www.' + google_domains.get(country.upper(), 'google.com') - - # params & headers - - lang_country = '%s-%s' % (language, country) # (en-US, en-EN, de-DE, de-AU, fr-FR ..) + ret_val['locale'] = locale + ret_val['subdomain'] = eng_traits.custom['supported_domains'].get(country.upper(), 'www.google.com') # hl parameter: - # https://developers.google.com/custom-search/docs/xml_results#hlsp The - # Interface Language: + # The hl parameter specifies the interface language (host language) of + # your user interface. To improve the performance and the quality of your + # search results, you are strongly encouraged to set this parameter + # explicitly. + # https://developers.google.com/custom-search/docs/xml_results#hlsp + # The Interface Language: # https://developers.google.com/custom-search/docs/xml_results_appendices#interfaceLanguages - ret_val['params']['hl'] = lang_list.get(lang_country, language) + ret_val['params']['hl'] = lang_code # lr parameter: # The lr (language restrict) parameter restricts search results to @@ -218,22 +180,72 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language): # https://developers.google.com/custom-search/docs/xml_results#lrsp # Language Collection Values: # https://developers.google.com/custom-search/docs/xml_results_appendices#languageCollections + # + # To select 'all' languages an empty 'lr' value is used. + # + # Different to other google services, Google Schloar supports to select more + # than one language. The languages are seperated by a pipe '|' (logical OR). + # By example: &lr=lang_zh-TW%7Clang_de selects articles written in + # traditional chinese OR german language. - if _any_language and supported_any_language: + ret_val['params']['lr'] = eng_lang + if sxng_locale == 'all': + ret_val['params']['lr'] = '' - # interpretation is left up to Google (based on whoogle) - # - # - add parameter ``source=lnt`` - # - don't use parameter ``lr`` - # - don't add a ``Accept-Language`` HTTP header. + # cr parameter: + # The cr parameter restricts search results to documents originating in a + # particular country. + # https://developers.google.com/custom-search/docs/xml_results#crsp - ret_val['params']['source'] = 'lnt' + ret_val['params']['cr'] = 'country' + country + if sxng_locale == 'all': + ret_val['params']['cr'] = '' - else: + # gl parameter: (mandatory by Geeogle News) + # The gl parameter value is a two-letter country code. For WebSearch + # results, the gl parameter boosts search results whose country of origin + # matches the parameter value. See the Country Codes section for a list of + # valid values. + # Specifying a gl parameter value in WebSearch requests should improve the + # relevance of results. This is particularly true for international + # customers and, even more specifically, for customers in English-speaking + # countries other than the United States. + # https://developers.google.com/custom-search/docs/xml_results#glsp - # restricts search results to documents written in a particular - # language. - ret_val['params']['lr'] = "lang_" + lang_list.get(lang_country, language) + ret_val['params']['gl'] = country + + # ie parameter: + # The ie parameter sets the character encoding scheme that should be used + # to interpret the query string. The default ie value is latin1. + # https://developers.google.com/custom-search/docs/xml_results#iesp + + ret_val['params']['ie'] = 'utf8' + + # oe parameter: + # The oe parameter sets the character encoding scheme that should be used + # to decode the XML result. The default oe value is latin1. + # https://developers.google.com/custom-search/docs/xml_results#oesp + + ret_val['params']['oe'] = 'utf8' + + # num parameter: + # The num parameter identifies the number of search results to return. + # The default num value is 10, and the maximum value is 20. If you request + # more than 20 results, only 20 results will be returned. + # https://developers.google.com/custom-search/docs/xml_results#numsp + + # HINT: seems to have no effect (tested in google WEB & Images) + # ret_val['params']['num'] = 20 + + # HTTP headers + + ret_val['headers']['Accept'] = '*/*' + + # Cookies + + # - https://github.com/searxng/searxng/pull/1679#issuecomment-1235432746 + # - https://github.com/searxng/searxng/issues/1555 + ret_val['cookies']['CONSENT'] = "YES+" return ret_val @@ -245,33 +257,34 @@ def detect_google_sorry(resp): def request(query, params): """Google search request""" - + # pylint: disable=line-too-long offset = (params['pageno'] - 1) * 10 - - lang_info = get_lang_info(params, supported_languages, language_aliases, True) - - additional_parameters = {} - if use_mobile_ui: - additional_parameters = { - 'asearch': 'arc', - 'async': 'use_ac:true,_fmt:prog', - } + google_info = get_google_info(params, traits) # https://www.google.de/search?q=corona&hl=de&lr=lang_de&start=0&tbs=qdr%3Ad&safe=medium query_url = ( 'https://' - + lang_info['subdomain'] + + google_info['subdomain'] + '/search' + "?" + urlencode( { 'q': query, - **lang_info['params'], - 'ie': "utf8", - 'oe': "utf8", - 'start': offset, + **google_info['params'], 'filter': '0', - **additional_parameters, + 'start': offset, + # 'vet': '12ahUKEwik3ZbIzfn7AhXMX_EDHbUDBh0QxK8CegQIARAC..i', + # 'ved': '2ahUKEwik3ZbIzfn7AhXMX_EDHbUDBh0Q_skCegQIARAG', + # 'cs' : 1, + # 'sa': 'N', + # 'yv': 3, + # 'prmd': 'vin', + # 'ei': 'GASaY6TxOcy_xc8PtYeY6AE', + # 'sa': 'N', + # 'sstk': 'AcOHfVkD7sWCSAheZi-0tx_09XDO55gTWY0JNq3_V26cNN-c8lfD45aZYPI8s_Bqp8s57AHz5pxchDtAGCA_cikAWSjy9kw3kgg' + # formally known as use_mobile_ui + 'asearch': 'arc', + 'async': UI_ASYNC, } ) ) @@ -282,25 +295,38 @@ def request(query, params): query_url += '&' + urlencode({'safe': filter_mapping[params['safesearch']]}) params['url'] = query_url - params['cookies']['CONSENT'] = "YES+" - params['headers'].update(lang_info['headers']) - if use_mobile_ui: - params['headers']['Accept'] = '*/*' - else: - params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' - + params['cookies'] = google_info['cookies'] + params['headers'].update(google_info['headers']) return params +# =26;[3,"dimg_ZNMiZPCqE4apxc8P3a2tuAQ_137"]a87; +# ...6T+9Nl4cnD+gr9OK8I56/tX3l86nWYw//2Q==26; +RE_DATA_IMAGE = re.compile(r'"(dimg_[^"]*)"[^;]*;(data:image[^;]*;[^;]*);') + + +def _parse_data_images(dom): + data_image_map = {} + for img_id, data_image in RE_DATA_IMAGE.findall(dom.text_content()): + end_pos = data_image.rfind('=') + if end_pos > 0: + data_image = data_image[: end_pos + 1] + data_image_map[img_id] = data_image + logger.debug('data:image objects --> %s', list(data_image_map.keys())) + return data_image_map + + def response(resp): """Get response from google's search request""" - + # pylint: disable=too-many-branches, too-many-statements detect_google_sorry(resp) results = [] # convert the text to dom dom = html.fromstring(resp.text) + data_image_map = _parse_data_images(dom) + # results --> answer answer_list = eval_xpath(dom, '//div[contains(@class, "LGOjhe")]') if answer_list: @@ -309,25 +335,9 @@ def response(resp): else: logger.debug("did not find 'answer'") - # results --> number_of_results - if not use_mobile_ui: - try: - _txt = eval_xpath_getindex(dom, '//div[@id="result-stats"]//text()', 0) - _digit = ''.join([n for n in _txt if n.isdigit()]) - number_of_results = int(_digit) - results.append({'number_of_results': number_of_results}) - except Exception as e: # pylint: disable=broad-except - logger.debug("did not 'number_of_results'") - logger.error(e, exc_info=True) - # parse results - for result in eval_xpath_list(dom, results_xpath): - - # google *sections* - if extract_text(eval_xpath(result, g_section_with_header)): - logger.debug("ignoring ") - continue + for result in eval_xpath_list(dom, results_xpath): # pylint: disable=too-many-nested-blocks try: title_tag = eval_xpath_getindex(result, title_xpath, 0, default=None) @@ -336,16 +346,30 @@ def response(resp): logger.debug('ignoring item from the result_xpath list: missing title') continue title = extract_text(title_tag) + url = eval_xpath_getindex(result, href_xpath, 0, None) if url is None: + logger.debug('ignoring item from the result_xpath list: missing url of title "%s"', title) continue - content = extract_text(eval_xpath_getindex(result, content_xpath, 0, default=None), allow_none=True) - if content is None: + + content_nodes = eval_xpath(result, content_xpath) + content = extract_text(content_nodes) + + if not content: logger.debug('ignoring item from the result_xpath list: missing content of title "%s"', title) continue - logger.debug('add link to results: %s', title) - results.append({'url': url, 'title': title, 'content': content}) + img_src = content_nodes[0].xpath('.//img/@src') + if img_src: + img_src = img_src[0] + if img_src.startswith('data:image'): + img_id = content_nodes[0].xpath('.//img/@id') + if img_id: + img_src = data_image_map.get(img_id[0]) + else: + img_src = None + + results.append({'url': url, 'title': title, 'content': content, 'img_src': img_src}) except Exception as e: # pylint: disable=broad-except logger.error(e, exc_info=True) @@ -361,15 +385,107 @@ def response(resp): # get supported languages from their site -def _fetch_supported_languages(resp): - ret_val = {} + + +skip_countries = [ + # official language of google-country not in google-languages + 'AL', # Albanien (sq) + 'AZ', # Aserbaidschan (az) + 'BD', # Bangladesch (bn) + 'BN', # Brunei Darussalam (ms) + 'BT', # Bhutan (dz) + 'ET', # Äthiopien (am) + 'GE', # Georgien (ka, os) + 'GL', # Grönland (kl) + 'KH', # Kambodscha (km) + 'LA', # Laos (lo) + 'LK', # Sri Lanka (si, ta) + 'ME', # Montenegro (sr) + 'MK', # Nordmazedonien (mk, sq) + 'MM', # Myanmar (my) + 'MN', # Mongolei (mn) + 'MV', # Malediven (dv) // dv_MV is unknown by babel + 'MY', # Malaysia (ms) + 'NP', # Nepal (ne) + 'TJ', # Tadschikistan (tg) + 'TM', # Turkmenistan (tk) + 'UZ', # Usbekistan (uz) +] + + +def fetch_traits(engine_traits: EngineTraits, add_domains: bool = True): + """Fetch languages from Google.""" + # pylint: disable=import-outside-toplevel, too-many-branches + + engine_traits.custom['supported_domains'] = {} + + resp = network.get('https://www.google.com/preferences') + if not resp.ok: + raise RuntimeError("Response from Google's preferences is not OK.") + dom = html.fromstring(resp.text) - radio_buttons = eval_xpath_list(dom, '//*[@id="langSec"]//input[@name="lr"]') + # supported language codes - for x in radio_buttons: - name = x.get("data-name") - code = x.get("value").split('_')[-1] - ret_val[code] = {"name": name} + lang_map = {'no': 'nb'} + for x in eval_xpath_list(dom, '//*[@id="langSec"]//input[@name="lr"]'): - return ret_val + eng_lang = x.get("value").split('_')[-1] + try: + locale = babel.Locale.parse(lang_map.get(eng_lang, eng_lang), sep='-') + except babel.UnknownLocaleError: + print("ERROR: %s -> %s is unknown by babel" % (x.get("data-name"), eng_lang)) + continue + sxng_lang = language_tag(locale) + + conflict = engine_traits.languages.get(sxng_lang) + if conflict: + if conflict != eng_lang: + print("CONFLICT: babel %s --> %s, %s" % (sxng_lang, conflict, eng_lang)) + continue + engine_traits.languages[sxng_lang] = 'lang_' + eng_lang + + # alias languages + engine_traits.languages['zh'] = 'lang_zh-CN' + + # supported region codes + + for x in eval_xpath_list(dom, '//*[@name="region"]/..//input[@name="region"]'): + eng_country = x.get("value") + + if eng_country in skip_countries: + continue + if eng_country == 'ZZ': + engine_traits.all_locale = 'ZZ' + continue + + sxng_locales = get_offical_locales(eng_country, engine_traits.languages.keys(), regional=True) + + if not sxng_locales: + print("ERROR: can't map from google country %s (%s) to a babel region." % (x.get('data-name'), eng_country)) + continue + + for sxng_locale in sxng_locales: + engine_traits.regions[region_tag(sxng_locale)] = eng_country + + # alias regions + engine_traits.regions['zh-CN'] = 'HK' + + # supported domains + + if add_domains: + resp = network.get('https://www.google.com/supported_domains') + if not resp.ok: + raise RuntimeError("Response from https://www.google.com/supported_domains is not OK.") + + for domain in resp.text.split(): + domain = domain.strip() + if not domain or domain in [ + '.google.com', + ]: + continue + region = domain.split('.')[-1].upper() + engine_traits.custom['supported_domains'][region] = 'www' + domain + if region == 'HK': + # There is no google.cn, we use .com.hk for zh-CN + engine_traits.custom['supported_domains']['CN'] = 'www' + domain diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py index 528f8d21d..e6445b1c4 100644 --- a/searx/engines/google_images.py +++ b/searx/engines/google_images.py @@ -1,31 +1,38 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint -"""This is the implementation of the google images engine using the google -internal API used the Google Go Android app. +"""This is the implementation of the Google Images engine using the internal +Google API used by the Google Go Android app. This internal API offer results in -- JSON (_fmt:json) -- Protobuf (_fmt:pb) -- Protobuf compressed? (_fmt:pc) -- HTML (_fmt:html) -- Protobuf encoded in JSON (_fmt:jspb). +- JSON (``_fmt:json``) +- Protobuf_ (``_fmt:pb``) +- Protobuf_ compressed? (``_fmt:pc``) +- HTML (``_fmt:html``) +- Protobuf_ encoded in JSON (``_fmt:jspb``). +.. _Protobuf: https://en.wikipedia.org/wiki/Protocol_Buffers """ +from typing import TYPE_CHECKING + from urllib.parse import urlencode from json import loads +from searx.engines.google import fetch_traits # pylint: disable=unused-import from searx.engines.google import ( - get_lang_info, + get_google_info, time_range_dict, detect_google_sorry, ) -# pylint: disable=unused-import -from searx.engines.google import supported_languages_url, _fetch_supported_languages +if TYPE_CHECKING: + import logging + from searx.enginelib.traits import EngineTraits + + logger: logging.Logger + traits: EngineTraits -# pylint: enable=unused-import # about about = { @@ -40,7 +47,6 @@ about = { # engine dependent config categories = ['images', 'web'] paging = True -use_locale_domain = True time_range_support = True safesearch = True send_accept_language_header = True @@ -51,20 +57,18 @@ filter_mapping = {0: 'images', 1: 'active', 2: 'active'} def request(query, params): """Google-Image search request""" - lang_info = get_lang_info(params, supported_languages, language_aliases, False) + google_info = get_google_info(params, traits) query_url = ( 'https://' - + lang_info['subdomain'] + + google_info['subdomain'] + '/search' + "?" + urlencode( { 'q': query, 'tbm': "isch", - **lang_info['params'], - 'ie': "utf8", - 'oe': "utf8", + **google_info['params'], 'asearch': 'isch', 'async': '_fmt:json,p:1,ijn:' + str(params['pageno']), } @@ -77,9 +81,8 @@ def request(query, params): query_url += '&' + urlencode({'safe': filter_mapping[params['safesearch']]}) params['url'] = query_url - params['headers'].update(lang_info['headers']) - params['headers']['User-Agent'] = 'NSTN/3.60.474802233.release Dalvik/2.1.0 (Linux; U; Android 12; US) gzip' - params['headers']['Accept'] = '*/*' + params['cookies'] = google_info['cookies'] + params['headers'].update(google_info['headers']) return params @@ -111,7 +114,11 @@ def response(resp): copyright_notice = item["result"].get('iptc', {}).get('copyright_notice') if copyright_notice: - result_item['source'] += ' / ' + copyright_notice + result_item['source'] += ' | ' + copyright_notice + + freshness_date = item["result"].get("freshness_date") + if freshness_date: + result_item['source'] += ' | ' + freshness_date file_size = item.get('gsa', {}).get('file_size') if file_size: diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py index 1ada2d64d..ae55ca9cb 100644 --- a/searx/engines/google_news.py +++ b/searx/engines/google_news.py @@ -1,24 +1,40 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint -"""This is the implementation of the google news engine. The google news API -ignores some parameters from the common :ref:`google API`: +"""This is the implementation of the Google News engine. -- num_ : the number of search results is ignored +Google News has a different region handling compared to Google WEB. + +- the ``ceid`` argument has to be set (:py:obj:`ceid_list`) +- the hl_ argument has to be set correctly (and different to Google WEB) +- the gl_ argument is mandatory + +If one of this argument is not set correctly, the request is redirected to +CONSENT dialog:: + + https://consent.google.com/m?continue= + +The google news API ignores some parameters from the common :ref:`google API`: + +- num_ : the number of search results is ignored / there is no paging all + results for a query term are in the first response. - save_ : is ignored / Google-News results are always *SafeSearch* +.. _hl: https://developers.google.com/custom-search/docs/xml_results#hlsp +.. _gl: https://developers.google.com/custom-search/docs/xml_results#glsp .. _num: https://developers.google.com/custom-search/docs/xml_results#numsp .. _save: https://developers.google.com/custom-search/docs/xml_results#safesp - """ -# pylint: disable=invalid-name +from typing import TYPE_CHECKING import binascii import re from urllib.parse import urlencode from base64 import b64decode from lxml import html +import babel +from searx import locales from searx.utils import ( eval_xpath, eval_xpath_list, @@ -26,18 +42,19 @@ from searx.utils import ( extract_text, ) -# pylint: disable=unused-import +from searx.engines.google import fetch_traits as _fetch_traits # pylint: disable=unused-import from searx.engines.google import ( - supported_languages_url, - _fetch_supported_languages, -) - -# pylint: enable=unused-import - -from searx.engines.google import ( - get_lang_info, + get_google_info, detect_google_sorry, ) +from searx.enginelib.traits import EngineTraits + +if TYPE_CHECKING: + import logging + + logger: logging.Logger + +traits: EngineTraits # about about = { @@ -49,70 +66,77 @@ about = { "results": 'HTML', } -# compared to other google engines google-news has a different time range -# support. The time range is included in the search term. -time_range_dict = { - 'day': 'when:1d', - 'week': 'when:7d', - 'month': 'when:1m', - 'year': 'when:1y', -} - # engine dependent config - categories = ['news'] paging = False -use_locale_domain = True -time_range_support = True +time_range_support = False # Google-News results are always *SafeSearch*. Option 'safesearch' is set to # False here, otherwise checker will report safesearch-errors:: # # safesearch : results are identitical for safesearch=0 and safesearch=2 -safesearch = False -send_accept_language_header = True +safesearch = True +# send_accept_language_header = True def request(query, params): """Google-News search request""" - lang_info = get_lang_info(params, supported_languages, language_aliases, False) + sxng_locale = params.get('searxng_locale', 'en-US') + ceid = locales.get_engine_locale(sxng_locale, traits.custom['ceid'], default='US:en') + google_info = get_google_info(params, traits) + google_info['subdomain'] = 'news.google.com' # google news has only one domain - # google news has only one domain - lang_info['subdomain'] = 'news.google.com' + ceid_region, ceid_lang = ceid.split(':') + ceid_lang, ceid_suffix = ( + ceid_lang.split('-') + + [ + None, + ] + )[:2] - ceid = "%s:%s" % (lang_info['country'], lang_info['language']) + google_info['params']['hl'] = ceid_lang - # google news redirects en to en-US - if lang_info['params']['hl'] == 'en': - lang_info['params']['hl'] = 'en-US' + if ceid_suffix and ceid_suffix not in ['Hans', 'Hant']: - # Very special to google-news compared to other google engines, the time - # range is included in the search term. - if params['time_range']: - query += ' ' + time_range_dict[params['time_range']] + if ceid_region.lower() == ceid_lang: + google_info['params']['hl'] = ceid_lang + '-' + ceid_region + else: + google_info['params']['hl'] = ceid_lang + '-' + ceid_suffix + + elif ceid_region.lower() != ceid_lang: + + if ceid_region in ['AT', 'BE', 'CH', 'IL', 'SA', 'IN', 'BD', 'PT']: + google_info['params']['hl'] = ceid_lang + else: + google_info['params']['hl'] = ceid_lang + '-' + ceid_region + + google_info['params']['lr'] = 'lang_' + ceid_lang.split('-')[0] + google_info['params']['gl'] = ceid_region query_url = ( 'https://' - + lang_info['subdomain'] - + '/search' - + "?" - + urlencode({'q': query, **lang_info['params'], 'ie': "utf8", 'oe': "utf8", 'gl': lang_info['country']}) + + google_info['subdomain'] + + "/search?" + + urlencode( + { + 'q': query, + **google_info['params'], + } + ) + # ceid includes a ':' character which must not be urlencoded + ('&ceid=%s' % ceid) - ) # ceid includes a ':' character which must not be urlencoded + ) + params['url'] = query_url - - params['cookies']['CONSENT'] = "YES+" - params['headers'].update(lang_info['headers']) - params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' - + params['cookies'] = google_info['cookies'] + params['headers'].update(google_info['headers']) return params def response(resp): """Get response from google's search request""" results = [] - detect_google_sorry(resp) # convert the text to dom @@ -152,8 +176,8 @@ def response(resp): # The pub_date is mostly a string like 'yesertday', not a real # timezone date or time. Therefore we can't use publishedDate. - pub_date = extract_text(eval_xpath(result, './article/div[1]/div[1]/time')) - pub_origin = extract_text(eval_xpath(result, './article/div[1]/div[1]/a')) + pub_date = extract_text(eval_xpath(result, './article//time')) + pub_origin = extract_text(eval_xpath(result, './article//a[@data-n-tid]')) content = ' / '.join([x for x in [pub_origin, pub_date] if x]) @@ -174,3 +198,127 @@ def response(resp): # return results return results + + +ceid_list = [ + 'AE:ar', + 'AR:es-419', + 'AT:de', + 'AU:en', + 'BD:bn', + 'BE:fr', + 'BE:nl', + 'BG:bg', + 'BR:pt-419', + 'BW:en', + 'CA:en', + 'CA:fr', + 'CH:de', + 'CH:fr', + 'CL:es-419', + 'CN:zh-Hans', + 'CO:es-419', + 'CU:es-419', + 'CZ:cs', + 'DE:de', + 'EG:ar', + 'ES:es', + 'ET:en', + 'FR:fr', + 'GB:en', + 'GH:en', + 'GR:el', + 'HK:zh-Hant', + 'HU:hu', + 'ID:en', + 'ID:id', + 'IE:en', + 'IL:en', + 'IL:he', + 'IN:bn', + 'IN:en', + 'IN:hi', + 'IN:ml', + 'IN:mr', + 'IN:ta', + 'IN:te', + 'IT:it', + 'JP:ja', + 'KE:en', + 'KR:ko', + 'LB:ar', + 'LT:lt', + 'LV:en', + 'LV:lv', + 'MA:fr', + 'MX:es-419', + 'MY:en', + 'NA:en', + 'NG:en', + 'NL:nl', + 'NO:no', + 'NZ:en', + 'PE:es-419', + 'PH:en', + 'PK:en', + 'PL:pl', + 'PT:pt-150', + 'RO:ro', + 'RS:sr', + 'RU:ru', + 'SA:ar', + 'SE:sv', + 'SG:en', + 'SI:sl', + 'SK:sk', + 'SN:fr', + 'TH:th', + 'TR:tr', + 'TW:zh-Hant', + 'TZ:en', + 'UA:ru', + 'UA:uk', + 'UG:en', + 'US:en', + 'US:es-419', + 'VE:es-419', + 'VN:vi', + 'ZA:en', + 'ZW:en', +] +"""List of region/language combinations supported by Google News. Values of the +``ceid`` argument of the Google News REST API.""" + + +_skip_values = [ + 'ET:en', # english (ethiopia) + 'ID:en', # english (indonesia) + 'LV:en', # english (latvia) +] + +_ceid_locale_map = {'NO:no': 'nb-NO'} + + +def fetch_traits(engine_traits: EngineTraits): + _fetch_traits(engine_traits, add_domains=False) + + engine_traits.custom['ceid'] = {} + + for ceid in ceid_list: + if ceid in _skip_values: + continue + + region, lang = ceid.split(':') + x = lang.split('-') + if len(x) > 1: + if x[1] not in ['Hant', 'Hans']: + lang = x[0] + + sxng_locale = _ceid_locale_map.get(ceid, lang + '-' + region) + try: + locale = babel.Locale.parse(sxng_locale, sep='-') + except babel.UnknownLocaleError: + print("ERROR: %s -> %s is unknown by babel" % (ceid, sxng_locale)) + continue + + engine_traits.custom['ceid'][locales.region_tag(locale)] = ceid diff --git a/searx/engines/google_scholar.py b/searx/engines/google_scholar.py index c07cd4cea..6f33d1e1a 100644 --- a/searx/engines/google_scholar.py +++ b/searx/engines/google_scholar.py @@ -1,19 +1,18 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint -"""Google (Scholar) +"""This is the implementation of the Google Scholar engine. -For detailed description of the *REST-full* API see: `Query Parameter -Definitions`_. - -.. _Query Parameter Definitions: - https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions +Compared to other Google services the Scholar engine has a simple GET REST-API +and there does not exists `async` API. Even though the API slightly vintage we +can make use of the :ref:`google API` to assemble the arguments of the GET +request. """ -# pylint: disable=invalid-name +from typing import TYPE_CHECKING +from typing import Optional from urllib.parse import urlencode from datetime import datetime -from typing import Optional from lxml import html from searx.utils import ( @@ -23,19 +22,21 @@ from searx.utils import ( extract_text, ) +from searx.exceptions import SearxEngineCaptchaException + +from searx.engines.google import fetch_traits # pylint: disable=unused-import from searx.engines.google import ( - get_lang_info, + get_google_info, time_range_dict, - detect_google_sorry, ) +from searx.enginelib.traits import EngineTraits -# pylint: disable=unused-import -from searx.engines.google import ( - supported_languages_url, - _fetch_supported_languages, -) +if TYPE_CHECKING: + import logging -# pylint: enable=unused-import + logger: logging.Logger + +traits: EngineTraits # about about = { @@ -51,53 +52,62 @@ about = { categories = ['science', 'scientific publications'] paging = True language_support = True -use_locale_domain = True time_range_support = True safesearch = False send_accept_language_header = True -def time_range_url(params): - """Returns a URL query component for a google-Scholar time range based on - ``params['time_range']``. Google-Scholar does only support ranges in years. - To have any effect, all the Searx ranges (*day*, *week*, *month*, *year*) - are mapped to *year*. If no range is set, an empty string is returned. - Example:: +def time_range_args(params): + """Returns a dictionary with a time range arguments based on + ``params['time_range']``. + + Google Scholar supports a detailed search by year. Searching by *last + month* or *last week* (as offered by SearXNG) is uncommon for scientific + publications and is not supported by Google Scholar. + + To limit the result list when the users selects a range, all the SearXNG + ranges (*day*, *week*, *month*, *year*) are mapped to *year*. If no range + is set an empty dictionary of arguments is returned. Example; when + user selects a time range (current year minus one in 2022): + + .. code:: python + + { 'as_ylo' : 2021 } - &as_ylo=2019 """ - # as_ylo=2016&as_yhi=2019 - ret_val = '' + ret_val = {} if params['time_range'] in time_range_dict: - ret_val = urlencode({'as_ylo': datetime.now().year - 1}) - return '&' + ret_val + ret_val['as_ylo'] = datetime.now().year - 1 + return ret_val + + +def detect_google_captcha(dom): + """In case of CAPTCHA Google Scholar open its own *not a Robot* dialog and is + not redirected to ``sorry.google.com``. + """ + if eval_xpath(dom, "//form[@id='gs_captcha_f']"): + raise SearxEngineCaptchaException() def request(query, params): """Google-Scholar search request""" - offset = (params['pageno'] - 1) * 10 - lang_info = get_lang_info(params, supported_languages, language_aliases, False) - + google_info = get_google_info(params, traits) # subdomain is: scholar.google.xy - lang_info['subdomain'] = lang_info['subdomain'].replace("www.", "scholar.") + google_info['subdomain'] = google_info['subdomain'].replace("www.", "scholar.") - query_url = ( - 'https://' - + lang_info['subdomain'] - + '/scholar' - + "?" - + urlencode({'q': query, **lang_info['params'], 'ie': "utf8", 'oe': "utf8", 'start': offset}) - ) + args = { + 'q': query, + **google_info['params'], + 'start': (params['pageno'] - 1) * 10, + 'as_sdt': '2007', # include patents / to disable set '0,5' + 'as_vis': '0', # include citations / to disable set '1' + } + args.update(time_range_args(params)) - query_url += time_range_url(params) - params['url'] = query_url - - params['cookies']['CONSENT'] = "YES+" - params['headers'].update(lang_info['headers']) - params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' - - # params['google_subdomain'] = subdomain + params['url'] = 'https://' + google_info['subdomain'] + '/scholar?' + urlencode(args) + params['cookies'] = google_info['cookies'] + params['headers'].update(google_info['headers']) return params @@ -138,19 +148,15 @@ def parse_gs_a(text: Optional[str]): def response(resp): # pylint: disable=too-many-locals - """Get response from google's search request""" + """Parse response from Google Scholar""" results = [] - detect_google_sorry(resp) - - # which subdomain ? - # subdomain = resp.search_params.get('google_subdomain') - # convert the text to dom dom = html.fromstring(resp.text) + detect_google_captcha(dom) # parse results - for result in eval_xpath_list(dom, '//div[@data-cid]'): + for result in eval_xpath_list(dom, '//div[@data-rp]'): title = extract_text(eval_xpath(result, './/h3[1]//a')) @@ -158,7 +164,7 @@ def response(resp): # pylint: disable=too-many-locals # this is a [ZITATION] block continue - pub_type = extract_text(eval_xpath(result, './/span[@class="gs_ct1"]')) + pub_type = extract_text(eval_xpath(result, './/span[@class="gs_ctg2"]')) if pub_type: pub_type = pub_type[1:-1].lower() diff --git a/searx/engines/google_videos.py b/searx/engines/google_videos.py index fc574bd48..985189df5 100644 --- a/searx/engines/google_videos.py +++ b/searx/engines/google_videos.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint -"""This is the implementation of the google videos engine. +"""This is the implementation of the Google Videos engine. .. admonition:: Content-Security-Policy (CSP) @@ -14,9 +14,8 @@ """ -# pylint: disable=invalid-name +from typing import TYPE_CHECKING -import re from urllib.parse import urlencode from lxml import html @@ -27,20 +26,22 @@ from searx.utils import ( extract_text, ) +from searx.engines.google import fetch_traits # pylint: disable=unused-import from searx.engines.google import ( - get_lang_info, + get_google_info, time_range_dict, filter_mapping, - g_section_with_header, - title_xpath, suggestion_xpath, detect_google_sorry, ) +from searx.enginelib.traits import EngineTraits -# pylint: disable=unused-import -from searx.engines.google import supported_languages_url, _fetch_supported_languages +if TYPE_CHECKING: + import logging -# pylint: enable=unused-import + logger: logging.Logger + +traits: EngineTraits # about about = { @@ -55,70 +56,32 @@ about = { # engine dependent config categories = ['videos', 'web'] -paging = False +paging = True language_support = True -use_locale_domain = True time_range_support = True safesearch = True -send_accept_language_header = True - -RE_CACHE = {} - - -def _re(regexpr): - """returns compiled regular expression""" - RE_CACHE[regexpr] = RE_CACHE.get(regexpr, re.compile(regexpr)) - return RE_CACHE[regexpr] - - -def scrap_out_thumbs_src(dom): - ret_val = {} - thumb_name = 'dimg_' - for script in eval_xpath_list(dom, '//script[contains(., "google.ldi={")]'): - _script = script.text - # "dimg_35":"https://i.ytimg.c....", - _dimurl = _re("s='([^']*)").findall(_script) - for k, v in _re('(' + thumb_name + '[0-9]*)":"(http[^"]*)').findall(_script): - v = v.replace(r'\u003d', '=') - v = v.replace(r'\u0026', '&') - ret_val[k] = v - logger.debug("found %s imgdata for: %s", thumb_name, ret_val.keys()) - return ret_val - - -def scrap_out_thumbs(dom): - """Scrap out thumbnail data from ')] q_initial_props = loads(text) q_locales = q_initial_props.get('locales') - q_valid_locales = [] + eng_tag_list = set() for country, v in q_locales.items(): for lang in v['langs']: @@ -261,25 +267,18 @@ def _fetch_supported_languages(resp): # qwant-news does not support all locales from qwant-web: continue - q_valid_locales.append(_locale) + eng_tag_list.add(_locale) - supported_languages = {} - - for q_locale in q_valid_locales: + for eng_tag in eng_tag_list: try: - locale = babel.Locale.parse(q_locale, sep='_') - except babel.core.UnknownLocaleError: - print("ERROR: can't determine babel locale of quant's locale %s" % q_locale) + sxng_tag = region_tag(babel.Locale.parse(eng_tag, sep='_')) + except babel.UnknownLocaleError: + print("ERROR: can't determine babel locale of quant's locale %s" % eng_tag) continue - # note: supported_languages (dict) - # - # dict's key is a string build up from a babel.Locale object / the - # notation 'xx-XX' (and 'xx') conforms to SearXNG's locale (and - # language) notation and dict's values are the locale strings used by - # the engine. - - searxng_locale = locale.language + '-' + locale.territory # --> params['language'] - supported_languages[searxng_locale] = q_locale - - return supported_languages + conflict = engine_traits.regions.get(sxng_tag) + if conflict: + if conflict != eng_tag: + print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag)) + continue + engine_traits.regions[sxng_tag] = eng_tag diff --git a/searx/engines/sepiasearch.py b/searx/engines/sepiasearch.py index 9c45d6c43..72157b253 100644 --- a/searx/engines/sepiasearch.py +++ b/searx/engines/sepiasearch.py @@ -1,70 +1,80 @@ # SPDX-License-Identifier: AGPL-3.0-or-later -""" - SepiaSearch (Videos) +# lint: pylint +"""SepiaSearch uses the same languages as :py:obj:`Peertube +` and the response is identical to the response from the +peertube engines. + """ -from json import loads -from dateutil import parser, relativedelta +from typing import TYPE_CHECKING + from urllib.parse import urlencode from datetime import datetime -# about +from searx.engines.peertube import fetch_traits # pylint: disable=unused-import +from searx.engines.peertube import ( + # pylint: disable=unused-import + video_response, + safesearch_table, + time_range_table, +) +from searx.enginelib.traits import EngineTraits + +if TYPE_CHECKING: + import logging + + logger: logging.Logger + +traits: EngineTraits + about = { + # pylint: disable=line-too-long "website": 'https://sepiasearch.org', "wikidata_id": None, - "official_api_documentation": "https://framagit.org/framasoft/peertube/search-index/-/tree/master/server/controllers/api", # NOQA + "official_api_documentation": 'https://docs.joinpeertube.org/api-rest-reference.html#tag/Search/operation/searchVideos', "use_official_api": True, "require_api_key": False, "results": 'JSON', } +# engine dependent config categories = ['videos'] paging = True + +base_url = 'https://sepiasearch.org' + time_range_support = True safesearch = True -supported_languages = [ - # fmt: off - 'en', 'fr', 'ja', 'eu', 'ca', 'cs', 'eo', 'el', - 'de', 'it', 'nl', 'es', 'oc', 'gd', 'zh', 'pt', - 'sv', 'pl', 'fi', 'ru' - # fmt: on -] -base_url = 'https://sepiasearch.org/api/v1/search/videos' - -safesearch_table = {0: 'both', 1: 'false', 2: 'false'} - -time_range_table = { - 'day': relativedelta.relativedelta(), - 'week': relativedelta.relativedelta(weeks=-1), - 'month': relativedelta.relativedelta(months=-1), - 'year': relativedelta.relativedelta(years=-1), -} - - -def minute_to_hm(minute): - if isinstance(minute, int): - return "%d:%02d" % (divmod(minute, 60)) - return None def request(query, params): + """Assemble request for the SepiaSearch API""" + + if not query: + return False + + # eng_region = traits.get_region(params['searxng_locale'], 'en_US') + eng_lang = traits.get_language(params['searxng_locale'], None) + params['url'] = ( - base_url - + '?' + base_url.rstrip("/") + + "/api/v1/search/videos?" + urlencode( { 'search': query, 'start': (params['pageno'] - 1) * 10, 'count': 10, - 'sort': '-match', + # -createdAt: sort by date ascending / createdAt: date descending + 'sort': '-match', # sort by *match descending* 'nsfw': safesearch_table[params['safesearch']], } ) ) - language = params['language'].split('-')[0] - if language in supported_languages: - params['url'] += '&languageOneOf[]=' + language + if eng_lang is not None: + params['url'] += '&languageOneOf[]=' + eng_lang + params['url'] += '&boostLanguages[]=' + eng_lang + if params['time_range'] in time_range_table: time = datetime.now().date() + time_range_table[params['time_range']] params['url'] += '&startDate=' + time.isoformat() @@ -73,34 +83,4 @@ def request(query, params): def response(resp): - results = [] - - search_results = loads(resp.text) - - if 'data' not in search_results: - return [] - - for result in search_results['data']: - title = result['name'] - content = result['description'] - thumbnail = result['thumbnailUrl'] - publishedDate = parser.parse(result['publishedAt']) - author = result.get('account', {}).get('displayName') - length = minute_to_hm(result.get('duration')) - url = result['url'] - - results.append( - { - 'url': url, - 'title': title, - 'content': content, - 'author': author, - 'length': length, - 'template': 'videos.html', - 'publishedDate': publishedDate, - 'iframe_src': result.get('embedUrl'), - 'thumbnail': thumbnail, - } - ) - - return results + return video_response(resp) diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py index f857f7b6d..2813d0bf3 100644 --- a/searx/engines/startpage.py +++ b/searx/engines/startpage.py @@ -1,28 +1,108 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint -"""Startpage (Web) +"""Startpage's language & region selectors are a mess .. + +.. _startpage regions: + +Startpage regions +================= + +In the list of regions there are tags we need to map to common region tags:: + + pt-BR_BR --> pt_BR + zh-CN_CN --> zh_Hans_CN + zh-TW_TW --> zh_Hant_TW + zh-TW_HK --> zh_Hant_HK + en-GB_GB --> en_GB + +and there is at least one tag with a three letter language tag (ISO 639-2):: + + fil_PH --> fil_PH + +The locale code ``no_NO`` from Startpage does not exists and is mapped to +``nb-NO``:: + + babel.core.UnknownLocaleError: unknown locale 'no_NO' + +For reference see languages-subtag at iana; ``no`` is the macrolanguage [1]_ and +W3C recommends subtag over macrolanguage [2]_. + +.. [1] `iana: language-subtag-registry + `_ :: + + type: language + Subtag: nb + Description: Norwegian Bokmål + Added: 2005-10-16 + Suppress-Script: Latn + Macrolanguage: no + +.. [2] + Use macrolanguages with care. Some language subtags have a Scope field set to + macrolanguage, i.e. this primary language subtag encompasses a number of more + specific primary language subtags in the registry. ... As we recommended for + the collection subtags mentioned above, in most cases you should try to use + the more specific subtags ... `W3: The primary language subtag + `_ + +.. _startpage languages: + +Startpage languages +=================== + +:py:obj:`send_accept_language_header`: + The displayed name in Startpage's settings page depend on the location of the + IP when ``Accept-Language`` HTTP header is unset. In :py:obj:`fetch_traits` + we use:: + + 'Accept-Language': "en-US,en;q=0.5", + .. + + to get uniform names independent from the IP). + +.. _startpage categories: + +Startpage categories +==================== + +Startpage's category (for Web-search, News, Videos, ..) is set by +:py:obj:`startpage_categ` in settings.yml:: + + - name: startpage + engine: startpage + startpage_categ: web + ... + +.. hint:: + + The default category is ``web`` .. and other categories than ``web`` are not + yet implemented. """ +from typing import TYPE_CHECKING +from collections import OrderedDict import re -from time import time - -from urllib.parse import urlencode from unicodedata import normalize, combining +from time import time from datetime import datetime, timedelta -from dateutil import parser -from lxml import html -from babel import Locale -from babel.localedata import locale_identifiers +import dateutil.parser +import lxml.html +import babel -from searx.network import get -from searx.utils import extract_text, eval_xpath, match_language -from searx.exceptions import ( - SearxEngineResponseException, - SearxEngineCaptchaException, -) +from searx import network +from searx.utils import extract_text, eval_xpath, gen_useragent +from searx.exceptions import SearxEngineCaptchaException +from searx.locales import region_tag +from searx.enginelib.traits import EngineTraits +if TYPE_CHECKING: + import logging + + logger: logging.Logger + +traits: EngineTraits # about about = { @@ -34,18 +114,28 @@ about = { "results": 'HTML', } +startpage_categ = 'web' +"""Startpage's category, visit :ref:`startpage categories`. +""" + +send_accept_language_header = True +"""Startpage tries to guess user's language and territory from the HTTP +``Accept-Language``. Optional the user can select a search-language (can be +different to the UI language) and a region filter. +""" + # engine dependent config categories = ['general', 'web'] -# there is a mechanism to block "bot" search -# (probably the parameter qid), require -# storing of qid's between mulitble search-calls - paging = True -supported_languages_url = 'https://www.startpage.com/do/settings' +time_range_support = True +safesearch = True + +time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'} +safesearch_dict = {0: '0', 1: '1', 2: '1'} # search-url -base_url = 'https://startpage.com/' -search_url = base_url + 'sp/search?' +base_url = 'https://www.startpage.com' +search_url = base_url + '/sp/search' # specific xpath variables # ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"] @@ -53,92 +143,193 @@ search_url = base_url + 'sp/search?' results_xpath = '//div[@class="w-gl__result__main"]' link_xpath = './/a[@class="w-gl__result-title result-link"]' content_xpath = './/p[@class="w-gl__description"]' +search_form_xpath = '//form[@id="search"]' +"""XPath of Startpage's origin search form + +.. code: html + +
+ + + + + + +
+""" # timestamp of the last fetch of 'sc' code sc_code_ts = 0 sc_code = '' +sc_code_cache_sec = 30 +"""Time in seconds the sc-code is cached in memory :py:obj:`get_sc_code`.""" -def raise_captcha(resp): +def get_sc_code(searxng_locale, params): + """Get an actual ``sc`` argument from Startpage's search form (HTML page). - if str(resp.url).startswith('https://www.startpage.com/sp/captcha'): - raise SearxEngineCaptchaException() + Startpage puts a ``sc`` argument on every HTML :py:obj:`search form + `. Without this argument Startpage considers the request + is from a bot. We do not know what is encoded in the value of the ``sc`` + argument, but it seems to be a kind of a *time-stamp*. - -def get_sc_code(headers): - """Get an actual `sc` argument from startpage's home page. - - Startpage puts a `sc` argument on every link. Without this argument - startpage considers the request is from a bot. We do not know what is - encoded in the value of the `sc` argument, but it seems to be a kind of a - *time-stamp*. This *time-stamp* is valid for a few hours. - - This function scrap a new *time-stamp* from startpage's home page every hour - (3000 sec). + Startpage's search form generates a new sc-code on each request. This + function scrap a new sc-code from Startpage's home page every + :py:obj:`sc_code_cache_sec` seconds. """ global sc_code_ts, sc_code # pylint: disable=global-statement - if time() > (sc_code_ts + 3000): - logger.debug("query new sc time-stamp ...") + if sc_code and (time() < (sc_code_ts + sc_code_cache_sec)): + logger.debug("get_sc_code: reuse '%s'", sc_code) + return sc_code - resp = get(base_url, headers=headers) - raise_captcha(resp) - dom = html.fromstring(resp.text) + headers = {**params['headers']} + headers['Origin'] = base_url + headers['Referer'] = base_url + '/' + # headers['Connection'] = 'keep-alive' + # headers['Accept-Encoding'] = 'gzip, deflate, br' + # headers['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8' + # headers['User-Agent'] = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:105.0) Gecko/20100101 Firefox/105.0' - try: - # - sc_code = eval_xpath(dom, '//input[@name="sc"]/@value')[0] - except IndexError as exc: - # suspend startpage API --> https://github.com/searxng/searxng/pull/695 - raise SearxEngineResponseException( - suspended_time=7 * 24 * 3600, message="PR-695: query new sc time-stamp failed!" - ) from exc + # add Accept-Language header + if searxng_locale == 'all': + searxng_locale = 'en-US' + locale = babel.Locale.parse(searxng_locale, sep='-') - sc_code_ts = time() - logger.debug("new value is: %s", sc_code) + if send_accept_language_header: + ac_lang = locale.language + if locale.territory: + ac_lang = "%s-%s,%s;q=0.9,*;q=0.5" % ( + locale.language, + locale.territory, + locale.language, + ) + headers['Accept-Language'] = ac_lang + get_sc_url = base_url + '/?sc=%s' % (sc_code) + logger.debug("query new sc time-stamp ... %s", get_sc_url) + logger.debug("headers: %s", headers) + resp = network.get(get_sc_url, headers=headers) + + # ?? x = network.get('https://www.startpage.com/sp/cdn/images/filter-chevron.svg', headers=headers) + # ?? https://www.startpage.com/sp/cdn/images/filter-chevron.svg + # ?? ping-back URL: https://www.startpage.com/sp/pb?sc=TLsB0oITjZ8F21 + + if str(resp.url).startswith('https://www.startpage.com/sp/captcha'): + raise SearxEngineCaptchaException( + message="get_sc_code: got redirected to https://www.startpage.com/sp/captcha", + ) + + dom = lxml.html.fromstring(resp.text) + + try: + sc_code = eval_xpath(dom, search_form_xpath + '//input[@name="sc"]/@value')[0] + except IndexError as exc: + logger.debug("suspend startpage API --> https://github.com/searxng/searxng/pull/695") + raise SearxEngineCaptchaException( + message="get_sc_code: [PR-695] query new sc time-stamp failed! (%s)" % resp.url, + ) from exc + + sc_code_ts = time() + logger.debug("get_sc_code: new value is: %s", sc_code) return sc_code -# do search-request def request(query, params): + """Assemble a Startpage request. - # pylint: disable=line-too-long - # The format string from Startpage's FFox add-on [1]:: - # - # https://www.startpage.com/do/dsearch?query={searchTerms}&cat=web&pl=ext-ff&language=__MSG_extensionUrlLanguage__&extVersion=1.3.0 - # - # [1] https://addons.mozilla.org/en-US/firefox/addon/startpage-private-search/ + To avoid CAPTCHA we need to send a well formed HTTP POST request with a + cookie. We need to form a request that is identical to the request build by + Startpage's search form: + - in the cookie the **region** is selected + - in the HTTP POST data the **language** is selected + + Additionally the arguments form Startpage's search form needs to be set in + HTML POST data / compare ```` elements: :py:obj:`search_form_xpath`. + """ + if startpage_categ == 'web': + return _request_cat_web(query, params) + + logger.error("Startpages's category '%' is not yet implemented.", startpage_categ) + return params + + +def _request_cat_web(query, params): + + engine_region = traits.get_region(params['searxng_locale'], 'en-US') + engine_language = traits.get_language(params['searxng_locale'], 'en') + + # build arguments args = { 'query': query, - 'page': params['pageno'], 'cat': 'web', - # 'pl': 'ext-ff', - # 'extVersion': '1.3.0', - # 'abp': "-1", - 'sc': get_sc_code(params['headers']), + 't': 'device', + 'sc': get_sc_code(params['searxng_locale'], params), # hint: this func needs HTTP headers, + 'with_date': time_range_dict.get(params['time_range'], ''), } - # set language if specified - if params['language'] != 'all': - lang_code = match_language(params['language'], supported_languages, fallback=None) - if lang_code: - language_name = supported_languages[lang_code]['alias'] - args['language'] = language_name - args['lui'] = language_name + if engine_language: + args['language'] = engine_language + args['lui'] = engine_language + + args['abp'] = '1' + if params['pageno'] > 1: + args['page'] = params['pageno'] + + # build cookie + lang_homepage = 'en' + cookie = OrderedDict() + cookie['date_time'] = 'world' + cookie['disable_family_filter'] = safesearch_dict[params['safesearch']] + cookie['disable_open_in_new_window'] = '0' + cookie['enable_post_method'] = '1' # hint: POST + cookie['enable_proxy_safety_suggest'] = '1' + cookie['enable_stay_control'] = '1' + cookie['instant_answers'] = '1' + cookie['lang_homepage'] = 's/device/%s/' % lang_homepage + cookie['num_of_results'] = '10' + cookie['suggestions'] = '1' + cookie['wt_unit'] = 'celsius' + + if engine_language: + cookie['language'] = engine_language + cookie['language_ui'] = engine_language + + if engine_region: + cookie['search_results_region'] = engine_region + + params['cookies']['preferences'] = 'N1N'.join(["%sEEE%s" % x for x in cookie.items()]) + logger.debug('cookie preferences: %s', params['cookies']['preferences']) + + # POST request + logger.debug("data: %s", args) + params['data'] = args + params['method'] = 'POST' + params['url'] = search_url + params['headers']['Origin'] = base_url + params['headers']['Referer'] = base_url + '/' + # is the Accept header needed? + # params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' - params['url'] = search_url + urlencode(args) return params # get response from search-request def response(resp): - results = [] + dom = lxml.html.fromstring(resp.text) - dom = html.fromstring(resp.text) + if startpage_categ == 'web': + return _response_cat_web(dom) + + logger.error("Startpages's category '%' is not yet implemented.", startpage_categ) + return [] + + +def _response_cat_web(dom): + results = [] # parse results for result in eval_xpath(dom, results_xpath): @@ -173,7 +364,7 @@ def response(resp): content = content[date_pos:] try: - published_date = parser.parse(date_string, dayfirst=True) + published_date = dateutil.parser.parse(date_string, dayfirst=True) except ValueError: pass @@ -199,62 +390,103 @@ def response(resp): return results -# get supported languages from their site -def _fetch_supported_languages(resp): - # startpage's language selector is a mess each option has a displayed name - # and a value, either of which may represent the language name in the native - # script, the language name in English, an English transliteration of the - # native name, the English name of the writing script used by the language, - # or occasionally something else entirely. +def fetch_traits(engine_traits: EngineTraits): + """Fetch :ref:`languages ` and :ref:`regions ` from Startpage.""" + # pylint: disable=too-many-branches - # this cases are so special they need to be hardcoded, a couple of them are misspellings - language_names = { - 'english_uk': 'en-GB', - 'fantizhengwen': ['zh-TW', 'zh-HK'], - 'hangul': 'ko', - 'malayam': 'ml', - 'norsk': 'nb', - 'sinhalese': 'si', - 'sudanese': 'su', + headers = { + 'User-Agent': gen_useragent(), + 'Accept-Language': "en-US,en;q=0.5", # bing needs to set the English language } + resp = network.get('https://www.startpage.com/do/settings', headers=headers) - # get the English name of every language known by babel - language_names.update( - { - # fmt: off - name.lower(): lang_code - # pylint: disable=protected-access - for lang_code, name in Locale('en')._data['languages'].items() - # fmt: on - } - ) + if not resp.ok: + print("ERROR: response from Startpage is not OK.") + + dom = lxml.html.fromstring(resp.text) + + # regions + + sp_region_names = [] + for option in dom.xpath('//form[@name="settings"]//select[@name="search_results_region"]/option'): + sp_region_names.append(option.get('value')) + + for eng_tag in sp_region_names: + if eng_tag == 'all': + continue + babel_region_tag = {'no_NO': 'nb_NO'}.get(eng_tag, eng_tag) # norway + + if '-' in babel_region_tag: + l, r = babel_region_tag.split('-') + r = r.split('_')[-1] + sxng_tag = region_tag(babel.Locale.parse(l + '_' + r, sep='_')) + + else: + try: + sxng_tag = region_tag(babel.Locale.parse(babel_region_tag, sep='_')) + + except babel.UnknownLocaleError: + print("ERROR: can't determine babel locale of startpage's locale %s" % eng_tag) + continue + + conflict = engine_traits.regions.get(sxng_tag) + if conflict: + if conflict != eng_tag: + print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag)) + continue + engine_traits.regions[sxng_tag] = eng_tag + + # languages + + catalog_engine2code = {name.lower(): lang_code for lang_code, name in babel.Locale('en').languages.items()} # get the native name of every language known by babel - for lang_code in filter(lambda lang_code: lang_code.find('_') == -1, locale_identifiers()): - native_name = Locale(lang_code).get_language_name().lower() + + for lang_code in filter(lambda lang_code: lang_code.find('_') == -1, babel.localedata.locale_identifiers()): + native_name = babel.Locale(lang_code).get_language_name().lower() # add native name exactly as it is - language_names[native_name] = lang_code + catalog_engine2code[native_name] = lang_code # add "normalized" language name (i.e. français becomes francais and español becomes espanol) unaccented_name = ''.join(filter(lambda c: not combining(c), normalize('NFKD', native_name))) if len(unaccented_name) == len(unaccented_name.encode()): # add only if result is ascii (otherwise "normalization" didn't work) - language_names[unaccented_name] = lang_code + catalog_engine2code[unaccented_name] = lang_code + + # values that can't be determined by babel's languages names + + catalog_engine2code.update( + { + # traditional chinese used in .. + 'fantizhengwen': 'zh_Hant', + # Korean alphabet + 'hangul': 'ko', + # Malayalam is one of 22 scheduled languages of India. + 'malayam': 'ml', + 'norsk': 'nb', + 'sinhalese': 'si', + } + ) + + skip_eng_tags = { + 'english_uk', # SearXNG lang 'en' already maps to 'english' + } - dom = html.fromstring(resp.text) - sp_lang_names = [] for option in dom.xpath('//form[@name="settings"]//select[@name="language"]/option'): - sp_lang_names.append((option.get('value'), extract_text(option).lower())) - supported_languages = {} - for sp_option_value, sp_option_text in sp_lang_names: - lang_code = language_names.get(sp_option_value) or language_names.get(sp_option_text) - if isinstance(lang_code, str): - supported_languages[lang_code] = {'alias': sp_option_value} - elif isinstance(lang_code, list): - for _lc in lang_code: - supported_languages[_lc] = {'alias': sp_option_value} - else: - print('Unknown language option in Startpage: {} ({})'.format(sp_option_value, sp_option_text)) + eng_tag = option.get('value') + if eng_tag in skip_eng_tags: + continue + name = extract_text(option).lower() - return supported_languages + sxng_tag = catalog_engine2code.get(eng_tag) + if sxng_tag is None: + sxng_tag = catalog_engine2code[name] + + conflict = engine_traits.languages.get(sxng_tag) + if conflict: + if conflict != eng_tag: + print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag)) + continue + engine_traits.languages[sxng_tag] = eng_tag diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py index 8d3b0839a..6ea77f092 100644 --- a/searx/engines/wikidata.py +++ b/searx/engines/wikidata.py @@ -1,9 +1,12 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint -"""Wikidata +"""This module implements the Wikidata engine. Some implementations are shared +from :ref:`wikipedia engine`. + """ # pylint: disable=missing-class-docstring +from typing import TYPE_CHECKING from hashlib import md5 from urllib.parse import urlencode, unquote from json import loads @@ -13,12 +16,17 @@ from babel.dates import format_datetime, format_date, format_time, get_datetime_ from searx.data import WIKIDATA_UNITS from searx.network import post, get -from searx.utils import match_language, searx_useragent, get_string_replaces_function +from searx.utils import searx_useragent, get_string_replaces_function from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom -from searx.engines.wikipedia import ( # pylint: disable=unused-import - _fetch_supported_languages, - supported_languages_url, -) +from searx.engines.wikipedia import fetch_traits as _fetch_traits +from searx.enginelib.traits import EngineTraits + +if TYPE_CHECKING: + import logging + + logger: logging.Logger + +traits: EngineTraits # about about = { @@ -154,33 +162,35 @@ def send_wikidata_query(query, method='GET'): def request(query, params): - language = params['language'].split('-')[0] - if language == 'all': - language = 'en' - else: - language = match_language(params['language'], supported_languages, language_aliases).split('-')[0] + + # wikidata does not support zh-classical (zh_Hans) / zh-TW, zh-HK and zh-CN + # mapped to zh + sxng_lang = params['searxng_locale'].split('-')[0] + language = traits.get_language(sxng_lang, 'en') query, attributes = get_query(query, language) + logger.debug("request --> language %s // len(attributes): %s", language, len(attributes)) params['method'] = 'POST' params['url'] = SPARQL_ENDPOINT_URL params['data'] = {'query': query} params['headers'] = get_headers() - params['language'] = language params['attributes'] = attributes + return params def response(resp): + results = [] jsonresponse = loads(resp.content.decode()) - language = resp.search_params['language'].lower() + language = resp.search_params['language'] attributes = resp.search_params['attributes'] + logger.debug("request --> language %s // len(attributes): %s", language, len(attributes)) seen_entities = set() - for result in jsonresponse.get('results', {}).get('bindings', []): attribute_result = {key: value['value'] for key, value in result.items()} entity_url = attribute_result['item'] @@ -756,3 +766,15 @@ def init(engine_settings=None): # pylint: disable=unused-argument lang = result['name']['xml:lang'] entity_id = result['item']['value'].replace('http://www.wikidata.org/entity/', '') WIKIDATA_PROPERTIES[(entity_id, lang)] = name.capitalize() + + +def fetch_traits(engine_traits: EngineTraits): + """Use languages evaluated from :py:obj:`wikipedia.fetch_traits + ` except zh-classical (zh_Hans) what + is not supported by wikidata.""" + + _fetch_traits(engine_traits) + # wikidata does not support zh-classical (zh_Hans) + engine_traits.languages.pop('zh_Hans') + # wikidata does not have net-locations for the languages + engine_traits.custom['wiki_netloc'] = {} diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py index ca841e8b3..9d2d30afa 100644 --- a/searx/engines/wikipedia.py +++ b/searx/engines/wikipedia.py @@ -1,13 +1,26 @@ # SPDX-License-Identifier: AGPL-3.0-or-later -""" - Wikipedia (Web) +# lint: pylint +"""This module implements the Wikipedia engine. Some of this implementations +are shared by other engines: + +- :ref:`wikidata engine` + +The list of supported languages is fetched from the article linked by +:py:obj:`wikipedia_article_depth`. Unlike traditional search engines, wikipedia +does not support one Wikipedia for all the languages, but there is one Wikipedia +for every language (:py:obj:`fetch_traits`). """ -from urllib.parse import quote -from json import loads -from lxml.html import fromstring -from searx.utils import match_language, searx_useragent -from searx.network import raise_for_httperror +import urllib.parse +import babel + +from lxml import html + +from searx import network +from searx.locales import language_tag +from searx.enginelib.traits import EngineTraits + +traits: EngineTraits # about about = { @@ -19,32 +32,40 @@ about = { "results": 'JSON', } - send_accept_language_header = True -# search-url -search_url = 'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}' -supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias' -language_variants = {"zh": ("zh-cn", "zh-hk", "zh-mo", "zh-my", "zh-sg", "zh-tw")} +wikipedia_article_depth = 'https://meta.wikimedia.org/wiki/Wikipedia_article_depth' +"""The *editing depth* of Wikipedia is one of several possible rough indicators +of the encyclopedia's collaborative quality, showing how frequently its articles +are updated. The measurement of depth was introduced after some limitations of +the classic measurement of article count were realized. +""" + +# example: https://zh-classical.wikipedia.org/api/rest_v1/page/summary/日 +rest_v1_summary_url = 'https://{wiki_netloc}/api/rest_v1/page/summary/{title}' +"""`wikipedia rest_v1 summary API`_: The summary response includes an extract of +the first paragraph of the page in plain text and HTML as well as the type of +page. This is useful for page previews (fka. Hovercards, aka. Popups) on the web +and link previews in the apps. + +.. _wikipedia rest_v1 summary API: https://en.wikipedia.org/api/rest_v1/#/Page%20content/get_page_summary__title_ + +""" -# set language in base_url -def url_lang(lang): - lang_pre = lang.split('-')[0] - if lang_pre == 'all' or lang_pre not in supported_languages and lang_pre not in language_aliases: - return 'en' - return match_language(lang, supported_languages, language_aliases).split('-')[0] - - -# do search-request def request(query, params): + """Assemble a request (`wikipedia rest_v1 summary API`_).""" if query.islower(): query = query.title() - language = url_lang(params['language']) - params['url'] = search_url.format(title=quote(query), language=language) + engine_language = traits.get_language(params['searxng_locale'], 'en') + wiki_netloc = traits.custom['wiki_netloc'].get(engine_language, 'https://en.wikipedia.org/wiki/') + title = urllib.parse.quote(query) + + # '!wikipedia 日 :zh-TW' --> https://zh-classical.wikipedia.org/ + # '!wikipedia 日 :zh' --> https://zh.wikipedia.org/ + params['url'] = rest_v1_summary_url.format(wiki_netloc=wiki_netloc, title=title) - params['headers']['User-Agent'] = searx_useragent() params['raise_for_httperror'] = False params['soft_max_redirects'] = 2 @@ -53,13 +74,14 @@ def request(query, params): # get response from search-request def response(resp): + + results = [] if resp.status_code == 404: return [] - if resp.status_code == 400: try: - api_result = loads(resp.text) - except: + api_result = resp.json() + except Exception: # pylint: disable=broad-except pass else: if ( @@ -68,49 +90,135 @@ def response(resp): ): return [] - raise_for_httperror(resp) - - results = [] - api_result = loads(resp.text) - - # skip disambiguation pages - if api_result.get('type') != 'standard': - return [] + network.raise_for_httperror(resp) + api_result = resp.json() title = api_result['title'] wikipedia_link = api_result['content_urls']['desktop']['page'] + results.append({'url': wikipedia_link, 'title': title, 'content': api_result.get('description', '')}) - results.append({'url': wikipedia_link, 'title': title}) - - results.append( - { - 'infobox': title, - 'id': wikipedia_link, - 'content': api_result.get('extract', ''), - 'img_src': api_result.get('thumbnail', {}).get('source'), - 'urls': [{'title': 'Wikipedia', 'url': wikipedia_link}], - } - ) + if api_result.get('type') == 'standard': + results.append( + { + 'infobox': title, + 'id': wikipedia_link, + 'content': api_result.get('extract', ''), + 'img_src': api_result.get('thumbnail', {}).get('source'), + 'urls': [{'title': 'Wikipedia', 'url': wikipedia_link}], + } + ) return results -# get supported languages from their site -def _fetch_supported_languages(resp): - supported_languages = {} - dom = fromstring(resp.text) - tables = dom.xpath('//table[contains(@class,"sortable")]') - for table in tables: - # exclude header row - trs = table.xpath('.//tr')[1:] - for tr in trs: - td = tr.xpath('./td') - code = td[3].xpath('./a')[0].text - name = td[1].xpath('./a')[0].text - english_name = td[1].xpath('./a')[0].text - articles = int(td[4].xpath('./a')[0].text.replace(',', '')) - # exclude languages with too few articles - if articles >= 100: - supported_languages[code] = {"name": name, "english_name": english_name} +# Nonstandard language codes +# +# These Wikipedias use language codes that do not conform to the ISO 639 +# standard (which is how wiki subdomains are chosen nowadays). - return supported_languages +lang_map = { + 'be-tarask': 'bel', + 'ak': 'aka', + 'als': 'gsw', + 'bat-smg': 'sgs', + 'cbk-zam': 'cbk', + 'fiu-vro': 'vro', + 'map-bms': 'map', + 'nrm': 'nrf', + 'roa-rup': 'rup', + 'nds-nl': 'nds', + #'simple: – invented code used for the Simple English Wikipedia (not the official IETF code en-simple) + 'zh-min-nan': 'nan', + 'zh-yue': 'yue', + 'an': 'arg', + 'zh-classical': 'zh-Hant', # babel maps classical to zh-Hans (for whatever reason) +} + +unknown_langs = [ + 'an', # Aragonese + 'ba', # Bashkir + 'bar', # Bavarian + 'bcl', # Central Bicolano + 'be-tarask', # Belarusian variant / Belarusian is already covered by 'be' + 'bpy', # Bishnupriya Manipuri is unknown by babel + 'hif', # Fiji Hindi + 'ilo', # Ilokano + 'li', # Limburgish + 'sco', # Scots (sco) is not known by babel, Scottish Gaelic (gd) is known by babel + 'sh', # Serbo-Croatian + 'simple', # simple english is not know as a natural language different to english (babel) + 'vo', # Volapük + 'wa', # Walloon +] + + +def fetch_traits(engine_traits: EngineTraits): + """Fetch languages from Wikipedia. + + The location of the Wikipedia address of a language is mapped in a + :py:obj:`custom field ` + (``wiki_netloc``). Here is a reduced example: + + .. code:: python + + traits.custom['wiki_netloc'] = { + "en": "en.wikipedia.org", + .. + "gsw": "als.wikipedia.org", + .. + "zh": "zh.wikipedia.org", + "zh-classical": "zh-classical.wikipedia.org" + } + + """ + + engine_traits.custom['wiki_netloc'] = {} + + # insert alias to map from a region like zh-CN to a language zh_Hans + engine_traits.languages['zh_Hans'] = 'zh' + + resp = network.get(wikipedia_article_depth) + if not resp.ok: + print("ERROR: response from Wikipedia is not OK.") + + dom = html.fromstring(resp.text) + for row in dom.xpath('//table[contains(@class,"sortable")]//tbody/tr'): + + cols = row.xpath('./td') + if not cols: + continue + cols = [c.text_content().strip() for c in cols] + + depth = float(cols[3].replace('-', '0').replace(',', '')) + articles = int(cols[4].replace(',', '').replace(',', '')) + + if articles < 10000: + # exclude languages with too few articles + continue + + if int(depth) < 20: + # Rough indicator of a Wikipedia’s quality, showing how frequently + # its articles are updated. + continue + + eng_tag = cols[2] + wiki_url = row.xpath('./td[3]/a/@href')[0] + wiki_url = urllib.parse.urlparse(wiki_url) + + if eng_tag in unknown_langs: + continue + + try: + sxng_tag = language_tag(babel.Locale.parse(lang_map.get(eng_tag, eng_tag), sep='-')) + except babel.UnknownLocaleError: + print("ERROR: %s [%s] is unknown by babel" % (cols[0], eng_tag)) + continue + + conflict = engine_traits.languages.get(sxng_tag) + if conflict: + if conflict != eng_tag: + print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag)) + continue + + engine_traits.languages[sxng_tag] = eng_tag + engine_traits.custom['wiki_netloc'][eng_tag] = wiki_url.netloc diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py index c13ce6d78..0fdeacec2 100644 --- a/searx/engines/yahoo.py +++ b/searx/engines/yahoo.py @@ -17,8 +17,10 @@ from searx.utils import ( eval_xpath_getindex, eval_xpath_list, extract_text, - match_language, ) +from searx.enginelib.traits import EngineTraits + +traits: EngineTraits # about about = { @@ -34,8 +36,7 @@ about = { categories = ['general', 'web'] paging = True time_range_support = True -supported_languages_url = 'https://search.yahoo.com/preferences/languages' -"""Supported languages are read from Yahoo preference page.""" +# send_accept_language_header = True time_range_dict = { 'day': ('1d', 'd'), @@ -43,15 +44,10 @@ time_range_dict = { 'month': ('1m', 'm'), } -language_aliases = { - 'zh-HK': 'zh_chs', - 'zh-CN': 'zh_chs', # dead since 2015 / routed to hk.search.yahoo.com - 'zh-TW': 'zh_cht', -} - lang2domain = { 'zh_chs': 'hk.search.yahoo.com', 'zh_cht': 'tw.search.yahoo.com', + 'any': 'search.yahoo.com', 'en': 'search.yahoo.com', 'bg': 'search.yahoo.com', 'cs': 'search.yahoo.com', @@ -67,21 +63,23 @@ lang2domain = { } """Map language to domain""" - -def _get_language(params): - - lang = language_aliases.get(params['language']) - if lang is None: - lang = match_language(params['language'], supported_languages, language_aliases) - lang = lang.split('-')[0] - logger.debug("params['language']: %s --> %s", params['language'], lang) - return lang +locale_aliases = { + 'zh': 'zh_Hans', + 'zh-HK': 'zh_Hans', + 'zh-CN': 'zh_Hans', # dead since 2015 / routed to hk.search.yahoo.com + 'zh-TW': 'zh_Hant', +} def request(query, params): """build request""" + + lang = locale_aliases.get(params['language'], None) + if not lang: + lang = params['language'].split('-')[0] + lang = traits.get_language(lang, traits.all_locale) + offset = (params['pageno'] - 1) * 7 + 1 - lang = _get_language(params) age, btf = time_range_dict.get(params['time_range'], ('', '')) args = urlencode( @@ -154,13 +152,37 @@ def response(resp): return results -# get supported languages from their site -def _fetch_supported_languages(resp): - supported_languages = [] +def fetch_traits(engine_traits: EngineTraits): + """Fetch languages from yahoo""" + + # pylint: disable=import-outside-toplevel + import babel + from searx import network + from searx.locales import language_tag + + engine_traits.all_locale = 'any' + + resp = network.get('https://search.yahoo.com/preferences/languages') + if not resp.ok: + print("ERROR: response from peertube is not OK.") + dom = html.fromstring(resp.text) offset = len('lang_') - for val in eval_xpath_list(dom, '//div[contains(@class, "lang-item")]/input/@value'): - supported_languages.append(val[offset:]) + eng2sxng = {'zh_chs': 'zh_Hans', 'zh_cht': 'zh_Hant'} - return supported_languages + for val in eval_xpath_list(dom, '//div[contains(@class, "lang-item")]/input/@value'): + eng_tag = val[offset:] + + try: + sxng_tag = language_tag(babel.Locale.parse(eng2sxng.get(eng_tag, eng_tag))) + except babel.UnknownLocaleError: + print('ERROR: unknown language --> %s' % eng_tag) + continue + + conflict = engine_traits.languages.get(sxng_tag) + if conflict: + if conflict != eng_tag: + print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag)) + continue + engine_traits.languages[sxng_tag] = eng_tag diff --git a/searx/locales.py b/searx/locales.py index 9e06bf39d..ffa5e731c 100644 --- a/searx/locales.py +++ b/searx/locales.py @@ -4,11 +4,11 @@ """Initialize :py:obj:`LOCALE_NAMES`, :py:obj:`RTL_LOCALES`. """ -from typing import Set +from typing import Set, Optional, List import os import pathlib -from babel import Locale +import babel from babel.support import Translations import babel.languages import babel.core @@ -134,7 +134,7 @@ def locales_initialize(directory=None): flask_babel.get_translations = get_translations for tag, descr in ADDITIONAL_TRANSLATIONS.items(): - locale = Locale.parse(LOCALE_BEST_MATCH[tag], sep='-') + locale = babel.Locale.parse(LOCALE_BEST_MATCH[tag], sep='-') LOCALE_NAMES[tag] = descr if locale.text_direction == 'rtl': RTL_LOCALES.add(tag) @@ -142,7 +142,7 @@ def locales_initialize(directory=None): for tag in LOCALE_BEST_MATCH: descr = LOCALE_NAMES.get(tag) if not descr: - locale = Locale.parse(tag, sep='-') + locale = babel.Locale.parse(tag, sep='-') LOCALE_NAMES[tag] = get_locale_descr(locale, tag.replace('-', '_')) if locale.text_direction == 'rtl': RTL_LOCALES.add(tag) @@ -154,12 +154,77 @@ def locales_initialize(directory=None): tag = dirname.replace('_', '-') descr = LOCALE_NAMES.get(tag) if not descr: - locale = Locale.parse(dirname) + locale = babel.Locale.parse(dirname) LOCALE_NAMES[tag] = get_locale_descr(locale, dirname) if locale.text_direction == 'rtl': RTL_LOCALES.add(tag) +def region_tag(locale: babel.Locale) -> str: + """Returns SearXNG's region tag from the locale (e.g. zh-TW , en-US).""" + if not locale.territory: + raise ValueError('%s missed a territory') + return locale.language + '-' + locale.territory + + +def language_tag(locale: babel.Locale) -> str: + """Returns SearXNG's language tag from the locale and if exits, the tag + includes the script name (e.g. en, zh_Hant). + """ + sxng_lang = locale.language + if locale.script: + sxng_lang += '_' + locale.script + return sxng_lang + + +def get_locale(locale_tag: str) -> Optional[babel.Locale]: + """Returns a :py:obj:`babel.Locale` object parsed from argument + ``locale_tag``""" + try: + locale = babel.Locale.parse(locale_tag, sep='-') + return locale + + except babel.core.UnknownLocaleError: + return None + + +def get_offical_locales( + territory: str, languages=None, regional: bool = False, de_facto: bool = True +) -> Set[babel.Locale]: + """Returns a list of :py:obj:`babel.Locale` with languages from + :py:obj:`babel.languages.get_official_languages`. + + :param territory: The territory (country or region) code. + + :param languages: A list of language codes the languages from + :py:obj:`babel.languages.get_official_languages` should be in + (intersection). If this argument is ``None``, all official languages in + this territory are used. + + :param regional: If the regional flag is set, then languages which are + regionally official are also returned. + + :param de_facto: If the de_facto flag is set to `False`, then languages + which are “de facto” official are not returned. + + """ + ret_val = set() + o_languages = babel.languages.get_official_languages(territory, regional=regional, de_facto=de_facto) + + if languages: + languages = [l.lower() for l in languages] + o_languages = set(l for l in o_languages if l.lower() in languages) + + for lang in o_languages: + try: + locale = babel.Locale.parse(lang + '_' + territory) + ret_val.add(locale) + except babel.UnknownLocaleError: + continue + + return ret_val + + def get_engine_locale(searxng_locale, engine_locales, default=None): """Return engine's language (aka locale) string that best fits to argument ``searxng_locale``. @@ -177,6 +242,10 @@ def get_engine_locale(searxng_locale, engine_locales, default=None): ... 'pl-PL' : 'pl_PL', 'pt-PT' : 'pt_PT' + .. + 'zh' : 'zh' + 'zh_Hans' : 'zh' + 'zh_Hant' : 'zh-classical' } .. hint:: @@ -210,13 +279,13 @@ def get_engine_locale(searxng_locale, engine_locales, default=None): engine. """ - # pylint: disable=too-many-branches + # pylint: disable=too-many-branches, too-many-return-statements engine_locale = engine_locales.get(searxng_locale) if engine_locale is not None: - # There was a 1:1 mapping (e.g. "fr-BE --> fr_BE" or "fr --> fr_FR"), no - # need to narrow language nor territory. + # There was a 1:1 mapping (e.g. a region "fr-BE --> fr_BE" or a language + # "zh --> zh"), no need to narrow language-script nor territory. return engine_locale try: @@ -227,6 +296,12 @@ def get_engine_locale(searxng_locale, engine_locales, default=None): except babel.core.UnknownLocaleError: return default + searxng_lang = language_tag(locale) + engine_locale = engine_locales.get(searxng_lang) + if engine_locale is not None: + # There was a 1:1 mapping (e.g. "zh-HK --> zh_Hant" or "zh-CN --> zh_Hans") + return engine_locale + # SearXNG's selected locale is not supported by the engine .. if locale.territory: @@ -247,10 +322,6 @@ def get_engine_locale(searxng_locale, engine_locales, default=None): if locale.language: - searxng_lang = locale.language - if locale.script: - searxng_lang += '_' + locale.script - terr_lang_dict = {} for territory, langs in babel.core.get_global("territory_languages").items(): if not langs.get(searxng_lang, {}).get('official_status'): @@ -303,3 +374,98 @@ def get_engine_locale(searxng_locale, engine_locales, default=None): engine_locale = default return default + + +def match_locale(searxng_locale: str, locale_tag_list: List[str], fallback: Optional[str] = None) -> Optional[str]: + """Return tag from ``locale_tag_list`` that best fits to ``searxng_locale``. + + :param str searxng_locale: SearXNG's internal representation of locale (de, + de-DE, fr-BE, zh, zh-CN, zh-TW ..). + + :param list locale_tag_list: The list of locale tags to select from + + :param str fallback: fallback locale tag (if unset --> ``None``) + + The rules to find a match are implemented in :py:obj:`get_engine_locale`, + the ``engine_locales`` is build up by :py:obj:`build_engine_locales`. + + .. hint:: + + The *SearXNG locale* string and the members of ``locale_tag_list`` has to + be known by babel! The :py:obj:`ADDITIONAL_TRANSLATIONS` are used in the + UI and are not known by babel --> will be ignored. + """ + + # searxng_locale = 'es' + # locale_tag_list = ['es-AR', 'es-ES', 'es-MX'] + + if not searxng_locale: + return fallback + + locale = get_locale(searxng_locale) + if locale is None: + return fallback + + # normalize to a SearXNG locale that can be passed to get_engine_locale + + searxng_locale = language_tag(locale) + if locale.territory: + searxng_locale = region_tag(locale) + + # clean up locale_tag_list + + tag_list = [] + for tag in locale_tag_list: + if tag in ('all', 'auto') or tag in ADDITIONAL_TRANSLATIONS: + continue + tag_list.append(tag) + + # emulate fetch_traits + engine_locales = build_engine_locales(tag_list) + return get_engine_locale(searxng_locale, engine_locales, default=fallback) + + +def build_engine_locales(tag_list: List[str]): + """From a list of locale tags a dictionary is build that can be passed by + argument ``engine_locales`` to :py:obj:`get_engine_locale`. This function + is mainly used by :py:obj:`match_locale` and is similar to what the + ``fetch_traits(..)`` function of engines do. + + If there are territory codes in the ``tag_list`` that have a *script code* + additional keys are added to the returned dictionary. + + .. code:: python + + >>> import locales + >>> engine_locales = locales.build_engine_locales(['en', 'en-US', 'zh', 'zh-CN', 'zh-TW']) + >>> engine_locales + { + 'en': 'en', 'en-US': 'en-US', + 'zh': 'zh', 'zh-CN': 'zh-CN', 'zh_Hans': 'zh-CN', + 'zh-TW': 'zh-TW', 'zh_Hant': 'zh-TW' + } + >>> get_engine_locale('zh-Hans', engine_locales) + 'zh-CN' + + This function is a good example to understand the language/region model + of SearXNG: + + SearXNG only distinguishes between **search languages** and **search + regions**, by adding the *script-tags*, languages with *script-tags* can + be assigned to the **regions** that SearXNG supports. + + """ + engine_locales = {} + + for tag in tag_list: + locale = get_locale(tag) + if locale is None: + logger.warn("build_engine_locales: skip locale tag %s / unknown by babel", tag) + continue + if locale.territory: + engine_locales[region_tag(locale)] = tag + if locale.script: + engine_locales[language_tag(locale)] = tag + else: + engine_locales[language_tag(locale)] = tag + return engine_locales diff --git a/searx/preferences.py b/searx/preferences.py index 0eac8441c..5cee83a03 100644 --- a/searx/preferences.py +++ b/searx/preferences.py @@ -13,7 +13,7 @@ from typing import Iterable, Dict, List import flask from searx import settings, autocomplete -from searx.engines import Engine +from searx.enginelib import Engine from searx.plugins import Plugin from searx.locales import LOCALE_NAMES from searx.webutils import VALID_LANGUAGE_CODE diff --git a/searx/query.py b/searx/query.py index dbc52ec75..751308baa 100644 --- a/searx/query.py +++ b/searx/query.py @@ -4,7 +4,7 @@ from abc import abstractmethod, ABC import re from searx import settings -from searx.languages import language_codes +from searx.sxng_locales import sxng_locales from searx.engines import categories, engines, engine_shortcuts from searx.external_bang import get_bang_definition_and_autocomplete from searx.search import EngineRef @@ -84,7 +84,7 @@ class LanguageParser(QueryPartParser): found = False # check if any language-code is equal with # declared language-codes - for lc in language_codes: + for lc in sxng_locales: lang_id, lang_name, country, english_name, _flag = map(str.lower, lc) # if correct language-code is found @@ -125,7 +125,7 @@ class LanguageParser(QueryPartParser): self.raw_text_query.autocomplete_list.append(lang) return - for lc in language_codes: + for lc in sxng_locales: if lc[0] not in settings['search']['languages']: continue lang_id, lang_name, country, english_name, _flag = map(str.lower, lc) diff --git a/searx/search/processors/__init__.py b/searx/search/processors/__init__.py index a270b4ef5..1390de456 100644 --- a/searx/search/processors/__init__.py +++ b/searx/search/processors/__init__.py @@ -30,7 +30,10 @@ from .abstract import EngineProcessor logger = logger.getChild('search.processors') PROCESSORS: Dict[str, EngineProcessor] = {} -"""Cache request processores, stored by *engine-name* (:py:func:`initialize`)""" +"""Cache request processores, stored by *engine-name* (:py:func:`initialize`) + +:meta hide-value: +""" def get_processor_class(engine_type): diff --git a/searx/search/processors/abstract.py b/searx/search/processors/abstract.py index d74616db0..5f1882ca4 100644 --- a/searx/search/processors/abstract.py +++ b/searx/search/processors/abstract.py @@ -138,7 +138,8 @@ class EngineProcessor(ABC): return False def get_params(self, search_query, engine_category): - """Returns a set of *request params* or ``None`` if request is not supported. + """Returns a set of (see :ref:`request params `) or + ``None`` if request is not supported. Not supported conditions (``None`` is returned): @@ -159,11 +160,20 @@ class EngineProcessor(ABC): params['safesearch'] = search_query.safesearch params['time_range'] = search_query.time_range params['engine_data'] = search_query.engine_data.get(self.engine_name, {}) + params['searxng_locale'] = search_query.lang + + # deprecated / vintage --> use params['searxng_locale'] + # + # Conditions related to engine's traits are implemented in engine.traits + # module. Don't do 'locale' decissions here in the abstract layer of the + # search processor, just pass the value from user's choice unchanged to + # the engine request. if hasattr(self.engine, 'language') and self.engine.language: params['language'] = self.engine.language else: params['language'] = search_query.lang + return params @abstractmethod diff --git a/searx/search/processors/online.py b/searx/search/processors/online.py index 242718416..697533d8c 100644 --- a/searx/search/processors/online.py +++ b/searx/search/processors/online.py @@ -51,6 +51,9 @@ class OnlineProcessor(EngineProcessor): super().initialize() def get_params(self, search_query, engine_category): + """Returns a set of :ref:`request params ` or ``None`` + if request is not supported. + """ params = super().get_params(search_query, engine_category) if params is None: return None @@ -184,11 +187,6 @@ class OnlineProcessor(EngineProcessor): self.handle_exception(result_container, e, suspend=True) self.logger.exception('CAPTCHA') except SearxEngineTooManyRequestsException as e: - if "google" in self.engine_name: - self.logger.warn( - "Set to 'true' the use_mobile_ui parameter in the 'engines:'" - " section of your settings.yml file if google is blocked for you." - ) self.handle_exception(result_container, e, suspend=True) self.logger.exception('Too many requests') except SearxEngineAccessDeniedException as e: @@ -223,7 +221,7 @@ class OnlineProcessor(EngineProcessor): 'test': ['unique_results'], } - if getattr(self.engine, 'supported_languages', []): + if getattr(self.engine, 'traits', False): tests['lang_fr'] = { 'matrix': {'query': 'paris', 'lang': 'fr'}, 'result_container': ['not_empty', ('has_language', 'fr')], diff --git a/searx/search/processors/online_currency.py b/searx/search/processors/online_currency.py index 92398239f..7cb4205c9 100644 --- a/searx/search/processors/online_currency.py +++ b/searx/search/processors/online_currency.py @@ -38,8 +38,8 @@ class OnlineCurrencyProcessor(OnlineProcessor): engine_type = 'online_currency' def get_params(self, search_query, engine_category): - """Returns a set of *request params* or ``None`` if search query does not match - to :py:obj:`parser_re`.""" + """Returns a set of :ref:`request params ` + or ``None`` if search query does not match to :py:obj:`parser_re`.""" params = super().get_params(search_query, engine_category) if params is None: diff --git a/searx/search/processors/online_dictionary.py b/searx/search/processors/online_dictionary.py index fbfc9df8e..6145a47d1 100644 --- a/searx/search/processors/online_dictionary.py +++ b/searx/search/processors/online_dictionary.py @@ -18,8 +18,9 @@ class OnlineDictionaryProcessor(OnlineProcessor): engine_type = 'online_dictionary' def get_params(self, search_query, engine_category): - """Returns a set of *request params* or ``None`` if search query does not match - to :py:obj:`parser_re`.""" + """Returns a set of :ref:`request params ` or + ``None`` if search query does not match to :py:obj:`parser_re`. + """ params = super().get_params(search_query, engine_category) if params is None: return None diff --git a/searx/search/processors/online_url_search.py b/searx/search/processors/online_url_search.py index 6383fa37f..a1dd6a018 100644 --- a/searx/search/processors/online_url_search.py +++ b/searx/search/processors/online_url_search.py @@ -20,9 +20,10 @@ class OnlineUrlSearchProcessor(OnlineProcessor): engine_type = 'online_url_search' def get_params(self, search_query, engine_category): - """Returns a set of *request params* or ``None`` if search query does not match - to at least one of :py:obj:`re_search_urls`. + """Returns a set of :ref:`request params ` or ``None`` if + search query does not match to :py:obj:`re_search_urls`. """ + params = super().get_params(search_query, engine_category) if params is None: return None diff --git a/searx/settings.yml b/searx/settings.yml index 841457b5e..fabd87bad 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -731,22 +731,9 @@ engines: - name: google engine: google shortcut: go - # see https://docs.searxng.org/src/searx.engines.google.html#module-searx.engines.google - use_mobile_ui: false # additional_tests: # android: *test_android - # - name: google italian - # engine: google - # shortcut: goit - # use_mobile_ui: false - # language: it - - # - name: google mobile ui - # engine: google - # shortcut: gomui - # use_mobile_ui: true - - name: google images engine: google_images shortcut: goi @@ -1758,9 +1745,8 @@ engines: engine: peertube shortcut: ptb paging: true - # https://instances.joinpeertube.org/instances - base_url: https://peertube.biz/ - # base_url: https://tube.tardis.world/ + # alternatives see: https://instances.joinpeertube.org/instances + # base_url: https://tube.4aem.com categories: videos disabled: true timeout: 6.0 diff --git a/searx/settings_defaults.py b/searx/settings_defaults.py index 6e98076ff..7f657aa54 100644 --- a/searx/settings_defaults.py +++ b/searx/settings_defaults.py @@ -12,13 +12,13 @@ import logging from base64 import b64decode from os.path import dirname, abspath -from searx.languages import language_codes as languages +from .sxng_locales import sxng_locales searx_dir = abspath(dirname(__file__)) logger = logging.getLogger('searx') OUTPUT_FORMATS = ['html', 'csv', 'json', 'rss'] -LANGUAGE_CODES = ['all', 'auto'] + list(l[0] for l in languages) +SXNG_LOCALE_TAGS = ['all', 'auto'] + list(l[0] for l in sxng_locales) SIMPLE_STYLE = ('auto', 'light', 'dark') CATEGORIES_AS_TABS = { 'general': {}, @@ -156,8 +156,8 @@ SCHEMA = { 'safe_search': SettingsValue((0, 1, 2), 0), 'autocomplete': SettingsValue(str, ''), 'autocomplete_min': SettingsValue(int, 4), - 'default_lang': SettingsValue(tuple(LANGUAGE_CODES + ['']), ''), - 'languages': SettingSublistValue(LANGUAGE_CODES, LANGUAGE_CODES), + 'default_lang': SettingsValue(tuple(SXNG_LOCALE_TAGS + ['']), ''), + 'languages': SettingSublistValue(SXNG_LOCALE_TAGS, SXNG_LOCALE_TAGS), 'ban_time_on_fail': SettingsValue(numbers.Real, 5), 'max_ban_time_on_fail': SettingsValue(numbers.Real, 120), 'suspended_times': { diff --git a/searx/languages.py b/searx/sxng_locales.py similarity index 51% rename from searx/languages.py rename to searx/sxng_locales.py index 377e7495b..0600a9c91 100644 --- a/searx/languages.py +++ b/searx/sxng_locales.py @@ -1,73 +1,120 @@ # -*- coding: utf-8 -*- -# list of language codes -# this file is generated automatically by utils/fetch_languages.py -language_codes = ( - ('af-ZA', 'Afrikaans', 'Suid-Afrika', 'Afrikaans', '\U0001f1ff\U0001f1e6'), - ('ar-EG', 'العربية', 'مصر', 'Arabic', '\U0001f1ea\U0001f1ec'), - ('be-BY', 'Беларуская', 'Беларусь', 'Belarusian', '\U0001f1e7\U0001f1fe'), +'''List of SearXNG's locale codes. + +This file is generated automatically by:: + + ./manage pyenv.cmd searxng_extra/update/update_engine_traits.py +''' + +sxng_locales = ( + ('ar', 'العربية', '', 'Arabic', '\U0001f310'), + ('bg', 'Български', '', 'Bulgarian', '\U0001f310'), ('bg-BG', 'Български', 'България', 'Bulgarian', '\U0001f1e7\U0001f1ec'), + ('ca', 'Català', '', 'Catalan', '\U0001f310'), ('ca-ES', 'Català', 'Espanya', 'Catalan', '\U0001f1ea\U0001f1f8'), + ('cs', 'Čeština', '', 'Czech', '\U0001f310'), ('cs-CZ', 'Čeština', 'Česko', 'Czech', '\U0001f1e8\U0001f1ff'), + ('da', 'Dansk', '', 'Danish', '\U0001f310'), ('da-DK', 'Dansk', 'Danmark', 'Danish', '\U0001f1e9\U0001f1f0'), ('de', 'Deutsch', '', 'German', '\U0001f310'), ('de-AT', 'Deutsch', 'Österreich', 'German', '\U0001f1e6\U0001f1f9'), ('de-CH', 'Deutsch', 'Schweiz', 'German', '\U0001f1e8\U0001f1ed'), ('de-DE', 'Deutsch', 'Deutschland', 'German', '\U0001f1e9\U0001f1ea'), + ('el', 'Ελληνικά', '', 'Greek', '\U0001f310'), ('el-GR', 'Ελληνικά', 'Ελλάδα', 'Greek', '\U0001f1ec\U0001f1f7'), ('en', 'English', '', 'English', '\U0001f310'), ('en-AU', 'English', 'Australia', 'English', '\U0001f1e6\U0001f1fa'), ('en-CA', 'English', 'Canada', 'English', '\U0001f1e8\U0001f1e6'), ('en-GB', 'English', 'United Kingdom', 'English', '\U0001f1ec\U0001f1e7'), ('en-IE', 'English', 'Ireland', 'English', '\U0001f1ee\U0001f1ea'), + ('en-IN', 'English', 'India', 'English', '\U0001f1ee\U0001f1f3'), ('en-MY', 'English', 'Malaysia', 'English', '\U0001f1f2\U0001f1fe'), ('en-NZ', 'English', 'New Zealand', 'English', '\U0001f1f3\U0001f1ff'), + ('en-PH', 'English', 'Philippines', 'English', '\U0001f1f5\U0001f1ed'), ('en-US', 'English', 'United States', 'English', '\U0001f1fa\U0001f1f8'), + ('en-ZA', 'English', 'South Africa', 'English', '\U0001f1ff\U0001f1e6'), ('es', 'Español', '', 'Spanish', '\U0001f310'), ('es-AR', 'Español', 'Argentina', 'Spanish', '\U0001f1e6\U0001f1f7'), ('es-CL', 'Español', 'Chile', 'Spanish', '\U0001f1e8\U0001f1f1'), ('es-ES', 'Español', 'España', 'Spanish', '\U0001f1ea\U0001f1f8'), ('es-MX', 'Español', 'México', 'Spanish', '\U0001f1f2\U0001f1fd'), + ('es-US', 'Español', 'Estados Unidos', 'Spanish', '\U0001f1fa\U0001f1f8'), + ('et', 'Eesti', '', 'Estonian', '\U0001f310'), ('et-EE', 'Eesti', 'Eesti', 'Estonian', '\U0001f1ea\U0001f1ea'), - ('fa-IR', 'فارسی', 'ایران', 'Persian', '\U0001f1ee\U0001f1f7'), + ('fi', 'Suomi', '', 'Finnish', '\U0001f310'), ('fi-FI', 'Suomi', 'Suomi', 'Finnish', '\U0001f1eb\U0001f1ee'), - ('fil-PH', 'Filipino', 'Pilipinas', 'Filipino', '\U0001f1f5\U0001f1ed'), ('fr', 'Français', '', 'French', '\U0001f310'), ('fr-BE', 'Français', 'Belgique', 'French', '\U0001f1e7\U0001f1ea'), ('fr-CA', 'Français', 'Canada', 'French', '\U0001f1e8\U0001f1e6'), ('fr-CH', 'Français', 'Suisse', 'French', '\U0001f1e8\U0001f1ed'), ('fr-FR', 'Français', 'France', 'French', '\U0001f1eb\U0001f1f7'), - ('he-IL', 'עברית', 'ישראל', 'Hebrew', '\U0001f1ee\U0001f1f1'), - ('hi-IN', 'हिन्दी', 'भारत', 'Hindi', '\U0001f1ee\U0001f1f3'), - ('hr-HR', 'Hrvatski', 'Hrvatska', 'Croatian', '\U0001f1ed\U0001f1f7'), + ('he', 'עברית', '', 'Hebrew', '\U0001f1ee\U0001f1f7'), + ('hi', 'हिन्दी', '', 'Hindi', '\U0001f310'), + ('hr', 'Hrvatski', '', 'Croatian', '\U0001f310'), + ('hu', 'Magyar', '', 'Hungarian', '\U0001f310'), ('hu-HU', 'Magyar', 'Magyarország', 'Hungarian', '\U0001f1ed\U0001f1fa'), + ('id', 'Indonesia', '', 'Indonesian', '\U0001f310'), ('id-ID', 'Indonesia', 'Indonesia', 'Indonesian', '\U0001f1ee\U0001f1e9'), - ('is-IS', 'Íslenska', 'Ísland', 'Icelandic', '\U0001f1ee\U0001f1f8'), + ('is', 'Íslenska', '', 'Icelandic', '\U0001f310'), + ('it', 'Italiano', '', 'Italian', '\U0001f310'), + ('it-CH', 'Italiano', 'Svizzera', 'Italian', '\U0001f1e8\U0001f1ed'), ('it-IT', 'Italiano', 'Italia', 'Italian', '\U0001f1ee\U0001f1f9'), + ('ja', '日本語', '', 'Japanese', '\U0001f310'), ('ja-JP', '日本語', '日本', 'Japanese', '\U0001f1ef\U0001f1f5'), + ('ko', '한국어', '', 'Korean', '\U0001f310'), ('ko-KR', '한국어', '대한민국', 'Korean', '\U0001f1f0\U0001f1f7'), - ('lt-LT', 'Lietuvių', 'Lietuva', 'Lithuanian', '\U0001f1f1\U0001f1f9'), - ('lv-LV', 'Latviešu', 'Latvija', 'Latvian', '\U0001f1f1\U0001f1fb'), + ('lt', 'Lietuvių', '', 'Lithuanian', '\U0001f310'), + ('lv', 'Latviešu', '', 'Latvian', '\U0001f310'), + ('nb', 'Norsk Bokmål', '', 'Norwegian Bokmål', '\U0001f310'), + ('nb-NO', 'Norsk Bokmål', 'Norge', 'Norwegian Bokmål', '\U0001f1f3\U0001f1f4'), ('nl', 'Nederlands', '', 'Dutch', '\U0001f310'), ('nl-BE', 'Nederlands', 'België', 'Dutch', '\U0001f1e7\U0001f1ea'), ('nl-NL', 'Nederlands', 'Nederland', 'Dutch', '\U0001f1f3\U0001f1f1'), - ('no-NO', 'Norsk', '', 'Norwegian (Bokmål)', '\U0001f1f3\U0001f1f4'), + ('pl', 'Polski', '', 'Polish', '\U0001f310'), ('pl-PL', 'Polski', 'Polska', 'Polish', '\U0001f1f5\U0001f1f1'), ('pt', 'Português', '', 'Portuguese', '\U0001f310'), ('pt-BR', 'Português', 'Brasil', 'Portuguese', '\U0001f1e7\U0001f1f7'), ('pt-PT', 'Português', 'Portugal', 'Portuguese', '\U0001f1f5\U0001f1f9'), + ('ro', 'Română', '', 'Romanian', '\U0001f310'), ('ro-RO', 'Română', 'România', 'Romanian', '\U0001f1f7\U0001f1f4'), + ('ru', 'Русский', '', 'Russian', '\U0001f310'), ('ru-RU', 'Русский', 'Россия', 'Russian', '\U0001f1f7\U0001f1fa'), - ('sk-SK', 'Slovenčina', 'Slovensko', 'Slovak', '\U0001f1f8\U0001f1f0'), - ('sl-SI', 'Slovenščina', 'Slovenija', 'Slovenian', '\U0001f1f8\U0001f1ee'), - ('sr-RS', 'Српски', 'Србија', 'Serbian', '\U0001f1f7\U0001f1f8'), + ('sk', 'Slovenčina', '', 'Slovak', '\U0001f310'), + ('sl', 'Slovenščina', '', 'Slovenian', '\U0001f310'), + ('sr', 'Српски', '', 'Serbian', '\U0001f310'), + ('sv', 'Svenska', '', 'Swedish', '\U0001f310'), ('sv-SE', 'Svenska', 'Sverige', 'Swedish', '\U0001f1f8\U0001f1ea'), - ('sw-TZ', 'Kiswahili', 'Tanzania', 'Swahili', '\U0001f1f9\U0001f1ff'), + ('th', 'ไทย', '', 'Thai', '\U0001f310'), ('th-TH', 'ไทย', 'ไทย', 'Thai', '\U0001f1f9\U0001f1ed'), + ('tr', 'Türkçe', '', 'Turkish', '\U0001f310'), ('tr-TR', 'Türkçe', 'Türkiye', 'Turkish', '\U0001f1f9\U0001f1f7'), - ('uk-UA', 'Українська', 'Україна', 'Ukrainian', '\U0001f1fa\U0001f1e6'), - ('vi-VN', 'Tiếng Việt', 'Việt Nam', 'Vietnamese', '\U0001f1fb\U0001f1f3'), + ('uk', 'Українська', '', 'Ukrainian', '\U0001f310'), + ('vi', 'Tiếng Việt', '', 'Vietnamese', '\U0001f310'), ('zh', '中文', '', 'Chinese', '\U0001f310'), ('zh-CN', '中文', '中国', 'Chinese', '\U0001f1e8\U0001f1f3'), - ('zh-HK', '中文', '中國香港', 'Chinese', '\U0001f1ed\U0001f1f0'), + ('zh-HK', '中文', '中國香港特別行政區', 'Chinese', '\U0001f1ed\U0001f1f0'), ('zh-TW', '中文', '台灣', 'Chinese', '\U0001f1f9\U0001f1fc'), ) +''' +A list of five-digit tuples: + +0. SearXNG's internal locale tag (a language or region tag) +1. Name of the language (:py:obj:`babel.core.Locale.get_language_name`) +2. For region tags the name of the region (:py:obj:`babel.core.Locale.get_territory_name`). + Empty string for language tags. +3. English language name (from :py:obj:`babel.core.Locale.english_name`) +4. Unicode flag (emoji) that fits to SearXNG's internal region tag. Languages + are represented by a globe (🌐) + +.. code:: python + + ('en', 'English', '', 'English', '🌐'), + ('en-CA', 'English', 'Canada', 'English', '🇨🇦'), + ('en-US', 'English', 'United States', 'English', '🇺🇸'), + .. + ('fr', 'Français', '', 'French', '🌐'), + ('fr-BE', 'Français', 'Belgique', 'French', '🇧🇪'), + ('fr-CA', 'Français', 'Canada', 'French', '🇨🇦'), + +:meta hide-value: +''' diff --git a/searx/templates/simple/filters/languages.html b/searx/templates/simple/filters/languages.html index 54e07e209..a42a304a5 100644 --- a/searx/templates/simple/filters/languages.html +++ b/searx/templates/simple/filters/languages.html @@ -1,12 +1,12 @@ diff --git a/searx/templates/simple/preferences.html b/searx/templates/simple/preferences.html index 9626b04d4..a0cc8efc2 100644 --- a/searx/templates/simple/preferences.html +++ b/searx/templates/simple/preferences.html @@ -115,10 +115,10 @@ {{ _('Search language') }}

{{- '' -}} {{- '' -}}

diff --git a/searx/utils.py b/searx/utils.py index e6180906b..161983011 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -18,13 +18,11 @@ from urllib.parse import urljoin, urlparse from lxml import html from lxml.etree import ElementBase, XPath, XPathError, XPathSyntaxError, _ElementStringResult, _ElementUnicodeResult -from babel.core import get_global - from searx import settings from searx.data import USER_AGENTS, data_dir from searx.version import VERSION_TAG -from searx.languages import language_codes +from searx.sxng_locales import sxng_locales from searx.exceptions import SearxXPathSyntaxException, SearxEngineXPathException from searx import logger @@ -53,8 +51,8 @@ _LANG_TO_LC_CACHE: Dict[str, Dict[str, str]] = {} _FASTTEXT_MODEL: Optional["fasttext.FastText._FastText"] = None """fasttext model to predict laguage of a search term""" -SEARCH_LANGUAGE_CODES = frozenset([searxng_locale[0].split('-')[0] for searxng_locale in language_codes]) -"""Languages supported by most searxng engines (:py:obj:`searx.languages.language_codes`).""" +SEARCH_LANGUAGE_CODES = frozenset([searxng_locale[0].split('-')[0] for searxng_locale in sxng_locales]) +"""Languages supported by most searxng engines (:py:obj:`searx.sxng_locales.sxng_locales`).""" class _NotSetClass: # pylint: disable=too-few-public-methods @@ -355,102 +353,16 @@ def is_valid_lang(lang) -> Optional[Tuple[bool, str, str]]: is_abbr = len(lang) == 2 lang = lang.lower() if is_abbr: - for l in language_codes: + for l in sxng_locales: if l[0][:2] == lang: return (True, l[0][:2], l[3].lower()) return None - for l in language_codes: + for l in sxng_locales: if l[1].lower() == lang or l[3].lower() == lang: return (True, l[0][:2], l[3].lower()) return None -def _get_lang_to_lc_dict(lang_list: List[str]) -> Dict[str, str]: - key = str(lang_list) - value = _LANG_TO_LC_CACHE.get(key, None) - if value is None: - value = {} - for lang in lang_list: - value.setdefault(lang.split('-')[0], lang) - _LANG_TO_LC_CACHE[key] = value - return value - - -# babel's get_global contains all sorts of miscellaneous locale and territory related data -# see get_global in: https://github.com/python-babel/babel/blob/master/babel/core.py -def _get_from_babel(lang_code: str, key): - match = get_global(key).get(lang_code.replace('-', '_')) - # for some keys, such as territory_aliases, match may be a list - if isinstance(match, str): - return match.replace('_', '-') - return match - - -def _match_language(lang_code: str, lang_list=[], custom_aliases={}) -> Optional[str]: # pylint: disable=W0102 - """auxiliary function to match lang_code in lang_list""" - # replace language code with a custom alias if necessary - if lang_code in custom_aliases: - lang_code = custom_aliases[lang_code] - - if lang_code in lang_list: - return lang_code - - # try to get the most likely country for this language - subtags = _get_from_babel(lang_code, 'likely_subtags') - if subtags: - if subtags in lang_list: - return subtags - subtag_parts = subtags.split('-') - new_code = subtag_parts[0] + '-' + subtag_parts[-1] - if new_code in custom_aliases: - new_code = custom_aliases[new_code] - if new_code in lang_list: - return new_code - - # try to get the any supported country for this language - return _get_lang_to_lc_dict(lang_list).get(lang_code) - - -def match_language( # pylint: disable=W0102 - locale_code, lang_list=[], custom_aliases={}, fallback: Optional[str] = 'en-US' -) -> Optional[str]: - """get the language code from lang_list that best matches locale_code""" - # try to get language from given locale_code - language = _match_language(locale_code, lang_list, custom_aliases) - if language: - return language - - locale_parts = locale_code.split('-') - lang_code = locale_parts[0] - - # if locale_code has script, try matching without it - if len(locale_parts) > 2: - language = _match_language(lang_code + '-' + locale_parts[-1], lang_list, custom_aliases) - if language: - return language - - # try to get language using an equivalent country code - if len(locale_parts) > 1: - country_alias = _get_from_babel(locale_parts[-1], 'territory_aliases') - if country_alias: - language = _match_language(lang_code + '-' + country_alias[0], lang_list, custom_aliases) - if language: - return language - - # try to get language using an equivalent language code - alias = _get_from_babel(lang_code, 'language_aliases') - if alias: - language = _match_language(alias, lang_list, custom_aliases) - if language: - return language - - if lang_code != locale_code: - # try to get language from given language without giving the country - language = _match_language(lang_code, lang_list, custom_aliases) - - return language or fallback - - def load_module(filename: str, module_dir: str) -> types.ModuleType: modname = splitext(filename)[0] modpath = join(module_dir, filename) diff --git a/searx/webapp.py b/searx/webapp.py index 95c33f704..4ed6c2eb7 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -89,7 +89,6 @@ from searx.utils import ( html_to_text, gen_useragent, dict_subset, - match_language, ) from searx.version import VERSION_STRING, GIT_URL, GIT_BRANCH from searx.query import RawTextQuery @@ -117,12 +116,13 @@ from searx.locales import ( RTL_LOCALES, localeselector, locales_initialize, + match_locale, ) # renaming names from searx imports ... from searx.autocomplete import search_autocomplete, backends as autocomplete_backends -from searx.languages import language_codes as languages from searx.redisdb import initialize as redis_initialize +from searx.sxng_locales import sxng_locales from searx.search import SearchWithPlugins, initialize as search_initialize from searx.network import stream as http_stream, set_context_network_name from searx.search.checker import get_result as checker_get_result @@ -227,7 +227,7 @@ def _get_browser_language(req, lang_list): if '-' in lang: lang_parts = lang.split('-') lang = "{}-{}".format(lang_parts[0], lang_parts[-1].upper()) - locale = match_language(lang, lang_list, fallback=None) + locale = match_locale(lang, lang_list, fallback=None) if locale is not None: return locale return 'en' @@ -407,7 +407,7 @@ def get_client_settings(): def render(template_name: str, **kwargs): - + # pylint: disable=too-many-statements kwargs['client_settings'] = str( base64.b64encode( bytes( @@ -438,17 +438,20 @@ def render(template_name: str, **kwargs): kwargs['OTHER_CATEGORY'] = OTHER_CATEGORY # i18n - kwargs['language_codes'] = [l for l in languages if l[0] in settings['search']['languages']] + kwargs['sxng_locales'] = [l for l in sxng_locales if l[0] in settings['search']['languages']] locale = request.preferences.get_value('locale') kwargs['locale_rfc5646'] = _get_locale_rfc5646(locale) if locale in RTL_LOCALES and 'rtl' not in kwargs: kwargs['rtl'] = True + if 'current_language' not in kwargs: - kwargs['current_language'] = match_language( - request.preferences.get_value('language'), settings['search']['languages'] - ) + _locale = request.preferences.get_value('language') + if _locale in ('auto', 'all'): + kwargs['current_language'] = _locale + else: + kwargs['current_language'] = match_locale(_locale, settings['search']['languages']) # values from settings kwargs['search_formats'] = [x for x in settings['search']['formats'] if x != 'html'] @@ -810,6 +813,13 @@ def search(): ) ) + if search_query.lang in ('auto', 'all'): + current_language = search_query.lang + else: + current_language = match_locale( + search_query.lang, settings['search']['languages'], fallback=request.preferences.get_value("language") + ) + # search_query.lang contains the user choice (all, auto, en, ...) # when the user choice is "auto", search.search_query.lang contains the detected language # otherwise it is equals to search_query.lang @@ -832,12 +842,8 @@ def search(): result_container.unresponsive_engines ), current_locale = request.preferences.get_value("locale"), - current_language = match_language( - search_query.lang, - settings['search']['languages'], - fallback=request.preferences.get_value("language") - ), - search_language = match_language( + current_language = current_language, + search_language = match_locale( search.search_query.lang, settings['search']['languages'], fallback=request.preferences.get_value("language") @@ -907,16 +913,11 @@ def autocompleter(): # and there is a query part if len(raw_text_query.autocomplete_list) == 0 and len(sug_prefix) > 0: - # get language from cookie - language = request.preferences.get_value('language') - if not language or language == 'all': - language = 'en' - else: - language = language.split('-')[0] + # get SearXNG's locale and autocomplete backend from cookie + sxng_locale = request.preferences.get_value('language') + backend_name = request.preferences.get_value('autocomplete') - # run autocompletion - raw_results = search_autocomplete(request.preferences.get_value('autocomplete'), sug_prefix, language) - for result in raw_results: + for result in search_autocomplete(backend_name, sug_prefix, sxng_locale): # attention: this loop will change raw_text_query object and this is # the reason why the sug_prefix was stored before (see above) if result != sug_prefix: @@ -1001,7 +1002,9 @@ def preferences(): 'rate80': rate80, 'rate95': rate95, 'warn_timeout': e.timeout > settings['outgoing']['request_timeout'], - 'supports_selected_language': _is_selected_language_supported(e, request.preferences), + 'supports_selected_language': e.traits.is_locale_supported( + str(request.preferences.get_value('language') or 'all') + ), 'result_count': result_count, } # end of stats @@ -1052,7 +1055,9 @@ def preferences(): # supports supports = {} for _, e in filtered_engines.items(): - supports_selected_language = _is_selected_language_supported(e, request.preferences) + supports_selected_language = e.traits.is_locale_supported( + str(request.preferences.get_value('language') or 'all') + ) safesearch = e.safesearch time_range_support = e.time_range_support for checker_test_name in checker_results.get(e.name, {}).get('errors', {}): @@ -1099,16 +1104,6 @@ def preferences(): ) -def _is_selected_language_supported(engine, preferences: Preferences): # pylint: disable=redefined-outer-name - language = preferences.get_value('language') - if language == 'all': - return True - x = match_language( - language, getattr(engine, 'supported_languages', []), getattr(engine, 'language_aliases', {}), None - ) - return bool(x) - - @app.route('/image_proxy', methods=['GET']) def image_proxy(): # pylint: disable=too-many-return-statements, too-many-branches @@ -1327,10 +1322,7 @@ def config(): if not request.preferences.validate_token(engine): continue - supported_languages = engine.supported_languages - if isinstance(engine.supported_languages, dict): - supported_languages = list(engine.supported_languages.keys()) - + _languages = engine.traits.languages.keys() _engines.append( { 'name': name, @@ -1339,7 +1331,8 @@ def config(): 'enabled': not engine.disabled, 'paging': engine.paging, 'language_support': engine.language_support, - 'supported_languages': supported_languages, + 'languages': list(_languages), + 'regions': list(engine.traits.regions.keys()), 'safesearch': engine.safesearch, 'time_range_support': engine.time_range_support, 'timeout': engine.timeout, diff --git a/searx/webutils.py b/searx/webutils.py index 6c023ebc3..e62b0d695 100644 --- a/searx/webutils.py +++ b/searx/webutils.py @@ -1,4 +1,6 @@ # -*- coding: utf-8 -*- +from __future__ import annotations + import os import pathlib import csv @@ -8,7 +10,7 @@ import re import inspect import itertools from datetime import datetime, timedelta -from typing import Iterable, List, Tuple, Dict +from typing import Iterable, List, Tuple, Dict, TYPE_CHECKING from io import StringIO from codecs import getincrementalencoder @@ -16,7 +18,10 @@ from codecs import getincrementalencoder from flask_babel import gettext, format_date from searx import logger, settings -from searx.engines import Engine, OTHER_CATEGORY +from searx.engines import OTHER_CATEGORY + +if TYPE_CHECKING: + from searx.enginelib import Engine VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$') diff --git a/searxng_extra/update/update_engine_descriptions.py b/searxng_extra/update/update_engine_descriptions.py index 6052bf084..66bc303db 100755 --- a/searxng_extra/update/update_engine_descriptions.py +++ b/searxng_extra/update/update_engine_descriptions.py @@ -18,8 +18,8 @@ from os.path import join from lxml.html import fromstring from searx.engines import wikidata, set_loggers -from searx.utils import extract_text, match_language -from searx.locales import LOCALE_NAMES, locales_initialize +from searx.utils import extract_text +from searx.locales import LOCALE_NAMES, locales_initialize, match_locale from searx import searx_dir from searx.utils import gen_useragent, detect_language import searx.search @@ -225,9 +225,9 @@ def fetch_website_description(engine_name, website): fetched_lang, desc = get_website_description(website, lang, WIKIPEDIA_LANGUAGES[lang]) if fetched_lang is None or desc is None: continue - matched_lang = match_language(fetched_lang, LANGUAGES, fallback=None) + matched_lang = match_locale(fetched_lang, LANGUAGES, fallback=None) if matched_lang is None: - fetched_wikipedia_lang = match_language(fetched_lang, WIKIPEDIA_LANGUAGES.values(), fallback=None) + fetched_wikipedia_lang = match_locale(fetched_lang, WIKIPEDIA_LANGUAGES.values(), fallback=None) matched_lang = wikipedia_languages_r.get(fetched_wikipedia_lang) if matched_lang is not None: update_description(engine_name, matched_lang, desc, website, replace=False) diff --git a/searxng_extra/update/update_engine_traits.py b/searxng_extra/update/update_engine_traits.py new file mode 100755 index 000000000..7449912dc --- /dev/null +++ b/searxng_extra/update/update_engine_traits.py @@ -0,0 +1,198 @@ +#!/usr/bin/env python +# lint: pylint +# SPDX-License-Identifier: AGPL-3.0-or-later +"""Update :py:obj:`searx.enginelib.traits.EngineTraitsMap` and :origin:`searx/languages.py` + +:py:obj:`searx.enginelib.traits.EngineTraitsMap.ENGINE_TRAITS_FILE`: + Persistence of engines traits, fetched from the engines. + +:origin:`searx/languages.py` + Is generated from intersecting each engine's supported traits. + +The script :origin:`searxng_extra/update/update_engine_traits.py` is called in +the :origin:`CI Update data ... <.github/workflows/data-update.yml>` + +""" + +# pylint: disable=invalid-name +from unicodedata import lookup +from pathlib import Path +from pprint import pformat +import babel + +from searx import settings, searx_dir +from searx import network +from searx.engines import load_engines +from searx.enginelib.traits import EngineTraitsMap + +# Output files. +languages_file = Path(searx_dir) / 'sxng_locales.py' +languages_file_header = """\ +# -*- coding: utf-8 -*- +'''List of SearXNG's locale codes. + +This file is generated automatically by:: + + ./manage pyenv.cmd searxng_extra/update/update_engine_traits.py +''' + +sxng_locales = ( +""" +languages_file_footer = """, +) +''' +A list of five-digit tuples: + +0. SearXNG's internal locale tag (a language or region tag) +1. Name of the language (:py:obj:`babel.core.Locale.get_language_name`) +2. For region tags the name of the region (:py:obj:`babel.core.Locale.get_territory_name`). + Empty string for language tags. +3. English language name (from :py:obj:`babel.core.Locale.english_name`) +4. Unicode flag (emoji) that fits to SearXNG's internal region tag. Languages + are represented by a globe (\U0001F310) + +.. code:: python + + ('en', 'English', '', 'English', '\U0001f310'), + ('en-CA', 'English', 'Canada', 'English', '\U0001f1e8\U0001f1e6'), + ('en-US', 'English', 'United States', 'English', '\U0001f1fa\U0001f1f8'), + .. + ('fr', 'Français', '', 'French', '\U0001f310'), + ('fr-BE', 'Français', 'Belgique', 'French', '\U0001f1e7\U0001f1ea'), + ('fr-CA', 'Français', 'Canada', 'French', '\U0001f1e8\U0001f1e6'), + +:meta hide-value: +''' +""" + + +lang2emoji = { + 'ha': '\U0001F1F3\U0001F1EA', # Hausa / Niger + 'bs': '\U0001F1E7\U0001F1E6', # Bosnian / Bosnia & Herzegovina + 'jp': '\U0001F1EF\U0001F1F5', # Japanese + 'ua': '\U0001F1FA\U0001F1E6', # Ukrainian + 'he': '\U0001F1EE\U0001F1F7', # Hebrew +} + + +def main(): + load_engines(settings['engines']) + # traits_map = EngineTraitsMap.from_data() + traits_map = fetch_traits_map() + sxng_tag_list = filter_locales(traits_map) + write_languages_file(sxng_tag_list) + + +def fetch_traits_map(): + """Fetchs supported languages for each engine and writes json file with those.""" + network.set_timeout_for_thread(10.0) + + def log(msg): + print(msg) + + traits_map = EngineTraitsMap.fetch_traits(log=log) + print("fetched properties from %s engines" % len(traits_map)) + print("write json file: %s" % traits_map.ENGINE_TRAITS_FILE) + traits_map.save_data() + return traits_map + + +def filter_locales(traits_map: EngineTraitsMap): + """Filter language & region tags by a threshold.""" + + min_eng_per_region = 11 + min_eng_per_lang = 13 + + _ = {} + for eng in traits_map.values(): + for reg in eng.regions.keys(): + _[reg] = _.get(reg, 0) + 1 + + regions = set(k for k, v in _.items() if v >= min_eng_per_region) + lang_from_region = set(k.split('-')[0] for k in regions) + + _ = {} + for eng in traits_map.values(): + for lang in eng.languages.keys(): + # ignore script types like zh_Hant, zh_Hans or sr_Latin, pa_Arab (they + # already counted by existence of 'zh' or 'sr', 'pa') + if '_' in lang: + # print("ignore %s" % lang) + continue + _[lang] = _.get(lang, 0) + 1 + + languages = set(k for k, v in _.items() if v >= min_eng_per_lang) + + sxng_tag_list = set() + sxng_tag_list.update(regions) + sxng_tag_list.update(lang_from_region) + sxng_tag_list.update(languages) + + return sxng_tag_list + + +def write_languages_file(sxng_tag_list): + + language_codes = [] + + for sxng_tag in sorted(sxng_tag_list): + sxng_locale: babel.Locale = babel.Locale.parse(sxng_tag, sep='-') + + flag = get_unicode_flag(sxng_locale) or '' + + item = ( + sxng_tag, + sxng_locale.get_language_name().title(), + sxng_locale.get_territory_name() or '', + sxng_locale.english_name.split(' (')[0], + UnicodeEscape(flag), + ) + + language_codes.append(item) + + language_codes = tuple(language_codes) + + with open(languages_file, 'w', encoding='utf-8') as new_file: + file_content = "{header} {language_codes}{footer}".format( + header=languages_file_header, + language_codes=pformat(language_codes, width=120, indent=4)[1:-1], + footer=languages_file_footer, + ) + new_file.write(file_content) + new_file.close() + + +class UnicodeEscape(str): + """Escape unicode string in :py:obj:`pprint.pformat`""" + + def __repr__(self): + return "'" + "".join([chr(c) for c in self.encode('unicode-escape')]) + "'" + + +def get_unicode_flag(locale: babel.Locale): + """Determine a unicode flag (emoji) that fits to the ``locale``""" + + emoji = lang2emoji.get(locale.language) + if emoji: + return emoji + + if not locale.territory: + return '\U0001F310' + + emoji = lang2emoji.get(locale.territory.lower()) + if emoji: + return emoji + + try: + c1 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + locale.territory[0]) + c2 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + locale.territory[1]) + # print("OK : %s --> %s%s" % (locale, c1, c2)) + except KeyError as exc: + print("ERROR: %s --> %s" % (locale, exc)) + return None + + return c1 + c2 + + +if __name__ == "__main__": + main() diff --git a/searxng_extra/update/update_languages.py b/searxng_extra/update/update_languages.py deleted file mode 100755 index 87b13b276..000000000 --- a/searxng_extra/update/update_languages.py +++ /dev/null @@ -1,313 +0,0 @@ -#!/usr/bin/env python -# lint: pylint - -# SPDX-License-Identifier: AGPL-3.0-or-later -"""This script generates languages.py from intersecting each engine's supported -languages. - -Output files: :origin:`searx/data/engines_languages.json` and -:origin:`searx/languages.py` (:origin:`CI Update data ... -<.github/workflows/data-update.yml>`). - -""" - -# pylint: disable=invalid-name -from unicodedata import lookup -import json -from pathlib import Path -from pprint import pformat -from babel import Locale, UnknownLocaleError -from babel.languages import get_global -from babel.core import parse_locale - -from searx import settings, searx_dir -from searx.engines import load_engines, engines -from searx.network import set_timeout_for_thread - -# Output files. -engines_languages_file = Path(searx_dir) / 'data' / 'engines_languages.json' -languages_file = Path(searx_dir) / 'languages.py' - - -# Fetches supported languages for each engine and writes json file with those. -def fetch_supported_languages(): - set_timeout_for_thread(10.0) - - engines_languages = {} - names = list(engines) - names.sort() - - for engine_name in names: - if hasattr(engines[engine_name], 'fetch_supported_languages'): - engines_languages[engine_name] = engines[engine_name].fetch_supported_languages() - print("fetched %s languages from engine %s" % (len(engines_languages[engine_name]), engine_name)) - if type(engines_languages[engine_name]) == list: # pylint: disable=unidiomatic-typecheck - engines_languages[engine_name] = sorted(engines_languages[engine_name]) - - print("fetched languages from %s engines" % len(engines_languages)) - - # write json file - with open(engines_languages_file, 'w', encoding='utf-8') as f: - json.dump(engines_languages, f, indent=2, sort_keys=True) - - return engines_languages - - -# Get babel Locale object from lang_code if possible. -def get_locale(lang_code): - try: - locale = Locale.parse(lang_code, sep='-') - return locale - except (UnknownLocaleError, ValueError): - return None - - -lang2emoji = { - 'ha': '\U0001F1F3\U0001F1EA', # Hausa / Niger - 'bs': '\U0001F1E7\U0001F1E6', # Bosnian / Bosnia & Herzegovina - 'jp': '\U0001F1EF\U0001F1F5', # Japanese - 'ua': '\U0001F1FA\U0001F1E6', # Ukrainian - 'he': '\U0001F1EE\U0001F1F7', # Hebrew -} - - -def get_unicode_flag(lang_code): - """Determine a unicode flag (emoji) that fits to the ``lang_code``""" - - emoji = lang2emoji.get(lang_code.lower()) - if emoji: - return emoji - - if len(lang_code) == 2: - return '\U0001F310' - - language = territory = script = variant = '' - try: - language, territory, script, variant = parse_locale(lang_code, '-') - except ValueError as exc: - print(exc) - - # https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2 - if not territory: - # https://www.unicode.org/emoji/charts/emoji-list.html#country-flag - emoji = lang2emoji.get(language) - if not emoji: - print( - "%s --> language: %s / territory: %s / script: %s / variant: %s" - % (lang_code, language, territory, script, variant) - ) - return emoji - - emoji = lang2emoji.get(territory.lower()) - if emoji: - return emoji - - try: - c1 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + territory[0]) - c2 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + territory[1]) - # print("%s --> territory: %s --> %s%s" %(lang_code, territory, c1, c2 )) - except KeyError as exc: - print("%s --> territory: %s --> %s" % (lang_code, territory, exc)) - return None - - return c1 + c2 - - -def get_territory_name(lang_code): - country_name = None - locale = get_locale(lang_code) - try: - if locale is not None: - country_name = locale.get_territory_name() - except FileNotFoundError as exc: - print("ERROR: %s --> %s" % (locale, exc)) - return country_name - - -# Join all language lists. -def join_language_lists(engines_languages): - language_list = {} - for engine_name in engines_languages: - for lang_code in engines_languages[engine_name]: - - # apply custom fixes if necessary - if lang_code in getattr(engines[engine_name], 'language_aliases', {}).values(): - lang_code = next( - lc for lc, alias in engines[engine_name].language_aliases.items() if lang_code == alias - ) - - locale = get_locale(lang_code) - - # ensure that lang_code uses standard language and country codes - if locale and locale.territory: - lang_code = "{lang}-{country}".format(lang=locale.language, country=locale.territory) - short_code = lang_code.split('-')[0] - - # add language without country if not in list - if short_code not in language_list: - if locale: - # get language's data from babel's Locale object - language_name = locale.get_language_name().title() - english_name = locale.english_name.split(' (')[0] - elif short_code in engines_languages['wikipedia']: - # get language's data from wikipedia if not known by babel - language_name = engines_languages['wikipedia'][short_code]['name'] - english_name = engines_languages['wikipedia'][short_code]['english_name'] - else: - language_name = None - english_name = None - - # add language to list - language_list[short_code] = { - 'name': language_name, - 'english_name': english_name, - 'counter': set(), - 'countries': {}, - } - - # add language with country if not in list - if lang_code != short_code and lang_code not in language_list[short_code]['countries']: - country_name = '' - if locale: - # get country name from babel's Locale object - try: - country_name = locale.get_territory_name() - except FileNotFoundError as exc: - print("ERROR: %s --> %s" % (locale, exc)) - locale = None - - language_list[short_code]['countries'][lang_code] = { - 'country_name': country_name, - 'counter': set(), - } - - # count engine for both language_country combination and language alone - language_list[short_code]['counter'].add(engine_name) - if lang_code != short_code: - language_list[short_code]['countries'][lang_code]['counter'].add(engine_name) - - return language_list - - -# Filter language list so it only includes the most supported languages and countries -def filter_language_list(all_languages): - min_engines_per_lang = 12 - min_engines_per_country = 7 - # pylint: disable=consider-using-dict-items, consider-iterating-dictionary - main_engines = [ - engine_name - for engine_name in engines.keys() - if 'general' in engines[engine_name].categories - and engines[engine_name].supported_languages - and not engines[engine_name].disabled - ] - - # filter list to include only languages supported by most engines or all default general engines - filtered_languages = { - code: lang - for code, lang in all_languages.items() - if ( - len(lang['counter']) >= min_engines_per_lang - or all(main_engine in lang['counter'] for main_engine in main_engines) - ) - } - - def _copy_lang_data(lang, country_name=None): - new_dict = {} - new_dict['name'] = all_languages[lang]['name'] - new_dict['english_name'] = all_languages[lang]['english_name'] - if country_name: - new_dict['country_name'] = country_name - return new_dict - - # for each language get country codes supported by most engines or at least one country code - filtered_languages_with_countries = {} - for lang, lang_data in filtered_languages.items(): - countries = lang_data['countries'] - filtered_countries = {} - - # get language's country codes with enough supported engines - for lang_country, country_data in countries.items(): - if len(country_data['counter']) >= min_engines_per_country: - filtered_countries[lang_country] = _copy_lang_data(lang, country_data['country_name']) - - # add language without countries too if there's more than one country to choose from - if len(filtered_countries) > 1: - filtered_countries[lang] = _copy_lang_data(lang, None) - elif len(filtered_countries) == 1: - lang_country = next(iter(filtered_countries)) - - # if no country has enough engines try to get most likely country code from babel - if not filtered_countries: - lang_country = None - subtags = get_global('likely_subtags').get(lang) - if subtags: - country_code = subtags.split('_')[-1] - if len(country_code) == 2: - lang_country = "{lang}-{country}".format(lang=lang, country=country_code) - - if lang_country: - filtered_countries[lang_country] = _copy_lang_data(lang, None) - else: - filtered_countries[lang] = _copy_lang_data(lang, None) - - filtered_languages_with_countries.update(filtered_countries) - - return filtered_languages_with_countries - - -class UnicodeEscape(str): - """Escape unicode string in :py:obj:`pprint.pformat`""" - - def __repr__(self): - return "'" + "".join([chr(c) for c in self.encode('unicode-escape')]) + "'" - - -# Write languages.py. -def write_languages_file(languages): - file_headers = ( - "# -*- coding: utf-8 -*-", - "# list of language codes", - "# this file is generated automatically by utils/fetch_languages.py", - "language_codes = (\n", - ) - - language_codes = [] - - for code in sorted(languages): - - name = languages[code]['name'] - if name is None: - print("ERROR: languages['%s'] --> %s" % (code, languages[code])) - continue - - flag = get_unicode_flag(code) or '' - item = ( - code, - languages[code]['name'].split(' (')[0], - get_territory_name(code) or '', - languages[code].get('english_name') or '', - UnicodeEscape(flag), - ) - - language_codes.append(item) - - language_codes = tuple(language_codes) - - with open(languages_file, 'w', encoding='utf-8') as new_file: - file_content = "{file_headers} {language_codes},\n)\n".format( - # fmt: off - file_headers = '\n'.join(file_headers), - language_codes = pformat(language_codes, indent=4)[1:-1] - # fmt: on - ) - new_file.write(file_content) - new_file.close() - - -if __name__ == "__main__": - load_engines(settings['engines']) - _engines_languages = fetch_supported_languages() - _all_languages = join_language_lists(_engines_languages) - _filtered_languages = filter_language_list(_all_languages) - write_languages_file(_filtered_languages) diff --git a/searxng_extra/update/update_osm_keys_tags.py b/searxng_extra/update/update_osm_keys_tags.py index 72197498d..72f3d61c5 100755 --- a/searxng_extra/update/update_osm_keys_tags.py +++ b/searxng_extra/update/update_osm_keys_tags.py @@ -50,7 +50,7 @@ from pathlib import Path from searx import searx_dir from searx.network import set_timeout_for_thread from searx.engines import wikidata, set_loggers -from searx.languages import language_codes +from searx.sxng_locales import sxng_locales from searx.engines.openstreetmap import get_key_rank, VALUE_TO_LINK set_loggers(wikidata, 'wikidata') @@ -76,7 +76,7 @@ GROUP BY ?key ?item ?itemLabel ORDER BY ?key ?item ?itemLabel """ -LANGUAGES = [l[0].lower() for l in language_codes] +LANGUAGES = [l[0].lower() for l in sxng_locales] PRESET_KEYS = { ('wikidata',): {'en': 'Wikidata'}, diff --git a/tests/unit/test_locales.py b/tests/unit/test_locales.py new file mode 100644 index 000000000..61561c17b --- /dev/null +++ b/tests/unit/test_locales.py @@ -0,0 +1,111 @@ +# -*- coding: utf-8 -*- +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +"""Test some code from module :py:obj:`searx.locales`""" + +from searx import locales +from searx.sxng_locales import sxng_locales +from tests import SearxTestCase + + +class TestLocales(SearxTestCase): + """Implemented tests: + + - :py:obj:`searx.locales.match_locale` + """ + + def test_match_locale(self): + + locale_tag_list = [x[0] for x in sxng_locales] + + # Test SearXNG search languages + + self.assertEqual(locales.match_locale('de', locale_tag_list), 'de') + self.assertEqual(locales.match_locale('fr', locale_tag_list), 'fr') + self.assertEqual(locales.match_locale('zh', locale_tag_list), 'zh') + + # Test SearXNG search regions + + self.assertEqual(locales.match_locale('ca-es', locale_tag_list), 'ca-ES') + self.assertEqual(locales.match_locale('de-at', locale_tag_list), 'de-AT') + self.assertEqual(locales.match_locale('de-de', locale_tag_list), 'de-DE') + self.assertEqual(locales.match_locale('en-UK', locale_tag_list), 'en-GB') + self.assertEqual(locales.match_locale('fr-be', locale_tag_list), 'fr-BE') + self.assertEqual(locales.match_locale('fr-be', locale_tag_list), 'fr-BE') + self.assertEqual(locales.match_locale('fr-ca', locale_tag_list), 'fr-CA') + self.assertEqual(locales.match_locale('fr-ch', locale_tag_list), 'fr-CH') + self.assertEqual(locales.match_locale('zh-cn', locale_tag_list), 'zh-CN') + self.assertEqual(locales.match_locale('zh-tw', locale_tag_list), 'zh-TW') + self.assertEqual(locales.match_locale('zh-hk', locale_tag_list), 'zh-HK') + + # Test language script code + + self.assertEqual(locales.match_locale('zh-hans', locale_tag_list), 'zh-CN') + self.assertEqual(locales.match_locale('zh-hans-cn', locale_tag_list), 'zh-CN') + self.assertEqual(locales.match_locale('zh-hant', locale_tag_list), 'zh-TW') + self.assertEqual(locales.match_locale('zh-hant-tw', locale_tag_list), 'zh-TW') + + # Test individual locale lists + + self.assertEqual(locales.match_locale('es', [], fallback='fallback'), 'fallback') + + self.assertEqual(locales.match_locale('de', ['de-CH', 'de-DE']), 'de-DE') + self.assertEqual(locales.match_locale('de', ['de-CH', 'de-DE']), 'de-DE') + self.assertEqual(locales.match_locale('es', ['ES']), 'ES') + self.assertEqual(locales.match_locale('es', ['es-AR', 'es-ES', 'es-MX']), 'es-ES') + self.assertEqual(locales.match_locale('es-AR', ['es-AR', 'es-ES', 'es-MX']), 'es-AR') + self.assertEqual(locales.match_locale('es-CO', ['es-AR', 'es-ES']), 'es-ES') + self.assertEqual(locales.match_locale('es-CO', ['es-AR']), 'es-AR') + + # Tests from the commit message of 9ae409a05a + + # Assumption: + # A. When a user selects a language the results should be optimized according to + # the selected language. + # + # B. When user selects a language and a territory the results should be + # optimized with first priority on territory and second on language. + + # Assume we have an engine that supports the follwoing locales: + locale_tag_list = ['zh-CN', 'zh-HK', 'nl-BE', 'fr-CA'] + + # Examples (Assumption A.) + # ------------------------ + + # A user selects region 'zh-TW' which should end in zh_HK. + # hint: CN is 'Hans' and HK ('Hant') fits better to TW ('Hant') + self.assertEqual(locales.match_locale('zh-TW', locale_tag_list), 'zh-HK') + + # A user selects only the language 'zh' which should end in CN + self.assertEqual(locales.match_locale('zh', locale_tag_list), 'zh-CN') + + # A user selects only the language 'fr' which should end in fr_CA + self.assertEqual(locales.match_locale('fr', locale_tag_list), 'fr-CA') + + # The difference in priority on the territory is best shown with a + # engine that supports the following locales: + locale_tag_list = ['fr-FR', 'fr-CA', 'en-GB', 'nl-BE'] + + # A user selects only a language + self.assertEqual(locales.match_locale('en', locale_tag_list), 'en-GB') + + # hint: the engine supports fr_FR and fr_CA since no territory is given, + # fr_FR takes priority .. + self.assertEqual(locales.match_locale('fr', locale_tag_list), 'fr-FR') + + # Examples (Assumption B.) + # ------------------------ + + # A user selects region 'fr-BE' which should end in nl-BE + self.assertEqual(locales.match_locale('fr-BE', locale_tag_list), 'nl-BE') + + # If the user selects a language and there are two locales like the + # following: + + locale_tag_list = ['fr-BE', 'fr-CH'] + + # The get_engine_locale selects the locale by looking at the "population + # percent" and this percentage has an higher amount in BE (68.%) + # compared to CH (21%) + + self.assertEqual(locales.match_locale('fr', locale_tag_list), 'fr-BE') diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 6f51f1ee3..2ad4593a1 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -87,39 +87,6 @@ class TestUtils(SearxTestCase): html = '

Lorem ipsumdolor sit amet

' self.assertEqual(utils.html_to_text(html), "Lorem ipsum") - def test_match_language(self): - self.assertEqual(utils.match_language('es', ['es']), 'es') - self.assertEqual(utils.match_language('es', [], fallback='fallback'), 'fallback') - self.assertEqual(utils.match_language('ja', ['jp'], {'ja': 'jp'}), 'jp') - - # handle script tags - self.assertEqual(utils.match_language('zh-CN', ['zh-Hans-CN', 'zh-Hant-TW']), 'zh-Hans-CN') - self.assertEqual(utils.match_language('zh-TW', ['zh-Hans-CN', 'zh-Hant-TW']), 'zh-Hant-TW') - self.assertEqual(utils.match_language('zh-Hans-CN', ['zh-CN', 'zh-TW']), 'zh-CN') - self.assertEqual(utils.match_language('zh-Hant-TW', ['zh-CN', 'zh-TW']), 'zh-TW') - self.assertEqual(utils.match_language('zh-Hans', ['zh-CN', 'zh-TW', 'zh-HK']), 'zh-CN') - self.assertEqual(utils.match_language('zh-Hant', ['zh-CN', 'zh-TW', 'zh-HK']), 'zh-TW') - - aliases = {'en-GB': 'en-UK', 'he': 'iw'} - - # guess country - self.assertEqual(utils.match_language('de-DE', ['de']), 'de') - self.assertEqual(utils.match_language('de', ['de-DE']), 'de-DE') - self.assertEqual(utils.match_language('es-CO', ['es-AR', 'es-ES', 'es-MX']), 'es-ES') - self.assertEqual(utils.match_language('es-CO', ['es-MX']), 'es-MX') - self.assertEqual(utils.match_language('en-UK', ['en-AU', 'en-GB', 'en-US']), 'en-GB') - self.assertEqual(utils.match_language('en-GB', ['en-AU', 'en-UK', 'en-US'], aliases), 'en-UK') - - # language aliases - self.assertEqual(utils.match_language('iw', ['he']), 'he') - self.assertEqual(utils.match_language('he', ['iw'], aliases), 'iw') - self.assertEqual(utils.match_language('iw-IL', ['he']), 'he') - self.assertEqual(utils.match_language('he-IL', ['iw'], aliases), 'iw') - self.assertEqual(utils.match_language('iw', ['he-IL']), 'he-IL') - self.assertEqual(utils.match_language('he', ['iw-IL'], aliases), 'iw-IL') - self.assertEqual(utils.match_language('iw-IL', ['he-IL']), 'he-IL') - self.assertEqual(utils.match_language('he-IL', ['iw-IL'], aliases), 'iw-IL') - def test_ecma_unscape(self): self.assertEqual(utils.ecma_unescape('text%20with%20space'), 'text with space') self.assertEqual(utils.ecma_unescape('text using %xx: %F3'), 'text using %xx: ó') diff --git a/utils/templates/etc/searxng/settings.yml b/utils/templates/etc/searxng/settings.yml index d8b659b1a..04d25b9d3 100644 --- a/utils/templates/etc/searxng/settings.yml +++ b/utils/templates/etc/searxng/settings.yml @@ -52,9 +52,6 @@ enabled_plugins: engines: - - name: google - use_mobile_ui: true - # - name: fdroid # disabled: false #