forked from zaclys/searxng
[mod] replace utils.match_language by locales.match_locale
This patch replaces the *full of magic* ``utils.match_language`` function by a ``locales.match_locale``. The ``locales.match_locale`` function is based on the ``locales.build_engine_locales`` introduced in9ae409a0
[1]. In the past SearXNG did only support a search by a language but not in a region. This has been changed a long time ago and regions have been added to SearXNG core but not to the engines. The ``utils.match_language`` was the function to handle the different aspects of language/regions in SearXNG core and the supported *languages* in the engine. The ``utils.match_language`` did it with some magic and works good for most use cases but fails in some edge case. To replace the concurrence of languages and regions in the SearXNG core the ``locales.build_engine_locales`` was introduced in9ae409a0
[1]. With the last patches all engines has been migrated to a ``fetch_traits`` and a language/region concept that is based on ``locales.build_engine_locales``. To summarize: there is no longer a need for the ``locales.match_language``. [1] https://github.com/searxng/searxng/pull/1652 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
4d4aa13e1f
commit
16f0db4493
108
searx/locales.py
108
searx/locales.py
|
@ -4,7 +4,7 @@
|
||||||
"""Initialize :py:obj:`LOCALE_NAMES`, :py:obj:`RTL_LOCALES`.
|
"""Initialize :py:obj:`LOCALE_NAMES`, :py:obj:`RTL_LOCALES`.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import Set
|
from typing import Set, Optional, List
|
||||||
import os
|
import os
|
||||||
import pathlib
|
import pathlib
|
||||||
|
|
||||||
|
@ -177,6 +177,17 @@ def language_tag(locale: babel.Locale) -> str:
|
||||||
return sxng_lang
|
return sxng_lang
|
||||||
|
|
||||||
|
|
||||||
|
def get_locale(locale_tag: str) -> Optional[babel.Locale]:
|
||||||
|
"""Returns a :py:obj:`babel.Locale` object parsed from argument
|
||||||
|
``locale_tag``"""
|
||||||
|
try:
|
||||||
|
locale = babel.Locale.parse(locale_tag, sep='-')
|
||||||
|
return locale
|
||||||
|
|
||||||
|
except babel.core.UnknownLocaleError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def get_offical_locales(
|
def get_offical_locales(
|
||||||
territory: str, languages=None, regional: bool = False, de_facto: bool = True
|
territory: str, languages=None, regional: bool = False, de_facto: bool = True
|
||||||
) -> Set[babel.Locale]:
|
) -> Set[babel.Locale]:
|
||||||
|
@ -363,3 +374,98 @@ def get_engine_locale(searxng_locale, engine_locales, default=None):
|
||||||
engine_locale = default
|
engine_locale = default
|
||||||
|
|
||||||
return default
|
return default
|
||||||
|
|
||||||
|
|
||||||
|
def match_locale(searxng_locale: str, locale_tag_list: List[str], fallback: Optional[str] = None) -> Optional[str]:
|
||||||
|
"""Return tag from ``locale_tag_list`` that best fits to ``searxng_locale``.
|
||||||
|
|
||||||
|
:param str searxng_locale: SearXNG's internal representation of locale (de,
|
||||||
|
de-DE, fr-BE, zh, zh-CN, zh-TW ..).
|
||||||
|
|
||||||
|
:param list locale_tag_list: The list of locale tags to select from
|
||||||
|
|
||||||
|
:param str fallback: fallback locale tag (if unset --> ``None``)
|
||||||
|
|
||||||
|
The rules to find a match are implemented in :py:obj:`get_engine_locale`,
|
||||||
|
the ``engine_locales`` is build up by :py:obj:`build_engine_locales`.
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
The *SearXNG locale* string and the members of ``locale_tag_list`` has to
|
||||||
|
be known by babel! The :py:obj:`ADDITIONAL_TRANSLATIONS` are used in the
|
||||||
|
UI and are not known by babel --> will be ignored.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# searxng_locale = 'es'
|
||||||
|
# locale_tag_list = ['es-AR', 'es-ES', 'es-MX']
|
||||||
|
|
||||||
|
if not searxng_locale:
|
||||||
|
return fallback
|
||||||
|
|
||||||
|
locale = get_locale(searxng_locale)
|
||||||
|
if locale is None:
|
||||||
|
return fallback
|
||||||
|
|
||||||
|
# normalize to a SearXNG locale that can be passed to get_engine_locale
|
||||||
|
|
||||||
|
searxng_locale = language_tag(locale)
|
||||||
|
if locale.territory:
|
||||||
|
searxng_locale = region_tag(locale)
|
||||||
|
|
||||||
|
# clean up locale_tag_list
|
||||||
|
|
||||||
|
tag_list = []
|
||||||
|
for tag in locale_tag_list:
|
||||||
|
if tag in ('all', 'auto') or tag in ADDITIONAL_TRANSLATIONS:
|
||||||
|
continue
|
||||||
|
tag_list.append(tag)
|
||||||
|
|
||||||
|
# emulate fetch_traits
|
||||||
|
engine_locales = build_engine_locales(tag_list)
|
||||||
|
return get_engine_locale(searxng_locale, engine_locales, default=fallback)
|
||||||
|
|
||||||
|
|
||||||
|
def build_engine_locales(tag_list: List[str]):
|
||||||
|
"""From a list of locale tags a dictionary is build that can be passed by
|
||||||
|
argument ``engine_locales`` to :py:obj:`get_engine_locale`. This function
|
||||||
|
is mainly used by :py:obj:`match_locale` and is similar to what the
|
||||||
|
``fetch_traits(..)`` function of engines do.
|
||||||
|
|
||||||
|
If there are territory codes in the ``tag_list`` that have a *script code*
|
||||||
|
additional keys are added to the returned dictionary.
|
||||||
|
|
||||||
|
.. code:: python
|
||||||
|
|
||||||
|
>>> import locales
|
||||||
|
>>> engine_locales = locales.build_engine_locales(['en', 'en-US', 'zh', 'zh-CN', 'zh-TW'])
|
||||||
|
>>> engine_locales
|
||||||
|
{
|
||||||
|
'en': 'en', 'en-US': 'en-US',
|
||||||
|
'zh': 'zh', 'zh-CN': 'zh-CN', 'zh_Hans': 'zh-CN',
|
||||||
|
'zh-TW': 'zh-TW', 'zh_Hant': 'zh-TW'
|
||||||
|
}
|
||||||
|
>>> get_engine_locale('zh-Hans', engine_locales)
|
||||||
|
'zh-CN'
|
||||||
|
|
||||||
|
This function is a good example to understand the language/region model
|
||||||
|
of SearXNG:
|
||||||
|
|
||||||
|
SearXNG only distinguishes between **search languages** and **search
|
||||||
|
regions**, by adding the *script-tags*, languages with *script-tags* can
|
||||||
|
be assigned to the **regions** that SearXNG supports.
|
||||||
|
|
||||||
|
"""
|
||||||
|
engine_locales = {}
|
||||||
|
|
||||||
|
for tag in tag_list:
|
||||||
|
locale = get_locale(tag)
|
||||||
|
if locale is None:
|
||||||
|
logger.warn("build_engine_locales: skip locale tag %s / unknown by babel", tag)
|
||||||
|
continue
|
||||||
|
if locale.territory:
|
||||||
|
engine_locales[region_tag(locale)] = tag
|
||||||
|
if locale.script:
|
||||||
|
engine_locales[language_tag(locale)] = tag
|
||||||
|
else:
|
||||||
|
engine_locales[language_tag(locale)] = tag
|
||||||
|
return engine_locales
|
||||||
|
|
|
@ -18,8 +18,6 @@ from urllib.parse import urljoin, urlparse
|
||||||
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from lxml.etree import ElementBase, XPath, XPathError, XPathSyntaxError, _ElementStringResult, _ElementUnicodeResult
|
from lxml.etree import ElementBase, XPath, XPathError, XPathSyntaxError, _ElementStringResult, _ElementUnicodeResult
|
||||||
from babel.core import get_global
|
|
||||||
|
|
||||||
|
|
||||||
from searx import settings
|
from searx import settings
|
||||||
from searx.data import USER_AGENTS, data_dir
|
from searx.data import USER_AGENTS, data_dir
|
||||||
|
@ -365,92 +363,6 @@ def is_valid_lang(lang) -> Optional[Tuple[bool, str, str]]:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def _get_lang_to_lc_dict(lang_list: List[str]) -> Dict[str, str]:
|
|
||||||
key = str(lang_list)
|
|
||||||
value = _LANG_TO_LC_CACHE.get(key, None)
|
|
||||||
if value is None:
|
|
||||||
value = {}
|
|
||||||
for lang in lang_list:
|
|
||||||
value.setdefault(lang.split('-')[0], lang)
|
|
||||||
_LANG_TO_LC_CACHE[key] = value
|
|
||||||
return value
|
|
||||||
|
|
||||||
|
|
||||||
# babel's get_global contains all sorts of miscellaneous locale and territory related data
|
|
||||||
# see get_global in: https://github.com/python-babel/babel/blob/master/babel/core.py
|
|
||||||
def _get_from_babel(lang_code: str, key):
|
|
||||||
match = get_global(key).get(lang_code.replace('-', '_'))
|
|
||||||
# for some keys, such as territory_aliases, match may be a list
|
|
||||||
if isinstance(match, str):
|
|
||||||
return match.replace('_', '-')
|
|
||||||
return match
|
|
||||||
|
|
||||||
|
|
||||||
def _match_language(lang_code: str, lang_list=[], custom_aliases={}) -> Optional[str]: # pylint: disable=W0102
|
|
||||||
"""auxiliary function to match lang_code in lang_list"""
|
|
||||||
# replace language code with a custom alias if necessary
|
|
||||||
if lang_code in custom_aliases:
|
|
||||||
lang_code = custom_aliases[lang_code]
|
|
||||||
|
|
||||||
if lang_code in lang_list:
|
|
||||||
return lang_code
|
|
||||||
|
|
||||||
# try to get the most likely country for this language
|
|
||||||
subtags = _get_from_babel(lang_code, 'likely_subtags')
|
|
||||||
if subtags:
|
|
||||||
if subtags in lang_list:
|
|
||||||
return subtags
|
|
||||||
subtag_parts = subtags.split('-')
|
|
||||||
new_code = subtag_parts[0] + '-' + subtag_parts[-1]
|
|
||||||
if new_code in custom_aliases:
|
|
||||||
new_code = custom_aliases[new_code]
|
|
||||||
if new_code in lang_list:
|
|
||||||
return new_code
|
|
||||||
|
|
||||||
# try to get the any supported country for this language
|
|
||||||
return _get_lang_to_lc_dict(lang_list).get(lang_code)
|
|
||||||
|
|
||||||
|
|
||||||
def match_language( # pylint: disable=W0102
|
|
||||||
locale_code, lang_list=[], custom_aliases={}, fallback: Optional[str] = 'en-US'
|
|
||||||
) -> Optional[str]:
|
|
||||||
"""get the language code from lang_list that best matches locale_code"""
|
|
||||||
# try to get language from given locale_code
|
|
||||||
language = _match_language(locale_code, lang_list, custom_aliases)
|
|
||||||
if language:
|
|
||||||
return language
|
|
||||||
|
|
||||||
locale_parts = locale_code.split('-')
|
|
||||||
lang_code = locale_parts[0]
|
|
||||||
|
|
||||||
# if locale_code has script, try matching without it
|
|
||||||
if len(locale_parts) > 2:
|
|
||||||
language = _match_language(lang_code + '-' + locale_parts[-1], lang_list, custom_aliases)
|
|
||||||
if language:
|
|
||||||
return language
|
|
||||||
|
|
||||||
# try to get language using an equivalent country code
|
|
||||||
if len(locale_parts) > 1:
|
|
||||||
country_alias = _get_from_babel(locale_parts[-1], 'territory_aliases')
|
|
||||||
if country_alias:
|
|
||||||
language = _match_language(lang_code + '-' + country_alias[0], lang_list, custom_aliases)
|
|
||||||
if language:
|
|
||||||
return language
|
|
||||||
|
|
||||||
# try to get language using an equivalent language code
|
|
||||||
alias = _get_from_babel(lang_code, 'language_aliases')
|
|
||||||
if alias:
|
|
||||||
language = _match_language(alias, lang_list, custom_aliases)
|
|
||||||
if language:
|
|
||||||
return language
|
|
||||||
|
|
||||||
if lang_code != locale_code:
|
|
||||||
# try to get language from given language without giving the country
|
|
||||||
language = _match_language(lang_code, lang_list, custom_aliases)
|
|
||||||
|
|
||||||
return language or fallback
|
|
||||||
|
|
||||||
|
|
||||||
def load_module(filename: str, module_dir: str) -> types.ModuleType:
|
def load_module(filename: str, module_dir: str) -> types.ModuleType:
|
||||||
modname = splitext(filename)[0]
|
modname = splitext(filename)[0]
|
||||||
modpath = join(module_dir, filename)
|
modpath = join(module_dir, filename)
|
||||||
|
|
|
@ -89,7 +89,6 @@ from searx.utils import (
|
||||||
html_to_text,
|
html_to_text,
|
||||||
gen_useragent,
|
gen_useragent,
|
||||||
dict_subset,
|
dict_subset,
|
||||||
match_language,
|
|
||||||
)
|
)
|
||||||
from searx.version import VERSION_STRING, GIT_URL, GIT_BRANCH
|
from searx.version import VERSION_STRING, GIT_URL, GIT_BRANCH
|
||||||
from searx.query import RawTextQuery
|
from searx.query import RawTextQuery
|
||||||
|
@ -117,6 +116,7 @@ from searx.locales import (
|
||||||
RTL_LOCALES,
|
RTL_LOCALES,
|
||||||
localeselector,
|
localeselector,
|
||||||
locales_initialize,
|
locales_initialize,
|
||||||
|
match_locale,
|
||||||
)
|
)
|
||||||
|
|
||||||
# renaming names from searx imports ...
|
# renaming names from searx imports ...
|
||||||
|
@ -227,7 +227,7 @@ def _get_browser_language(req, lang_list):
|
||||||
if '-' in lang:
|
if '-' in lang:
|
||||||
lang_parts = lang.split('-')
|
lang_parts = lang.split('-')
|
||||||
lang = "{}-{}".format(lang_parts[0], lang_parts[-1].upper())
|
lang = "{}-{}".format(lang_parts[0], lang_parts[-1].upper())
|
||||||
locale = match_language(lang, lang_list, fallback=None)
|
locale = match_locale(lang, lang_list, fallback=None)
|
||||||
if locale is not None:
|
if locale is not None:
|
||||||
return locale
|
return locale
|
||||||
return 'en'
|
return 'en'
|
||||||
|
@ -407,7 +407,7 @@ def get_client_settings():
|
||||||
|
|
||||||
|
|
||||||
def render(template_name: str, **kwargs):
|
def render(template_name: str, **kwargs):
|
||||||
|
# pylint: disable=too-many-statements
|
||||||
kwargs['client_settings'] = str(
|
kwargs['client_settings'] = str(
|
||||||
base64.b64encode(
|
base64.b64encode(
|
||||||
bytes(
|
bytes(
|
||||||
|
@ -445,10 +445,13 @@ def render(template_name: str, **kwargs):
|
||||||
|
|
||||||
if locale in RTL_LOCALES and 'rtl' not in kwargs:
|
if locale in RTL_LOCALES and 'rtl' not in kwargs:
|
||||||
kwargs['rtl'] = True
|
kwargs['rtl'] = True
|
||||||
|
|
||||||
if 'current_language' not in kwargs:
|
if 'current_language' not in kwargs:
|
||||||
kwargs['current_language'] = match_language(
|
_locale = request.preferences.get_value('language')
|
||||||
request.preferences.get_value('language'), settings['search']['languages']
|
if _locale in ('auto', 'all'):
|
||||||
)
|
kwargs['current_language'] = _locale
|
||||||
|
else:
|
||||||
|
kwargs['current_language'] = match_locale(_locale, settings['search']['languages'])
|
||||||
|
|
||||||
# values from settings
|
# values from settings
|
||||||
kwargs['search_formats'] = [x for x in settings['search']['formats'] if x != 'html']
|
kwargs['search_formats'] = [x for x in settings['search']['formats'] if x != 'html']
|
||||||
|
@ -810,6 +813,13 @@ def search():
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if search_query.lang in ('auto', 'all'):
|
||||||
|
current_language = search_query.lang
|
||||||
|
else:
|
||||||
|
current_language = match_locale(
|
||||||
|
search_query.lang, settings['search']['languages'], fallback=request.preferences.get_value("language")
|
||||||
|
)
|
||||||
|
|
||||||
# search_query.lang contains the user choice (all, auto, en, ...)
|
# search_query.lang contains the user choice (all, auto, en, ...)
|
||||||
# when the user choice is "auto", search.search_query.lang contains the detected language
|
# when the user choice is "auto", search.search_query.lang contains the detected language
|
||||||
# otherwise it is equals to search_query.lang
|
# otherwise it is equals to search_query.lang
|
||||||
|
@ -832,12 +842,8 @@ def search():
|
||||||
result_container.unresponsive_engines
|
result_container.unresponsive_engines
|
||||||
),
|
),
|
||||||
current_locale = request.preferences.get_value("locale"),
|
current_locale = request.preferences.get_value("locale"),
|
||||||
current_language = match_language(
|
current_language = current_language,
|
||||||
search_query.lang,
|
search_language = match_locale(
|
||||||
settings['search']['languages'],
|
|
||||||
fallback=request.preferences.get_value("language")
|
|
||||||
),
|
|
||||||
search_language = match_language(
|
|
||||||
search.search_query.lang,
|
search.search_query.lang,
|
||||||
settings['search']['languages'],
|
settings['search']['languages'],
|
||||||
fallback=request.preferences.get_value("language")
|
fallback=request.preferences.get_value("language")
|
||||||
|
|
|
@ -18,8 +18,8 @@ from os.path import join
|
||||||
from lxml.html import fromstring
|
from lxml.html import fromstring
|
||||||
|
|
||||||
from searx.engines import wikidata, set_loggers
|
from searx.engines import wikidata, set_loggers
|
||||||
from searx.utils import extract_text, match_language
|
from searx.utils import extract_text
|
||||||
from searx.locales import LOCALE_NAMES, locales_initialize
|
from searx.locales import LOCALE_NAMES, locales_initialize, match_locale
|
||||||
from searx import searx_dir
|
from searx import searx_dir
|
||||||
from searx.utils import gen_useragent, detect_language
|
from searx.utils import gen_useragent, detect_language
|
||||||
import searx.search
|
import searx.search
|
||||||
|
@ -225,9 +225,9 @@ def fetch_website_description(engine_name, website):
|
||||||
fetched_lang, desc = get_website_description(website, lang, WIKIPEDIA_LANGUAGES[lang])
|
fetched_lang, desc = get_website_description(website, lang, WIKIPEDIA_LANGUAGES[lang])
|
||||||
if fetched_lang is None or desc is None:
|
if fetched_lang is None or desc is None:
|
||||||
continue
|
continue
|
||||||
matched_lang = match_language(fetched_lang, LANGUAGES, fallback=None)
|
matched_lang = match_locale(fetched_lang, LANGUAGES, fallback=None)
|
||||||
if matched_lang is None:
|
if matched_lang is None:
|
||||||
fetched_wikipedia_lang = match_language(fetched_lang, WIKIPEDIA_LANGUAGES.values(), fallback=None)
|
fetched_wikipedia_lang = match_locale(fetched_lang, WIKIPEDIA_LANGUAGES.values(), fallback=None)
|
||||||
matched_lang = wikipedia_languages_r.get(fetched_wikipedia_lang)
|
matched_lang = wikipedia_languages_r.get(fetched_wikipedia_lang)
|
||||||
if matched_lang is not None:
|
if matched_lang is not None:
|
||||||
update_description(engine_name, matched_lang, desc, website, replace=False)
|
update_description(engine_name, matched_lang, desc, website, replace=False)
|
||||||
|
|
|
@ -0,0 +1,111 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
# lint: pylint
|
||||||
|
"""Test some code from module :py:obj:`searx.locales`"""
|
||||||
|
|
||||||
|
from searx import locales
|
||||||
|
from searx.sxng_locales import sxng_locales
|
||||||
|
from tests import SearxTestCase
|
||||||
|
|
||||||
|
|
||||||
|
class TestLocales(SearxTestCase):
|
||||||
|
"""Implemented tests:
|
||||||
|
|
||||||
|
- :py:obj:`searx.locales.match_locale`
|
||||||
|
"""
|
||||||
|
|
||||||
|
def test_match_locale(self):
|
||||||
|
|
||||||
|
locale_tag_list = [x[0] for x in sxng_locales]
|
||||||
|
|
||||||
|
# Test SearXNG search languages
|
||||||
|
|
||||||
|
self.assertEqual(locales.match_locale('de', locale_tag_list), 'de')
|
||||||
|
self.assertEqual(locales.match_locale('fr', locale_tag_list), 'fr')
|
||||||
|
self.assertEqual(locales.match_locale('zh', locale_tag_list), 'zh')
|
||||||
|
|
||||||
|
# Test SearXNG search regions
|
||||||
|
|
||||||
|
self.assertEqual(locales.match_locale('ca-es', locale_tag_list), 'ca-ES')
|
||||||
|
self.assertEqual(locales.match_locale('de-at', locale_tag_list), 'de-AT')
|
||||||
|
self.assertEqual(locales.match_locale('de-de', locale_tag_list), 'de-DE')
|
||||||
|
self.assertEqual(locales.match_locale('en-UK', locale_tag_list), 'en-GB')
|
||||||
|
self.assertEqual(locales.match_locale('fr-be', locale_tag_list), 'fr-BE')
|
||||||
|
self.assertEqual(locales.match_locale('fr-be', locale_tag_list), 'fr-BE')
|
||||||
|
self.assertEqual(locales.match_locale('fr-ca', locale_tag_list), 'fr-CA')
|
||||||
|
self.assertEqual(locales.match_locale('fr-ch', locale_tag_list), 'fr-CH')
|
||||||
|
self.assertEqual(locales.match_locale('zh-cn', locale_tag_list), 'zh-CN')
|
||||||
|
self.assertEqual(locales.match_locale('zh-tw', locale_tag_list), 'zh-TW')
|
||||||
|
self.assertEqual(locales.match_locale('zh-hk', locale_tag_list), 'zh-HK')
|
||||||
|
|
||||||
|
# Test language script code
|
||||||
|
|
||||||
|
self.assertEqual(locales.match_locale('zh-hans', locale_tag_list), 'zh-CN')
|
||||||
|
self.assertEqual(locales.match_locale('zh-hans-cn', locale_tag_list), 'zh-CN')
|
||||||
|
self.assertEqual(locales.match_locale('zh-hant', locale_tag_list), 'zh-TW')
|
||||||
|
self.assertEqual(locales.match_locale('zh-hant-tw', locale_tag_list), 'zh-TW')
|
||||||
|
|
||||||
|
# Test individual locale lists
|
||||||
|
|
||||||
|
self.assertEqual(locales.match_locale('es', [], fallback='fallback'), 'fallback')
|
||||||
|
|
||||||
|
self.assertEqual(locales.match_locale('de', ['de-CH', 'de-DE']), 'de-DE')
|
||||||
|
self.assertEqual(locales.match_locale('de', ['de-CH', 'de-DE']), 'de-DE')
|
||||||
|
self.assertEqual(locales.match_locale('es', ['ES']), 'ES')
|
||||||
|
self.assertEqual(locales.match_locale('es', ['es-AR', 'es-ES', 'es-MX']), 'es-ES')
|
||||||
|
self.assertEqual(locales.match_locale('es-AR', ['es-AR', 'es-ES', 'es-MX']), 'es-AR')
|
||||||
|
self.assertEqual(locales.match_locale('es-CO', ['es-AR', 'es-ES']), 'es-ES')
|
||||||
|
self.assertEqual(locales.match_locale('es-CO', ['es-AR']), 'es-AR')
|
||||||
|
|
||||||
|
# Tests from the commit message of 9ae409a05a
|
||||||
|
|
||||||
|
# Assumption:
|
||||||
|
# A. When a user selects a language the results should be optimized according to
|
||||||
|
# the selected language.
|
||||||
|
#
|
||||||
|
# B. When user selects a language and a territory the results should be
|
||||||
|
# optimized with first priority on territory and second on language.
|
||||||
|
|
||||||
|
# Assume we have an engine that supports the follwoing locales:
|
||||||
|
locale_tag_list = ['zh-CN', 'zh-HK', 'nl-BE', 'fr-CA']
|
||||||
|
|
||||||
|
# Examples (Assumption A.)
|
||||||
|
# ------------------------
|
||||||
|
|
||||||
|
# A user selects region 'zh-TW' which should end in zh_HK.
|
||||||
|
# hint: CN is 'Hans' and HK ('Hant') fits better to TW ('Hant')
|
||||||
|
self.assertEqual(locales.match_locale('zh-TW', locale_tag_list), 'zh-HK')
|
||||||
|
|
||||||
|
# A user selects only the language 'zh' which should end in CN
|
||||||
|
self.assertEqual(locales.match_locale('zh', locale_tag_list), 'zh-CN')
|
||||||
|
|
||||||
|
# A user selects only the language 'fr' which should end in fr_CA
|
||||||
|
self.assertEqual(locales.match_locale('fr', locale_tag_list), 'fr-CA')
|
||||||
|
|
||||||
|
# The difference in priority on the territory is best shown with a
|
||||||
|
# engine that supports the following locales:
|
||||||
|
locale_tag_list = ['fr-FR', 'fr-CA', 'en-GB', 'nl-BE']
|
||||||
|
|
||||||
|
# A user selects only a language
|
||||||
|
self.assertEqual(locales.match_locale('en', locale_tag_list), 'en-GB')
|
||||||
|
|
||||||
|
# hint: the engine supports fr_FR and fr_CA since no territory is given,
|
||||||
|
# fr_FR takes priority ..
|
||||||
|
self.assertEqual(locales.match_locale('fr', locale_tag_list), 'fr-FR')
|
||||||
|
|
||||||
|
# Examples (Assumption B.)
|
||||||
|
# ------------------------
|
||||||
|
|
||||||
|
# A user selects region 'fr-BE' which should end in nl-BE
|
||||||
|
self.assertEqual(locales.match_locale('fr-BE', locale_tag_list), 'nl-BE')
|
||||||
|
|
||||||
|
# If the user selects a language and there are two locales like the
|
||||||
|
# following:
|
||||||
|
|
||||||
|
locale_tag_list = ['fr-BE', 'fr-CH']
|
||||||
|
|
||||||
|
# The get_engine_locale selects the locale by looking at the "population
|
||||||
|
# percent" and this percentage has an higher amount in BE (68.%)
|
||||||
|
# compared to CH (21%)
|
||||||
|
|
||||||
|
self.assertEqual(locales.match_locale('fr', locale_tag_list), 'fr-BE')
|
|
@ -87,39 +87,6 @@ class TestUtils(SearxTestCase):
|
||||||
html = '<p><b>Lorem ipsum</i>dolor sit amet</p>'
|
html = '<p><b>Lorem ipsum</i>dolor sit amet</p>'
|
||||||
self.assertEqual(utils.html_to_text(html), "Lorem ipsum")
|
self.assertEqual(utils.html_to_text(html), "Lorem ipsum")
|
||||||
|
|
||||||
def test_match_language(self):
|
|
||||||
self.assertEqual(utils.match_language('es', ['es']), 'es')
|
|
||||||
self.assertEqual(utils.match_language('es', [], fallback='fallback'), 'fallback')
|
|
||||||
self.assertEqual(utils.match_language('ja', ['jp'], {'ja': 'jp'}), 'jp')
|
|
||||||
|
|
||||||
# handle script tags
|
|
||||||
self.assertEqual(utils.match_language('zh-CN', ['zh-Hans-CN', 'zh-Hant-TW']), 'zh-Hans-CN')
|
|
||||||
self.assertEqual(utils.match_language('zh-TW', ['zh-Hans-CN', 'zh-Hant-TW']), 'zh-Hant-TW')
|
|
||||||
self.assertEqual(utils.match_language('zh-Hans-CN', ['zh-CN', 'zh-TW']), 'zh-CN')
|
|
||||||
self.assertEqual(utils.match_language('zh-Hant-TW', ['zh-CN', 'zh-TW']), 'zh-TW')
|
|
||||||
self.assertEqual(utils.match_language('zh-Hans', ['zh-CN', 'zh-TW', 'zh-HK']), 'zh-CN')
|
|
||||||
self.assertEqual(utils.match_language('zh-Hant', ['zh-CN', 'zh-TW', 'zh-HK']), 'zh-TW')
|
|
||||||
|
|
||||||
aliases = {'en-GB': 'en-UK', 'he': 'iw'}
|
|
||||||
|
|
||||||
# guess country
|
|
||||||
self.assertEqual(utils.match_language('de-DE', ['de']), 'de')
|
|
||||||
self.assertEqual(utils.match_language('de', ['de-DE']), 'de-DE')
|
|
||||||
self.assertEqual(utils.match_language('es-CO', ['es-AR', 'es-ES', 'es-MX']), 'es-ES')
|
|
||||||
self.assertEqual(utils.match_language('es-CO', ['es-MX']), 'es-MX')
|
|
||||||
self.assertEqual(utils.match_language('en-UK', ['en-AU', 'en-GB', 'en-US']), 'en-GB')
|
|
||||||
self.assertEqual(utils.match_language('en-GB', ['en-AU', 'en-UK', 'en-US'], aliases), 'en-UK')
|
|
||||||
|
|
||||||
# language aliases
|
|
||||||
self.assertEqual(utils.match_language('iw', ['he']), 'he')
|
|
||||||
self.assertEqual(utils.match_language('he', ['iw'], aliases), 'iw')
|
|
||||||
self.assertEqual(utils.match_language('iw-IL', ['he']), 'he')
|
|
||||||
self.assertEqual(utils.match_language('he-IL', ['iw'], aliases), 'iw')
|
|
||||||
self.assertEqual(utils.match_language('iw', ['he-IL']), 'he-IL')
|
|
||||||
self.assertEqual(utils.match_language('he', ['iw-IL'], aliases), 'iw-IL')
|
|
||||||
self.assertEqual(utils.match_language('iw-IL', ['he-IL']), 'he-IL')
|
|
||||||
self.assertEqual(utils.match_language('he-IL', ['iw-IL'], aliases), 'iw-IL')
|
|
||||||
|
|
||||||
def test_ecma_unscape(self):
|
def test_ecma_unscape(self):
|
||||||
self.assertEqual(utils.ecma_unescape('text%20with%20space'), 'text with space')
|
self.assertEqual(utils.ecma_unescape('text%20with%20space'), 'text with space')
|
||||||
self.assertEqual(utils.ecma_unescape('text using %xx: %F3'), 'text using %xx: ó')
|
self.assertEqual(utils.ecma_unescape('text using %xx: %F3'), 'text using %xx: ó')
|
||||||
|
|
Loading…
Reference in New Issue