forked from zaclys/searxng
[mod] remove obsolete EngineTraits.supported_languages
All engines has been migrated from ``supported_languages`` to the ``fetch_traits`` concept. There is no longer a need for the obsolete code that implements the ``supported_languages`` concept. Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
96a2eec3b5
commit
4d4aa13e1f
@ -19,9 +19,6 @@ from searx.engines import (
|
||||
from searx.network import get as http_get
|
||||
from searx.exceptions import SearxEngineResponseException
|
||||
|
||||
# a fetch_supported_languages() for XPath engines isn't available right now
|
||||
# _brave = ENGINES_LANGUAGES['brave'].keys()
|
||||
|
||||
|
||||
def get(*args, **kwargs):
|
||||
if 'timeout' not in kwargs:
|
||||
@ -225,14 +222,6 @@ def search_autocomplete(backend_name, query, sxng_locale):
|
||||
backend = backends.get(backend_name)
|
||||
if backend is None:
|
||||
return []
|
||||
|
||||
if engines[backend_name].traits.data_type != "traits_v1":
|
||||
# vintage / deprecated
|
||||
if not sxng_locale or sxng_locale == 'all':
|
||||
sxng_locale = 'en'
|
||||
else:
|
||||
sxng_locale = sxng_locale.split('-')[0]
|
||||
|
||||
try:
|
||||
return backend(query, sxng_locale)
|
||||
except (HTTPError, SearxEngineResponseException):
|
||||
|
@ -49,8 +49,7 @@
|
||||
"uk": "\u0423\u043a\u0440\u0430\u0457\u043d\u0441\u044c\u043a\u0430",
|
||||
"zh": "\u4e2d\u6587\uff08\u7e41\u9ad4\uff09"
|
||||
},
|
||||
"regions": {},
|
||||
"supported_languages": {}
|
||||
"regions": {}
|
||||
},
|
||||
"bing": {
|
||||
"all_locale": null,
|
||||
@ -146,8 +145,7 @@
|
||||
"zh-CN": "zh-CN",
|
||||
"zh-HK": "zh-HK",
|
||||
"zh-TW": "zh-TW"
|
||||
},
|
||||
"supported_languages": {}
|
||||
}
|
||||
},
|
||||
"bing images": {
|
||||
"all_locale": null,
|
||||
@ -243,8 +241,7 @@
|
||||
"zh-CN": "zh-CN",
|
||||
"zh-HK": "zh-HK",
|
||||
"zh-TW": "zh-TW"
|
||||
},
|
||||
"supported_languages": {}
|
||||
}
|
||||
},
|
||||
"bing news": {
|
||||
"all_locale": "en-WW",
|
||||
@ -316,8 +313,7 @@
|
||||
"it-IT": "it-IT",
|
||||
"pt-BR": "pt-BR",
|
||||
"zh-CN": "zh-CN"
|
||||
},
|
||||
"supported_languages": {}
|
||||
}
|
||||
},
|
||||
"bing videos": {
|
||||
"all_locale": null,
|
||||
@ -413,8 +409,7 @@
|
||||
"zh-CN": "zh-CN",
|
||||
"zh-HK": "zh-HK",
|
||||
"zh-TW": "zh-TW"
|
||||
},
|
||||
"supported_languages": {}
|
||||
}
|
||||
},
|
||||
"dailymotion": {
|
||||
"all_locale": null,
|
||||
@ -491,8 +486,7 @@
|
||||
"vi-VN": "vi_VN",
|
||||
"zh-CN": "zh_CN",
|
||||
"zh-TW": "zh_TW"
|
||||
},
|
||||
"supported_languages": {}
|
||||
}
|
||||
},
|
||||
"duckduckgo": {
|
||||
"all_locale": "wt-wt",
|
||||
@ -656,8 +650,7 @@
|
||||
"zh-CN": "cn-zh",
|
||||
"zh-HK": "hk-tzh",
|
||||
"zh-TW": "tw-tzh"
|
||||
},
|
||||
"supported_languages": {}
|
||||
}
|
||||
},
|
||||
"duckduckgo images": {
|
||||
"all_locale": "wt-wt",
|
||||
@ -821,8 +814,7 @@
|
||||
"zh-CN": "cn-zh",
|
||||
"zh-HK": "hk-tzh",
|
||||
"zh-TW": "tw-tzh"
|
||||
},
|
||||
"supported_languages": {}
|
||||
}
|
||||
},
|
||||
"duckduckgo weather": {
|
||||
"all_locale": "wt-wt",
|
||||
@ -986,8 +978,7 @@
|
||||
"zh-CN": "cn-zh",
|
||||
"zh-HK": "hk-tzh",
|
||||
"zh-TW": "tw-tzh"
|
||||
},
|
||||
"supported_languages": {}
|
||||
}
|
||||
},
|
||||
"google": {
|
||||
"all_locale": "ZZ",
|
||||
@ -1439,8 +1430,7 @@
|
||||
"zh-HK": "HK",
|
||||
"zh-SG": "SG",
|
||||
"zh-TW": "TW"
|
||||
},
|
||||
"supported_languages": {}
|
||||
}
|
||||
},
|
||||
"google images": {
|
||||
"all_locale": "ZZ",
|
||||
@ -1892,8 +1882,7 @@
|
||||
"zh-HK": "HK",
|
||||
"zh-SG": "SG",
|
||||
"zh-TW": "TW"
|
||||
},
|
||||
"supported_languages": {}
|
||||
}
|
||||
},
|
||||
"google news": {
|
||||
"all_locale": "ZZ",
|
||||
@ -2238,8 +2227,7 @@
|
||||
"zh-HK": "HK",
|
||||
"zh-SG": "SG",
|
||||
"zh-TW": "TW"
|
||||
},
|
||||
"supported_languages": {}
|
||||
}
|
||||
},
|
||||
"google scholar": {
|
||||
"all_locale": "ZZ",
|
||||
@ -2691,8 +2679,7 @@
|
||||
"zh-HK": "HK",
|
||||
"zh-SG": "SG",
|
||||
"zh-TW": "TW"
|
||||
},
|
||||
"supported_languages": {}
|
||||
}
|
||||
},
|
||||
"google videos": {
|
||||
"all_locale": "ZZ",
|
||||
@ -3144,8 +3131,7 @@
|
||||
"zh-HK": "HK",
|
||||
"zh-SG": "SG",
|
||||
"zh-TW": "TW"
|
||||
},
|
||||
"supported_languages": {}
|
||||
}
|
||||
},
|
||||
"peertube": {
|
||||
"all_locale": null,
|
||||
@ -3174,8 +3160,7 @@
|
||||
"zh_Hans": "zh",
|
||||
"zh_Hant": "zh"
|
||||
},
|
||||
"regions": {},
|
||||
"supported_languages": {}
|
||||
"regions": {}
|
||||
},
|
||||
"qwant": {
|
||||
"all_locale": null,
|
||||
@ -3222,8 +3207,7 @@
|
||||
"th-TH": "th_TH",
|
||||
"zh-CN": "zh_CN",
|
||||
"zh-HK": "zh_HK"
|
||||
},
|
||||
"supported_languages": {}
|
||||
}
|
||||
},
|
||||
"qwant images": {
|
||||
"all_locale": null,
|
||||
@ -3270,8 +3254,7 @@
|
||||
"th-TH": "th_TH",
|
||||
"zh-CN": "zh_CN",
|
||||
"zh-HK": "zh_HK"
|
||||
},
|
||||
"supported_languages": {}
|
||||
}
|
||||
},
|
||||
"qwant news": {
|
||||
"all_locale": null,
|
||||
@ -3303,8 +3286,7 @@
|
||||
"nl-BE": "nl_BE",
|
||||
"nl-NL": "nl_NL",
|
||||
"pt-PT": "pt_PT"
|
||||
},
|
||||
"supported_languages": {}
|
||||
}
|
||||
},
|
||||
"qwant videos": {
|
||||
"all_locale": null,
|
||||
@ -3351,8 +3333,7 @@
|
||||
"th-TH": "th_TH",
|
||||
"zh-CN": "zh_CN",
|
||||
"zh-HK": "zh_HK"
|
||||
},
|
||||
"supported_languages": {}
|
||||
}
|
||||
},
|
||||
"sepiasearch": {
|
||||
"all_locale": null,
|
||||
@ -3381,8 +3362,7 @@
|
||||
"zh_Hans": "zh",
|
||||
"zh_Hant": "zh"
|
||||
},
|
||||
"regions": {},
|
||||
"supported_languages": {}
|
||||
"regions": {}
|
||||
},
|
||||
"startpage": {
|
||||
"all_locale": null,
|
||||
@ -3521,8 +3501,7 @@
|
||||
"zh-CN": "zh-CN_CN",
|
||||
"zh-HK": "zh-TW_HK",
|
||||
"zh-TW": "zh-TW_TW"
|
||||
},
|
||||
"supported_languages": {}
|
||||
}
|
||||
},
|
||||
"wikidata": {
|
||||
"all_locale": null,
|
||||
@ -3610,8 +3589,7 @@
|
||||
"zh": "zh",
|
||||
"zh_Hant": "zh-classical"
|
||||
},
|
||||
"regions": {},
|
||||
"supported_languages": {}
|
||||
"regions": {}
|
||||
},
|
||||
"wikipedia": {
|
||||
"all_locale": null,
|
||||
@ -3779,8 +3757,7 @@
|
||||
"zh_Hans": "zh",
|
||||
"zh_Hant": "zh-classical"
|
||||
},
|
||||
"regions": {},
|
||||
"supported_languages": {}
|
||||
"regions": {}
|
||||
},
|
||||
"yahoo": {
|
||||
"all_locale": "any",
|
||||
@ -3820,7 +3797,6 @@
|
||||
"zh_Hans": "zh_chs",
|
||||
"zh_Hant": "zh_cht"
|
||||
},
|
||||
"regions": {},
|
||||
"supported_languages": {}
|
||||
"regions": {}
|
||||
}
|
||||
}
|
@ -134,10 +134,3 @@ class Engine: # pylint: disable=too-few-public-methods
|
||||
require_api_key: true
|
||||
results: HTML
|
||||
"""
|
||||
|
||||
# deprecated properties
|
||||
|
||||
_fetch_supported_languages: Callable # deprecated use fetch_traits
|
||||
supported_languages: Union[List[str], Dict[str, str]] # deprecated use traits
|
||||
language_aliases: Dict[str, str] # deprecated not needed when using triats
|
||||
supported_languages_url: str # deprecated not needed when using triats
|
||||
|
@ -13,11 +13,9 @@ used.
|
||||
from __future__ import annotations
|
||||
import json
|
||||
import dataclasses
|
||||
from typing import Dict, Union, List, Callable, Optional, TYPE_CHECKING
|
||||
from typing import Dict, Union, Callable, Optional, TYPE_CHECKING
|
||||
from typing_extensions import Literal, Self
|
||||
|
||||
from babel.localedata import locale_identifiers
|
||||
|
||||
from searx import locales
|
||||
from searx.data import data_dir, ENGINE_TRAITS
|
||||
|
||||
@ -79,18 +77,8 @@ class EngineTraits:
|
||||
language").
|
||||
"""
|
||||
|
||||
data_type: Literal['traits_v1', 'supported_languages'] = 'traits_v1'
|
||||
"""Data type, default is 'traits_v1' for vintage use 'supported_languages'.
|
||||
|
||||
.. hint::
|
||||
|
||||
For the transition period until the *fetch* functions of all the engines
|
||||
are converted there will be the data_type 'supported_languages', which
|
||||
maps the old logic unchanged 1:1.
|
||||
|
||||
Instances of data_type 'supported_languages' do not implement methods
|
||||
like ``self.get_language(..)`` and ``self.get_region(..)``
|
||||
|
||||
data_type: Literal['traits_v1'] = 'traits_v1'
|
||||
"""Data type, default is 'traits_v1'.
|
||||
"""
|
||||
|
||||
custom: Dict[str, Dict] = dataclasses.field(default_factory=dict)
|
||||
@ -139,16 +127,6 @@ class EngineTraits:
|
||||
if self.data_type == 'traits_v1':
|
||||
return bool(self.get_region(searxng_locale) or self.get_language(searxng_locale))
|
||||
|
||||
if self.data_type == 'supported_languages': # vintage / deprecated
|
||||
# pylint: disable=import-outside-toplevel
|
||||
from searx.utils import match_language
|
||||
|
||||
if searxng_locale == 'all':
|
||||
return True
|
||||
x = match_language(searxng_locale, self.supported_languages, self.language_aliases, None)
|
||||
return bool(x)
|
||||
|
||||
# return bool(self.get_supported_language(searxng_locale))
|
||||
raise TypeError('engine traits of type %s is unknown' % self.data_type)
|
||||
|
||||
def copy(self):
|
||||
@ -178,10 +156,6 @@ class EngineTraits:
|
||||
|
||||
if self.data_type == 'traits_v1':
|
||||
self._set_traits_v1(engine)
|
||||
|
||||
elif self.data_type == 'supported_languages': # vintage / deprecated
|
||||
self._set_supported_languages(engine)
|
||||
|
||||
else:
|
||||
raise TypeError('engine traits of type %s is unknown' % self.data_type)
|
||||
|
||||
@ -215,106 +189,6 @@ class EngineTraits:
|
||||
# set the copied & modified traits in engine's namespace
|
||||
engine.traits = traits
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# The code below is deprecated an can hopefully be deleted at one day
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
supported_languages: Union[List[str], Dict[str, str]] = dataclasses.field(default_factory=dict)
|
||||
"""depricated: does not work for engines that do support languages based on a
|
||||
region. With this type it is not guaranteed that the key values can be
|
||||
parsed by :py:obj:`babel.Locale.parse`!
|
||||
"""
|
||||
|
||||
# language_aliases: Dict[str, str] = dataclasses.field(default_factory=dict)
|
||||
# """depricated: does not work for engines that do support languages based on a
|
||||
# region. With this type it is not guaranteed that the key values can be
|
||||
# parsed by :py:obj:`babel.Locale.parse`!
|
||||
# """
|
||||
|
||||
BABEL_LANGS = [
|
||||
lang_parts[0] + '-' + lang_parts[-1] if len(lang_parts) > 1 else lang_parts[0]
|
||||
for lang_parts in (lang_code.split('_') for lang_code in locale_identifiers())
|
||||
]
|
||||
|
||||
# def get_supported_language(self, searxng_locale, default=None): # vintage / deprecated
|
||||
# """Return engine's language string that *best fits* to SearXNG's locale."""
|
||||
# if searxng_locale == 'all' and self.all_locale is not None:
|
||||
# return self.all_locale
|
||||
# return locales.get_engine_locale(searxng_locale, self.supported_languages, default=default)
|
||||
|
||||
@classmethod # vintage / deprecated
|
||||
def fetch_supported_languages(cls, engine: Engine) -> Union[Self, None]:
|
||||
"""DEPRECATED: Calls a function ``_fetch_supported_languages`` from engine's
|
||||
namespace to fetch languages from the origin engine. If function does
|
||||
not exists, ``None`` is returned.
|
||||
"""
|
||||
|
||||
# pylint: disable=import-outside-toplevel
|
||||
from searx import network
|
||||
from searx.utils import gen_useragent
|
||||
|
||||
fetch_languages = getattr(engine, '_fetch_supported_languages', None)
|
||||
if fetch_languages is None:
|
||||
return None
|
||||
|
||||
# The headers has been moved here from commit 9b6ffed06: Some engines (at
|
||||
# least bing and startpage) return a different result list of supported
|
||||
# languages depending on the IP location where the HTTP request comes from.
|
||||
# The IP based results (from bing) can be avoided by setting a
|
||||
# 'Accept-Language' in the HTTP request.
|
||||
|
||||
headers = {
|
||||
'User-Agent': gen_useragent(),
|
||||
'Accept-Language': "en-US,en;q=0.5", # bing needs to set the English language
|
||||
}
|
||||
resp = network.get(engine.supported_languages_url, headers=headers)
|
||||
supported_languages = fetch_languages(resp)
|
||||
if isinstance(supported_languages, list):
|
||||
supported_languages.sort()
|
||||
|
||||
engine_traits = cls()
|
||||
engine_traits.data_type = 'supported_languages'
|
||||
engine_traits.supported_languages = supported_languages
|
||||
return engine_traits
|
||||
|
||||
def _set_supported_languages(self, engine: Engine): # vintage / deprecated
|
||||
traits = self.copy()
|
||||
|
||||
# pylint: disable=import-outside-toplevel
|
||||
from searx.utils import match_language
|
||||
|
||||
_msg = "settings.yml - engine: '%s' / %s: '%s' not supported"
|
||||
|
||||
if hasattr(engine, 'language'):
|
||||
if engine.language not in self.supported_languages:
|
||||
raise ValueError(_msg % (engine.name, 'language', engine.language))
|
||||
|
||||
if isinstance(self.supported_languages, dict):
|
||||
traits.supported_languages = {engine.language: self.supported_languages[engine.language]}
|
||||
else:
|
||||
traits.supported_languages = [engine.language]
|
||||
|
||||
engine.language_support = bool(traits.supported_languages)
|
||||
engine.supported_languages = traits.supported_languages
|
||||
|
||||
# find custom aliases for non standard language codes
|
||||
traits.language_aliases = {} # pylint: disable=attribute-defined-outside-init
|
||||
|
||||
for engine_lang in getattr(engine, 'language_aliases', {}):
|
||||
iso_lang = match_language(engine_lang, self.BABEL_LANGS, fallback=None)
|
||||
if (
|
||||
iso_lang
|
||||
and iso_lang != engine_lang
|
||||
and not engine_lang.startswith(iso_lang)
|
||||
and iso_lang not in self.supported_languages
|
||||
):
|
||||
traits.language_aliases[iso_lang] = engine_lang
|
||||
|
||||
engine.language_aliases = traits.language_aliases
|
||||
|
||||
# set the copied & modified traits in engine's namespace
|
||||
engine.traits = traits
|
||||
|
||||
|
||||
class EngineTraitsMap(Dict[str, EngineTraits]):
|
||||
"""A python dictionary to map :class:`EngineTraits` by engine name."""
|
||||
@ -352,17 +226,6 @@ class EngineTraitsMap(Dict[str, EngineTraits]):
|
||||
log("%-20s: SearXNG regions --> %s" % (engine_name, len(traits.regions)))
|
||||
obj[engine_name] = traits
|
||||
|
||||
# vintage / deprecated
|
||||
_traits = EngineTraits.fetch_supported_languages(engine)
|
||||
if _traits is not None:
|
||||
log("%-20s: %s supported_languages (deprecated)" % (engine_name, len(_traits.supported_languages)))
|
||||
if traits is not None:
|
||||
traits.supported_languages = _traits.supported_languages
|
||||
obj[engine_name] = traits
|
||||
else:
|
||||
obj[engine_name] = _traits
|
||||
continue
|
||||
|
||||
return obj
|
||||
|
||||
def set_traits(self, engine: Engine):
|
||||
|
@ -43,8 +43,6 @@ ENGINE_DEFAULT_ARGS = {
|
||||
"send_accept_language_header": False,
|
||||
"tokens": [],
|
||||
"about": {},
|
||||
"supported_languages": [], # deprecated use traits
|
||||
"language_aliases": {}, # deprecated not needed when using traits
|
||||
}
|
||||
# set automatically when an engine does not have any tab category
|
||||
OTHER_CATEGORY = 'other'
|
||||
|
@ -25,6 +25,7 @@ base_url = 'https://wiki.gentoo.org'
|
||||
# xpath queries
|
||||
xpath_results = '//ul[@class="mw-search-results"]/li'
|
||||
xpath_link = './/div[@class="mw-search-result-heading"]/a'
|
||||
xpath_content = './/div[@class="searchresult"]'
|
||||
|
||||
|
||||
# cut 'en' from 'en-US', 'de' from 'de-CH', and so on
|
||||
@ -77,8 +78,6 @@ main_langs = {
|
||||
'uk': 'Українська',
|
||||
'zh': '简体中文',
|
||||
}
|
||||
supported_languages = dict(lang_urls, **main_langs)
|
||||
|
||||
|
||||
# do search-request
|
||||
def request(query, params):
|
||||
@ -118,7 +117,8 @@ def response(resp):
|
||||
link = result.xpath(xpath_link)[0]
|
||||
href = urljoin(base_url, link.attrib.get('href'))
|
||||
title = extract_text(link)
|
||||
content = extract_text(result.xpath(xpath_content))
|
||||
|
||||
results.append({'url': href, 'title': title})
|
||||
results.append({'url': href, 'title': title, 'content': content})
|
||||
|
||||
return results
|
||||
|
@ -221,7 +221,7 @@ class OnlineProcessor(EngineProcessor):
|
||||
'test': ['unique_results'],
|
||||
}
|
||||
|
||||
if getattr(self.engine, 'supported_languages', []):
|
||||
if getattr(self.engine, 'traits', False):
|
||||
tests['lang_fr'] = {
|
||||
'matrix': {'query': 'paris', 'lang': 'fr'},
|
||||
'result_container': ['not_empty', ('has_language', 'fr')],
|
||||
|
@ -1317,11 +1317,6 @@ def config():
|
||||
continue
|
||||
|
||||
_languages = engine.traits.languages.keys()
|
||||
if engine.traits.data_type == 'supported_languages': # vintage / deprecated
|
||||
_languages = engine.traits.supported_languages
|
||||
if isinstance(_languages, dict):
|
||||
_languages = _languages.keys()
|
||||
|
||||
_engines.append(
|
||||
{
|
||||
'name': name,
|
||||
|
Loading…
Reference in New Issue
Block a user