[mod] update_languages.py - review of fetch_supported_languages()

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2022-04-08 13:24:17 +02:00
parent 05e8622bf5
commit 50f9e4eec1
2 changed files with 36 additions and 24 deletions

View file

@ -19,8 +19,7 @@ from os.path import realpath, dirname
from babel.localedata import locale_identifiers from babel.localedata import locale_identifiers
from searx import logger, settings from searx import logger, settings
from searx.data import ENGINES_LANGUAGES from searx.data import ENGINES_LANGUAGES
from searx.network import get from searx.utils import load_module, match_language
from searx.utils import load_module, match_language, gen_useragent
logger = logger.getChild('engines') logger = logger.getChild('engines')
@ -219,18 +218,6 @@ def set_language_attributes(engine: Engine):
# language_support # language_support
engine.language_support = len(engine.supported_languages) > 0 engine.language_support = len(engine.supported_languages) > 0
# assign language fetching method if auxiliary method exists
if hasattr(engine, '_fetch_supported_languages'):
headers = {
'User-Agent': gen_useragent(),
'Accept-Language': "en-US,en;q=0.5", # bing needs to set the English language
}
engine.fetch_supported_languages = (
# pylint: disable=protected-access
lambda: engine._fetch_supported_languages(get(engine.supported_languages_url, headers=headers))
)
def update_attributes_for_tor(engine: Engine) -> bool: def update_attributes_for_tor(engine: Engine) -> bool:
if using_tor_proxy(engine) and hasattr(engine, 'onion_url'): if using_tor_proxy(engine) and hasattr(engine, 'onion_url'):
engine.search_url = engine.onion_url + getattr(engine, 'search_path', '') engine.search_url = engine.onion_url + getattr(engine, 'search_path', '')

View file

@ -1,6 +1,5 @@
#!/usr/bin/env python #!/usr/bin/env python
# lint: pylint # lint: pylint
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
"""This script generates languages.py from intersecting each engine's supported """This script generates languages.py from intersecting each engine's supported
languages. languages.
@ -22,31 +21,57 @@ from babel.core import parse_locale
from searx import settings, searx_dir from searx import settings, searx_dir
from searx.engines import load_engines, engines from searx.engines import load_engines, engines
from searx.network import set_timeout_for_thread from searx.network import set_timeout_for_thread, get
from searx.utils import gen_useragent
# Output files. # Output files.
engines_languages_file = Path(searx_dir) / 'data' / 'engines_languages.json' engines_languages_file = Path(searx_dir) / 'data' / 'engines_languages.json'
languages_file = Path(searx_dir) / 'languages.py' languages_file = Path(searx_dir) / 'languages.py'
# Fetchs supported languages for each engine and writes json file with those.
def fetch_supported_languages(): def fetch_supported_languages():
set_timeout_for_thread(10.0) """Fetchs supported languages for each engine and writes json file with those.
"""
set_timeout_for_thread(10.0)
engines_languages = {} engines_languages = {}
names = list(engines) names = list(engines)
names.sort() names.sort()
# The headers has been moved here from commit 9b6ffed06: Some engines (at
# least bing) return a different result list of supported languages
# depending on the IP location where the HTTP request comes from. The IP
# based results (from bing) can be avoided by setting a 'Accept-Language' in
# the HTTP request.
headers = {
'User-Agent': gen_useragent(),
'Accept-Language': "en-US,en;q=0.5", # bing needs to set the English language
}
for engine_name in names: for engine_name in names:
if hasattr(engines[engine_name], 'fetch_supported_languages'): if not hasattr(engines[engine_name], '_fetch_supported_languages'):
engines_languages[engine_name] = engines[engine_name].fetch_supported_languages() continue
print("fetched %s languages from engine %s" % (len(engines_languages[engine_name]), engine_name))
if type(engines_languages[engine_name]) == list: # pylint: disable=unidiomatic-typecheck func = engines[engine_name]._fetch_supported_languages # pylint: disable=protected-access
engines_languages[engine_name] = sorted(engines_languages[engine_name]) url = engines[engine_name].supported_languages_url
resp = get(url, headers=headers)
l = func(resp)
if isinstance(l, list):
l.sort()
print("%s: fetched language %s containing %s items" % (
engine_name,
l.__class__.__name__,
len(l)
))
engines_languages[engine_name] = l
print("fetched languages from %s engines" % len(engines_languages)) print("fetched languages from %s engines" % len(engines_languages))
print("write json file: %s" % (engines_languages_file))
# write json file
with open(engines_languages_file, 'w', encoding='utf-8') as f: with open(engines_languages_file, 'w', encoding='utf-8') as f:
json.dump(engines_languages, f, indent=2, sort_keys=True) json.dump(engines_languages, f, indent=2, sort_keys=True)