From 864aeebc2ff08f195144567471c70c51533a3f6e Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Fri, 8 Apr 2022 13:24:17 +0200 Subject: [PATCH] [mod] update_languages.py - review of fetch_supported_languages() Signed-off-by: Markus Heiser --- searx/engines/__init__.py | 15 +------- searxng_extra/update/update_languages.py | 45 ++++++++++++++++++------ 2 files changed, 36 insertions(+), 24 deletions(-) diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index ae132f48d..f23bd8cdf 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -19,8 +19,7 @@ from os.path import realpath, dirname from babel.localedata import locale_identifiers from searx import logger, settings from searx.data import ENGINES_LANGUAGES -from searx.network import get -from searx.utils import load_module, match_language, gen_useragent +from searx.utils import load_module, match_language logger = logger.getChild('engines') @@ -219,18 +218,6 @@ def set_language_attributes(engine: Engine): # language_support engine.language_support = len(engine.supported_languages) > 0 - # assign language fetching method if auxiliary method exists - if hasattr(engine, '_fetch_supported_languages'): - headers = { - 'User-Agent': gen_useragent(), - 'Accept-Language': "en-US,en;q=0.5", # bing needs to set the English language - } - engine.fetch_supported_languages = ( - # pylint: disable=protected-access - lambda: engine._fetch_supported_languages(get(engine.supported_languages_url, headers=headers)) - ) - - def update_attributes_for_tor(engine: Engine) -> bool: if using_tor_proxy(engine) and hasattr(engine, 'onion_url'): engine.search_url = engine.onion_url + getattr(engine, 'search_path', '') diff --git a/searxng_extra/update/update_languages.py b/searxng_extra/update/update_languages.py index 95ee5bd3e..613158349 100755 --- a/searxng_extra/update/update_languages.py +++ b/searxng_extra/update/update_languages.py @@ -1,6 +1,5 @@ #!/usr/bin/env python # lint: pylint - # SPDX-License-Identifier: AGPL-3.0-or-later """This script generates languages.py from intersecting each engine's supported languages. @@ -22,31 +21,57 @@ from babel.core import parse_locale from searx import settings, searx_dir from searx.engines import load_engines, engines -from searx.network import set_timeout_for_thread +from searx.network import set_timeout_for_thread, get +from searx.utils import gen_useragent # Output files. engines_languages_file = Path(searx_dir) / 'data' / 'engines_languages.json' languages_file = Path(searx_dir) / 'languages.py' -# Fetchs supported languages for each engine and writes json file with those. def fetch_supported_languages(): - set_timeout_for_thread(10.0) + """Fetchs supported languages for each engine and writes json file with those. + """ + set_timeout_for_thread(10.0) engines_languages = {} names = list(engines) names.sort() + # The headers has been moved here from commit 9b6ffed06: Some engines (at + # least bing) return a different result list of supported languages + # depending on the IP location where the HTTP request comes from. The IP + # based results (from bing) can be avoided by setting a 'Accept-Language' in + # the HTTP request. + + headers = { + 'User-Agent': gen_useragent(), + 'Accept-Language': "en-US,en;q=0.5", # bing needs to set the English language + } + for engine_name in names: - if hasattr(engines[engine_name], 'fetch_supported_languages'): - engines_languages[engine_name] = engines[engine_name].fetch_supported_languages() - print("fetched %s languages from engine %s" % (len(engines_languages[engine_name]), engine_name)) - if type(engines_languages[engine_name]) == list: # pylint: disable=unidiomatic-typecheck - engines_languages[engine_name] = sorted(engines_languages[engine_name]) + if not hasattr(engines[engine_name], '_fetch_supported_languages'): + continue + + func = engines[engine_name]._fetch_supported_languages # pylint: disable=protected-access + url = engines[engine_name].supported_languages_url + resp = get(url, headers=headers) + + l = func(resp) + if isinstance(l, list): + l.sort() + + print("%s: fetched language %s containing %s items" % ( + engine_name, + l.__class__.__name__, + len(l) + )) + + engines_languages[engine_name] = l print("fetched languages from %s engines" % len(engines_languages)) + print("write json file: %s" % (engines_languages_file)) - # write json file with open(engines_languages_file, 'w', encoding='utf-8') as f: json.dump(engines_languages, f, indent=2, sort_keys=True)