mirror of
https://github.com/searxng/searxng
synced 2024-01-01 19:24:07 +01:00
[mod] update_languages.py - review of fetch_supported_languages()
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
bf83e08152
commit
864aeebc2f
2 changed files with 36 additions and 24 deletions
|
@ -19,8 +19,7 @@ from os.path import realpath, dirname
|
||||||
from babel.localedata import locale_identifiers
|
from babel.localedata import locale_identifiers
|
||||||
from searx import logger, settings
|
from searx import logger, settings
|
||||||
from searx.data import ENGINES_LANGUAGES
|
from searx.data import ENGINES_LANGUAGES
|
||||||
from searx.network import get
|
from searx.utils import load_module, match_language
|
||||||
from searx.utils import load_module, match_language, gen_useragent
|
|
||||||
|
|
||||||
|
|
||||||
logger = logger.getChild('engines')
|
logger = logger.getChild('engines')
|
||||||
|
@ -219,18 +218,6 @@ def set_language_attributes(engine: Engine):
|
||||||
# language_support
|
# language_support
|
||||||
engine.language_support = len(engine.supported_languages) > 0
|
engine.language_support = len(engine.supported_languages) > 0
|
||||||
|
|
||||||
# assign language fetching method if auxiliary method exists
|
|
||||||
if hasattr(engine, '_fetch_supported_languages'):
|
|
||||||
headers = {
|
|
||||||
'User-Agent': gen_useragent(),
|
|
||||||
'Accept-Language': "en-US,en;q=0.5", # bing needs to set the English language
|
|
||||||
}
|
|
||||||
engine.fetch_supported_languages = (
|
|
||||||
# pylint: disable=protected-access
|
|
||||||
lambda: engine._fetch_supported_languages(get(engine.supported_languages_url, headers=headers))
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def update_attributes_for_tor(engine: Engine) -> bool:
|
def update_attributes_for_tor(engine: Engine) -> bool:
|
||||||
if using_tor_proxy(engine) and hasattr(engine, 'onion_url'):
|
if using_tor_proxy(engine) and hasattr(engine, 'onion_url'):
|
||||||
engine.search_url = engine.onion_url + getattr(engine, 'search_path', '')
|
engine.search_url = engine.onion_url + getattr(engine, 'search_path', '')
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# lint: pylint
|
# lint: pylint
|
||||||
|
|
||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
"""This script generates languages.py from intersecting each engine's supported
|
"""This script generates languages.py from intersecting each engine's supported
|
||||||
languages.
|
languages.
|
||||||
|
@ -22,31 +21,57 @@ from babel.core import parse_locale
|
||||||
|
|
||||||
from searx import settings, searx_dir
|
from searx import settings, searx_dir
|
||||||
from searx.engines import load_engines, engines
|
from searx.engines import load_engines, engines
|
||||||
from searx.network import set_timeout_for_thread
|
from searx.network import set_timeout_for_thread, get
|
||||||
|
from searx.utils import gen_useragent
|
||||||
|
|
||||||
# Output files.
|
# Output files.
|
||||||
engines_languages_file = Path(searx_dir) / 'data' / 'engines_languages.json'
|
engines_languages_file = Path(searx_dir) / 'data' / 'engines_languages.json'
|
||||||
languages_file = Path(searx_dir) / 'languages.py'
|
languages_file = Path(searx_dir) / 'languages.py'
|
||||||
|
|
||||||
|
|
||||||
# Fetchs supported languages for each engine and writes json file with those.
|
|
||||||
def fetch_supported_languages():
|
def fetch_supported_languages():
|
||||||
set_timeout_for_thread(10.0)
|
"""Fetchs supported languages for each engine and writes json file with those.
|
||||||
|
|
||||||
|
"""
|
||||||
|
set_timeout_for_thread(10.0)
|
||||||
engines_languages = {}
|
engines_languages = {}
|
||||||
names = list(engines)
|
names = list(engines)
|
||||||
names.sort()
|
names.sort()
|
||||||
|
|
||||||
|
# The headers has been moved here from commit 9b6ffed06: Some engines (at
|
||||||
|
# least bing) return a different result list of supported languages
|
||||||
|
# depending on the IP location where the HTTP request comes from. The IP
|
||||||
|
# based results (from bing) can be avoided by setting a 'Accept-Language' in
|
||||||
|
# the HTTP request.
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
'User-Agent': gen_useragent(),
|
||||||
|
'Accept-Language': "en-US,en;q=0.5", # bing needs to set the English language
|
||||||
|
}
|
||||||
|
|
||||||
for engine_name in names:
|
for engine_name in names:
|
||||||
if hasattr(engines[engine_name], 'fetch_supported_languages'):
|
if not hasattr(engines[engine_name], '_fetch_supported_languages'):
|
||||||
engines_languages[engine_name] = engines[engine_name].fetch_supported_languages()
|
continue
|
||||||
print("fetched %s languages from engine %s" % (len(engines_languages[engine_name]), engine_name))
|
|
||||||
if type(engines_languages[engine_name]) == list: # pylint: disable=unidiomatic-typecheck
|
func = engines[engine_name]._fetch_supported_languages # pylint: disable=protected-access
|
||||||
engines_languages[engine_name] = sorted(engines_languages[engine_name])
|
url = engines[engine_name].supported_languages_url
|
||||||
|
resp = get(url, headers=headers)
|
||||||
|
|
||||||
|
l = func(resp)
|
||||||
|
if isinstance(l, list):
|
||||||
|
l.sort()
|
||||||
|
|
||||||
|
print("%s: fetched language %s containing %s items" % (
|
||||||
|
engine_name,
|
||||||
|
l.__class__.__name__,
|
||||||
|
len(l)
|
||||||
|
))
|
||||||
|
|
||||||
|
engines_languages[engine_name] = l
|
||||||
|
|
||||||
print("fetched languages from %s engines" % len(engines_languages))
|
print("fetched languages from %s engines" % len(engines_languages))
|
||||||
|
print("write json file: %s" % (engines_languages_file))
|
||||||
|
|
||||||
# write json file
|
|
||||||
with open(engines_languages_file, 'w', encoding='utf-8') as f:
|
with open(engines_languages_file, 'w', encoding='utf-8') as f:
|
||||||
json.dump(engines_languages, f, indent=2, sort_keys=True)
|
json.dump(engines_languages, f, indent=2, sort_keys=True)
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue