mirror of https://github.com/searxng/searxng.git
Merge pull request #676 from return42/fix-bing-lang
Fix issues on running update_languages.py
This commit is contained in:
commit
21d7c8b367
|
@ -193,7 +193,7 @@ def set_language_attributes(engine):
|
||||||
if hasattr(engine, '_fetch_supported_languages'):
|
if hasattr(engine, '_fetch_supported_languages'):
|
||||||
headers = {
|
headers = {
|
||||||
'User-Agent': gen_useragent(),
|
'User-Agent': gen_useragent(),
|
||||||
'Accept-Language': 'ja-JP,ja;q=0.8,en-US;q=0.5,en;q=0.3', # bing needs a non-English language
|
'Accept-Language': "en-US,en;q=0.5", # bing needs to set the English language
|
||||||
}
|
}
|
||||||
engine.fetch_supported_languages = (
|
engine.fetch_supported_languages = (
|
||||||
# pylint: disable=protected-access
|
# pylint: disable=protected-access
|
||||||
|
|
|
@ -6,7 +6,7 @@
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode, urlparse, parse_qs
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.utils import eval_xpath, extract_text, match_language
|
from searx.utils import eval_xpath, extract_text, match_language
|
||||||
|
|
||||||
|
@ -25,7 +25,7 @@ paging = True
|
||||||
time_range_support = False
|
time_range_support = False
|
||||||
safesearch = False
|
safesearch = False
|
||||||
supported_languages_url = 'https://www.bing.com/account/general'
|
supported_languages_url = 'https://www.bing.com/account/general'
|
||||||
language_aliases = {'zh-CN': 'zh-CHS', 'zh-TW': 'zh-CHT', 'zh-HK': 'zh-CHT'}
|
language_aliases = {}
|
||||||
|
|
||||||
# search-url
|
# search-url
|
||||||
base_url = 'https://www.bing.com/'
|
base_url = 'https://www.bing.com/'
|
||||||
|
@ -127,18 +127,27 @@ def response(resp):
|
||||||
|
|
||||||
# get supported languages from their site
|
# get supported languages from their site
|
||||||
def _fetch_supported_languages(resp):
|
def _fetch_supported_languages(resp):
|
||||||
|
|
||||||
lang_tags = set()
|
lang_tags = set()
|
||||||
|
|
||||||
setmkt = re.compile('setmkt=([^&]*)')
|
|
||||||
dom = html.fromstring(resp.text)
|
dom = html.fromstring(resp.text)
|
||||||
lang_links = eval_xpath(dom, "//li/a[contains(@href, 'setmkt')]")
|
lang_links = eval_xpath(dom, '//div[@id="language-section"]//li')
|
||||||
|
|
||||||
for a in lang_links:
|
for _li in lang_links:
|
||||||
href = eval_xpath(a, './@href')[0]
|
|
||||||
match = setmkt.search(href)
|
href = eval_xpath(_li, './/@href')[0]
|
||||||
l_tag = match.groups()[0]
|
(_scheme, _netloc, _path, _params, query, _fragment) = urlparse(href)
|
||||||
_lang, _nation = l_tag.split('-', 1)
|
query = parse_qs(query, keep_blank_values=True)
|
||||||
l_tag = _lang.lower() + '-' + _nation.upper()
|
|
||||||
lang_tags.add(l_tag)
|
# fmt: off
|
||||||
|
setlang = query.get('setlang', [None, ])[0]
|
||||||
|
# example: 'mn-Cyrl-MN' --> '['mn', 'Cyrl-MN']
|
||||||
|
lang, nation = (setlang.split('-', maxsplit=1) + [None,])[:2] # fmt: skip
|
||||||
|
# fmt: on
|
||||||
|
|
||||||
|
if not nation:
|
||||||
|
nation = lang.upper()
|
||||||
|
tag = lang + '-' + nation
|
||||||
|
lang_tags.add(tag)
|
||||||
|
|
||||||
return list(lang_tags)
|
return list(lang_tags)
|
||||||
|
|
|
@ -35,6 +35,8 @@ def fetch_supported_languages():
|
||||||
if type(engines_languages[engine_name]) == list:
|
if type(engines_languages[engine_name]) == list:
|
||||||
engines_languages[engine_name] = sorted(engines_languages[engine_name])
|
engines_languages[engine_name] = sorted(engines_languages[engine_name])
|
||||||
|
|
||||||
|
print("fetched languages from %s engines" % len(engines_languages))
|
||||||
|
|
||||||
# write json file
|
# write json file
|
||||||
with open(engines_languages_file, 'w', encoding='utf-8') as f:
|
with open(engines_languages_file, 'w', encoding='utf-8') as f:
|
||||||
json.dump(engines_languages, f, indent=2, sort_keys=True)
|
json.dump(engines_languages, f, indent=2, sort_keys=True)
|
||||||
|
@ -97,7 +99,11 @@ def join_language_lists(engines_languages):
|
||||||
country_name = ''
|
country_name = ''
|
||||||
if locale:
|
if locale:
|
||||||
# get country name from babel's Locale object
|
# get country name from babel's Locale object
|
||||||
country_name = locale.get_territory_name()
|
try:
|
||||||
|
country_name = locale.get_territory_name()
|
||||||
|
except FileNotFoundError as exc:
|
||||||
|
print("ERROR: %s --> %s" % (locale, exc))
|
||||||
|
locale = None
|
||||||
|
|
||||||
language_list[short_code]['countries'][lang_code] = {'country_name': country_name, 'counter': set()}
|
language_list[short_code]['countries'][lang_code] = {'country_name': country_name, 'counter': set()}
|
||||||
|
|
||||||
|
@ -186,17 +192,24 @@ def write_languages_file(languages):
|
||||||
"language_codes =",
|
"language_codes =",
|
||||||
)
|
)
|
||||||
|
|
||||||
language_codes = tuple(
|
language_codes = []
|
||||||
[
|
|
||||||
(
|
for code in sorted(languages):
|
||||||
code,
|
|
||||||
languages[code]['name'].split(' (')[0],
|
name = languages[code]['name']
|
||||||
languages[code].get('country_name') or '',
|
if name is None:
|
||||||
languages[code].get('english_name') or '',
|
print("ERROR: languages['%s'] --> %s" % (code, languages[code]))
|
||||||
)
|
continue
|
||||||
for code in sorted(languages)
|
item = (
|
||||||
]
|
code,
|
||||||
)
|
languages[code]['name'].split(' (')[0],
|
||||||
|
languages[code].get('country_name') or '',
|
||||||
|
languages[code].get('english_name') or '',
|
||||||
|
)
|
||||||
|
|
||||||
|
language_codes.append(item)
|
||||||
|
|
||||||
|
language_codes = tuple(language_codes)
|
||||||
|
|
||||||
with open(languages_file, 'w') as new_file:
|
with open(languages_file, 'w') as new_file:
|
||||||
file_content = "{file_headers} \\\n{language_codes}".format(
|
file_content = "{file_headers} \\\n{language_codes}".format(
|
||||||
|
|
Loading…
Reference in New Issue