[fix] check language aliases when setting search language

This commit is contained in:
Marc Abonce Seguin 2018-11-25 23:32:48 -06:00
parent 3c95d64ff8
commit 5568f24d6c
16 changed files with 29 additions and 11 deletions

View file

@ -55,7 +55,7 @@ def request(query, params):
query=urlencode({'q': query}),
offset=offset)
language = match_language(params['language'], supported_languages).lower()
language = match_language(params['language'], supported_languages, language_aliases).lower()
params['cookies']['SRCHHPGUSR'] = \
'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')

View file

@ -48,7 +48,7 @@ def request(query, params):
'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
# language cookie
language = match_language(params['language'], supported_languages).lower()
language = match_language(params['language'], supported_languages, language_aliases).lower()
params['cookies']['_EDGE_S'] = 'mkt=' + language + '&F=1'
# query and paging

View file

@ -166,7 +166,7 @@ def extract_text_from_dom(result, xpath):
def request(query, params):
offset = (params['pageno'] - 1) * 10
language = match_language(params['language'], supported_languages)
language = match_language(params['language'], supported_languages, language_aliases)
language_array = language.split('-')
if params['language'].find('-') > 0:
country = params['language'].split('-')[1]

View file

@ -51,7 +51,7 @@ def request(query, params):
params['url'] = search_url.format(query=urlencode({'q': query}),
search_options=urlencode(search_options))
language = match_language(params['language'], supported_languages).split('-')[0]
language = match_language(params['language'], supported_languages, language_aliases).split('-')[0]
if language:
params['url'] += '&lr=lang_' + language

View file

@ -46,7 +46,7 @@ def request(query, params):
offset=offset)
# add language tag
language = match_language(params['language'], supported_languages)
language = match_language(params['language'], supported_languages, language_aliases)
params['url'] += '&locale=' + language.replace('-', '_').lower()
return params

View file

@ -36,7 +36,7 @@ regex_img_url_remove_start = re.compile(b'^https?://i\.swisscows\.ch/\?link=')
# do search-request
def request(query, params):
region = match_language(params['language'], supported_languages)
region = match_language(params['language'], supported_languages, language_aliases)
ui_language = region.split('-')[0]
search_path = search_string.format(

View file

@ -68,7 +68,7 @@ def response(resp):
html = fromstring(resp.text)
search_results = html.xpath(wikidata_ids_xpath)
language = match_language(resp.search_params['language'], supported_languages).split('-')[0]
language = match_language(resp.search_params['language'], supported_languages, language_aliases).split('-')[0]
# TODO: make requests asynchronous to avoid timeout when result_count > 1
for search_result in search_results[:result_count]:

View file

@ -31,7 +31,7 @@ supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'
# set language in base_url
def url_lang(lang):
return match_language(lang, supported_languages).split('-')[0]
return match_language(lang, supported_languages, language_aliases).split('-')[0]
# do search-request