forked from zaclys/searxng
Merge pull request #1446 from MarcAbonce/language_aliases_fix
[fix] Fix queries in Hebrew and Norwegian so they give results in the right language
This commit is contained in:
commit
491792c1a5
File diff suppressed because it is too large
Load Diff
|
@ -113,8 +113,7 @@ def load_engine(engine_data):
|
||||||
iso_lang not in getattr(engine, 'supported_languages'):
|
iso_lang not in getattr(engine, 'supported_languages'):
|
||||||
language_aliases[iso_lang] = engine_lang
|
language_aliases[iso_lang] = engine_lang
|
||||||
|
|
||||||
if language_aliases:
|
setattr(engine, 'language_aliases', language_aliases)
|
||||||
setattr(engine, 'language_aliases', language_aliases)
|
|
||||||
|
|
||||||
# assign language fetching method if auxiliary method exists
|
# assign language fetching method if auxiliary method exists
|
||||||
if hasattr(engine, '_fetch_supported_languages'):
|
if hasattr(engine, '_fetch_supported_languages'):
|
||||||
|
|
|
@ -55,7 +55,7 @@ def request(query, params):
|
||||||
query=urlencode({'q': query}),
|
query=urlencode({'q': query}),
|
||||||
offset=offset)
|
offset=offset)
|
||||||
|
|
||||||
language = match_language(params['language'], supported_languages).lower()
|
language = match_language(params['language'], supported_languages, language_aliases).lower()
|
||||||
|
|
||||||
params['cookies']['SRCHHPGUSR'] = \
|
params['cookies']['SRCHHPGUSR'] = \
|
||||||
'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
|
'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
|
||||||
|
|
|
@ -48,7 +48,7 @@ def request(query, params):
|
||||||
'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
|
'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
|
||||||
|
|
||||||
# language cookie
|
# language cookie
|
||||||
language = match_language(params['language'], supported_languages).lower()
|
language = match_language(params['language'], supported_languages, language_aliases).lower()
|
||||||
params['cookies']['_EDGE_S'] = 'mkt=' + language + '&F=1'
|
params['cookies']['_EDGE_S'] = 'mkt=' + language + '&F=1'
|
||||||
|
|
||||||
# query and paging
|
# query and paging
|
||||||
|
|
|
@ -166,7 +166,7 @@ def extract_text_from_dom(result, xpath):
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
offset = (params['pageno'] - 1) * 10
|
offset = (params['pageno'] - 1) * 10
|
||||||
|
|
||||||
language = match_language(params['language'], supported_languages)
|
language = match_language(params['language'], supported_languages, language_aliases)
|
||||||
language_array = language.split('-')
|
language_array = language.split('-')
|
||||||
if params['language'].find('-') > 0:
|
if params['language'].find('-') > 0:
|
||||||
country = params['language'].split('-')[1]
|
country = params['language'].split('-')[1]
|
||||||
|
@ -381,10 +381,10 @@ def attributes_to_html(attributes):
|
||||||
def _fetch_supported_languages(resp):
|
def _fetch_supported_languages(resp):
|
||||||
supported_languages = {}
|
supported_languages = {}
|
||||||
dom = html.fromstring(resp.text)
|
dom = html.fromstring(resp.text)
|
||||||
options = dom.xpath('//table//td/font/label/span')
|
options = dom.xpath('//*[@id="langSec"]//input[@name="lr"]')
|
||||||
for option in options:
|
for option in options:
|
||||||
code = option.xpath('./@id')[0][1:]
|
code = option.xpath('./@value')[0].split('_')[-1]
|
||||||
name = option.text.title()
|
name = option.xpath('./@data-name')[0].title()
|
||||||
supported_languages[code] = {"name": name}
|
supported_languages[code] = {"name": name}
|
||||||
|
|
||||||
return supported_languages
|
return supported_languages
|
||||||
|
|
|
@ -51,7 +51,7 @@ def request(query, params):
|
||||||
params['url'] = search_url.format(query=urlencode({'q': query}),
|
params['url'] = search_url.format(query=urlencode({'q': query}),
|
||||||
search_options=urlencode(search_options))
|
search_options=urlencode(search_options))
|
||||||
|
|
||||||
language = match_language(params['language'], supported_languages).split('-')[0]
|
language = match_language(params['language'], supported_languages, language_aliases).split('-')[0]
|
||||||
if language:
|
if language:
|
||||||
params['url'] += '&lr=lang_' + language
|
params['url'] += '&lr=lang_' + language
|
||||||
|
|
||||||
|
|
|
@ -46,7 +46,7 @@ def request(query, params):
|
||||||
offset=offset)
|
offset=offset)
|
||||||
|
|
||||||
# add language tag
|
# add language tag
|
||||||
language = match_language(params['language'], supported_languages)
|
language = match_language(params['language'], supported_languages, language_aliases)
|
||||||
params['url'] += '&locale=' + language.replace('-', '_').lower()
|
params['url'] += '&locale=' + language.replace('-', '_').lower()
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
|
@ -36,7 +36,7 @@ regex_img_url_remove_start = re.compile(b'^https?://i\.swisscows\.ch/\?link=')
|
||||||
|
|
||||||
# do search-request
|
# do search-request
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
region = match_language(params['language'], supported_languages)
|
region = match_language(params['language'], supported_languages, language_aliases)
|
||||||
ui_language = region.split('-')[0]
|
ui_language = region.split('-')[0]
|
||||||
|
|
||||||
search_path = search_string.format(
|
search_path = search_string.format(
|
||||||
|
|
|
@ -68,7 +68,7 @@ def response(resp):
|
||||||
html = fromstring(resp.text)
|
html = fromstring(resp.text)
|
||||||
search_results = html.xpath(wikidata_ids_xpath)
|
search_results = html.xpath(wikidata_ids_xpath)
|
||||||
|
|
||||||
language = match_language(resp.search_params['language'], supported_languages).split('-')[0]
|
language = match_language(resp.search_params['language'], supported_languages, language_aliases).split('-')[0]
|
||||||
|
|
||||||
# TODO: make requests asynchronous to avoid timeout when result_count > 1
|
# TODO: make requests asynchronous to avoid timeout when result_count > 1
|
||||||
for search_result in search_results[:result_count]:
|
for search_result in search_results[:result_count]:
|
||||||
|
|
|
@ -31,7 +31,7 @@ supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'
|
||||||
|
|
||||||
# set language in base_url
|
# set language in base_url
|
||||||
def url_lang(lang):
|
def url_lang(lang):
|
||||||
return match_language(lang, supported_languages).split('-')[0]
|
return match_language(lang, supported_languages, language_aliases).split('-')[0]
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
# do search-request
|
||||||
|
|
|
@ -9,6 +9,7 @@ class TestBingImagesEngine(SearxTestCase):
|
||||||
|
|
||||||
def test_request(self):
|
def test_request(self):
|
||||||
bing_images.supported_languages = ['fr-FR', 'en-US']
|
bing_images.supported_languages = ['fr-FR', 'en-US']
|
||||||
|
bing_images.language_aliases = {}
|
||||||
query = 'test_query'
|
query = 'test_query'
|
||||||
dicto = defaultdict(dict)
|
dicto = defaultdict(dict)
|
||||||
dicto['pageno'] = 1
|
dicto['pageno'] = 1
|
||||||
|
|
|
@ -9,6 +9,7 @@ class TestBingVideosEngine(SearxTestCase):
|
||||||
|
|
||||||
def test_request(self):
|
def test_request(self):
|
||||||
bing_videos.supported_languages = ['fr-FR', 'en-US']
|
bing_videos.supported_languages = ['fr-FR', 'en-US']
|
||||||
|
bing_videos.language_aliases = {}
|
||||||
query = 'test_query'
|
query = 'test_query'
|
||||||
dicto = defaultdict(dict)
|
dicto = defaultdict(dict)
|
||||||
dicto['pageno'] = 1
|
dicto['pageno'] = 1
|
||||||
|
|
|
@ -15,7 +15,8 @@ class TestGoogleEngine(SearxTestCase):
|
||||||
return response
|
return response
|
||||||
|
|
||||||
def test_request(self):
|
def test_request(self):
|
||||||
google.supported_languages = ['en', 'fr', 'zh-CN']
|
google.supported_languages = ['en', 'fr', 'zh-CN', 'iw']
|
||||||
|
google.language_aliases = {'he': 'iw'}
|
||||||
|
|
||||||
query = 'test_query'
|
query = 'test_query'
|
||||||
dicto = defaultdict(dict)
|
dicto = defaultdict(dict)
|
||||||
|
@ -41,6 +42,12 @@ class TestGoogleEngine(SearxTestCase):
|
||||||
self.assertIn('zh-CN', params['url'])
|
self.assertIn('zh-CN', params['url'])
|
||||||
self.assertIn('zh-CN', params['headers']['Accept-Language'])
|
self.assertIn('zh-CN', params['headers']['Accept-Language'])
|
||||||
|
|
||||||
|
dicto['language'] = 'he'
|
||||||
|
params = google.request(query, dicto)
|
||||||
|
self.assertIn('google.com', params['url'])
|
||||||
|
self.assertIn('iw', params['url'])
|
||||||
|
self.assertIn('iw', params['headers']['Accept-Language'])
|
||||||
|
|
||||||
def test_response(self):
|
def test_response(self):
|
||||||
self.assertRaises(AttributeError, google.response, None)
|
self.assertRaises(AttributeError, google.response, None)
|
||||||
self.assertRaises(AttributeError, google.response, [])
|
self.assertRaises(AttributeError, google.response, [])
|
||||||
|
@ -198,29 +205,13 @@ class TestGoogleEngine(SearxTestCase):
|
||||||
html = u"""
|
html = u"""
|
||||||
<html>
|
<html>
|
||||||
<body>
|
<body>
|
||||||
<table>
|
<div id="langSec">
|
||||||
<tbody>
|
<div>
|
||||||
<tr>
|
<input name="lr" data-name="english" value="lang_en" />
|
||||||
<td>
|
<input name="lr" data-name="中文 (简体)" value="lang_zh-CN" />
|
||||||
<font>
|
<input name="lr" data-name="中文 (繁體)" value="lang_zh-TW" />
|
||||||
<label>
|
</div>
|
||||||
<span id="ten">English</span>
|
</div>
|
||||||
</label>
|
|
||||||
</font>
|
|
||||||
</td>
|
|
||||||
<td>
|
|
||||||
<font>
|
|
||||||
<label>
|
|
||||||
<span id="tzh-CN">中文 (简体)</span>
|
|
||||||
</label>
|
|
||||||
<label>
|
|
||||||
<span id="tzh-TW">中文 (繁體)</span>
|
|
||||||
</label>
|
|
||||||
</font>
|
|
||||||
</td>
|
|
||||||
</tr>
|
|
||||||
</tbody>
|
|
||||||
</table>
|
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -10,6 +10,7 @@ class TestGoogleNewsEngine(SearxTestCase):
|
||||||
|
|
||||||
def test_request(self):
|
def test_request(self):
|
||||||
google_news.supported_languages = ['en-US', 'fr-FR']
|
google_news.supported_languages = ['en-US', 'fr-FR']
|
||||||
|
google_news.language_aliases = {}
|
||||||
query = 'test_query'
|
query = 'test_query'
|
||||||
dicto = defaultdict(dict)
|
dicto = defaultdict(dict)
|
||||||
dicto['pageno'] = 1
|
dicto['pageno'] = 1
|
||||||
|
|
|
@ -8,6 +8,7 @@ class TestQwantEngine(SearxTestCase):
|
||||||
|
|
||||||
def test_request(self):
|
def test_request(self):
|
||||||
qwant.supported_languages = ['en-US', 'fr-CA', 'fr-FR']
|
qwant.supported_languages = ['en-US', 'fr-CA', 'fr-FR']
|
||||||
|
qwant.language_aliases = {}
|
||||||
query = 'test_query'
|
query = 'test_query'
|
||||||
dicto = defaultdict(dict)
|
dicto = defaultdict(dict)
|
||||||
dicto['pageno'] = 0
|
dicto['pageno'] = 0
|
||||||
|
|
|
@ -8,6 +8,7 @@ class TestSwisscowsEngine(SearxTestCase):
|
||||||
|
|
||||||
def test_request(self):
|
def test_request(self):
|
||||||
swisscows.supported_languages = ['de-AT', 'de-DE']
|
swisscows.supported_languages = ['de-AT', 'de-DE']
|
||||||
|
swisscows.language_aliases = {}
|
||||||
query = 'test_query'
|
query = 'test_query'
|
||||||
dicto = defaultdict(dict)
|
dicto = defaultdict(dict)
|
||||||
dicto['pageno'] = 1
|
dicto['pageno'] = 1
|
||||||
|
|
|
@ -27,6 +27,7 @@ class TestWikidataEngine(SearxTestCase):
|
||||||
self.assertRaises(AttributeError, wikidata.response, '[]')
|
self.assertRaises(AttributeError, wikidata.response, '[]')
|
||||||
|
|
||||||
wikidata.supported_languages = ['en', 'es']
|
wikidata.supported_languages = ['en', 'es']
|
||||||
|
wikidata.language_aliases = {}
|
||||||
response = mock.Mock(text='<html></html>', search_params={"language": "en"})
|
response = mock.Mock(text='<html></html>', search_params={"language": "en"})
|
||||||
self.assertEqual(wikidata.response(response), [])
|
self.assertEqual(wikidata.response(response), [])
|
||||||
|
|
||||||
|
|
|
@ -8,7 +8,8 @@ from searx.testing import SearxTestCase
|
||||||
class TestWikipediaEngine(SearxTestCase):
|
class TestWikipediaEngine(SearxTestCase):
|
||||||
|
|
||||||
def test_request(self):
|
def test_request(self):
|
||||||
wikipedia.supported_languages = ['fr', 'en']
|
wikipedia.supported_languages = ['fr', 'en', 'no']
|
||||||
|
wikipedia.language_aliases = {'nb': 'no'}
|
||||||
|
|
||||||
query = 'test_query'
|
query = 'test_query'
|
||||||
dicto = defaultdict(dict)
|
dicto = defaultdict(dict)
|
||||||
|
@ -25,9 +26,13 @@ class TestWikipediaEngine(SearxTestCase):
|
||||||
self.assertIn('Test_Query', params['url'])
|
self.assertIn('Test_Query', params['url'])
|
||||||
self.assertNotIn('test_query', params['url'])
|
self.assertNotIn('test_query', params['url'])
|
||||||
|
|
||||||
|
dicto['language'] = 'nb'
|
||||||
|
params = wikipedia.request(query, dicto)
|
||||||
|
self.assertIn('no.wikipedia.org', params['url'])
|
||||||
|
|
||||||
dicto['language'] = 'xx'
|
dicto['language'] = 'xx'
|
||||||
params = wikipedia.request(query, dicto)
|
params = wikipedia.request(query, dicto)
|
||||||
self.assertIn('en', params['url'])
|
self.assertIn('en.wikipedia.org', params['url'])
|
||||||
|
|
||||||
def test_response(self):
|
def test_response(self):
|
||||||
dicto = defaultdict(dict)
|
dicto = defaultdict(dict)
|
||||||
|
|
Loading…
Reference in New Issue