mirror of
				https://github.com/searxng/searxng
				synced 2024-01-01 19:24:07 +01:00 
			
		
		
		
	Merge pull request #1061 from a01200356/bing
[fix] Language support for Bing Images and Videos
This commit is contained in:
		
						commit
						c8a66a090a
					
				
					 11 changed files with 114 additions and 39 deletions
				
			
		| 
						 | 
					@ -13,7 +13,7 @@ python:
 | 
				
			||||||
before_install:
 | 
					before_install:
 | 
				
			||||||
  - "export DISPLAY=:99.0"
 | 
					  - "export DISPLAY=:99.0"
 | 
				
			||||||
  - "sh -e /etc/init.d/xvfb start"
 | 
					  - "sh -e /etc/init.d/xvfb start"
 | 
				
			||||||
  - npm install less less-plugin-clean-css grunt-cli
 | 
					  - npm install less@2.7 less-plugin-clean-css grunt-cli
 | 
				
			||||||
  - export PATH=`pwd`/node_modules/.bin:$PATH
 | 
					  - export PATH=`pwd`/node_modules/.bin:$PATH
 | 
				
			||||||
  - ./manage.sh install_geckodriver ~/drivers
 | 
					  - ./manage.sh install_geckodriver ~/drivers
 | 
				
			||||||
  - export PATH=~/drivers:$PATH
 | 
					  - export PATH=~/drivers:$PATH
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							| 
						 | 
					@ -18,7 +18,6 @@
 | 
				
			||||||
from lxml import html
 | 
					from lxml import html
 | 
				
			||||||
from json import loads
 | 
					from json import loads
 | 
				
			||||||
import re
 | 
					import re
 | 
				
			||||||
from searx.engines.bing import _fetch_supported_languages, supported_languages_url
 | 
					 | 
				
			||||||
from searx.url_utils import urlencode
 | 
					from searx.url_utils import urlencode
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# engine dependent config
 | 
					# engine dependent config
 | 
				
			||||||
| 
						 | 
					@ -26,6 +25,8 @@ categories = ['images']
 | 
				
			||||||
paging = True
 | 
					paging = True
 | 
				
			||||||
safesearch = True
 | 
					safesearch = True
 | 
				
			||||||
time_range_support = True
 | 
					time_range_support = True
 | 
				
			||||||
 | 
					language_support = True
 | 
				
			||||||
 | 
					supported_languages_url = 'https://www.bing.com/account/general'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# search-url
 | 
					# search-url
 | 
				
			||||||
base_url = 'https://www.bing.com/'
 | 
					base_url = 'https://www.bing.com/'
 | 
				
			||||||
| 
						 | 
					@ -45,23 +46,41 @@ safesearch_types = {2: 'STRICT',
 | 
				
			||||||
_quote_keys_regex = re.compile('({|,)([a-z][a-z0-9]*):(")', re.I | re.U)
 | 
					_quote_keys_regex = re.compile('({|,)([a-z][a-z0-9]*):(")', re.I | re.U)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# get supported region code
 | 
				
			||||||
 | 
					def get_region_code(lang, lang_list=None):
 | 
				
			||||||
 | 
					    region = None
 | 
				
			||||||
 | 
					    if lang in (lang_list or supported_languages):
 | 
				
			||||||
 | 
					        region = lang
 | 
				
			||||||
 | 
					    elif lang.startswith('no'):
 | 
				
			||||||
 | 
					        region = 'nb-NO'
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        # try to get a supported country code with language
 | 
				
			||||||
 | 
					        lang = lang.split('-')[0]
 | 
				
			||||||
 | 
					        for lc in (lang_list or supported_languages):
 | 
				
			||||||
 | 
					            if lang == lc.split('-')[0]:
 | 
				
			||||||
 | 
					                region = lc
 | 
				
			||||||
 | 
					                break
 | 
				
			||||||
 | 
					    if region:
 | 
				
			||||||
 | 
					        return region.lower()
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        return 'en-us'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# do search-request
 | 
					# do search-request
 | 
				
			||||||
def request(query, params):
 | 
					def request(query, params):
 | 
				
			||||||
    offset = (params['pageno'] - 1) * 10 + 1
 | 
					    offset = (params['pageno'] - 1) * 10 + 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # required for cookie
 | 
					 | 
				
			||||||
    if params['language'] == 'all':
 | 
					 | 
				
			||||||
        language = 'en-US'
 | 
					 | 
				
			||||||
    else:
 | 
					 | 
				
			||||||
        language = params['language']
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    search_path = search_string.format(
 | 
					    search_path = search_string.format(
 | 
				
			||||||
        query=urlencode({'q': query}),
 | 
					        query=urlencode({'q': query}),
 | 
				
			||||||
        offset=offset)
 | 
					        offset=offset)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    language = get_region_code(params['language'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    params['cookies']['SRCHHPGUSR'] = \
 | 
					    params['cookies']['SRCHHPGUSR'] = \
 | 
				
			||||||
        'NEWWND=0&NRSLT=-1&SRCHLANG=' + language.split('-')[0] +\
 | 
					        'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
 | 
				
			||||||
        '&ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
 | 
					
 | 
				
			||||||
 | 
					    params['cookies']['_EDGE_S'] = 'mkt=' + language +\
 | 
				
			||||||
 | 
					        '&ui=' + language + '&F=1'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    params['url'] = base_url + search_path
 | 
					    params['url'] = base_url + search_path
 | 
				
			||||||
    if params['time_range'] in time_range_dict:
 | 
					    if params['time_range'] in time_range_dict:
 | 
				
			||||||
| 
						 | 
					@ -106,3 +125,22 @@ def response(resp):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # return results
 | 
					    # return results
 | 
				
			||||||
    return results
 | 
					    return results
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# get supported languages from their site
 | 
				
			||||||
 | 
					def _fetch_supported_languages(resp):
 | 
				
			||||||
 | 
					    supported_languages = []
 | 
				
			||||||
 | 
					    dom = html.fromstring(resp.text)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    regions_xpath = '//div[@id="region-section-content"]' \
 | 
				
			||||||
 | 
					                    + '//ul[@class="b_vList"]/li/a/@href'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    regions = dom.xpath(regions_xpath)
 | 
				
			||||||
 | 
					    for region in regions:
 | 
				
			||||||
 | 
					        code = re.search('setmkt=[^\&]+', region).group()[7:]
 | 
				
			||||||
 | 
					        if code == 'nb-NO':
 | 
				
			||||||
 | 
					            code = 'no-NO'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        supported_languages.append(code)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return supported_languages
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -12,6 +12,7 @@
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from json import loads
 | 
					from json import loads
 | 
				
			||||||
from lxml import html
 | 
					from lxml import html
 | 
				
			||||||
 | 
					from searx.engines.bing_images import _fetch_supported_languages, supported_languages_url, get_region_code
 | 
				
			||||||
from searx.engines.xpath import extract_text
 | 
					from searx.engines.xpath import extract_text
 | 
				
			||||||
from searx.url_utils import urlencode
 | 
					from searx.url_utils import urlencode
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -21,6 +22,7 @@ paging = True
 | 
				
			||||||
safesearch = True
 | 
					safesearch = True
 | 
				
			||||||
time_range_support = True
 | 
					time_range_support = True
 | 
				
			||||||
number_of_results = 10
 | 
					number_of_results = 10
 | 
				
			||||||
 | 
					language_support = True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
search_url = 'https://www.bing.com/videos/asyncv2?{query}&async=content&'\
 | 
					search_url = 'https://www.bing.com/videos/asyncv2?{query}&async=content&'\
 | 
				
			||||||
             'first={offset}&count={number_of_results}&CW=1366&CH=25&FORM=R5VR5'
 | 
					             'first={offset}&count={number_of_results}&CW=1366&CH=25&FORM=R5VR5'
 | 
				
			||||||
| 
						 | 
					@ -45,7 +47,8 @@ def request(query, params):
 | 
				
			||||||
        'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
 | 
					        'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # language cookie
 | 
					    # language cookie
 | 
				
			||||||
    params['cookies']['_EDGE_S'] = 'mkt=' + params['language'].lower() + '&F=1'
 | 
					    region = get_region_code(params['language'], lang_list=supported_languages)
 | 
				
			||||||
 | 
					    params['cookies']['_EDGE_S'] = 'mkt=' + region + '&F=1'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # query and paging
 | 
					    # query and paging
 | 
				
			||||||
    params['url'] = search_url.format(query=urlencode({'q': query}),
 | 
					    params['url'] = search_url.format(query=urlencode({'q': query}),
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -134,4 +134,4 @@ def _fetch_supported_languages(resp):
 | 
				
			||||||
    regions_json = loads(response_page)
 | 
					    regions_json = loads(response_page)
 | 
				
			||||||
    supported_languages = map((lambda x: x[3:] + '-' + x[:2].upper()), regions_json.keys())
 | 
					    supported_languages = map((lambda x: x[3:] + '-' + x[:2].upper()), regions_json.keys())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return supported_languages
 | 
					    return list(supported_languages)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -118,7 +118,7 @@ def _fetch_supported_languages(resp):
 | 
				
			||||||
    dom = fromstring(resp.text)
 | 
					    dom = fromstring(resp.text)
 | 
				
			||||||
    options = dom.xpath('//div[@id="regions-popup"]//ul/li/a')
 | 
					    options = dom.xpath('//div[@id="regions-popup"]//ul/li/a')
 | 
				
			||||||
    for option in options:
 | 
					    for option in options:
 | 
				
			||||||
        code = option.xpath('./@data-val')[0]
 | 
					        code = option.xpath('./@data-search-language')[0]
 | 
				
			||||||
        if code.startswith('nb-'):
 | 
					        if code.startswith('nb-'):
 | 
				
			||||||
            code = code.replace('nb', 'no', 1)
 | 
					            code = code.replace('nb', 'no', 1)
 | 
				
			||||||
        supported_languages.append(code)
 | 
					        supported_languages.append(code)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -5,6 +5,11 @@
 | 
				
			||||||
language_codes = (
 | 
					language_codes = (
 | 
				
			||||||
    (u"ar-SA", u"العربية", u"", u"Arabic"),
 | 
					    (u"ar-SA", u"العربية", u"", u"Arabic"),
 | 
				
			||||||
    (u"bg-BG", u"Български", u"", u"Bulgarian"),
 | 
					    (u"bg-BG", u"Български", u"", u"Bulgarian"),
 | 
				
			||||||
 | 
					    (u"ca", u"Català", u"", u"Catalan"),
 | 
				
			||||||
 | 
					    (u"ca-AD", u"Català", u"Andorra", u"Catalan"),
 | 
				
			||||||
 | 
					    (u"ca-CT", u"Català", u"", u"Catalan"),
 | 
				
			||||||
 | 
					    (u"ca-ES", u"Català", u"Espanya", u"Catalan"),
 | 
				
			||||||
 | 
					    (u"ca-FR", u"Català", u"França", u"Catalan"),
 | 
				
			||||||
    (u"cs-CZ", u"Čeština", u"", u"Czech"),
 | 
					    (u"cs-CZ", u"Čeština", u"", u"Czech"),
 | 
				
			||||||
    (u"da-DK", u"Dansk", u"", u"Danish"),
 | 
					    (u"da-DK", u"Dansk", u"", u"Danish"),
 | 
				
			||||||
    (u"de", u"Deutsch", u"", u"German"),
 | 
					    (u"de", u"Deutsch", u"", u"German"),
 | 
				
			||||||
| 
						 | 
					@ -15,9 +20,7 @@ language_codes = (
 | 
				
			||||||
    (u"en", u"English", u"", u"English"),
 | 
					    (u"en", u"English", u"", u"English"),
 | 
				
			||||||
    (u"en-AU", u"English", u"Australia", u"English"),
 | 
					    (u"en-AU", u"English", u"Australia", u"English"),
 | 
				
			||||||
    (u"en-CA", u"English", u"Canada", u"English"),
 | 
					    (u"en-CA", u"English", u"Canada", u"English"),
 | 
				
			||||||
    (u"en-CY", u"English", u"Cyprus", u"English"),
 | 
					 | 
				
			||||||
    (u"en-GB", u"English", u"United Kingdom", u"English"),
 | 
					    (u"en-GB", u"English", u"United Kingdom", u"English"),
 | 
				
			||||||
    (u"en-GD", u"English", u"Grenada", u"English"),
 | 
					 | 
				
			||||||
    (u"en-ID", u"English", u"Indonesia", u"English"),
 | 
					    (u"en-ID", u"English", u"Indonesia", u"English"),
 | 
				
			||||||
    (u"en-IE", u"English", u"Ireland", u"English"),
 | 
					    (u"en-IE", u"English", u"Ireland", u"English"),
 | 
				
			||||||
    (u"en-IN", u"English", u"India", u"English"),
 | 
					    (u"en-IN", u"English", u"India", u"English"),
 | 
				
			||||||
| 
						 | 
					@ -28,6 +31,7 @@ language_codes = (
 | 
				
			||||||
    (u"en-US", u"English", u"United States", u"English"),
 | 
					    (u"en-US", u"English", u"United States", u"English"),
 | 
				
			||||||
    (u"en-ZA", u"English", u"South Africa", u"English"),
 | 
					    (u"en-ZA", u"English", u"South Africa", u"English"),
 | 
				
			||||||
    (u"es", u"Español", u"", u"Spanish"),
 | 
					    (u"es", u"Español", u"", u"Spanish"),
 | 
				
			||||||
 | 
					    (u"es-AD", u"Español", u"Andorra", u"Spanish"),
 | 
				
			||||||
    (u"es-AR", u"Español", u"Argentina", u"Spanish"),
 | 
					    (u"es-AR", u"Español", u"Argentina", u"Spanish"),
 | 
				
			||||||
    (u"es-CL", u"Español", u"Chile", u"Spanish"),
 | 
					    (u"es-CL", u"Español", u"Chile", u"Spanish"),
 | 
				
			||||||
    (u"es-CO", u"Español", u"Colombia", u"Spanish"),
 | 
					    (u"es-CO", u"Español", u"Colombia", u"Spanish"),
 | 
				
			||||||
| 
						 | 
					@ -38,38 +42,32 @@ language_codes = (
 | 
				
			||||||
    (u"et-EE", u"Eesti", u"", u"Estonian"),
 | 
					    (u"et-EE", u"Eesti", u"", u"Estonian"),
 | 
				
			||||||
    (u"fi-FI", u"Suomi", u"", u"Finnish"),
 | 
					    (u"fi-FI", u"Suomi", u"", u"Finnish"),
 | 
				
			||||||
    (u"fr", u"Français", u"", u"French"),
 | 
					    (u"fr", u"Français", u"", u"French"),
 | 
				
			||||||
 | 
					    (u"fr-AD", u"Français", u"Andorre", u"French"),
 | 
				
			||||||
    (u"fr-BE", u"Français", u"Belgique", u"French"),
 | 
					    (u"fr-BE", u"Français", u"Belgique", u"French"),
 | 
				
			||||||
    (u"fr-CA", u"Français", u"Canada", u"French"),
 | 
					    (u"fr-CA", u"Français", u"Canada", u"French"),
 | 
				
			||||||
    (u"fr-CH", u"Français", u"Suisse", u"French"),
 | 
					    (u"fr-CH", u"Français", u"Suisse", u"French"),
 | 
				
			||||||
    (u"fr-FR", u"Français", u"France", u"French"),
 | 
					    (u"fr-FR", u"Français", u"France", u"French"),
 | 
				
			||||||
    (u"he-IL", u"עברית", u"", u"Hebrew"),
 | 
					    (u"he-IL", u"עברית", u"", u"Hebrew"),
 | 
				
			||||||
    (u"hr-HR", u"Hrvatski", u"", u"Croatian"),
 | 
					 | 
				
			||||||
    (u"hu-HU", u"Magyar", u"", u"Hungarian"),
 | 
					    (u"hu-HU", u"Magyar", u"", u"Hungarian"),
 | 
				
			||||||
    (u"id-ID", u"Bahasa Indonesia", u"", u"Indonesian"),
 | 
					 | 
				
			||||||
    (u"it", u"Italiano", u"", u"Italian"),
 | 
					    (u"it", u"Italiano", u"", u"Italian"),
 | 
				
			||||||
    (u"it-CH", u"Italiano", u"Svizzera", u"Italian"),
 | 
					    (u"it-CH", u"Italiano", u"Svizzera", u"Italian"),
 | 
				
			||||||
    (u"it-IT", u"Italiano", u"Italia", u"Italian"),
 | 
					    (u"it-IT", u"Italiano", u"Italia", u"Italian"),
 | 
				
			||||||
    (u"ja-JP", u"日本語", u"", u"Japanese"),
 | 
					    (u"ja-JP", u"日本語", u"", u"Japanese"),
 | 
				
			||||||
    (u"ko-KR", u"한국어", u"", u"Korean"),
 | 
					    (u"ko-KR", u"한국어", u"", u"Korean"),
 | 
				
			||||||
    (u"lt-LT", u"Lietuvių", u"", u"Lithuanian"),
 | 
					 | 
				
			||||||
    (u"lv-LV", u"Latviešu", u"", u"Latvian"),
 | 
					 | 
				
			||||||
    (u"ms-MY", u"Bahasa Melayu", u"", u"Malay"),
 | 
					 | 
				
			||||||
    (u"nl", u"Nederlands", u"", u"Dutch"),
 | 
					    (u"nl", u"Nederlands", u"", u"Dutch"),
 | 
				
			||||||
    (u"nl-BE", u"Nederlands", u"België", u"Dutch"),
 | 
					    (u"nl-BE", u"Nederlands", u"België", u"Dutch"),
 | 
				
			||||||
    (u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"),
 | 
					    (u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"),
 | 
				
			||||||
    (u"no-NO", u"Norsk", u"", u"Norwegian"),
 | 
					    (u"no-NO", u"Norsk", u"", u"Norwegian"),
 | 
				
			||||||
    (u"pl-PL", u"Polski", u"", u"Polish"),
 | 
					    (u"pl-PL", u"Polski", u"", u"Polish"),
 | 
				
			||||||
    (u"pt", u"Português", u"", u"Portuguese"),
 | 
					    (u"pt", u"Português", u"", u"Portuguese"),
 | 
				
			||||||
 | 
					    (u"pt-AD", u"Português", u"Andorra", u"Portuguese"),
 | 
				
			||||||
    (u"pt-BR", u"Português", u"Brasil", u"Portuguese"),
 | 
					    (u"pt-BR", u"Português", u"Brasil", u"Portuguese"),
 | 
				
			||||||
    (u"pt-PT", u"Português", u"Portugal", u"Portuguese"),
 | 
					    (u"pt-PT", u"Português", u"Portugal", u"Portuguese"),
 | 
				
			||||||
    (u"ro-RO", u"Română", u"", u"Romanian"),
 | 
					    (u"ro-RO", u"Română", u"", u"Romanian"),
 | 
				
			||||||
    (u"ru-RU", u"Русский", u"", u"Russian"),
 | 
					    (u"ru-RU", u"Русский", u"", u"Russian"),
 | 
				
			||||||
    (u"sk-SK", u"Slovenčina", u"", u"Slovak"),
 | 
					 | 
				
			||||||
    (u"sl", u"Slovenščina", u"", u"Slovenian"),
 | 
					 | 
				
			||||||
    (u"sv-SE", u"Svenska", u"", u"Swedish"),
 | 
					    (u"sv-SE", u"Svenska", u"", u"Swedish"),
 | 
				
			||||||
    (u"th-TH", u"ไทย", u"", u"Thai"),
 | 
					    (u"th-TH", u"ไทย", u"", u"Thai"),
 | 
				
			||||||
    (u"tr-TR", u"Türkçe", u"", u"Turkish"),
 | 
					    (u"tr-TR", u"Türkçe", u"", u"Turkish"),
 | 
				
			||||||
    (u"vi-VN", u"Tiếng Việt", u"", u"Vietnamese"),
 | 
					 | 
				
			||||||
    (u"zh", u"中文", u"", u"Chinese"),
 | 
					    (u"zh", u"中文", u"", u"Chinese"),
 | 
				
			||||||
    (u"zh-CN", u"中文", u"中国", u"Chinese"),
 | 
					    (u"zh-CN", u"中文", u"中国", u"Chinese"),
 | 
				
			||||||
    (u"zh-HK", u"中文", u"香港", u"Chinese"),
 | 
					    (u"zh-HK", u"中文", u"香港", u"Chinese"),
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -8,10 +8,12 @@ from searx.testing import SearxTestCase
 | 
				
			||||||
class TestBingImagesEngine(SearxTestCase):
 | 
					class TestBingImagesEngine(SearxTestCase):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_request(self):
 | 
					    def test_request(self):
 | 
				
			||||||
 | 
					        bing_images.supported_languages = ['fr-FR', 'en-US']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        query = 'test_query'
 | 
					        query = 'test_query'
 | 
				
			||||||
        dicto = defaultdict(dict)
 | 
					        dicto = defaultdict(dict)
 | 
				
			||||||
        dicto['pageno'] = 1
 | 
					        dicto['pageno'] = 1
 | 
				
			||||||
        dicto['language'] = 'fr_FR'
 | 
					        dicto['language'] = 'fr-FR'
 | 
				
			||||||
        dicto['safesearch'] = 1
 | 
					        dicto['safesearch'] = 1
 | 
				
			||||||
        dicto['time_range'] = ''
 | 
					        dicto['time_range'] = ''
 | 
				
			||||||
        params = bing_images.request(query, dicto)
 | 
					        params = bing_images.request(query, dicto)
 | 
				
			||||||
| 
						 | 
					@ -19,12 +21,19 @@ class TestBingImagesEngine(SearxTestCase):
 | 
				
			||||||
        self.assertTrue(query in params['url'])
 | 
					        self.assertTrue(query in params['url'])
 | 
				
			||||||
        self.assertTrue('bing.com' in params['url'])
 | 
					        self.assertTrue('bing.com' in params['url'])
 | 
				
			||||||
        self.assertTrue('SRCHHPGUSR' in params['cookies'])
 | 
					        self.assertTrue('SRCHHPGUSR' in params['cookies'])
 | 
				
			||||||
        self.assertTrue('fr' in params['cookies']['SRCHHPGUSR'])
 | 
					        self.assertTrue('DEMOTE' in params['cookies']['SRCHHPGUSR'])
 | 
				
			||||||
 | 
					        self.assertTrue('_EDGE_S' in params['cookies'])
 | 
				
			||||||
 | 
					        self.assertTrue('fr-fr' in params['cookies']['_EDGE_S'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        dicto['language'] = 'fr'
 | 
				
			||||||
 | 
					        params = bing_images.request(query, dicto)
 | 
				
			||||||
 | 
					        self.assertTrue('_EDGE_S' in params['cookies'])
 | 
				
			||||||
 | 
					        self.assertTrue('fr-fr' in params['cookies']['_EDGE_S'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        dicto['language'] = 'all'
 | 
					        dicto['language'] = 'all'
 | 
				
			||||||
        params = bing_images.request(query, dicto)
 | 
					        params = bing_images.request(query, dicto)
 | 
				
			||||||
        self.assertIn('SRCHHPGUSR', params['cookies'])
 | 
					        self.assertTrue('_EDGE_S' in params['cookies'])
 | 
				
			||||||
        self.assertIn('en', params['cookies']['SRCHHPGUSR'])
 | 
					        self.assertTrue('en-us' in params['cookies']['_EDGE_S'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_response(self):
 | 
					    def test_response(self):
 | 
				
			||||||
        self.assertRaises(AttributeError, bing_images.response, None)
 | 
					        self.assertRaises(AttributeError, bing_images.response, None)
 | 
				
			||||||
| 
						 | 
					@ -82,3 +91,28 @@ class TestBingImagesEngine(SearxTestCase):
 | 
				
			||||||
        self.assertEqual(results[0]['content'], '')
 | 
					        self.assertEqual(results[0]['content'], '')
 | 
				
			||||||
        self.assertEqual(results[0]['thumbnail_src'], 'thumb_url')
 | 
					        self.assertEqual(results[0]['thumbnail_src'], 'thumb_url')
 | 
				
			||||||
        self.assertEqual(results[0]['img_src'], 'img_url')
 | 
					        self.assertEqual(results[0]['img_src'], 'img_url')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_fetch_supported_languages(self):
 | 
				
			||||||
 | 
					        html = """
 | 
				
			||||||
 | 
					        <div>
 | 
				
			||||||
 | 
					            <div id="region-section-content">
 | 
				
			||||||
 | 
					                <ul class="b_vList">
 | 
				
			||||||
 | 
					                    <li>
 | 
				
			||||||
 | 
					                        <a href="https://bing...&setmkt=de-DE&s...">Germany</a>
 | 
				
			||||||
 | 
					                        <a href="https://bing...&setmkt=nb-NO&s...">Norway</a>
 | 
				
			||||||
 | 
					                    </li>
 | 
				
			||||||
 | 
					                </ul>
 | 
				
			||||||
 | 
					                <ul class="b_vList">
 | 
				
			||||||
 | 
					                    <li>
 | 
				
			||||||
 | 
					                        <a href="https://bing...&setmkt=es-AR&s...">Argentina</a>
 | 
				
			||||||
 | 
					                    </li>
 | 
				
			||||||
 | 
					                </ul>
 | 
				
			||||||
 | 
					            </div>
 | 
				
			||||||
 | 
					        </div>
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        response = mock.Mock(text=html)
 | 
				
			||||||
 | 
					        languages = list(bing_images._fetch_supported_languages(response))
 | 
				
			||||||
 | 
					        self.assertEqual(len(languages), 3)
 | 
				
			||||||
 | 
					        self.assertIn('de-DE', languages)
 | 
				
			||||||
 | 
					        self.assertIn('no-NO', languages)
 | 
				
			||||||
 | 
					        self.assertIn('es-AR', languages)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -8,6 +8,8 @@ from searx.testing import SearxTestCase
 | 
				
			||||||
class TestBingVideosEngine(SearxTestCase):
 | 
					class TestBingVideosEngine(SearxTestCase):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_request(self):
 | 
					    def test_request(self):
 | 
				
			||||||
 | 
					        bing_videos.supported_languages = ['fr-FR', 'en-US']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        query = 'test_query'
 | 
					        query = 'test_query'
 | 
				
			||||||
        dicto = defaultdict(dict)
 | 
					        dicto = defaultdict(dict)
 | 
				
			||||||
        dicto['pageno'] = 1
 | 
					        dicto['pageno'] = 1
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -139,9 +139,9 @@ class TestSwisscowsEngine(SearxTestCase):
 | 
				
			||||||
            <div id="regions-popup">
 | 
					            <div id="regions-popup">
 | 
				
			||||||
                <div>
 | 
					                <div>
 | 
				
			||||||
                    <ul>
 | 
					                    <ul>
 | 
				
			||||||
                        <li><a data-val="browser"></a></li>
 | 
					                        <li><a data-search-language="browser"></a></li>
 | 
				
			||||||
                        <li><a data-val="de-CH"></a></li>
 | 
					                        <li><a data-search-language="de-CH"></a></li>
 | 
				
			||||||
                        <li><a data-val="fr-CH"></a></li>
 | 
					                        <li><a data-search-language="fr-CH"></a></li>
 | 
				
			||||||
                    </ul>
 | 
					                    </ul>
 | 
				
			||||||
                </div>
 | 
					                </div>
 | 
				
			||||||
            </div>
 | 
					            </div>
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -8,13 +8,13 @@
 | 
				
			||||||
# are written in current directory to avoid overwriting in case something goes wrong.
 | 
					# are written in current directory to avoid overwriting in case something goes wrong.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from requests import get
 | 
					from requests import get
 | 
				
			||||||
from urllib import urlencode
 | 
					 | 
				
			||||||
from lxml.html import fromstring
 | 
					from lxml.html import fromstring
 | 
				
			||||||
from json import loads, dumps
 | 
					from json import loads, dump
 | 
				
			||||||
import io
 | 
					import io
 | 
				
			||||||
from sys import path
 | 
					from sys import path
 | 
				
			||||||
path.append('../searx')  # noqa
 | 
					path.append('../searx')  # noqa
 | 
				
			||||||
from searx import settings
 | 
					from searx import settings
 | 
				
			||||||
 | 
					from searx.url_utils import urlencode
 | 
				
			||||||
from searx.engines import initialize_engines, engines
 | 
					from searx.engines import initialize_engines, engines
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Geonames API for country names.
 | 
					# Geonames API for country names.
 | 
				
			||||||
| 
						 | 
					@ -70,7 +70,7 @@ def get_country_name(locale):
 | 
				
			||||||
    json = loads(response.text)
 | 
					    json = loads(response.text)
 | 
				
			||||||
    content = json.get('geonames', None)
 | 
					    content = json.get('geonames', None)
 | 
				
			||||||
    if content is None or len(content) != 1:
 | 
					    if content is None or len(content) != 1:
 | 
				
			||||||
        print "No country name found for " + locale[0] + "-" + locale[1]
 | 
					        print("No country name found for " + locale[0] + "-" + locale[1])
 | 
				
			||||||
        return ''
 | 
					        return ''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return content[0].get('countryName', '')
 | 
					    return content[0].get('countryName', '')
 | 
				
			||||||
| 
						 | 
					@ -84,11 +84,11 @@ def fetch_supported_languages():
 | 
				
			||||||
            try:
 | 
					            try:
 | 
				
			||||||
                engines_languages[engine_name] = engines[engine_name].fetch_supported_languages()
 | 
					                engines_languages[engine_name] = engines[engine_name].fetch_supported_languages()
 | 
				
			||||||
            except Exception as e:
 | 
					            except Exception as e:
 | 
				
			||||||
                print e
 | 
					                print(e)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # write json file
 | 
					    # write json file
 | 
				
			||||||
    with io.open(engines_languages_file, "w", encoding="utf-8") as f:
 | 
					    with io.open(engines_languages_file, "w", encoding="utf-8") as f:
 | 
				
			||||||
        f.write(unicode(dumps(engines_languages, ensure_ascii=False, encoding="utf-8")))
 | 
					        dump(engines_languages, f, ensure_ascii=False)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Join all language lists.
 | 
					# Join all language lists.
 | 
				
			||||||
| 
						 | 
					@ -97,7 +97,7 @@ def join_language_lists():
 | 
				
			||||||
    global languages
 | 
					    global languages
 | 
				
			||||||
    # include wikipedia first for more accurate language names
 | 
					    # include wikipedia first for more accurate language names
 | 
				
			||||||
    languages = {code: lang for code, lang
 | 
					    languages = {code: lang for code, lang
 | 
				
			||||||
                 in engines_languages['wikipedia'].iteritems()
 | 
					                 in engines_languages['wikipedia'].items()
 | 
				
			||||||
                 if valid_code(code)}
 | 
					                 if valid_code(code)}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for engine_name in engines_languages:
 | 
					    for engine_name in engines_languages:
 | 
				
			||||||
| 
						 | 
					@ -121,7 +121,7 @@ def join_language_lists():
 | 
				
			||||||
    # filter list to include only languages supported by most engines
 | 
					    # filter list to include only languages supported by most engines
 | 
				
			||||||
    min_supported_engines = int(0.70 * len(engines_languages))
 | 
					    min_supported_engines = int(0.70 * len(engines_languages))
 | 
				
			||||||
    languages = {code: lang for code, lang
 | 
					    languages = {code: lang for code, lang
 | 
				
			||||||
                 in languages.iteritems()
 | 
					                 in languages.items()
 | 
				
			||||||
                 if len(lang.get('counter', [])) >= min_supported_engines or
 | 
					                 if len(lang.get('counter', [])) >= min_supported_engines or
 | 
				
			||||||
                 len(languages.get(code.split('-')[0], {}).get('counter', [])) >= min_supported_engines}
 | 
					                 len(languages.get(code.split('-')[0], {}).get('counter', [])) >= min_supported_engines}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -165,7 +165,7 @@ def filter_single_country_languages():
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Write languages.py.
 | 
					# Write languages.py.
 | 
				
			||||||
def write_languages_file():
 | 
					def write_languages_file():
 | 
				
			||||||
    new_file = open(languages_file, 'w')
 | 
					    new_file = open(languages_file, 'wb')
 | 
				
			||||||
    file_content = '# -*- coding: utf-8 -*-\n'\
 | 
					    file_content = '# -*- coding: utf-8 -*-\n'\
 | 
				
			||||||
                   + '# list of language codes\n'\
 | 
					                   + '# list of language codes\n'\
 | 
				
			||||||
                   + '# this file is generated automatically by utils/update_search_languages.py\n'\
 | 
					                   + '# this file is generated automatically by utils/update_search_languages.py\n'\
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		
		Reference in a new issue