forked from zaclys/searxng
		
	[mod] add 'Accept-Language' HTTP header to online processores
Most engines that support languages (and regions) use the Accept-Language from the WEB browser to build a response that fits to the language (and region). - add new engine option: send_accept_language_header Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
		
							parent
							
								
									a2badb4fe4
								
							
						
					
					
						commit
						8df1f0c47e
					
				
					 21 changed files with 52 additions and 26 deletions
				
			
		| 
						 | 
				
			
			@ -440,6 +440,7 @@ engine is shown.  Most of the options have a default value or even are optional.
 | 
			
		|||
     engine: example
 | 
			
		||||
     shortcut: demo
 | 
			
		||||
     base_url: 'https://{language}.example.com/'
 | 
			
		||||
     send_accept_language_header: false
 | 
			
		||||
     categories: general
 | 
			
		||||
     timeout: 3.0
 | 
			
		||||
     api_key: 'apikey'
 | 
			
		||||
| 
						 | 
				
			
			@ -488,6 +489,13 @@ engine is shown.  Most of the options have a default value or even are optional.
 | 
			
		|||
  use multiple sites using only one engine, or updating the site URL without
 | 
			
		||||
  touching at the code.
 | 
			
		||||
 | 
			
		||||
``send_accept_language_header`` :
 | 
			
		||||
  Several engines that support languages (or regions) deal with the HTTP header
 | 
			
		||||
  ``Accept-Language`` to build a response that fits to the locale.  When this
 | 
			
		||||
  option is activated, the language (locale) that is selected by the user is used
 | 
			
		||||
  to build and send a ``Accept-Language`` header in the request to the origin
 | 
			
		||||
  search engine.
 | 
			
		||||
 | 
			
		||||
``categories`` : optional
 | 
			
		||||
  Define in which categories this engine will be active.  Most of the time, it is
 | 
			
		||||
  defined in the code of the engine, but in a few cases it is useful, like when
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -44,6 +44,7 @@ ENGINE_DEFAULT_ARGS = {
 | 
			
		|||
    "enable_http": False,
 | 
			
		||||
    "using_tor_proxy": False,
 | 
			
		||||
    "display_error_messages": True,
 | 
			
		||||
    "send_accept_language_header": False,
 | 
			
		||||
    "tokens": [],
 | 
			
		||||
    "about": {},
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -25,6 +25,7 @@ categories = ['general', 'web']
 | 
			
		|||
paging = True
 | 
			
		||||
time_range_support = False
 | 
			
		||||
safesearch = False
 | 
			
		||||
send_accept_language_header = True
 | 
			
		||||
supported_languages_url = 'https://www.bing.com/account/general'
 | 
			
		||||
language_aliases = {}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -68,7 +69,6 @@ def request(query, params):
 | 
			
		|||
        logger.debug("headers.Referer --> %s", referer)
 | 
			
		||||
 | 
			
		||||
    params['url'] = base_url + search_path
 | 
			
		||||
    params['headers']['Accept-Language'] = "en-US,en;q=0.5"
 | 
			
		||||
    params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
 | 
			
		||||
    return params
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -31,6 +31,7 @@ categories = ['images', 'web']
 | 
			
		|||
paging = True
 | 
			
		||||
safesearch = True
 | 
			
		||||
time_range_support = True
 | 
			
		||||
send_accept_language_header = True
 | 
			
		||||
supported_languages_url = 'https://www.bing.com/account/general'
 | 
			
		||||
number_of_results = 28
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -34,6 +34,7 @@ about = {
 | 
			
		|||
categories = ['news']
 | 
			
		||||
paging = True
 | 
			
		||||
time_range_support = True
 | 
			
		||||
send_accept_language_header = True
 | 
			
		||||
 | 
			
		||||
# search-url
 | 
			
		||||
base_url = 'https://www.bing.com/'
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -30,6 +30,7 @@ categories = ['videos', 'web']
 | 
			
		|||
paging = True
 | 
			
		||||
safesearch = True
 | 
			
		||||
time_range_support = True
 | 
			
		||||
send_accept_language_header = True
 | 
			
		||||
number_of_results = 28
 | 
			
		||||
 | 
			
		||||
base_url = 'https://www.bing.com/'
 | 
			
		||||
| 
						 | 
				
			
			@ -70,10 +71,6 @@ def request(query, params):
 | 
			
		|||
    if params['time_range'] in time_range_dict:
 | 
			
		||||
        params['url'] += time_range_string.format(interval=time_range_dict[params['time_range']])
 | 
			
		||||
 | 
			
		||||
    # bing videos did not like "older" versions < 70.0.1 when selectin other
 | 
			
		||||
    # languages then 'en' .. very strange ?!?!
 | 
			
		||||
    params['headers']['User-Agent'] = 'Mozilla/5.0 (X11; Linux x86_64; rv:73.0.1) Gecko/20100101 Firefox/73.0.1'
 | 
			
		||||
 | 
			
		||||
    return params
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -20,6 +20,7 @@ from json import loads
 | 
			
		|||
from urllib.parse import urlencode
 | 
			
		||||
 | 
			
		||||
engine_type = 'online'
 | 
			
		||||
send_accept_language_header = True
 | 
			
		||||
categories = ['general']
 | 
			
		||||
disabled = True
 | 
			
		||||
timeout = 2.0
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -31,6 +31,7 @@ categories = ['general', 'web']
 | 
			
		|||
paging = True
 | 
			
		||||
supported_languages_url = 'https://duckduckgo.com/util/u588.js'
 | 
			
		||||
time_range_support = True
 | 
			
		||||
send_accept_language_header = True
 | 
			
		||||
 | 
			
		||||
language_aliases = {
 | 
			
		||||
    'ar-SA': 'ar-XA',
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -27,6 +27,8 @@ about = {
 | 
			
		|||
    "results": 'JSON',
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
send_accept_language_header = True
 | 
			
		||||
 | 
			
		||||
URL = 'https://api.duckduckgo.com/' + '?{query}&format=json&pretty=0&no_redirect=1&d=1'
 | 
			
		||||
 | 
			
		||||
WIKIDATA_PREFIX = ['http://www.wikidata.org/entity/', 'https://www.wikidata.org/entity/']
 | 
			
		||||
| 
						 | 
				
			
			@ -62,7 +64,6 @@ def request(query, params):
 | 
			
		|||
    params['url'] = URL.format(query=urlencode({'q': query}))
 | 
			
		||||
    language = match_language(params['language'], supported_languages, language_aliases)
 | 
			
		||||
    language = language.split('-')[0]
 | 
			
		||||
    params['headers']['Accept-Language'] = language
 | 
			
		||||
    return params
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -30,6 +30,7 @@ about = {
 | 
			
		|||
categories = ['images', 'web']
 | 
			
		||||
paging = True
 | 
			
		||||
safesearch = True
 | 
			
		||||
send_accept_language_header = True
 | 
			
		||||
 | 
			
		||||
# search-url
 | 
			
		||||
images_url = 'https://duckduckgo.com/i.js?{query}&s={offset}&p={safesearch}&o=json&vqd={vqd}'
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -45,6 +45,7 @@ categories = ['general', 'web']
 | 
			
		|||
paging = True
 | 
			
		||||
time_range_support = True
 | 
			
		||||
safesearch = True
 | 
			
		||||
send_accept_language_header = True
 | 
			
		||||
use_mobile_ui = False
 | 
			
		||||
supported_languages_url = 'https://www.google.com/preferences?#languages'
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -241,16 +242,6 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language):
 | 
			
		|||
        # language.
 | 
			
		||||
        ret_val['params']['lr'] = "lang_" + lang_list.get(lang_country, language)
 | 
			
		||||
 | 
			
		||||
        # Accept-Language: fr-CH, fr;q=0.8, en;q=0.6, *;q=0.5
 | 
			
		||||
        ret_val['headers']['Accept-Language'] = ','.join(
 | 
			
		||||
            [
 | 
			
		||||
                lang_country,
 | 
			
		||||
                language + ';q=0.8,',
 | 
			
		||||
                'en;q=0.6',
 | 
			
		||||
                '*;q=0.5',
 | 
			
		||||
            ]
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    return ret_val
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -51,6 +51,7 @@ paging = False
 | 
			
		|||
use_locale_domain = True
 | 
			
		||||
time_range_support = True
 | 
			
		||||
safesearch = True
 | 
			
		||||
send_accept_language_header = True
 | 
			
		||||
 | 
			
		||||
filter_mapping = {0: 'images', 1: 'active', 2: 'active'}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -125,7 +126,6 @@ def request(query, params):
 | 
			
		|||
    """Google-Video search request"""
 | 
			
		||||
 | 
			
		||||
    lang_info = get_lang_info(params, supported_languages, language_aliases, False)
 | 
			
		||||
    logger.debug("HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language'])
 | 
			
		||||
 | 
			
		||||
    query_url = (
 | 
			
		||||
        'https://'
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -70,13 +70,13 @@ time_range_support = True
 | 
			
		|||
#
 | 
			
		||||
#  safesearch : results are identitical for safesearch=0 and safesearch=2
 | 
			
		||||
safesearch = False
 | 
			
		||||
send_accept_language_header = True
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def request(query, params):
 | 
			
		||||
    """Google-News search request"""
 | 
			
		||||
 | 
			
		||||
    lang_info = get_lang_info(params, supported_languages, language_aliases, False)
 | 
			
		||||
    logger.debug("HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language'])
 | 
			
		||||
 | 
			
		||||
    # google news has only one domain
 | 
			
		||||
    lang_info['subdomain'] = 'news.google.com'
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -22,6 +22,8 @@ about = {
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
categories = ["files", "apps"]
 | 
			
		||||
send_accept_language_header = True
 | 
			
		||||
 | 
			
		||||
search_url = "https://play.google.com/store/search?{query}&c=apps"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -52,6 +52,7 @@ language_support = True
 | 
			
		|||
use_locale_domain = True
 | 
			
		||||
time_range_support = True
 | 
			
		||||
safesearch = False
 | 
			
		||||
send_accept_language_header = True
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def time_range_url(params):
 | 
			
		||||
| 
						 | 
				
			
			@ -75,7 +76,6 @@ def request(query, params):
 | 
			
		|||
 | 
			
		||||
    offset = (params['pageno'] - 1) * 10
 | 
			
		||||
    lang_info = get_lang_info(params, supported_languages, language_aliases, False)
 | 
			
		||||
    logger.debug("HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language'])
 | 
			
		||||
 | 
			
		||||
    # subdomain is: scholar.google.xy
 | 
			
		||||
    lang_info['subdomain'] = lang_info['subdomain'].replace("www.", "scholar.")
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -60,6 +60,7 @@ language_support = True
 | 
			
		|||
use_locale_domain = True
 | 
			
		||||
time_range_support = True
 | 
			
		||||
safesearch = True
 | 
			
		||||
send_accept_language_header = True
 | 
			
		||||
 | 
			
		||||
RE_CACHE = {}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -111,7 +112,6 @@ def request(query, params):
 | 
			
		|||
    """Google-Video search request"""
 | 
			
		||||
 | 
			
		||||
    lang_info = get_lang_info(params, supported_languages, language_aliases, False)
 | 
			
		||||
    logger.debug("HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language'])
 | 
			
		||||
 | 
			
		||||
    query_url = (
 | 
			
		||||
        'https://'
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -30,6 +30,7 @@ about = {
 | 
			
		|||
categories = ['map']
 | 
			
		||||
paging = False
 | 
			
		||||
language_support = True
 | 
			
		||||
send_accept_language_header = True
 | 
			
		||||
 | 
			
		||||
# search-url
 | 
			
		||||
base_url = 'https://nominatim.openstreetmap.org/'
 | 
			
		||||
| 
						 | 
				
			
			@ -142,9 +143,8 @@ def request(query, params):
 | 
			
		|||
    params['url'] = base_url + search_string.format(query=urlencode({'q': query}))
 | 
			
		||||
    params['route'] = route_re.match(query)
 | 
			
		||||
    params['headers']['User-Agent'] = searx_useragent()
 | 
			
		||||
 | 
			
		||||
    accept_language = 'en' if params['language'] == 'all' else params['language']
 | 
			
		||||
    params['headers']['Accept-Language'] = accept_language
 | 
			
		||||
    if 'Accept-Language' not in params['headers']:
 | 
			
		||||
        params['headers']['Accept-Language'] = 'en'
 | 
			
		||||
    return params
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -19,6 +19,9 @@ about = {
 | 
			
		|||
    "results": 'JSON',
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
send_accept_language_header = True
 | 
			
		||||
 | 
			
		||||
# search-url
 | 
			
		||||
search_url = 'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}'
 | 
			
		||||
supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'
 | 
			
		||||
| 
						 | 
				
			
			@ -41,9 +44,6 @@ def request(query, params):
 | 
			
		|||
    language = url_lang(params['language'])
 | 
			
		||||
    params['url'] = search_url.format(title=quote(query), language=language)
 | 
			
		||||
 | 
			
		||||
    if params['language'].lower() in language_variants.get(language, []):
 | 
			
		||||
        params['headers']['Accept-Language'] = params['language'].lower()
 | 
			
		||||
 | 
			
		||||
    params['headers']['User-Agent'] = searx_useragent()
 | 
			
		||||
    params['raise_for_httperror'] = False
 | 
			
		||||
    params['soft_max_redirects'] = 2
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,6 +1,7 @@
 | 
			
		|||
# SPDX-License-Identifier: AGPL-3.0-or-later
 | 
			
		||||
 | 
			
		||||
import typing
 | 
			
		||||
import babel
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class EngineRef:
 | 
			
		||||
| 
						 | 
				
			
			@ -29,6 +30,7 @@ class SearchQuery:
 | 
			
		|||
        'query',
 | 
			
		||||
        'engineref_list',
 | 
			
		||||
        'lang',
 | 
			
		||||
        'locale',
 | 
			
		||||
        'safesearch',
 | 
			
		||||
        'pageno',
 | 
			
		||||
        'time_range',
 | 
			
		||||
| 
						 | 
				
			
			@ -59,6 +61,13 @@ class SearchQuery:
 | 
			
		|||
        self.external_bang = external_bang
 | 
			
		||||
        self.engine_data = engine_data or {}
 | 
			
		||||
 | 
			
		||||
        self.locale = None
 | 
			
		||||
        if self.lang:
 | 
			
		||||
            try:
 | 
			
		||||
                self.locale = babel.Locale.parse(self.lang, sep='-')
 | 
			
		||||
            except babel.core.UnknownLocaleError:
 | 
			
		||||
                pass
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def categories(self):
 | 
			
		||||
        return list(set(map(lambda engineref: engineref.category, self.engineref_list)))
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -60,6 +60,17 @@ class OnlineProcessor(EngineProcessor):
 | 
			
		|||
        # add an user agent
 | 
			
		||||
        params['headers']['User-Agent'] = gen_useragent()
 | 
			
		||||
 | 
			
		||||
        # add Accept-Language header
 | 
			
		||||
        if self.engine.send_accept_language_header and search_query.locale:
 | 
			
		||||
            ac_lang = search_query.locale.language
 | 
			
		||||
            if search_query.locale.territory:
 | 
			
		||||
                ac_lang = "%s-%s,%s;q=0.9,*;q=0.5" % (
 | 
			
		||||
                    search_query.locale.language,
 | 
			
		||||
                    search_query.locale.territory,
 | 
			
		||||
                    search_query.locale.language,
 | 
			
		||||
                )
 | 
			
		||||
            params['headers']['Accept-Language'] = ac_lang
 | 
			
		||||
 | 
			
		||||
        return params
 | 
			
		||||
 | 
			
		||||
    def _send_http_request(self, params):
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -748,6 +748,7 @@ engines:
 | 
			
		|||
 | 
			
		||||
  - name: google play movies
 | 
			
		||||
    engine: xpath
 | 
			
		||||
    send_accept_language_header: true
 | 
			
		||||
    search_url: https://play.google.com/store/search?q={query}&c=movies
 | 
			
		||||
    results_xpath: '//div[@class="ImZGtf mpg5gc"]'
 | 
			
		||||
    title_xpath: './/div[@class="RZEgze"]//div[@class="kCSSQe"]//a'
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		
		Reference in a new issue