mirror of
https://github.com/searxng/searxng
synced 2024-01-01 18:24:07 +00:00
Merge pull request #1560 from return42/http-accept-language
[mod] add 'Accept-Language' HTTP header to online processores
This commit is contained in:
commit
7c9c112484
@ -440,6 +440,7 @@ engine is shown. Most of the options have a default value or even are optional.
|
|||||||
engine: example
|
engine: example
|
||||||
shortcut: demo
|
shortcut: demo
|
||||||
base_url: 'https://{language}.example.com/'
|
base_url: 'https://{language}.example.com/'
|
||||||
|
send_accept_language_header: false
|
||||||
categories: general
|
categories: general
|
||||||
timeout: 3.0
|
timeout: 3.0
|
||||||
api_key: 'apikey'
|
api_key: 'apikey'
|
||||||
@ -488,6 +489,13 @@ engine is shown. Most of the options have a default value or even are optional.
|
|||||||
use multiple sites using only one engine, or updating the site URL without
|
use multiple sites using only one engine, or updating the site URL without
|
||||||
touching at the code.
|
touching at the code.
|
||||||
|
|
||||||
|
``send_accept_language_header`` :
|
||||||
|
Several engines that support languages (or regions) deal with the HTTP header
|
||||||
|
``Accept-Language`` to build a response that fits to the locale. When this
|
||||||
|
option is activated, the language (locale) that is selected by the user is used
|
||||||
|
to build and send a ``Accept-Language`` header in the request to the origin
|
||||||
|
search engine.
|
||||||
|
|
||||||
``categories`` : optional
|
``categories`` : optional
|
||||||
Define in which categories this engine will be active. Most of the time, it is
|
Define in which categories this engine will be active. Most of the time, it is
|
||||||
defined in the code of the engine, but in a few cases it is useful, like when
|
defined in the code of the engine, but in a few cases it is useful, like when
|
||||||
|
@ -44,6 +44,7 @@ ENGINE_DEFAULT_ARGS = {
|
|||||||
"enable_http": False,
|
"enable_http": False,
|
||||||
"using_tor_proxy": False,
|
"using_tor_proxy": False,
|
||||||
"display_error_messages": True,
|
"display_error_messages": True,
|
||||||
|
"send_accept_language_header": False,
|
||||||
"tokens": [],
|
"tokens": [],
|
||||||
"about": {},
|
"about": {},
|
||||||
}
|
}
|
||||||
|
@ -25,6 +25,7 @@ categories = ['general', 'web']
|
|||||||
paging = True
|
paging = True
|
||||||
time_range_support = False
|
time_range_support = False
|
||||||
safesearch = False
|
safesearch = False
|
||||||
|
send_accept_language_header = True
|
||||||
supported_languages_url = 'https://www.bing.com/account/general'
|
supported_languages_url = 'https://www.bing.com/account/general'
|
||||||
language_aliases = {}
|
language_aliases = {}
|
||||||
|
|
||||||
@ -68,7 +69,6 @@ def request(query, params):
|
|||||||
logger.debug("headers.Referer --> %s", referer)
|
logger.debug("headers.Referer --> %s", referer)
|
||||||
|
|
||||||
params['url'] = base_url + search_path
|
params['url'] = base_url + search_path
|
||||||
params['headers']['Accept-Language'] = "en-US,en;q=0.5"
|
|
||||||
params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
|
params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
@ -31,6 +31,7 @@ categories = ['images', 'web']
|
|||||||
paging = True
|
paging = True
|
||||||
safesearch = True
|
safesearch = True
|
||||||
time_range_support = True
|
time_range_support = True
|
||||||
|
send_accept_language_header = True
|
||||||
supported_languages_url = 'https://www.bing.com/account/general'
|
supported_languages_url = 'https://www.bing.com/account/general'
|
||||||
number_of_results = 28
|
number_of_results = 28
|
||||||
|
|
||||||
|
@ -34,6 +34,7 @@ about = {
|
|||||||
categories = ['news']
|
categories = ['news']
|
||||||
paging = True
|
paging = True
|
||||||
time_range_support = True
|
time_range_support = True
|
||||||
|
send_accept_language_header = True
|
||||||
|
|
||||||
# search-url
|
# search-url
|
||||||
base_url = 'https://www.bing.com/'
|
base_url = 'https://www.bing.com/'
|
||||||
|
@ -30,6 +30,7 @@ categories = ['videos', 'web']
|
|||||||
paging = True
|
paging = True
|
||||||
safesearch = True
|
safesearch = True
|
||||||
time_range_support = True
|
time_range_support = True
|
||||||
|
send_accept_language_header = True
|
||||||
number_of_results = 28
|
number_of_results = 28
|
||||||
|
|
||||||
base_url = 'https://www.bing.com/'
|
base_url = 'https://www.bing.com/'
|
||||||
@ -70,10 +71,6 @@ def request(query, params):
|
|||||||
if params['time_range'] in time_range_dict:
|
if params['time_range'] in time_range_dict:
|
||||||
params['url'] += time_range_string.format(interval=time_range_dict[params['time_range']])
|
params['url'] += time_range_string.format(interval=time_range_dict[params['time_range']])
|
||||||
|
|
||||||
# bing videos did not like "older" versions < 70.0.1 when selectin other
|
|
||||||
# languages then 'en' .. very strange ?!?!
|
|
||||||
params['headers']['User-Agent'] = 'Mozilla/5.0 (X11; Linux x86_64; rv:73.0.1) Gecko/20100101 Firefox/73.0.1'
|
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
@ -20,6 +20,7 @@ from json import loads
|
|||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
|
|
||||||
engine_type = 'online'
|
engine_type = 'online'
|
||||||
|
send_accept_language_header = True
|
||||||
categories = ['general']
|
categories = ['general']
|
||||||
disabled = True
|
disabled = True
|
||||||
timeout = 2.0
|
timeout = 2.0
|
||||||
|
@ -31,6 +31,7 @@ categories = ['general', 'web']
|
|||||||
paging = True
|
paging = True
|
||||||
supported_languages_url = 'https://duckduckgo.com/util/u588.js'
|
supported_languages_url = 'https://duckduckgo.com/util/u588.js'
|
||||||
time_range_support = True
|
time_range_support = True
|
||||||
|
send_accept_language_header = True
|
||||||
|
|
||||||
language_aliases = {
|
language_aliases = {
|
||||||
'ar-SA': 'ar-XA',
|
'ar-SA': 'ar-XA',
|
||||||
|
@ -27,6 +27,8 @@ about = {
|
|||||||
"results": 'JSON',
|
"results": 'JSON',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
send_accept_language_header = True
|
||||||
|
|
||||||
URL = 'https://api.duckduckgo.com/' + '?{query}&format=json&pretty=0&no_redirect=1&d=1'
|
URL = 'https://api.duckduckgo.com/' + '?{query}&format=json&pretty=0&no_redirect=1&d=1'
|
||||||
|
|
||||||
WIKIDATA_PREFIX = ['http://www.wikidata.org/entity/', 'https://www.wikidata.org/entity/']
|
WIKIDATA_PREFIX = ['http://www.wikidata.org/entity/', 'https://www.wikidata.org/entity/']
|
||||||
@ -62,7 +64,6 @@ def request(query, params):
|
|||||||
params['url'] = URL.format(query=urlencode({'q': query}))
|
params['url'] = URL.format(query=urlencode({'q': query}))
|
||||||
language = match_language(params['language'], supported_languages, language_aliases)
|
language = match_language(params['language'], supported_languages, language_aliases)
|
||||||
language = language.split('-')[0]
|
language = language.split('-')[0]
|
||||||
params['headers']['Accept-Language'] = language
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
@ -30,6 +30,7 @@ about = {
|
|||||||
categories = ['images', 'web']
|
categories = ['images', 'web']
|
||||||
paging = True
|
paging = True
|
||||||
safesearch = True
|
safesearch = True
|
||||||
|
send_accept_language_header = True
|
||||||
|
|
||||||
# search-url
|
# search-url
|
||||||
images_url = 'https://duckduckgo.com/i.js?{query}&s={offset}&p={safesearch}&o=json&vqd={vqd}'
|
images_url = 'https://duckduckgo.com/i.js?{query}&s={offset}&p={safesearch}&o=json&vqd={vqd}'
|
||||||
|
@ -45,6 +45,7 @@ categories = ['general', 'web']
|
|||||||
paging = True
|
paging = True
|
||||||
time_range_support = True
|
time_range_support = True
|
||||||
safesearch = True
|
safesearch = True
|
||||||
|
send_accept_language_header = True
|
||||||
use_mobile_ui = False
|
use_mobile_ui = False
|
||||||
supported_languages_url = 'https://www.google.com/preferences?#languages'
|
supported_languages_url = 'https://www.google.com/preferences?#languages'
|
||||||
|
|
||||||
@ -241,16 +242,6 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language):
|
|||||||
# language.
|
# language.
|
||||||
ret_val['params']['lr'] = "lang_" + lang_list.get(lang_country, language)
|
ret_val['params']['lr'] = "lang_" + lang_list.get(lang_country, language)
|
||||||
|
|
||||||
# Accept-Language: fr-CH, fr;q=0.8, en;q=0.6, *;q=0.5
|
|
||||||
ret_val['headers']['Accept-Language'] = ','.join(
|
|
||||||
[
|
|
||||||
lang_country,
|
|
||||||
language + ';q=0.8,',
|
|
||||||
'en;q=0.6',
|
|
||||||
'*;q=0.5',
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
return ret_val
|
return ret_val
|
||||||
|
|
||||||
|
|
||||||
|
@ -51,6 +51,7 @@ paging = False
|
|||||||
use_locale_domain = True
|
use_locale_domain = True
|
||||||
time_range_support = True
|
time_range_support = True
|
||||||
safesearch = True
|
safesearch = True
|
||||||
|
send_accept_language_header = True
|
||||||
|
|
||||||
filter_mapping = {0: 'images', 1: 'active', 2: 'active'}
|
filter_mapping = {0: 'images', 1: 'active', 2: 'active'}
|
||||||
|
|
||||||
@ -125,7 +126,6 @@ def request(query, params):
|
|||||||
"""Google-Video search request"""
|
"""Google-Video search request"""
|
||||||
|
|
||||||
lang_info = get_lang_info(params, supported_languages, language_aliases, False)
|
lang_info = get_lang_info(params, supported_languages, language_aliases, False)
|
||||||
logger.debug("HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language'])
|
|
||||||
|
|
||||||
query_url = (
|
query_url = (
|
||||||
'https://'
|
'https://'
|
||||||
|
@ -70,13 +70,13 @@ time_range_support = True
|
|||||||
#
|
#
|
||||||
# safesearch : results are identitical for safesearch=0 and safesearch=2
|
# safesearch : results are identitical for safesearch=0 and safesearch=2
|
||||||
safesearch = False
|
safesearch = False
|
||||||
|
send_accept_language_header = True
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
"""Google-News search request"""
|
"""Google-News search request"""
|
||||||
|
|
||||||
lang_info = get_lang_info(params, supported_languages, language_aliases, False)
|
lang_info = get_lang_info(params, supported_languages, language_aliases, False)
|
||||||
logger.debug("HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language'])
|
|
||||||
|
|
||||||
# google news has only one domain
|
# google news has only one domain
|
||||||
lang_info['subdomain'] = 'news.google.com'
|
lang_info['subdomain'] = 'news.google.com'
|
||||||
|
@ -22,6 +22,8 @@ about = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
categories = ["files", "apps"]
|
categories = ["files", "apps"]
|
||||||
|
send_accept_language_header = True
|
||||||
|
|
||||||
search_url = "https://play.google.com/store/search?{query}&c=apps"
|
search_url = "https://play.google.com/store/search?{query}&c=apps"
|
||||||
|
|
||||||
|
|
||||||
|
@ -52,6 +52,7 @@ language_support = True
|
|||||||
use_locale_domain = True
|
use_locale_domain = True
|
||||||
time_range_support = True
|
time_range_support = True
|
||||||
safesearch = False
|
safesearch = False
|
||||||
|
send_accept_language_header = True
|
||||||
|
|
||||||
|
|
||||||
def time_range_url(params):
|
def time_range_url(params):
|
||||||
@ -75,7 +76,6 @@ def request(query, params):
|
|||||||
|
|
||||||
offset = (params['pageno'] - 1) * 10
|
offset = (params['pageno'] - 1) * 10
|
||||||
lang_info = get_lang_info(params, supported_languages, language_aliases, False)
|
lang_info = get_lang_info(params, supported_languages, language_aliases, False)
|
||||||
logger.debug("HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language'])
|
|
||||||
|
|
||||||
# subdomain is: scholar.google.xy
|
# subdomain is: scholar.google.xy
|
||||||
lang_info['subdomain'] = lang_info['subdomain'].replace("www.", "scholar.")
|
lang_info['subdomain'] = lang_info['subdomain'].replace("www.", "scholar.")
|
||||||
|
@ -60,6 +60,7 @@ language_support = True
|
|||||||
use_locale_domain = True
|
use_locale_domain = True
|
||||||
time_range_support = True
|
time_range_support = True
|
||||||
safesearch = True
|
safesearch = True
|
||||||
|
send_accept_language_header = True
|
||||||
|
|
||||||
RE_CACHE = {}
|
RE_CACHE = {}
|
||||||
|
|
||||||
@ -111,7 +112,6 @@ def request(query, params):
|
|||||||
"""Google-Video search request"""
|
"""Google-Video search request"""
|
||||||
|
|
||||||
lang_info = get_lang_info(params, supported_languages, language_aliases, False)
|
lang_info = get_lang_info(params, supported_languages, language_aliases, False)
|
||||||
logger.debug("HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language'])
|
|
||||||
|
|
||||||
query_url = (
|
query_url = (
|
||||||
'https://'
|
'https://'
|
||||||
|
@ -30,6 +30,7 @@ about = {
|
|||||||
categories = ['map']
|
categories = ['map']
|
||||||
paging = False
|
paging = False
|
||||||
language_support = True
|
language_support = True
|
||||||
|
send_accept_language_header = True
|
||||||
|
|
||||||
# search-url
|
# search-url
|
||||||
base_url = 'https://nominatim.openstreetmap.org/'
|
base_url = 'https://nominatim.openstreetmap.org/'
|
||||||
@ -142,9 +143,8 @@ def request(query, params):
|
|||||||
params['url'] = base_url + search_string.format(query=urlencode({'q': query}))
|
params['url'] = base_url + search_string.format(query=urlencode({'q': query}))
|
||||||
params['route'] = route_re.match(query)
|
params['route'] = route_re.match(query)
|
||||||
params['headers']['User-Agent'] = searx_useragent()
|
params['headers']['User-Agent'] = searx_useragent()
|
||||||
|
if 'Accept-Language' not in params['headers']:
|
||||||
accept_language = 'en' if params['language'] == 'all' else params['language']
|
params['headers']['Accept-Language'] = 'en'
|
||||||
params['headers']['Accept-Language'] = accept_language
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
@ -19,6 +19,9 @@ about = {
|
|||||||
"results": 'JSON',
|
"results": 'JSON',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
send_accept_language_header = True
|
||||||
|
|
||||||
# search-url
|
# search-url
|
||||||
search_url = 'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}'
|
search_url = 'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}'
|
||||||
supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'
|
supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'
|
||||||
@ -41,9 +44,6 @@ def request(query, params):
|
|||||||
language = url_lang(params['language'])
|
language = url_lang(params['language'])
|
||||||
params['url'] = search_url.format(title=quote(query), language=language)
|
params['url'] = search_url.format(title=quote(query), language=language)
|
||||||
|
|
||||||
if params['language'].lower() in language_variants.get(language, []):
|
|
||||||
params['headers']['Accept-Language'] = params['language'].lower()
|
|
||||||
|
|
||||||
params['headers']['User-Agent'] = searx_useragent()
|
params['headers']['User-Agent'] = searx_useragent()
|
||||||
params['raise_for_httperror'] = False
|
params['raise_for_httperror'] = False
|
||||||
params['soft_max_redirects'] = 2
|
params['soft_max_redirects'] = 2
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
|
||||||
import typing
|
import typing
|
||||||
|
import babel
|
||||||
|
|
||||||
|
|
||||||
class EngineRef:
|
class EngineRef:
|
||||||
@ -29,6 +30,7 @@ class SearchQuery:
|
|||||||
'query',
|
'query',
|
||||||
'engineref_list',
|
'engineref_list',
|
||||||
'lang',
|
'lang',
|
||||||
|
'locale',
|
||||||
'safesearch',
|
'safesearch',
|
||||||
'pageno',
|
'pageno',
|
||||||
'time_range',
|
'time_range',
|
||||||
@ -59,6 +61,13 @@ class SearchQuery:
|
|||||||
self.external_bang = external_bang
|
self.external_bang = external_bang
|
||||||
self.engine_data = engine_data or {}
|
self.engine_data = engine_data or {}
|
||||||
|
|
||||||
|
self.locale = None
|
||||||
|
if self.lang:
|
||||||
|
try:
|
||||||
|
self.locale = babel.Locale.parse(self.lang, sep='-')
|
||||||
|
except babel.core.UnknownLocaleError:
|
||||||
|
pass
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def categories(self):
|
def categories(self):
|
||||||
return list(set(map(lambda engineref: engineref.category, self.engineref_list)))
|
return list(set(map(lambda engineref: engineref.category, self.engineref_list)))
|
||||||
|
@ -138,6 +138,13 @@ class EngineProcessor(ABC):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
def get_params(self, search_query, engine_category):
|
def get_params(self, search_query, engine_category):
|
||||||
|
"""Returns a set of *request params* or ``None`` if request is not supported.
|
||||||
|
|
||||||
|
Not supported conditions (``None`` is returned):
|
||||||
|
|
||||||
|
- A page-number > 1 when engine does not support paging.
|
||||||
|
- A time range when the engine does not support time range.
|
||||||
|
"""
|
||||||
# if paging is not supported, skip
|
# if paging is not supported, skip
|
||||||
if search_query.pageno > 1 and not self.engine.paging:
|
if search_query.pageno > 1 and not self.engine.paging:
|
||||||
return None
|
return None
|
||||||
|
@ -60,6 +60,17 @@ class OnlineProcessor(EngineProcessor):
|
|||||||
# add an user agent
|
# add an user agent
|
||||||
params['headers']['User-Agent'] = gen_useragent()
|
params['headers']['User-Agent'] = gen_useragent()
|
||||||
|
|
||||||
|
# add Accept-Language header
|
||||||
|
if self.engine.send_accept_language_header and search_query.locale:
|
||||||
|
ac_lang = search_query.locale.language
|
||||||
|
if search_query.locale.territory:
|
||||||
|
ac_lang = "%s-%s,%s;q=0.9,*;q=0.5" % (
|
||||||
|
search_query.locale.language,
|
||||||
|
search_query.locale.territory,
|
||||||
|
search_query.locale.language,
|
||||||
|
)
|
||||||
|
params['headers']['Accept-Language'] = ac_lang
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
def _send_http_request(self, params):
|
def _send_http_request(self, params):
|
||||||
|
@ -38,6 +38,9 @@ class OnlineCurrencyProcessor(OnlineProcessor):
|
|||||||
engine_type = 'online_currency'
|
engine_type = 'online_currency'
|
||||||
|
|
||||||
def get_params(self, search_query, engine_category):
|
def get_params(self, search_query, engine_category):
|
||||||
|
"""Returns a set of *request params* or ``None`` if search query does not match
|
||||||
|
to :py:obj:`parser_re`."""
|
||||||
|
|
||||||
params = super().get_params(search_query, engine_category)
|
params = super().get_params(search_query, engine_category)
|
||||||
if params is None:
|
if params is None:
|
||||||
return None
|
return None
|
||||||
|
@ -18,6 +18,8 @@ class OnlineDictionaryProcessor(OnlineProcessor):
|
|||||||
engine_type = 'online_dictionary'
|
engine_type = 'online_dictionary'
|
||||||
|
|
||||||
def get_params(self, search_query, engine_category):
|
def get_params(self, search_query, engine_category):
|
||||||
|
"""Returns a set of *request params* or ``None`` if search query does not match
|
||||||
|
to :py:obj:`parser_re`."""
|
||||||
params = super().get_params(search_query, engine_category)
|
params = super().get_params(search_query, engine_category)
|
||||||
if params is None:
|
if params is None:
|
||||||
return None
|
return None
|
||||||
|
@ -20,6 +20,9 @@ class OnlineUrlSearchProcessor(OnlineProcessor):
|
|||||||
engine_type = 'online_url_search'
|
engine_type = 'online_url_search'
|
||||||
|
|
||||||
def get_params(self, search_query, engine_category):
|
def get_params(self, search_query, engine_category):
|
||||||
|
"""Returns a set of *request params* or ``None`` if search query does not match
|
||||||
|
to at least one of :py:obj:`re_search_urls`.
|
||||||
|
"""
|
||||||
params = super().get_params(search_query, engine_category)
|
params = super().get_params(search_query, engine_category)
|
||||||
if params is None:
|
if params is None:
|
||||||
return None
|
return None
|
||||||
|
@ -748,6 +748,7 @@ engines:
|
|||||||
|
|
||||||
- name: google play movies
|
- name: google play movies
|
||||||
engine: xpath
|
engine: xpath
|
||||||
|
send_accept_language_header: true
|
||||||
search_url: https://play.google.com/store/search?q={query}&c=movies
|
search_url: https://play.google.com/store/search?q={query}&c=movies
|
||||||
results_xpath: '//div[@class="ImZGtf mpg5gc"]'
|
results_xpath: '//div[@class="ImZGtf mpg5gc"]'
|
||||||
title_xpath: './/div[@class="RZEgze"]//div[@class="kCSSQe"]//a'
|
title_xpath: './/div[@class="RZEgze"]//div[@class="kCSSQe"]//a'
|
||||||
|
Loading…
Reference in New Issue
Block a user