forked from zaclys/searxng
make search language handling less strict
languages.py can change, so users may query on a language that is not on the list anymore, even if it is still recognized by a few engines. also made no and nb the same because they seem to return the same, though most engines will only support one or the other.
This commit is contained in:
parent
805fb02ed1
commit
fd65c12921
File diff suppressed because one or more lines are too long
|
@ -94,6 +94,8 @@ def _fetch_supported_languages(resp):
|
|||
options = dom.xpath('//div[@id="limit-languages"]//input')
|
||||
for option in options:
|
||||
code = option.xpath('./@id')[0].replace('_', '-')
|
||||
if code == 'nb':
|
||||
code = 'no'
|
||||
supported_languages.append(code)
|
||||
|
||||
return supported_languages
|
||||
|
|
|
@ -47,6 +47,8 @@ def request(query, params):
|
|||
|
||||
# add language tag if specified
|
||||
if params['language'] != 'all':
|
||||
if params['language'] == 'no' or params['language'].startswith('no-'):
|
||||
params['language'] = params['language'].replace('no', 'nb', 1)
|
||||
if params['language'].find('-') < 0:
|
||||
# tries to get a country code from language
|
||||
for lang in supported_languages:
|
||||
|
@ -118,6 +120,8 @@ def _fetch_supported_languages(resp):
|
|||
|
||||
supported_languages = []
|
||||
for lang in regions_json['languages'].values():
|
||||
if lang['code'] == 'nb':
|
||||
lang['code'] = 'no'
|
||||
for country in lang['countries']:
|
||||
supported_languages.append(lang['code'] + '-' + country)
|
||||
|
||||
|
|
|
@ -120,6 +120,8 @@ def _fetch_supported_languages(resp):
|
|||
options = dom.xpath('//div[@id="regions-popup"]//ul/li/a')
|
||||
for option in options:
|
||||
code = option.xpath('./@data-val')[0]
|
||||
if code.startswith('nb-'):
|
||||
code = code.replace('nb', 'no', 1)
|
||||
supported_languages.append(code)
|
||||
|
||||
return supported_languages
|
||||
|
|
|
@ -57,6 +57,7 @@ language_codes = (
|
|||
(u"nl", u"Nederlands", u"", u"Dutch"),
|
||||
(u"nl-BE", u"Nederlands", u"België", u"Dutch"),
|
||||
(u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"),
|
||||
(u"no-NO", u"Norsk", u"", u"Norwegian"),
|
||||
(u"pl-PL", u"Polski", u"", u"Polish"),
|
||||
(u"pt", u"Português", u"", u"Portuguese"),
|
||||
(u"pt-BR", u"Português", u"Brasil", u"Portuguese"),
|
||||
|
|
|
@ -107,6 +107,8 @@ class SearchLanguageSetting(EnumStringSetting):
|
|||
pass
|
||||
elif lang in self.choices:
|
||||
data = lang
|
||||
elif data == 'nb-NO':
|
||||
data = 'no-NO'
|
||||
elif data == 'ar-XA':
|
||||
data = 'ar-SA'
|
||||
else:
|
||||
|
|
|
@ -24,7 +24,7 @@ from searx.engines import (
|
|||
import string
|
||||
import re
|
||||
|
||||
VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(\-[A-Z]{2})?$')
|
||||
VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$')
|
||||
|
||||
|
||||
class RawTextQuery(object):
|
||||
|
@ -68,7 +68,7 @@ class RawTextQuery(object):
|
|||
|
||||
# this force a language
|
||||
if query_part[0] == ':':
|
||||
lang = query_part[1:].lower()
|
||||
lang = query_part[1:].lower().replace('_', '-')
|
||||
|
||||
# user may set a valid, yet not selectable language
|
||||
if VALID_LANGUAGE_CODE.match(lang):
|
||||
|
@ -86,7 +86,7 @@ class RawTextQuery(object):
|
|||
or lang_id.startswith(lang)\
|
||||
or lang == lang_name\
|
||||
or lang == english_name\
|
||||
or lang.replace('_', ' ') == country:
|
||||
or lang.replace('-', ' ') == country:
|
||||
parse_next = True
|
||||
self.languages.append(lang_id)
|
||||
# to ensure best match (first match is not necessarily the best one)
|
||||
|
|
|
@ -27,20 +27,16 @@ from searx.engines import (
|
|||
)
|
||||
from searx.answerers import ask
|
||||
from searx.utils import gen_useragent
|
||||
from searx.query import RawTextQuery, SearchQuery
|
||||
from searx.query import RawTextQuery, SearchQuery, VALID_LANGUAGE_CODE
|
||||
from searx.results import ResultContainer
|
||||
from searx import logger
|
||||
from searx.plugins import plugins
|
||||
from searx.languages import language_codes
|
||||
from searx.exceptions import SearxParameterException
|
||||
|
||||
logger = logger.getChild('search')
|
||||
|
||||
number_of_searches = 0
|
||||
|
||||
language_code_set = set(l[0].lower() for l in language_codes)
|
||||
language_code_set.add('all')
|
||||
|
||||
|
||||
def send_http_request(engine, request_params, start_time, timeout_limit):
|
||||
# for page_load_time stats
|
||||
|
@ -219,7 +215,7 @@ def get_search_query_from_webapp(preferences, form):
|
|||
query_lang = preferences.get_value('language')
|
||||
|
||||
# check language
|
||||
if query_lang.lower() not in language_code_set:
|
||||
if not VALID_LANGUAGE_CODE.match(query_lang):
|
||||
raise SearxParameterException('language', query_lang)
|
||||
|
||||
# get safesearch
|
||||
|
@ -371,11 +367,6 @@ class Search(object):
|
|||
if search_query.pageno > 1 and not engine.paging:
|
||||
continue
|
||||
|
||||
# if search-language is set and engine does not
|
||||
# provide language-support, skip
|
||||
if search_query.lang != 'all' and not engine.language_support:
|
||||
continue
|
||||
|
||||
# if time_range is not supported, skip
|
||||
if search_query.time_range and not engine.time_range_support:
|
||||
continue
|
||||
|
|
Loading…
Reference in New Issue