mirror of https://github.com/searxng/searxng.git
[mod] move language recognition to get_search_query_from_webapp
To set the language from language recognition and hold the value selected by the client, the previous implementation creates a copy of the SearchQuery object and manipulates the SearchQuery object by calling function replace_auto_language(). This patch tries to implement a similar functionality in a more central place, in function get_search_query_from_webapp() when the SearchQuery object is build up. Additional this patch uses the language preferred by the client, if language recognition does not have a match / the existing implementation does not care about client preferences and uses 'all' in case of no match. Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
c03b0ea650
commit
d5ecda9930
|
@ -22,7 +22,6 @@ from searx.network import initialize as initialize_network, check_network_config
|
|||
from searx.metrics import initialize as initialize_metrics, counter_inc, histogram_observe_time
|
||||
from searx.search.processors import PROCESSORS, initialize as initialize_processors
|
||||
from searx.search.checker import initialize as initialize_checker
|
||||
from searx.utils import detect_language
|
||||
|
||||
|
||||
logger = logger.getChild('search')
|
||||
|
@ -40,57 +39,19 @@ def initialize(settings_engines=None, enable_checker=False, check_network=False,
|
|||
initialize_checker()
|
||||
|
||||
|
||||
def replace_auto_language(search_query: SearchQuery):
|
||||
"""
|
||||
Do nothing except if `search_query.lang` is "auto".
|
||||
In this case:
|
||||
* the value "auto" is replaced by the detected language of the query.
|
||||
The default value is "all" when no language is detected.
|
||||
* `search_query.locale` is updated accordingly
|
||||
|
||||
Use :py:obj:`searx.utils.detect_language` with `only_search_languages=True` to keep
|
||||
only languages supported by the engines.
|
||||
"""
|
||||
if search_query.lang != 'auto':
|
||||
return
|
||||
|
||||
detected_lang = detect_language(search_query.query, threshold=0.3, only_search_languages=True)
|
||||
if detected_lang is None:
|
||||
# fallback to 'all' if no language has been detected
|
||||
search_query.lang = 'all'
|
||||
search_query.locale = None
|
||||
return
|
||||
search_query.lang = detected_lang
|
||||
try:
|
||||
search_query.locale = babel.Locale.parse(search_query.lang)
|
||||
except babel.core.UnknownLocaleError:
|
||||
search_query.locale = None
|
||||
|
||||
|
||||
class Search:
|
||||
"""Search information container"""
|
||||
|
||||
__slots__ = "search_query", "result_container", "start_time", "actual_timeout"
|
||||
|
||||
def __init__(self, search_query: SearchQuery):
|
||||
"""Initialize the Search
|
||||
|
||||
search_query is copied
|
||||
"""
|
||||
"""Initialize the Search"""
|
||||
# init vars
|
||||
super().__init__()
|
||||
self.search_query = search_query
|
||||
self.result_container = ResultContainer()
|
||||
self.start_time = None
|
||||
self.actual_timeout = None
|
||||
self.search_query = copy(search_query)
|
||||
self.update_search_query(self.search_query)
|
||||
|
||||
def update_search_query(self, search_query: SearchQuery):
|
||||
"""Update search_query.
|
||||
|
||||
call replace_auto_language to replace the "auto" language
|
||||
"""
|
||||
replace_auto_language(search_query)
|
||||
|
||||
def search_external_bang(self):
|
||||
"""
|
||||
|
|
|
@ -6,6 +6,7 @@ from searx.query import RawTextQuery
|
|||
from searx.engines import categories, engines
|
||||
from searx.search import SearchQuery, EngineRef
|
||||
from searx.preferences import Preferences, is_locked
|
||||
from searx.utils import detect_language
|
||||
|
||||
|
||||
# remove duplicate queries.
|
||||
|
@ -214,7 +215,27 @@ def parse_engine_data(form):
|
|||
|
||||
def get_search_query_from_webapp(
|
||||
preferences: Preferences, form: Dict[str, str]
|
||||
) -> Tuple[SearchQuery, RawTextQuery, List[EngineRef], List[EngineRef]]:
|
||||
) -> Tuple[SearchQuery, RawTextQuery, List[EngineRef], List[EngineRef], str]:
|
||||
"""Assemble data from preferences and request.form (from the HTML form) needed
|
||||
in a search query.
|
||||
|
||||
The returned tuple consits of:
|
||||
|
||||
1. instance of :py:obj:`searx.search.SearchQuery`
|
||||
2. instance of :py:obj:`searx.query.RawTextQuery`
|
||||
3. list of :py:obj:`searx.search.EngineRef` instances
|
||||
4. string with the *selected locale* of the query
|
||||
|
||||
About language/locale: if the client selects the alias ``auto`` the
|
||||
``SearchQuery`` object is build up by the :py:obj:`detected language
|
||||
<searx.utils.detect_language>`. If language recognition does not have a
|
||||
match the language preferred by the :py:obj:`Preferences.client` is used.
|
||||
If client does not have a preference, the default ``all`` is used.
|
||||
|
||||
The *selected locale* in the tuple always represents the selected
|
||||
language/locale and might differ from the language recognition.
|
||||
|
||||
"""
|
||||
# no text for the query ?
|
||||
if not form.get('q'):
|
||||
raise SearxParameterException('q', '')
|
||||
|
@ -229,13 +250,19 @@ def get_search_query_from_webapp(
|
|||
# set query
|
||||
query = raw_text_query.getQuery()
|
||||
query_pageno = parse_pageno(form)
|
||||
query_lang = parse_lang(preferences, form, raw_text_query)
|
||||
query_safesearch = parse_safesearch(preferences, form)
|
||||
query_time_range = parse_time_range(form)
|
||||
query_timeout = parse_timeout(form, raw_text_query)
|
||||
external_bang = raw_text_query.external_bang
|
||||
engine_data = parse_engine_data(form)
|
||||
|
||||
query_lang = parse_lang(preferences, form, raw_text_query)
|
||||
selected_locale = query_lang
|
||||
|
||||
if query_lang == 'auto':
|
||||
query_lang = detect_language(query, threshold=0.8, only_search_languages=True)
|
||||
query_lang = query_lang or preferences.client.locale_tag or 'all'
|
||||
|
||||
if not is_locked('categories') and raw_text_query.specific:
|
||||
# if engines are calculated from query,
|
||||
# set categories by using that information
|
||||
|
@ -265,4 +292,5 @@ def get_search_query_from_webapp(
|
|||
raw_text_query,
|
||||
query_engineref_list_unknown,
|
||||
query_engineref_list_notoken,
|
||||
selected_locale,
|
||||
)
|
||||
|
|
|
@ -84,6 +84,7 @@ from searx.webutils import (
|
|||
from searx.webadapter import (
|
||||
get_search_query_from_webapp,
|
||||
get_selected_categories,
|
||||
parse_lang,
|
||||
)
|
||||
from searx.utils import (
|
||||
html_to_text,
|
||||
|
@ -440,11 +441,7 @@ def render(template_name: str, **kwargs):
|
|||
kwargs['rtl'] = True
|
||||
|
||||
if 'current_language' not in kwargs:
|
||||
_locale = request.preferences.get_value('language')
|
||||
if _locale in ('auto', 'all'):
|
||||
kwargs['current_language'] = _locale
|
||||
else:
|
||||
kwargs['current_language'] = match_locale(_locale, settings['search']['languages'])
|
||||
kwargs['current_language'] = parse_lang(request.preferences, {}, RawTextQuery('', []))
|
||||
|
||||
# values from settings
|
||||
kwargs['search_formats'] = [x for x in settings['search']['formats'] if x != 'html']
|
||||
|
@ -678,7 +675,9 @@ def search():
|
|||
raw_text_query = None
|
||||
result_container = None
|
||||
try:
|
||||
search_query, raw_text_query, _, _ = get_search_query_from_webapp(request.preferences, request.form)
|
||||
search_query, raw_text_query, _, _, selected_locale = get_search_query_from_webapp(
|
||||
request.preferences, request.form
|
||||
)
|
||||
# search = Search(search_query) # without plugins
|
||||
search = SearchWithPlugins(search_query, request.user_plugins, request) # pylint: disable=redefined-outer-name
|
||||
|
||||
|
@ -809,13 +808,6 @@ def search():
|
|||
)
|
||||
)
|
||||
|
||||
if search_query.lang in ('auto', 'all'):
|
||||
current_language = search_query.lang
|
||||
else:
|
||||
current_language = match_locale(
|
||||
search_query.lang, settings['search']['languages'], fallback=request.preferences.get_value("language")
|
||||
)
|
||||
|
||||
# search_query.lang contains the user choice (all, auto, en, ...)
|
||||
# when the user choice is "auto", search.search_query.lang contains the detected language
|
||||
# otherwise it is equals to search_query.lang
|
||||
|
@ -838,7 +830,7 @@ def search():
|
|||
result_container.unresponsive_engines
|
||||
),
|
||||
current_locale = request.preferences.get_value("locale"),
|
||||
current_language = current_language,
|
||||
current_language = selected_locale,
|
||||
search_language = match_locale(
|
||||
search.search_query.lang,
|
||||
settings['search']['languages'],
|
||||
|
|
Loading…
Reference in New Issue