From 3df7d50df0bc6513168931711ac563c347c051fb Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Tue, 19 Apr 2022 10:49:49 +0200 Subject: [PATCH] [fix] replace language_support by a language/region view Many WEB-search engines (e.g. startpage) response best results if a **region** is selected, most often a language filter is also in the properties of the WEB-search engine. hint:: The **search language** should not be mixed: sometimes the language argument is just the language of the UI with none effect on the result list. To summarize: Some WEB-search engines have language codes (e.g. `ca`) in their properties, other have a region codes (e.g. `ca-ES`), some have regions and languages in their properties (e.g. startpage) and other engine do not have any language or region support. In the past we generalized *language* over all kind of engines without taking into mind that several WEB-search engines have best results when there is a region selected. This *language-centric* view in SearXNG is misleading when we need region-codes to parameterize a engine request! This patch replaces the *language-centric* view by a "language / region" view. Conclusions: With regions we can't say any longer that a engine supports *this or that* language. By example: when the user selects 'zh' and a engine supports only region codes like 'zh-TW' or 'zh-CN' we do not what results the user expects / similar with 'en' or 'fr when the engine needs a region tag. - Since it is unclear what the user expects by his language selection, we can't assert a property that says: "supports_selected_language" The feature is replaced in the UI by the wider sense of "language_support", what stands for: The engine has some kind of language support, either by a region tag or by a language tag. - A list of "supported_languages" does not make sense when there are regions responsible for the result of an engine. The "supported_languages" has been removed from the /config URL. - The `has_language` test in the `searx/search/checker/impl.py` has been removed since it does not cover engines with region support. If there is a need for such a test we can implement new tests after all engines with language (region) support has been moved to the *supported properites* scheme (`'type': 'engine_properties'`) / see commit previous commit: [mod] engines_languages.json: add new type EngineProperties Signed-off-by: Markus Heiser --- searx/search/checker/impl.py | 7 ------- searx/search/processors/online.py | 10 ---------- searx/templates/simple/preferences.html | 4 ++-- searx/webapp.py | 23 ++--------------------- 4 files changed, 4 insertions(+), 40 deletions(-) diff --git a/searx/search/checker/impl.py b/searx/search/checker/impl.py index bc5cdf968..c11e00e66 100644 --- a/searx/search/checker/impl.py +++ b/searx/search/checker/impl.py @@ -293,13 +293,6 @@ class ResultContainerTests: if len(self.result_container.answers) == 0: self._record_error('No answer') - def has_language(self, lang): - """Check at least one title or content of the results is written in the `lang`. - - Detected using pycld3, may be not accurate""" - if lang not in self.languages: - self._record_error(lang + ' not found') - def not_empty(self): """Check the ResultContainer has at least one answer or infobox or result""" result_types = set() diff --git a/searx/search/processors/online.py b/searx/search/processors/online.py index 17e9b6a96..ef1fb8cb3 100644 --- a/searx/search/processors/online.py +++ b/searx/search/processors/online.py @@ -212,16 +212,6 @@ class OnlineProcessor(EngineProcessor): 'test': ['unique_results'], } - if getattr(self.engine, 'supported_languages', []): - tests['lang_fr'] = { - 'matrix': {'query': 'paris', 'lang': 'fr'}, - 'result_container': ['not_empty', ('has_language', 'fr')], - } - tests['lang_en'] = { - 'matrix': {'query': 'paris', 'lang': 'en'}, - 'result_container': ['not_empty', ('has_language', 'en')], - } - if getattr(self.engine, 'safesearch', False): tests['safesearch'] = {'matrix': {'query': 'porn', 'safesearch': (0, 2)}, 'test': ['unique_results']} diff --git a/searx/templates/simple/preferences.html b/searx/templates/simple/preferences.html index 7f9be5693..214fef3dd 100644 --- a/searx/templates/simple/preferences.html +++ b/searx/templates/simple/preferences.html @@ -307,7 +307,7 @@ {{ _("Allow") }}{{- "" -}} {{ _("Engine name") }}{{- "" -}} {{ _("Shortcut") }}{{- "" -}} - {{ _("Supports selected language") }}{{- "" -}} + {{ _("Language / Region") }}{{- "" -}} {{ _("SafeSearch") }}{{- "" -}} {{ _("Time range") }}{{- "" -}} {{ _("Response time") }}{{- "" -}} @@ -333,7 +333,7 @@ {{- engine_about(search_engine) -}} {{- "" -}} {{ shortcuts[search_engine.name] }}{{- "" -}} - {{ checkbox(None, supports[search_engine.name]['supports_selected_language'], true) }}{{- "" -}} + {{ checkbox(None, supports[search_engine.name]['language_support'], true) }}{{- "" -}} {{ checkbox(None, supports[search_engine.name]['safesearch'], true) }}{{- "" -}} {{ checkbox(None, supports[search_engine.name]['time_range_support'], true) }}{{- "" -}} {{- engine_time(search_engine.name) -}} diff --git a/searx/webapp.py b/searx/webapp.py index eb2b19d44..613d30f52 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -997,7 +997,6 @@ def preferences(): 'rate80': rate80, 'rate95': rate95, 'warn_timeout': e.timeout > settings['outgoing']['request_timeout'], - 'supports_selected_language': _is_selected_language_supported(e, request.preferences), 'result_count': result_count, } # end of stats @@ -1048,20 +1047,17 @@ def preferences(): # supports supports = {} for _, e in filtered_engines.items(): - supports_selected_language = _is_selected_language_supported(e, request.preferences) safesearch = e.safesearch time_range_support = e.time_range_support for checker_test_name in checker_results.get(e.name, {}).get('errors', {}): - if supports_selected_language and checker_test_name.startswith('lang_'): - supports_selected_language = '?' - elif safesearch and checker_test_name == 'safesearch': + if safesearch and checker_test_name == 'safesearch': safesearch = '?' elif time_range_support and checker_test_name == 'time_range': time_range_support = '?' supports[e.name] = { - 'supports_selected_language': supports_selected_language, 'safesearch': safesearch, 'time_range_support': time_range_support, + 'language_support': e.language_support, } return render( @@ -1095,16 +1091,6 @@ def preferences(): ) -def _is_selected_language_supported(engine, preferences: Preferences): # pylint: disable=redefined-outer-name - language = preferences.get_value('language') - if language == 'all': - return True - x = match_language( - language, getattr(engine, 'supported_languages', []), getattr(engine, 'language_aliases', {}), None - ) - return bool(x) - - @app.route('/image_proxy', methods=['GET']) def image_proxy(): # pylint: disable=too-many-return-statements, too-many-branches @@ -1323,10 +1309,6 @@ def config(): if not request.preferences.validate_token(engine): continue - supported_languages = engine.supported_languages - if isinstance(engine.supported_languages, dict): - supported_languages = list(engine.supported_languages.keys()) - _engines.append( { 'name': name, @@ -1335,7 +1317,6 @@ def config(): 'enabled': not engine.disabled, 'paging': engine.paging, 'language_support': engine.language_support, - 'supported_languages': supported_languages, 'safesearch': engine.safesearch, 'time_range_support': engine.time_range_support, 'timeout': engine.timeout,