From 70a9208972cc619b53b15b21ed590c3544b8b8f3 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Sat, 29 May 2021 09:35:10 +0200 Subject: [PATCH 1/5] [mod] searx.engines.__init__: refactoring --- searx/engines/__init__.py | 212 +++++++++++++++++++------------------- 1 file changed, 108 insertions(+), 104 deletions(-) diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index 49990c325..1ce90d5ad 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -1,59 +1,46 @@ - -''' -searx is free software: you can redistribute it and/or modify -it under the terms of the GNU Affero General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -searx is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU Affero General Public License for more details. - -You should have received a copy of the GNU Affero General Public License -along with searx. If not, see < http://www.gnu.org/licenses/ >. - -(C) 2013- by Adam Tauber, -''' +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +# pylint: disable=missing-function-docstring +""" +Engine loader: +call load_engines(settings['engines]) +to initialiaze categories, engines, engine_shortcuts +""" import sys -import threading +import copy + from os.path import realpath, dirname from babel.localedata import locale_identifiers -from urllib.parse import urlparse -from operator import itemgetter -from searx import settings -from searx import logger +from searx import logger, settings from searx.data import ENGINES_LANGUAGES -from searx.exceptions import SearxEngineResponseException -from searx.network import get, initialize as initialize_network, set_context_network_name -from searx.utils import load_module, match_language, get_engine_from_settings, gen_useragent +from searx.network import get +from searx.utils import load_module, match_language, gen_useragent logger = logger.getChild('engines') - -engine_dir = dirname(realpath(__file__)) - -engines = {} - -categories = {'general': []} - -babel_langs = [lang_parts[0] + '-' + lang_parts[-1] if len(lang_parts) > 1 else lang_parts[0] +ENGINE_DIR = dirname(realpath(__file__)) +BABEL_LANGS = [lang_parts[0] + '-' + lang_parts[-1] if len(lang_parts) > 1 else lang_parts[0] for lang_parts in (lang_code.split('_') for lang_code in locale_identifiers())] - +ENGINE_DEFAULT_ARGS = { + "engine_type": "online", + "inactive": False, + "disabled": False, + "timeout": settings["outgoing"]["request_timeout"], + "shortcut": "-", + "categories": ["general"], + "supported_languages": [], + "language_aliases": {}, + "paging": False, + "safesearch": False, + "time_range_support": False, + "enable_http": False, + "display_error_messages": True, + "tokens": [], +} +categories = {'general': []} +engines = {} engine_shortcuts = {} -engine_default_args = {'paging': False, - 'categories': ['general'], - 'supported_languages': [], - 'safesearch': False, - 'timeout': settings['outgoing']['request_timeout'], - 'shortcut': '-', - 'disabled': False, - 'enable_http': False, - 'time_range_support': False, - 'engine_type': 'online', - 'display_error_messages': True, - 'tokens': []} def load_engine(engine_data): @@ -67,64 +54,59 @@ def load_engine(engine_data): engine_name = engine_name.lower() engine_data['name'] = engine_name + # load_module engine_module = engine_data['engine'] - try: - engine = load_module(engine_module + '.py', engine_dir) + engine = load_module(engine_module + '.py', ENGINE_DIR) except (SyntaxError, KeyboardInterrupt, SystemExit, SystemError, ImportError, RuntimeError): logger.exception('Fatal exception in engine "{}"'.format(engine_module)) sys.exit(1) - except: + except BaseException: logger.exception('Cannot load engine "{}"'.format(engine_module)) return None + update_engine_attributes(engine, engine_data) + set_language_attributes(engine) + update_attributes_for_tor(engine) + + if is_missing_required_attributes(engine): + sys.exit(1) + if not is_engine_active(engine): + return None + return engine + + +def update_engine_attributes(engine, engine_data): + # set engine attributes from engine_data for param_name, param_value in engine_data.items(): - if param_name == 'engine': - pass - elif param_name == 'categories': - if param_value == 'none': - engine.categories = [] - else: - engine.categories = list(map(str.strip, param_value.split(','))) - else: + if param_name == 'categories': + if isinstance(param_value, str): + param_value = list(map(str.strip, param_value.split(','))) + engine.categories = param_value + elif param_name != 'engine': setattr(engine, param_name, param_value) - for arg_name, arg_value in engine_default_args.items(): + # set default attributes + for arg_name, arg_value in ENGINE_DEFAULT_ARGS.items(): if not hasattr(engine, arg_name): - setattr(engine, arg_name, arg_value) + setattr(engine, arg_name, copy.deepcopy(arg_value)) - # checking required variables - for engine_attr in dir(engine): - if engine_attr.startswith('_'): - continue - if engine_attr == 'inactive' and getattr(engine, engine_attr) is True: - return None - if getattr(engine, engine_attr) is None: - logger.error('Missing engine config attribute: "{0}.{1}"' - .format(engine.name, engine_attr)) - sys.exit(1) +def set_language_attributes(engine): + # pylint: disable=protected-access # assign supported languages from json file - if engine_data['name'] in ENGINES_LANGUAGES: - setattr(engine, 'supported_languages', ENGINES_LANGUAGES[engine_data['name']]) + if engine.name in ENGINES_LANGUAGES: + engine.supported_languages = ENGINES_LANGUAGES[engine.name] # find custom aliases for non standard language codes - if hasattr(engine, 'supported_languages'): - if hasattr(engine, 'language_aliases'): - language_aliases = getattr(engine, 'language_aliases') - else: - language_aliases = {} - - for engine_lang in getattr(engine, 'supported_languages'): - iso_lang = match_language(engine_lang, babel_langs, fallback=None) - if iso_lang and iso_lang != engine_lang and not engine_lang.startswith(iso_lang) and \ - iso_lang not in getattr(engine, 'supported_languages'): - language_aliases[iso_lang] = engine_lang - - setattr(engine, 'language_aliases', language_aliases) + for engine_lang in engine.supported_languages: + iso_lang = match_language(engine_lang, BABEL_LANGS, fallback=None) + if iso_lang and iso_lang != engine_lang and not engine_lang.startswith(iso_lang) and \ + iso_lang not in engine.supported_languages: + engine.language_aliases[iso_lang] = engine_lang # language_support - setattr(engine, 'language_support', len(getattr(engine, 'supported_languages', [])) > 0) + engine.language_support = len(engine.supported_languages) > 0 # assign language fetching method if auxiliary method exists if hasattr(engine, '_fetch_supported_languages'): @@ -132,38 +114,60 @@ def load_engine(engine_data): 'User-Agent': gen_useragent(), 'Accept-Language': 'ja-JP,ja;q=0.8,en-US;q=0.5,en;q=0.3', # bing needs a non-English language } - setattr(engine, 'fetch_supported_languages', - lambda: engine._fetch_supported_languages(get(engine.supported_languages_url, headers=headers))) + engine.fetch_supported_languages =\ + lambda: engine._fetch_supported_languages(get(engine.supported_languages_url, headers=headers)) - # tor related settings - if settings['outgoing'].get('using_tor_proxy'): - # use onion url if using tor. - if hasattr(engine, 'onion_url'): - engine.search_url = engine.onion_url + getattr(engine, 'search_path', '') - elif 'onions' in engine.categories: - # exclude onion engines if not using tor. - return None - engine.timeout += settings['outgoing']['extra_proxy_timeout'] +def update_attributes_for_tor(engine): + if settings['outgoing'].get('using_tor_proxy') and hasattr(engine, 'onion_url'): + engine.search_url = engine.onion_url + getattr(engine, 'search_path', '') + engine.timeout += settings['outgoing'].get('extra_proxy_timeout', 0) - for category_name in engine.categories: - categories.setdefault(category_name, []).append(engine) +def is_missing_required_attributes(engine): + """an attribute is required when its name doesn't start with '_'. + Required attributes must not be None + """ + missing = False + for engine_attr in dir(engine): + if not engine_attr.startswith('_') and getattr(engine, engine_attr) is None: + logger.error('Missing engine config attribute: "{0}.{1}"' + .format(engine.name, engine_attr)) + missing = True + return missing + + +def is_engine_active(engine): + # check if engine is inactive + if engine.inactive is True: + return False + + # exclude onion engines if not using tor + if 'onions' in engine.categories and not settings['outgoing'].get('using_tor_proxy'): + return False + + return True + + +def register_engine(engine): + engines[engine.name] = engine if engine.shortcut in engine_shortcuts: logger.error('Engine config error: ambigious shortcut: {0}'.format(engine.shortcut)) sys.exit(1) - engine_shortcuts[engine.shortcut] = engine.name - - return engine + for category_name in engine.categories: + categories.setdefault(category_name, []).append(engine) def load_engines(engine_list): - global engines, engine_shortcuts + """Use case: engine_list = settings['engines'] + """ engines.clear() engine_shortcuts.clear() + categories.clear() + categories['general'] = [] for engine_data in engine_list: engine = load_engine(engine_data) - if engine is not None: - engines[engine.name] = engine + if engine: + register_engine(engine) return engines From 89089370463b8d4c8b766d9ac9b59757e9328296 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Tue, 1 Jun 2021 13:34:41 +0200 Subject: [PATCH 2/5] [mod] searx.engines.load_engine return None instead of sys.exit(1) Loading an engine should not exit the application (*). Instead of exit, return None. (*) RuntimeError still exit the application: syntax error, etc... BTW: add documentation and normalize indentation (no functional change) Suggested-by: @dalf https://github.com/searxng/searxng/pull/116#issuecomment-851865627 Signed-off-by: Markus Heiser --- searx/engines/__init__.py | 87 ++++++++++++++++++++++++++++++--------- 1 file changed, 67 insertions(+), 20 deletions(-) diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index 1ce90d5ad..70a8ab025 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -1,10 +1,15 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint # pylint: disable=missing-function-docstring -""" -Engine loader: -call load_engines(settings['engines]) -to initialiaze categories, engines, engine_shortcuts +"""This module implements the engine loader. + +Load and initialize the ``engines``, see :py:func:`load_engines` and register +:py:obj:`engine_shortcuts`. + +usage:: + + load_engines( settings['engines'] ) + """ import sys @@ -20,8 +25,10 @@ from searx.utils import load_module, match_language, gen_useragent logger = logger.getChild('engines') ENGINE_DIR = dirname(realpath(__file__)) -BABEL_LANGS = [lang_parts[0] + '-' + lang_parts[-1] if len(lang_parts) > 1 else lang_parts[0] - for lang_parts in (lang_code.split('_') for lang_code in locale_identifiers())] +BABEL_LANGS = [ + lang_parts[0] + '-' + lang_parts[-1] if len(lang_parts) > 1 else lang_parts[0] + for lang_parts in (lang_code.split('_') for lang_code in locale_identifiers()) +] ENGINE_DEFAULT_ARGS = { "engine_type": "online", "inactive": False, @@ -38,16 +45,45 @@ ENGINE_DEFAULT_ARGS = { "display_error_messages": True, "tokens": [], } +"""Defaults for the namespace of an engine module, see :py:func:`load_engine`""" + categories = {'general': []} engines = {} engine_shortcuts = {} +"""Simple map of registered *shortcuts* to name of the engine (or ``None``). +:: + + engine_shortcuts[engine.shortcut] = engine.name + +""" def load_engine(engine_data): + """Load engine from ``engine_data``. + + :param dict engine_data: Attributes from YAML ``settings:engines/`` + :return: initialized namespace of the ````. + + 1. create a namespace and load module of the ```` + 2. update namespace with the defaults from :py:obj:`ENGINE_DEFAULT_ARGS` + 3. update namespace with values from ``engine_data`` + + If engine *is active*, return namespace of the engine, otherwise return + ``None``. + + This function also returns ``None`` if initialization of the namespace fails + for one of the following reasons: + + - engine name contains underscore + - engine name is not lowercase + - required attribute is not set :py:func:`is_missing_required_attributes` + + """ + engine_name = engine_data['name'] if '_' in engine_name: logger.error('Engine name contains underscore: "{}"'.format(engine_name)) - sys.exit(1) + return None if engine_name.lower() != engine_name: logger.warn('Engine name is not lowercase: "{}", converting to lowercase'.format(engine_name)) @@ -69,10 +105,12 @@ def load_engine(engine_data): set_language_attributes(engine) update_attributes_for_tor(engine) - if is_missing_required_attributes(engine): - sys.exit(1) if not is_engine_active(engine): return None + + if is_missing_required_attributes(engine): + return None + return engine @@ -101,8 +139,11 @@ def set_language_attributes(engine): # find custom aliases for non standard language codes for engine_lang in engine.supported_languages: iso_lang = match_language(engine_lang, BABEL_LANGS, fallback=None) - if iso_lang and iso_lang != engine_lang and not engine_lang.startswith(iso_lang) and \ - iso_lang not in engine.supported_languages: + if (iso_lang + and iso_lang != engine_lang + and not engine_lang.startswith(iso_lang) + and iso_lang not in engine.supported_languages + ): engine.language_aliases[iso_lang] = engine_lang # language_support @@ -114,25 +155,30 @@ def set_language_attributes(engine): 'User-Agent': gen_useragent(), 'Accept-Language': 'ja-JP,ja;q=0.8,en-US;q=0.5,en;q=0.3', # bing needs a non-English language } - engine.fetch_supported_languages =\ - lambda: engine._fetch_supported_languages(get(engine.supported_languages_url, headers=headers)) + engine.fetch_supported_languages = ( + lambda: engine._fetch_supported_languages( + get(engine.supported_languages_url, headers=headers)) + ) def update_attributes_for_tor(engine): - if settings['outgoing'].get('using_tor_proxy') and hasattr(engine, 'onion_url'): + if (settings['outgoing'].get('using_tor_proxy') + and hasattr(engine, 'onion_url') ): engine.search_url = engine.onion_url + getattr(engine, 'search_path', '') engine.timeout += settings['outgoing'].get('extra_proxy_timeout', 0) def is_missing_required_attributes(engine): - """an attribute is required when its name doesn't start with '_'. - Required attributes must not be None + """An attribute is required when its name doesn't start with ``_`` (underline). + Required attributes must not be ``None``. + """ missing = False for engine_attr in dir(engine): if not engine_attr.startswith('_') and getattr(engine, engine_attr) is None: - logger.error('Missing engine config attribute: "{0}.{1}"' - .format(engine.name, engine_attr)) + logger.error( + 'Missing engine config attribute: "{0}.{1}"' + .format(engine.name, engine_attr)) missing = True return missing @@ -143,7 +189,8 @@ def is_engine_active(engine): return False # exclude onion engines if not using tor - if 'onions' in engine.categories and not settings['outgoing'].get('using_tor_proxy'): + if ('onions' in engine.categories + and not settings['outgoing'].get('using_tor_proxy') ): return False return True @@ -160,7 +207,7 @@ def register_engine(engine): def load_engines(engine_list): - """Use case: engine_list = settings['engines'] + """usage: ``engine_list = settings['engines']`` """ engines.clear() engine_shortcuts.clear() From 8375974dff24623d8cb2790ca66ffb067ea34484 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Tue, 1 Jun 2021 16:24:31 +0200 Subject: [PATCH 3/5] [fix] sys.exit(1) when there is duplicate engine name --- searx/engines/__init__.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index 70a8ab025..d7defe0b4 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -197,11 +197,16 @@ def is_engine_active(engine): def register_engine(engine): + if engine.name in engines: + logger.error('Engine config error: ambigious name: {0}'.format(engine.name)) + sys.exit(1) engines[engine.name] = engine + if engine.shortcut in engine_shortcuts: logger.error('Engine config error: ambigious shortcut: {0}'.format(engine.shortcut)) sys.exit(1) engine_shortcuts[engine.shortcut] = engine.name + for category_name in engine.categories: categories.setdefault(category_name, []).append(engine) From ebbd5f73666c1e6c73f0e4f0a575461ccb3a7a42 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Tue, 1 Jun 2021 14:36:02 +0200 Subject: [PATCH 4/5] [docs] add new rubric 'Source-Code' - document namespace: searx.engines - move docs/dev/xpath_engine.rst -> docs/src/searx.engines.xpath.rst Signed-off-by: Markus Heiser --- docs/dev/index.rst | 1 - docs/index.rst | 1 + docs/src/index.rst | 14 ++++++++++++++ docs/src/searx.engines.rst | 8 ++++++++ .../searx.engines.xpath.rst} | 0 5 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 docs/src/index.rst create mode 100644 docs/src/searx.engines.rst rename docs/{dev/xpath_engine.rst => src/searx.engines.xpath.rst} (100%) diff --git a/docs/dev/index.rst b/docs/dev/index.rst index 82e5d3e84..ba0a25a9c 100644 --- a/docs/dev/index.rst +++ b/docs/dev/index.rst @@ -9,7 +9,6 @@ Developer documentation quickstart contribution_guide engine_overview - xpath_engine search_api plugins translation diff --git a/docs/index.rst b/docs/index.rst index 40105129f..71f0d8855 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -35,5 +35,6 @@ anyone, you can set up your own, see :ref:`installation`. searx_extra/index utils/index blog/index + src/index .. _Searx-instances: https://searx.space diff --git a/docs/src/index.rst b/docs/src/index.rst new file mode 100644 index 000000000..f88b943d5 --- /dev/null +++ b/docs/src/index.rst @@ -0,0 +1,14 @@ +=========== +Source-Code +=========== + +This is a partial documentation of our source code. We are not aim to document +every item from the source code, but we will add documentation when requested. + + +.. toctree:: + :maxdepth: 2 + :caption: Contents + :glob: + + searx.* diff --git a/docs/src/searx.engines.rst b/docs/src/searx.engines.rst new file mode 100644 index 000000000..687fdb0b2 --- /dev/null +++ b/docs/src/searx.engines.rst @@ -0,0 +1,8 @@ +.. _load_engines: + +============ +Load Engines +============ + +.. automodule:: searx.engines + :members: diff --git a/docs/dev/xpath_engine.rst b/docs/src/searx.engines.xpath.rst similarity index 100% rename from docs/dev/xpath_engine.rst rename to docs/src/searx.engines.xpath.rst From 15de8708b706d5fe7e77febffd580449d827b938 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Tue, 1 Jun 2021 15:09:24 +0200 Subject: [PATCH 5/5] [docs] highlight source code linked from *automodule* content sphinx.ext.viewcode won't highlight when 'highlight_language' [1] is set to string 'none' [2] [1] https://www.sphinx-doc.org/en/master/usage/extensions/viewcode.html [2] https://www.sphinx-doc.org/en/master/usage/configuration.html#confval-highlight_language Signed-off-by: Markus Heiser --- docs/conf.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/docs/conf.py b/docs/conf.py index 998b07ff1..11e980909 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -13,7 +13,14 @@ project = u'searx' copyright = u'2015-2020, Adam Tauber, Noémi Ványi' author = u'Adam Tauber' release, version = VERSION_STRING, VERSION_STRING -highlight_language = 'none' + +# hint: sphinx.ext.viewcode won't highlight when 'highlight_language' [1] is set +# to string 'none' [2] +# +# [1] https://www.sphinx-doc.org/en/master/usage/extensions/viewcode.html +# [2] https://www.sphinx-doc.org/en/master/usage/configuration.html#confval-highlight_language + +highlight_language = 'default' # General --------------------------------------------------------------