From 1a3c73cf6f4a5bf88302fe200f1039185f1a4d9a Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Fri, 13 Aug 2021 19:02:01 +0200 Subject: [PATCH] Replace Flask by Starlette (2/n) --- dockerfiles/docker-entrypoint.sh | 3 +- requirements.txt | 5 +- searx/__main__.py | 1415 -------------------- searx/answerers/random/answerer.py | 2 +- searx/answerers/statistics/answerer.py | 2 +- searx/engines/openstreetmap.py | 2 +- searx/engines/pdbe.py | 2 +- searx/engines/pubmed.py | 2 +- searx/engines/qwant.py | 2 +- searx/flaskfix.py | 77 -- searx/i18n.py | 50 + searx/network/client.py | 147 +- searx/network/network.py | 199 +-- searx/plugins/__init__.py | 14 +- searx/plugins/hash_plugin.py | 2 +- searx/plugins/infinite_scroll.py | 2 +- searx/plugins/oa_doi_rewrite.py | 4 +- searx/plugins/search_on_category_select.py | 2 +- searx/plugins/self_info.py | 2 +- searx/plugins/tracker_url_remover.py | 4 +- searx/plugins/vim_hotkeys.py | 2 +- searx/preferences.py | 6 +- searx/run.py | 48 +- searx/search/__init__.py | 54 +- searx/search/threadnopoolexecutor.py | 43 + searx/templates.py | 192 +++ searx/testing.py | 8 +- searx/webapp.py | 846 +++++------- searx/webutils.py | 16 - tests/unit/test_webapp.py | 76 +- 30 files changed, 994 insertions(+), 2235 deletions(-) delete mode 100644 searx/__main__.py delete mode 100644 searx/flaskfix.py create mode 100644 searx/i18n.py create mode 100644 searx/search/threadnopoolexecutor.py create mode 100644 searx/templates.py diff --git a/dockerfiles/docker-entrypoint.sh b/dockerfiles/docker-entrypoint.sh index d5c4d5fea..b3decbc1c 100755 --- a/dockerfiles/docker-entrypoint.sh +++ b/dockerfiles/docker-entrypoint.sh @@ -146,4 +146,5 @@ unset MORTY_KEY # Start uwsgi printf 'Listen on %s\n' "${BIND_ADDRESS}" -exec su-exec searx:searx uwsgi --master --http-socket "${BIND_ADDRESS}" "${UWSGI_SETTINGS_PATH}" +export SEARX_BIND_ADDRESS="${BIND_ADDRESS}" +exec su-exec searx:searx python3 -m searx.webapp diff --git a/requirements.txt b/requirements.txt index e0d61f405..c45901fb8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,12 +1,11 @@ certifi==2021.5.30 babel==2.9.1 -flask-babel==2.0.0 -flask==2.0.1 jinja2==3.0.1 lxml==4.6.3 pygments==2.9.0 python-dateutil==2.8.2 pyyaml==5.4.1 +aiohttp[speedup]==3.7.4.post0 httpx[http2]==0.17.1 Brotli==1.0.9 uvloop==0.16.0 @@ -16,6 +15,6 @@ setproctitle==1.2.2 starlette==0.16.0 starlette-i18n==1.0.0 starlette-context==0.3.3 +python-multipart==0.0.5 uvicorn[standard]==0.14.0 gunicorn==20.1.0 -python-multipart==0.0.5 diff --git a/searx/__main__.py b/searx/__main__.py deleted file mode 100644 index 9943cc8a3..000000000 --- a/searx/__main__.py +++ /dev/null @@ -1,1415 +0,0 @@ -#!/usr/bin/env python -# SPDX-License-Identifier: AGPL-3.0-or-later -# lint: pylint -# pylint: disable=missing-function-docstring -"""WebbApp - -""" -import hashlib -import hmac -import json -import os -import sys -import asyncio - -from typing import Optional, List -from functools import partial -from datetime import datetime, timedelta -from timeit import default_timer -from html import escape -from io import StringIO - -import logging - -from urllib.parse import parse_qs, urlencode, urlsplit - -import httpx - -import babel -import babel.numbers -import babel.dates - -import jinja2 - -from pygments import highlight -from pygments.lexers import get_lexer_by_name -from pygments.formatters import HtmlFormatter # pylint: disable=no-name-in-module - -from starlette.applications import Starlette -from starlette.requests import Request -from starlette.responses import ( - FileResponse, - JSONResponse, - PlainTextResponse, - RedirectResponse, - Response, - StreamingResponse -) -from starlette.routing import Mount, NoMatchFound -from starlette.templating import Jinja2Templates -from starlette.staticfiles import StaticFiles -from starlette_context import context, plugins -from starlette_context.middleware import RawContextMiddleware -from starlette_i18n import ( - i18n, - load_gettext_translations, -) -from starlette_i18n import gettext_lazy as gettext - -from searx import ( - logger, - get_setting, - settings, - searx_debug, - searx_dir, -) -from searx.settings_defaults import OUTPUT_FORMATS -from searx.exceptions import SearxParameterException -from searx.engines import ( - categories, - engines, - engine_shortcuts, -) -from searx.webutils import ( - UnicodeWriter, - highlight_content, - get_static_files, - get_result_templates, - get_themes, - prettify_url, - new_hmac, -) -from searx.webadapter import ( - get_search_query_from_webapp, - get_selected_categories, -) -from searx.utils import ( - html_to_text, - gen_useragent, - dict_subset, - match_language, -) -from searx.version import VERSION_STRING, GIT_URL -from searx.query import RawTextQuery -from searx.plugins import plugins -from searx.plugins.oa_doi_rewrite import get_doi_resolver -from searx.preferences import ( - Preferences, - ValidationException, - LANGUAGE_CODES, -) -from searx.answerers import answerers, ask -from searx.metrics import ( - get_engines_stats, - get_engine_errors, - get_reliabilities, - histogram, - counter, -) - -# renaming names from searx imports ... -from searx.autocomplete import search_autocomplete, backends as autocomplete_backends -from searx.languages import language_codes as languages -from searx.locales import LOCALE_NAMES, UI_LOCALE_CODES, RTL_LOCALES -from searx.network import stream as http_stream -from searx.search import SearchWithPlugins, initialize as search_initialize -from searx.search.checker import get_result as checker_get_result - - -logger = logger.getChild('webapp') - -# used when translating category names -_category_names = ( - gettext('files'), - gettext('general'), - gettext('music'), - gettext('social media'), - gettext('images'), - gettext('videos'), - gettext('it'), - gettext('news'), - gettext('map'), - gettext('onions'), - gettext('science') -) - -# -timeout_text = gettext('timeout') -parsing_error_text = gettext('parsing error') -http_protocol_error_text = gettext('HTTP protocol error') -network_error_text = gettext('network error') -exception_classname_to_text = { - None: gettext('unexpected crash'), - 'timeout': timeout_text, - 'asyncio.TimeoutError': timeout_text, - 'httpx.TimeoutException': timeout_text, - 'httpx.ConnectTimeout': timeout_text, - 'httpx.ReadTimeout': timeout_text, - 'httpx.WriteTimeout': timeout_text, - 'httpx.HTTPStatusError': gettext('HTTP error'), - 'httpx.ConnectError': gettext("HTTP connection error"), - 'httpx.RemoteProtocolError': http_protocol_error_text, - 'httpx.LocalProtocolError': http_protocol_error_text, - 'httpx.ProtocolError': http_protocol_error_text, - 'httpx.ReadError': network_error_text, - 'httpx.WriteError': network_error_text, - 'httpx.ProxyError': gettext("proxy error"), - 'searx.exceptions.SearxEngineCaptchaException': gettext("CAPTCHA"), - 'searx.exceptions.SearxEngineTooManyRequestsException': gettext("too many requests"), - 'searx.exceptions.SearxEngineAccessDeniedException': gettext("access denied"), - 'searx.exceptions.SearxEngineAPIException': gettext("server API error"), - 'searx.exceptions.SearxEngineXPathException': parsing_error_text, - 'KeyError': parsing_error_text, - 'json.decoder.JSONDecodeError': parsing_error_text, - 'lxml.etree.ParserError': parsing_error_text, -} - -# about static -logger.debug('static directory is %s', settings['ui']['static_path']) -static_files = get_static_files(settings['ui']['static_path']) - -# about templates -logger.debug('templates directory is %s', settings['ui']['templates_path']) -default_theme = settings['ui']['default_theme'] -templates_path = settings['ui']['templates_path'] -themes = get_themes(templates_path) -result_templates = get_result_templates(templates_path) -global_favicons = [] -for indice, theme in enumerate(themes): - global_favicons.append([]) - theme_img_path = os.path.join(settings['ui']['static_path'], 'themes', theme, 'img', 'icons') - for (dirpath, dirnames, filenames) in os.walk(theme_img_path): - global_favicons[indice].extend(filenames) - -STATS_SORT_PARAMETERS = { - 'name': (False, 'name', ''), - 'score': (True, 'score', 0), - 'result_count': (True, 'result_count', 0), - 'time': (False, 'total', 0), - 'reliability': (False, 'reliability', 100), -} - - -class CustomTemplates(Jinja2Templates): - - @staticmethod - def url_for_theme(endpoint: str, override_theme=None, **values): - request = context.request - - # starlette migration - if '_external' in values: - del values['_external'] - if 'filename' in values: - values['path'] = values['filename'] - del values['filename'] - - # - if endpoint == 'static' and values.get('path'): - theme_name = get_current_theme_name(request, override=override_theme) - filename_with_theme = "themes/{}/{}".format(theme_name, values['path']) - if filename_with_theme in static_files: - values['path'] = filename_with_theme - return request.url_for(endpoint, **values) - try: - url_for_args = {} - for k in ('path', 'filename'): - if k in values: - v = values.pop(k) - url_for_args[k] = v - url = request.url_for(endpoint, **url_for_args) - _url = urlsplit(url) - _query = parse_qs(_url.query) - _query.update(values) - querystr = urlencode(_query, doseq=True) - return _url._replace(query=querystr).geturl() - # if anchor is not None: - # rv += f"#{url_quote(anchor)}" - except NoMatchFound: - error_message = "url_for, endpoint='%s' not found (values=%s)" % (endpoint, str(values)) - logger.error(error_message) - context.errors.append(error_message) - return '' - - - @staticmethod - def ugettext(*args): - translations = i18n.get_locale().translations - return translations.ugettext(*args) - - @staticmethod - def ungettext(*args): - translations = i18n.get_locale().translations - return translations.ungettext(*args) - - def _create_env(self, directory: str) -> "jinja2.Environment": - loader = jinja2.FileSystemLoader(directory) - env = jinja2.Environment( - loader=loader, - autoescape=True, - trim_blocks=True, - lstrip_blocks=True, - extensions=[ - 'jinja2.ext.loopcontrols', - 'jinja2.ext.i18n' - ] - ) - env.globals["url_for"] = CustomTemplates.url_for_theme - env.install_gettext_callables( - CustomTemplates.ugettext, - CustomTemplates.ungettext, - newstyle=True - ) - return env - - -old_i18n_Locale_parse = i18n.Locale.parse -def i18n_Locale_parse(identifier, sep='_', resolve_likely_subtags=True): - if identifier == 'oc': - identifier = 'fr' - return old_i18n_Locale_parse(identifier, sep, resolve_likely_subtags) -setattr(i18n.Locale, 'parse', i18n_Locale_parse) - - -load_gettext_translations(directory="searx/translations", domain="messages") -templates = CustomTemplates(directory=settings['ui']['templates_path']) -routes = [ - Mount('/static', app=StaticFiles(directory=settings['ui']['static_path']), name="static"), -] - -def on_startup(): - # check secret_key - if not searx_debug and settings['server']['secret_key'] == 'ultrasecretkey': - logger.error('server.secret_key is not changed. Please use something else instead of ultrasecretkey.') - sys.exit(1) - search_initialize(enable_checker=True) - -app = Starlette(routes=routes, debug=searx_debug, on_startup=[on_startup]) - - -@app.middleware("http") -async def pre_post_request(request: Request, call_next): - # pre-request - context.clear() - context.request = request - context.start_time = default_timer() - context.render_time = 0 - context.timings = [] - i18n.set_locale('en') - # call endpoint - response = await call_next(request) - # set default http headers - for header, value in settings['server']['default_http_headers'].items(): - if header not in response.headers: - response.headers[header] = value - # set timing Server-Timing header - total_time = default_timer() - context.start_time - timings_all = ['total;dur=' + str(round(total_time * 1000, 3)), - 'render;dur=' + str(round(context.render_time * 1000, 3))] - if len(context.timings) > 0: - timings = sorted(context.timings, key=lambda v: v['total']) - timings_total = [ - 'total_' + str(i) + '_' + v['engine'] + ';dur=' + str(round(v['total'] * 1000, 3)) - for i, v in enumerate(timings) - ] - timings_load = [ - 'load_' + str(i) + '_' + v['engine'] + ';dur=' + str(round(v['load'] * 1000, 3)) - for i, v in enumerate(timings) if v.get('load') - ] - timings_all = timings_all + timings_total + timings_load - response.headers['Server-Timing'] = ', '.join(timings_all) - return response - - -app.add_middleware(RawContextMiddleware) - - -def format_decimal(number, format=None): - locale = i18n.get_locale() - return babel.numbers.format_decimal(number, format=format, locale=locale) - - -def format_date(date=None, format='medium', rebase=False): - if rebase: - raise ValueError('rebase=True not implemented') - locale = i18n.get_locale() - if format in ('full', 'long', 'medium', 'short'): - format = locale.date_formats[format] - pattern = babel.dates.parse_pattern(format) - return pattern.apply(date, locale) - - -def _get_browser_or_settings_language(req : Request, lang_list: List[str]): - for lang in req.headers.get("Accept-Language", "en").split(","): - if ';' in lang: - lang = lang.split(';')[0] - if '-' in lang: - lang_parts = lang.split('-') - lang = "{}-{}".format(lang_parts[0], lang_parts[-1].upper()) - locale = match_language(lang, lang_list, fallback=None) - if locale is not None: - return locale - return settings['search']['default_lang'] or 'en' - - -# code-highlighter -def code_highlighter(codelines, language=None): - if not language: - language = 'text' - - try: - # find lexer by programing language - lexer = get_lexer_by_name(language, stripall=True) - - except Exception as e: # pylint: disable=broad-except - logger.exception(e, exc_info=True) - # if lexer is not found, using default one - lexer = get_lexer_by_name('text', stripall=True) - - html_code = '' - tmp_code = '' - last_line = None - - # parse lines - for line, code in codelines: - if not last_line: - line_code_start = line - - # new codeblock is detected - if last_line is not None and\ - last_line + 1 != line: - - # highlight last codepart - formatter = HtmlFormatter( - linenos='inline', linenostart=line_code_start, cssclass="code-highlight" - ) - html_code = html_code + highlight(tmp_code, lexer, formatter) - - # reset conditions for next codepart - tmp_code = '' - line_code_start = line - - # add codepart - tmp_code += code + '\n' - - # update line - last_line = line - - # highlight last codepart - formatter = HtmlFormatter(linenos='inline', linenostart=line_code_start, cssclass="code-highlight") - html_code = html_code + highlight(tmp_code, lexer, formatter) - - return html_code -templates.env.filters['code_highlighter'] = code_highlighter - - -def get_current_theme_name(request: Request, override: Optional[str] =None) -> str: - """Returns theme name. - - Checks in this order: - 1. override - 2. cookies - 3. settings""" - - if override and (override in themes or override == '__common__'): - return override - theme_name = request.query_params.get('theme', context.preferences.get_value('theme')) - if theme_name not in themes: - theme_name = default_theme - return theme_name - - -def get_result_template(theme_name: str, template_name: str) -> str: - themed_path = theme_name + '/result_templates/' + template_name - if themed_path in result_templates: - return themed_path - return 'result_templates/' + template_name - - -def proxify(url: str) -> str: - if url.startswith('//'): - url = 'https:' + url - - if not settings.get('result_proxy'): - return url - - url_params = dict(mortyurl=url.encode()) - - if settings['result_proxy'].get('key'): - url_params['mortyhash'] = hmac.new( - settings['result_proxy']['key'], - url.encode(), - hashlib.sha256 - ).hexdigest() - - return '{0}?{1}'.format( - settings['result_proxy']['url'], - urlencode(url_params) - ) - - -def image_proxify(request: Request, url: str): - - if url.startswith('//'): - url = 'https:' + url - - if not context.preferences.get_value('image_proxy'): - return url - - if url.startswith('data:image/'): - # 50 is an arbitrary number to get only the beginning of the image. - partial_base64 = url[len('data:image/'):50].split(';') - if len(partial_base64) == 2 \ - and partial_base64[0] in ['gif', 'png', 'jpeg', 'pjpeg', 'webp', 'tiff', 'bmp']\ - and partial_base64[1].startswith('base64,'): - return url - return None - - if settings.get('result_proxy'): - return proxify(url) - - h = new_hmac(settings['server']['secret_key'], url.encode()) - - return '{0}?{1}'.format(request.url_for('image_proxy'), - urlencode(dict(url=url.encode(), h=h))) - - -def _get_ordered_categories(): - ordered_categories = list(settings['ui']['categories_order']) - ordered_categories.extend(x for x in sorted(categories.keys()) if x not in ordered_categories) - return ordered_categories - - -def _get_enable_categories(all_categories): - disabled_engines = context.preferences.engines.get_disabled() - enabled_categories = set( - # pylint: disable=consider-using-dict-items - category for engine_name in engines - for category in engines[engine_name].categories - if (engine_name, category) not in disabled_engines - ) - return [x for x in all_categories if x in enabled_categories] - - -def get_translations(): - return { - # when there is autocompletion - 'no_item_found': str(gettext('No item found')) - } - - -def render(request: Request, - template_name: str, - override_theme: bool = None, - status_code: int = 200, - headers: dict = None, - media_type: str = None, - **kwargs) -> Response: - # values from the HTTP requests - kwargs['request'] = request - kwargs['endpoint'] = 'results' if 'q' in kwargs else request.scope['path'] - kwargs['cookies'] = request.cookies - kwargs['errors'] = context.errors - - # values from the preferences - kwargs['preferences'] = context.preferences - kwargs['method'] = context.preferences.get_value('method') - kwargs['autocomplete'] = context.preferences.get_value('autocomplete') - kwargs['results_on_new_tab'] = context.preferences.get_value('results_on_new_tab') - kwargs['advanced_search'] = context.preferences.get_value('advanced_search') - kwargs['safesearch'] = str(context.preferences.get_value('safesearch')) - kwargs['theme'] = get_current_theme_name(request, override=override_theme) - kwargs['all_categories'] = _get_ordered_categories() - kwargs['categories'] = _get_enable_categories(kwargs['all_categories']) - - # i18n - kwargs['language_codes'] = languages # from searx.languages - kwargs['translations'] = json.dumps(get_translations(), separators=(',', ':')) - - locale = context.preferences.get_value('locale') - if locale in RTL_LOCALES and 'rtl' not in kwargs: - kwargs['rtl'] = True - if 'current_language' not in kwargs: - kwargs['current_language'] = match_language( - context.preferences.get_value('language'), LANGUAGE_CODES ) - - # values from settings - kwargs['search_formats'] = [ - x for x in settings['search']['formats'] if x != 'html' - ] - kwargs['instance_name'] = settings['general']['instance_name'] - kwargs['searx_version'] = VERSION_STRING - kwargs['searx_git_url'] = GIT_URL - kwargs['get_setting'] = get_setting - - # helpers to create links to other pages - kwargs['image_proxify'] = partial(image_proxify, request) - kwargs['proxify'] = proxify if settings.get('result_proxy', {}).get('url') else None - kwargs['proxify_results'] = settings.get('result_proxy', {}).get('proxify_results', True) - kwargs['get_result_template'] = get_result_template - kwargs['opensearch_url'] = ( - request.url_for('opensearch') - + '?' - + urlencode({'method': kwargs['method'], 'autocomplete': kwargs['autocomplete']}) - ) - - # scripts from plugins - kwargs['scripts'] = set() - for plugin in context.user_plugins: - for script in plugin.js_dependencies: - kwargs['scripts'].add(script) - - # styles from plugins - kwargs['styles'] = set() - for plugin in context.user_plugins: - for css in plugin.css_dependencies: - kwargs['styles'].add(css) - - start_time = default_timer() - result = templates.TemplateResponse( - '{}/{}'.format(kwargs['theme'], template_name), - kwargs, - status_code=status_code, - headers=headers, - media_type=media_type - ) - context.render_time += default_timer() - start_time # pylint: disable=assigning-non-slot - - return result - - -async def set_context(request: Request): - context.errors = [] # pylint: disable=assigning-non-slot - - preferences = Preferences(themes, list(categories.keys()), engines, plugins) # pylint: disable=redefined-outer-name - user_agent = request.headers.get('User-Agent', '').lower() - if 'webkit' in user_agent and 'android' in user_agent: - preferences.key_value_settings['method'].value = 'GET' - context.preferences = preferences # pylint: disable=assigning-non-slot - - try: - preferences.parse_dict(request.cookies) - except Exception as e: # pylint: disable=broad-except - logger.exception(e, exc_info=True) - context.errors.append(gettext('Invalid settings, please edit your preferences')) - - # merge GET, POST vars - # context.form - context.form = dict(await request.form()) # pylint: disable=assigning-non-slot - for k, v in request.query_params.items(): - if k not in context.form: - context.form[k] = v - if context.form.get('preferences'): - preferences.parse_encoded_data(context.form['preferences']) - else: - try: - preferences.parse_dict(context.form) - except Exception as e: # pylint: disable=broad-except - logger.exception(e, exc_info=True) - context.errors.append(gettext('Invalid settings')) - - # set search language - if not preferences.get_value("language"): - preferences.parse_dict({"language": _get_browser_or_settings_language(request, LANGUAGE_CODES)}) - - # set UI locale - locale_source = 'preferences or query' - if not preferences.get_value("locale"): - locale = _get_browser_or_settings_language(request, UI_LOCALE_CODES) - locale = locale.replace('-', '_') - preferences.parse_dict({"locale": locale}) - locale_source = 'browser' - - logger.debug( - "%s uses locale `%s` from %s", - str(request.scope['path']), - preferences.get_value("locale"), - locale_source - ) - - # set starlette.i18n locale (get_text) - i18n.set_locale(code=preferences.get_value("locale")) - - # context.user_plugins - context.user_plugins = [] # pylint: disable=assigning-non-slot - allowed_plugins = preferences.plugins.get_enabled() - disabled_plugins = preferences.plugins.get_disabled() - for plugin in plugins: - if ((plugin.default_on and plugin.id not in disabled_plugins) - or plugin.id in allowed_plugins): - context.user_plugins.append(plugin) - - -def search_error(request, output_format, error_message): - if output_format == 'json': - return JSONResponse({'error': error_message}) - if output_format == 'csv': - cont_disp = 'attachment;Filename=searx.csv' - return Response('', media_type='application/csv', headers= {'Content-Disposition': cont_disp}) - if output_format == 'rss': - response_rss = render( - request, - 'opensearch_response_rss.xml', - results=[], - q=context.form['q'] if 'q' in context.form else '', - number_of_results=0, - error_message=error_message, - override_theme='__common__', - ) - return Response(response_rss, media_type='text/xml') - - # html - context.errors.append(gettext('search error')) - return render( - request, - 'index.html', - selected_categories=get_selected_categories(context.preferences, context.form), - ) - - -@app.route("/", methods=['GET', 'POST']) -async def index(request: Request): - await set_context(request) - return render( - request, - 'index.html', - selected_categories=get_selected_categories(context.preferences, context.form) - ) - - -@app.route('/search', methods=['GET', 'POST']) -async def search(request: Request): - """Search query in q and return results. - - Supported outputs: html, json, csv, rss. - """ - # pylint: disable=too-many-locals, too-many-return-statements, too-many-branches - # pylint: disable=too-many-statements - - await set_context(request) - - # output_format - output_format = context.form.get('format', 'html') - if output_format not in OUTPUT_FORMATS: - output_format = 'html' - - if output_format not in settings['search']['formats']: - return PlainTextResponse('', status_code=403) - - # check if there is query (not None and not an empty string) - if not context.form.get('q'): - if output_format == 'html': - return render( - request, - 'index.html', - selected_categories=get_selected_categories(context.preferences, context.form), - ) - return search_error(request, output_format, 'No query'), 400 - - # search - search_query = None - raw_text_query = None - result_container = None - try: - search_query, raw_text_query, _, _ = get_search_query_from_webapp( - context.preferences, context.form - ) - # search = Search(search_query) # without plugins - search = SearchWithPlugins(search_query, context.user_plugins, request) # pylint: disable=redefined-outer-name - - loop = asyncio.get_running_loop() - result_container = await loop.run_in_executor(None, search.search) - - except SearxParameterException as e: - logger.exception('search error: SearxParameterException') - return search_error(request, output_format, e.message), 400 - except Exception as e: # pylint: disable=broad-except - logger.exception(e, exc_info=True) - return search_error(request, output_format, gettext('search error')), 500 - - # results - results = result_container.get_ordered_results() - number_of_results = result_container.results_number() - if number_of_results < result_container.results_length(): - number_of_results = 0 - - # checkin for a external bang - if result_container.redirect_url: - return RedirectResponse(result_container.redirect_url) - - # Server-Timing header - context.timings = result_container.get_timings() # pylint: disable=assigning-non-slot - - # output - for result in results: - if output_format == 'html': - if 'content' in result and result['content']: - result['content'] = highlight_content(escape(result['content'][:1024]), search_query.query) - if 'title' in result and result['title']: - result['title'] = highlight_content(escape(result['title'] or ''), search_query.query) - else: - if result.get('content'): - result['content'] = html_to_text(result['content']).strip() - # removing html content and whitespace duplications - result['title'] = ' '.join(html_to_text(result['title']).strip().split()) - - if 'url' in result: - result['pretty_url'] = prettify_url(result['url']) - - # TODO, check if timezone is calculated right # pylint: disable=fixme - if result.get('publishedDate'): # do not try to get a date from an empty string or a None type - try: # test if publishedDate >= 1900 (datetime module bug) - result['pubdate'] = result['publishedDate'].strftime('%Y-%m-%d %H:%M:%S%z') - except ValueError: - result['publishedDate'] = None - else: - if result['publishedDate'].replace(tzinfo=None) >= datetime.now() - timedelta(days=1): - timedifference = datetime.now() - result['publishedDate'].replace(tzinfo=None) - minutes = int((timedifference.seconds / 60) % 60) - hours = int(timedifference.seconds / 60 / 60) - if hours == 0: - result['publishedDate'] = gettext('{minutes} minute(s) ago').format(minutes=minutes) - else: - result['publishedDate'] = gettext( - '{hours} hour(s), {minutes} minute(s) ago').format( - hours=hours, minutes=minutes - ) - else: - result['publishedDate'] = format_date(result['publishedDate']) - - if output_format == 'json': - x = { - 'query': search_query.query, - 'number_of_results': number_of_results, - 'results': results, - 'answers': list(result_container.answers), - 'corrections': list(result_container.corrections), - 'infoboxes': result_container.infoboxes, - 'suggestions': list(result_container.suggestions), - 'unresponsive_engines': __get_translated_errors(result_container.unresponsive_engines) - } - response = json.dumps( - x, default = lambda item: list(item) if isinstance(item, set) else item - ) - return JSONResponse(response) - - if output_format == 'csv': - csv = UnicodeWriter(StringIO()) - keys = ('title', 'url', 'content', 'host', 'engine', 'score', 'type') - csv.writerow(keys) - for row in results: - row['host'] = row['parsed_url'].netloc - row['type'] = 'result' - csv.writerow([row.get(key, '') for key in keys]) - for a in result_container.answers: - row = {'title': a, 'type': 'answer'} - csv.writerow([row.get(key, '') for key in keys]) - for a in result_container.suggestions: - row = {'title': a, 'type': 'suggestion'} - csv.writerow([row.get(key, '') for key in keys]) - for a in result_container.corrections: - row = {'title': a, 'type': 'correction'} - csv.writerow([row.get(key, '') for key in keys]) - csv.stream.seek(0) - response = Response(csv.stream.read(), media_type='application/csv') - cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query) - response.headers['Content-Disposition'] = cont_disp - return response - - if output_format == 'rss': - response_rss = render( - request, - 'opensearch_response_rss.xml', - results=results, - answers=result_container.answers, - corrections=result_container.corrections, - suggestions=result_container.suggestions, - q=context.form['q'], - number_of_results=number_of_results, - override_theme='__common__', - ) - return Response(response_rss, media_type='text/xml') - - # HTML output format - - # suggestions: use RawTextQuery to get the suggestion URLs with the same bang - suggestion_urls = list( - map( - lambda suggestion: { - 'url': raw_text_query.changeQuery(suggestion).getFullQuery(), - 'title': suggestion - }, - result_container.suggestions - )) - - correction_urls = list( - map( - lambda correction: { - 'url': raw_text_query.changeQuery(correction).getFullQuery(), - 'title': correction - }, - result_container.corrections - )) - - return render( - request, - 'results.html', - results = results, - q=context.form['q'], - selected_categories = search_query.categories, - pageno = search_query.pageno, - time_range = search_query.time_range, - number_of_results = format_decimal(number_of_results), - suggestions = suggestion_urls, - answers = result_container.answers, - corrections = correction_urls, - infoboxes = result_container.infoboxes, - engine_data = result_container.engine_data, - paging = result_container.paging, - unresponsive_engines = __get_translated_errors( - result_container.unresponsive_engines - ), - current_language = match_language( - search_query.lang, - LANGUAGE_CODES, - fallback=context.preferences.get_value("language") - ), - theme = get_current_theme_name(request), - favicons = global_favicons[themes.index(get_current_theme_name(request))], - timeout_limit = context.form.get('timeout_limit', None) - ) - - -def __get_translated_errors(unresponsive_engines): - translated_errors = [] - - # make a copy unresponsive_engines to avoid "RuntimeError: Set changed size - # during iteration" it happens when an engine modifies the ResultContainer - # after the search_multiple_requests method has stopped waiting - - for unresponsive_engine in list(unresponsive_engines): - error_user_text = exception_classname_to_text.get(unresponsive_engine[1]) - if not error_user_text: - error_user_text = exception_classname_to_text[None] - error_msg = gettext(error_user_text) - if unresponsive_engine[2]: - error_msg = "{} {}".format(error_msg, unresponsive_engine[2]) - if unresponsive_engine[3]: - error_msg = gettext('Suspended') + ': ' + error_msg - translated_errors.append((unresponsive_engine[0], error_msg)) - - return sorted(translated_errors, key=lambda e: e[0]) - - -@app.route('/about', methods=['GET']) -async def about(request): - """Render about page""" - await set_context(request) - return render(request, 'about.html') - - -@app.route('/autocompleter', methods=['GET', 'POST']) -async def autocompleter(request): - """Return autocompleter results""" - - await set_context(request) - - # run autocompleter - results = [] - - # set blocked engines - disabled_engines = context.preferences.engines.get_disabled() - - # parse query - raw_text_query = RawTextQuery(context.form.get('q', ''), disabled_engines) - sug_prefix = raw_text_query.getQuery() - - # normal autocompletion results only appear if no inner results returned - # and there is a query part - if len(raw_text_query.autocomplete_list) == 0 and len(sug_prefix) > 0: - - # get language from cookie - language = context.preferences.get_value('language') - if not language or language == 'all': - language = 'en' - else: - language = language.split('-')[0] - - # run autocompletion - raw_results = search_autocomplete( - context.preferences.get_value('autocomplete'), sug_prefix, language - ) - for result in raw_results: - # attention: this loop will change raw_text_query object and this is - # the reason why the sug_prefix was stored before (see above) - results.append(raw_text_query.changeQuery(result).getFullQuery()) - - if len(raw_text_query.autocomplete_list) > 0: - for autocomplete_text in raw_text_query.autocomplete_list: - results.append(raw_text_query.get_autocomplete_full_query(autocomplete_text)) - - for answers in ask(raw_text_query): - for answer in answers: - results.append(str(answer['answer'])) - - if request.headers.get('X-Requested-With') == 'XMLHttpRequest': - # the suggestion request comes from the searx search form - suggestions = json.dumps(results) - mimetype = 'application/json' - else: - # the suggestion request comes from browser's URL bar - suggestions = json.dumps([sug_prefix, results]) - mimetype = 'application/x-suggestions+json' - - return Response(suggestions, media_type=mimetype) - - -@app.route('/preferences', methods=['GET', 'POST']) -async def preferences(request: Request): - """Render preferences page && save user preferences""" - - # pylint: disable=too-many-locals, too-many-return-statements, too-many-branches - # pylint: disable=too-many-statements - - await set_context(request) - - # save preferences - if request.method == 'POST': - resp = RedirectResponse(url=request.url_for('index')) - try: - context.preferences.parse_form(context.form) - except ValidationException: - context.errors.append(gettext('Invalid settings, please edit your preferences')) - return resp - for cookie_name in request.cookies: - resp.delete_cookie(cookie_name) - context.preferences.save(resp) - return resp - - # render preferences - image_proxy = context.preferences.get_value('image_proxy') # pylint: disable=redefined-outer-name - disabled_engines = context.preferences.engines.get_disabled() - allowed_plugins = context.preferences.plugins.get_enabled() - - # stats for preferences page - filtered_engines = dict( - filter( - lambda kv: (kv[0], context.preferences.validate_token(kv[1])), - engines.items() - ) - ) - - engines_by_category = {} - - for c in categories: # pylint: disable=consider-using-dict-items - engines_by_category[c] = [e for e in categories[c] if e.name in filtered_engines] - # sort the engines alphabetically since the order in settings.yml is meaningless. - list.sort(engines_by_category[c], key=lambda e: e.name) - - # get first element [0], the engine time, - # and then the second element [1] : the time (the first one is the label) - stats = {} # pylint: disable=redefined-outer-name - max_rate95 = 0 - for _, e in filtered_engines.items(): - h = histogram('engine', e.name, 'time', 'total') - median = round(h.percentage(50), 1) if h.count > 0 else None - rate80 = round(h.percentage(80), 1) if h.count > 0 else None - rate95 = round(h.percentage(95), 1) if h.count > 0 else None - - max_rate95 = max(max_rate95, rate95 or 0) - - result_count_sum = histogram('engine', e.name, 'result', 'count').sum - successful_count = counter('engine', e.name, 'search', 'count', 'successful') - result_count = int(result_count_sum / float(successful_count)) if successful_count else 0 - - stats[e.name] = { - 'time': median, - 'rate80': rate80, - 'rate95': rate95, - 'warn_timeout': e.timeout > settings['outgoing']['request_timeout'], - 'supports_selected_language': _is_selected_language_supported(e, context.preferences), - 'result_count': result_count, - } - # end of stats - - # reliabilities - reliabilities = {} - engine_errors = get_engine_errors(filtered_engines) - checker_results = checker_get_result() - checker_results = checker_results['engines'] \ - if checker_results['status'] == 'ok' and 'engines' in checker_results else {} - for _, e in filtered_engines.items(): - checker_result = checker_results.get(e.name, {}) - checker_success = checker_result.get('success', True) - errors = engine_errors.get(e.name) or [] - if counter('engine', e.name, 'search', 'count', 'sent') == 0: - # no request - reliablity = None - elif checker_success and not errors: - reliablity = 100 - elif 'simple' in checker_result.get('errors', {}): - # the basic (simple) test doesn't work: the engine is broken accoding to the checker - # even if there is no exception - reliablity = 0 - else: - reliablity = 100 - sum([error['percentage'] for error in errors if not error.get('secondary')]) - - reliabilities[e.name] = { - 'reliablity': reliablity, - 'errors': [], - 'checker': checker_results.get(e.name, {}).get('errors', {}).keys(), - } - # keep the order of the list checker_results[e.name]['errors'] and deduplicate. - # the first element has the highest percentage rate. - reliabilities_errors = [] - for error in errors: - error_user_text = None - if error.get('secondary') or 'exception_classname' not in error: - continue - error_user_text = exception_classname_to_text.get(error.get('exception_classname')) - if not error: - error_user_text = exception_classname_to_text[None] - if error_user_text not in reliabilities_errors: - reliabilities_errors.append(error_user_text) - reliabilities[e.name]['errors'] = reliabilities_errors - - # supports - supports = {} - for _, e in filtered_engines.items(): - supports_selected_language = _is_selected_language_supported(e, context.preferences) - safesearch = e.safesearch - time_range_support = e.time_range_support - for checker_test_name in checker_results.get(e.name, {}).get('errors', {}): - if supports_selected_language and checker_test_name.startswith('lang_'): - supports_selected_language = '?' - elif safesearch and checker_test_name == 'safesearch': - safesearch = '?' - elif time_range_support and checker_test_name == 'time_range': - time_range_support = '?' - supports[e.name] = { - 'supports_selected_language': supports_selected_language, - 'safesearch': safesearch, - 'time_range_support': time_range_support, - } - - return render( - request, - 'preferences.html', - selected_categories = get_selected_categories(context.preferences, context.form), - locales = LOCALE_NAMES, - current_locale = context.preferences.get_value("locale"), - image_proxy = image_proxy, - engines_by_category = engines_by_category, - stats = stats, - max_rate95 = max_rate95, - reliabilities = reliabilities, - supports = supports, - answerers = [ - {'info': a.self_info(), 'keywords': a.keywords} - for a in answerers - ], - disabled_engines = disabled_engines, - autocomplete_backends = autocomplete_backends, - shortcuts = {y: x for x, y in engine_shortcuts.items()}, - themes = themes, - plugins = plugins, - doi_resolvers = settings['doi_resolvers'], - current_doi_resolver = get_doi_resolver( - context.form, context.preferences.get_value('doi_resolver') - ), - allowed_plugins = allowed_plugins, - theme = get_current_theme_name(request), - preferences_url_params = context.preferences.get_as_url_params(), - locked_preferences = settings['preferences']['lock'], - preferences = True - ) - - -def _is_selected_language_supported(engine, preferences): # pylint: disable=redefined-outer-name - language = preferences.get_value('language') - if language == 'all': - return True - x = match_language( - language, - getattr(engine, 'supported_languages', []), - getattr(engine, 'language_aliases', {}), - None - ) - return bool(x) - - -@app.route('/image_proxy', methods=['GET']) -async def image_proxy(request: Request): - # pylint: disable=too-many-return-statements - - url = request.query_params.get('url') - if not url: - return PlainTextResponse('No URL', status_code=400) - - h = new_hmac(settings['server']['secret_key'], url.encode()) - if h != request.query_params.get('h'): - return PlainTextResponse('Wrong k', status_code=400) - - maximum_size = 5 * 1024 * 1024 - - try: - headers = dict_subset(request.headers, {'If-Modified-Since', 'If-None-Match'}) - headers['User-Agent'] = gen_useragent() - stream = http_stream( - method = 'GET', - url = url, - headers = headers, - timeout = settings['outgoing']['request_timeout'], - allow_redirects = True, - max_redirects = 20 - ) - resp = next(stream) - content_length = resp.headers.get('Content-Length') - if (content_length - and content_length.isdigit() - and int(content_length) > maximum_size ): - return PlainTextResponse('Max size', status_code=400) - - if resp.status_code == 304: - return PlainTextResponse('', status_code=resp.status_code) - - if resp.status_code != 200: - logger.debug( - 'image-proxy: wrong response code: {0}'.format( - resp.status_code)) - if resp.status_code >= 400: - return PlainTextResponse('Status code', status_code=resp.status_code) - return PlainTextResponse('Status code', status_code=400) - - if not resp.headers.get('content-type', '').startswith('image/'): - logger.debug( - 'image-proxy: wrong content-type: {0}'.format( - resp.headers.get('content-type'))) - return PlainTextResponse('Wrong content type', status_code=400) - - headers = dict_subset( - resp.headers, - {'Content-Length', 'Length', 'Date', 'Last-Modified', 'Expires', 'Etag'} - ) - - total_length = 0 - - async def forward_chunk(): - nonlocal total_length - for chunk in stream: - total_length += len(chunk) - if total_length > maximum_size: - break - yield chunk - - return StreamingResponse(forward_chunk(), - media_type=resp.headers['content-type'], - headers=headers) - except httpx.HTTPError: - logger.exception('HTTP error') - return PlainTextResponse('HTTP Error', status_code=400) - - -@app.route('/stats', methods=['GET']) -async def stats(request: Request): - """Render engine statistics page.""" - await set_context(request) - - sort_order = request.query_params.get('sort', 'name') - selected_engine_name = request.query_params.get('engine', None) - - filtered_engines = dict( - filter( - lambda kv: (kv[0], context.preferences.validate_token(kv[1])), - engines.items() - )) - if selected_engine_name: - if selected_engine_name not in filtered_engines: - selected_engine_name = None - else: - filtered_engines = [selected_engine_name] - - checker_results = checker_get_result() - checker_results = ( - checker_results['engines'] - if checker_results['status'] == 'ok' and 'engines' in checker_results else {} - ) - - engine_stats = get_engines_stats(filtered_engines) - engine_reliabilities = get_reliabilities(filtered_engines, checker_results) - - if sort_order not in STATS_SORT_PARAMETERS: - sort_order = 'name' - - reverse, key_name, default_value = STATS_SORT_PARAMETERS[sort_order] - - def get_key(engine_stat): - reliability = engine_reliabilities.get(engine_stat['name']).get('reliablity', 0) - reliability_order = 0 if reliability else 1 - if key_name == 'reliability': - key = reliability - reliability_order = 0 - else: - key = engine_stat.get(key_name) or default_value - if reverse: - reliability_order = 1 - reliability_order - return (reliability_order, key, engine_stat['name']) - - engine_stats['time'] = sorted(engine_stats['time'], reverse=reverse, key=get_key) - return render( - request, - 'stats.html', - sort_order = sort_order, - engine_stats = engine_stats, - engine_reliabilities = engine_reliabilities, - selected_engine_name = selected_engine_name, - ) - - -@app.route('/stats/errors', methods=['GET']) -async def stats_errors(request: Request): - await set_context(request) - filtered_engines = dict( - filter( - lambda kv: (kv[0], context.preferences.validate_token(kv[1])), - engines.items() - )) - result = get_engine_errors(filtered_engines) - return JSONResponse(result) - - -@app.route('/stats/checker', methods=['GET']) -async def stats_checker(request: Request): - result = checker_get_result() - return JSONResponse(result) - - -@app.route('/robots.txt', methods=['GET']) -async def robots(request: Request): - return PlainTextResponse("""User-agent: * -Allow: / -Allow: /about -Disallow: /stats -Disallow: /preferences -Disallow: /*?*q=* -""", media_type='text/plain') - - -@app.route('/opensearch.xml', methods=['GET']) -async def opensearch(request: Request): - await set_context(request) - method = 'post' - - if context.preferences.get_value('method') == 'GET': - method = 'get' - - # chrome/chromium only supports HTTP GET.... - if request.headers.get('User-Agent', '').lower().find('webkit') >= 0: - method = 'get' - - return render( - request, - 'opensearch.xml', - status = 200, - media_type = "application/opensearchdescription+xml", - opensearch_method=method, - override_theme='__common__' - ) - - -@app.route('/favicon.ico') -async def favicon(request: Request): - await set_context(request) - return FileResponse( - os.path.join( - searx_dir, - settings['ui']['static_path'], - 'themes', - get_current_theme_name(request), - 'img', - 'favicon.png' - ), - media_type = 'image/vnd.microsoft.icon' - ) - - -@app.route('/clear_cookies') -def clear_cookies(request: Request): - resp = RedirectResponse(request.url_for('index')) - for cookie_name in request.cookies: - resp.delete_cookie(cookie_name) - return resp - - -@app.route('/config') -async def config(request: Request): - """Return configuration in JSON format.""" - await set_context(request) - - _engines = [] - for name, engine in engines.items(): - if not context.preferences.validate_token(engine): - continue - - supported_languages = engine.supported_languages - if isinstance(engine.supported_languages, dict): - supported_languages = list(engine.supported_languages.keys()) - - _engines.append({ - 'name': name, - 'categories': engine.categories, - 'shortcut': engine.shortcut, - 'enabled': not engine.disabled, - 'paging': engine.paging, - 'language_support': engine.language_support, - 'supported_languages': supported_languages, - 'safesearch': engine.safesearch, - 'time_range_support': engine.time_range_support, - 'timeout': engine.timeout - }) - - _plugins = [] - for _ in plugins: - _plugins.append({'name': _.name, 'enabled': _.default_on}) - - return JSONResponse({ - 'categories': list(categories.keys()), - 'engines': _engines, - 'plugins': _plugins, - 'instance_name': settings['general']['instance_name'], - 'locales': LOCALE_NAMES, - 'default_locale': settings['ui']['default_locale'], - 'autocomplete': settings['search']['autocomplete'], - 'safe_search': settings['search']['safe_search'], - 'default_theme': settings['ui']['default_theme'], - 'version': VERSION_STRING, - 'brand': { - 'CONTACT_URL': get_setting('general.contact_url'), - 'GIT_URL': GIT_URL, - 'DOCS_URL': get_setting('brand.docs_url'), - }, - 'doi_resolvers': list(settings['doi_resolvers'].keys()), - 'default_doi_resolver': settings['default_doi_resolver'], - }) - - -@app.exception_handler(404) -async def page_not_found(request: Request, exc): - await set_context(request) - return render( - request, - '404.html', - status_code=exc.status_code - ) - - -def main(): - if searx_debug: - from searx.run import run_debug - run_debug() - else: - from searx.run import run_production - run_production(app) - - -if __name__ == '__main__': - main() diff --git a/searx/answerers/random/answerer.py b/searx/answerers/random/answerer.py index d5223e517..11e26e0a5 100644 --- a/searx/answerers/random/answerer.py +++ b/searx/answerers/random/answerer.py @@ -2,7 +2,7 @@ import hashlib import random import string import uuid -from flask_babel import gettext +from searx.i18n import gettext # required answerer attribute # specifies which search query keywords triggers this answerer diff --git a/searx/answerers/statistics/answerer.py b/searx/answerers/statistics/answerer.py index abd4be7f5..349b23554 100644 --- a/searx/answerers/statistics/answerer.py +++ b/searx/answerers/statistics/answerer.py @@ -1,7 +1,7 @@ from functools import reduce from operator import mul -from flask_babel import gettext +from searx.i18n import gettext keywords = ('min', diff --git a/searx/engines/openstreetmap.py b/searx/engines/openstreetmap.py index 6920356c3..2d041293f 100644 --- a/searx/engines/openstreetmap.py +++ b/searx/engines/openstreetmap.py @@ -10,7 +10,7 @@ from json import loads from urllib.parse import urlencode from functools import partial -from flask_babel import gettext +from searx.i18n import gettext from searx.data import OSM_KEYS_TAGS, CURRENCIES from searx.utils import searx_useragent diff --git a/searx/engines/pdbe.py b/searx/engines/pdbe.py index b9bbfaf1b..2f502de20 100644 --- a/searx/engines/pdbe.py +++ b/searx/engines/pdbe.py @@ -4,7 +4,7 @@ """ from json import loads -from flask_babel import gettext +from searx.i18n import gettext # about about = { diff --git a/searx/engines/pubmed.py b/searx/engines/pubmed.py index 5d88d398e..a902b49b9 100644 --- a/searx/engines/pubmed.py +++ b/searx/engines/pubmed.py @@ -3,10 +3,10 @@ PubMed (Scholar publications) """ -from flask_babel import gettext from lxml import etree from datetime import datetime from urllib.parse import urlencode +from searx.i18n import gettext from searx.network import get # about diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py index 8d03d8324..1503ce4fb 100644 --- a/searx/engines/qwant.py +++ b/searx/engines/qwant.py @@ -29,8 +29,8 @@ from datetime import ( ) from json import loads from urllib.parse import urlencode -from flask_babel import gettext +from searx.i18n import gettext from searx.utils import match_language from searx.exceptions import SearxEngineAPIException from searx.network import raise_for_httperror diff --git a/searx/flaskfix.py b/searx/flaskfix.py deleted file mode 100644 index c069df453..000000000 --- a/searx/flaskfix.py +++ /dev/null @@ -1,77 +0,0 @@ -# SPDX-License-Identifier: AGPL-3.0-or-later -# lint: pylint -# pylint: disable=missing-module-docstring,missing-function-docstring - -from urllib.parse import urlparse - -from werkzeug.middleware.proxy_fix import ProxyFix -from werkzeug.serving import WSGIRequestHandler - -from searx import settings - - -class ReverseProxyPathFix: - '''Wrap the application in this middleware and configure the - front-end server to add these headers, to let you quietly bind - this to a URL other than / and to an HTTP scheme that is - different than what is used locally. - - http://flask.pocoo.org/snippets/35/ - - In nginx: - location /myprefix { - proxy_pass http://127.0.0.1:8000; - proxy_set_header Host $host; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Scheme $scheme; - proxy_set_header X-Script-Name /myprefix; - } - - :param wsgi_app: the WSGI application - ''' - # pylint: disable=too-few-public-methods - - def __init__(self, wsgi_app): - - self.wsgi_app = wsgi_app - self.script_name = None - self.scheme = None - self.server = None - - if settings['server']['base_url']: - - # If base_url is specified, then these values from are given - # preference over any Flask's generics. - - base_url = urlparse(settings['server']['base_url']) - self.script_name = base_url.path - if self.script_name.endswith('/'): - # remove trailing slash to avoid infinite redirect on the index - # see https://github.com/searx/searx/issues/2729 - self.script_name = self.script_name[:-1] - self.scheme = base_url.scheme - self.server = base_url.netloc - - def __call__(self, environ, start_response): - script_name = self.script_name or environ.get('HTTP_X_SCRIPT_NAME', '') - if script_name: - environ['SCRIPT_NAME'] = script_name - path_info = environ['PATH_INFO'] - if path_info.startswith(script_name): - environ['PATH_INFO'] = path_info[len(script_name):] - - scheme = self.scheme or environ.get('HTTP_X_SCHEME', '') - if scheme: - environ['wsgi.url_scheme'] = scheme - - server = self.server or environ.get('HTTP_X_FORWARDED_HOST', '') - if server: - environ['HTTP_HOST'] = server - return self.wsgi_app(environ, start_response) - - -def patch_application(app): - # serve pages with HTTP/1.1 - WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server']['http_protocol_version']) - # patch app to handle non root url-s behind proxy & wsgi - app.wsgi_app = ReverseProxyPathFix(ProxyFix(app.wsgi_app)) diff --git a/searx/i18n.py b/searx/i18n.py new file mode 100644 index 000000000..425e8e103 --- /dev/null +++ b/searx/i18n.py @@ -0,0 +1,50 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +# pylint: disable=missing-module-docstring,missing-function-docstring + +import babel +import babel.numbers +import babel.dates +import babel.support + +from starlette_i18n import ( + i18n, + load_gettext_translations, +) +from starlette_i18n import gettext_lazy as gettext + +__all__ = ( + 'gettext', + 'format_decimal', + 'format_date', + 'initialize_i18n' +) + + +def format_decimal(number, format=None): # pylint: disable=redefined-builtin + locale = i18n.get_locale() + return babel.numbers.format_decimal(number, format=format, locale=locale) + + +def format_date(date=None, format='medium', rebase=False): # pylint: disable=redefined-builtin + if rebase: + raise ValueError('rebase=True not implemented') + locale = i18n.get_locale() + if format in ('full', 'long', 'medium', 'short'): + format = locale.date_formats[format] + pattern = babel.dates.parse_pattern(format) + return pattern.apply(date, locale) + + +def monkeypatch(): + old_i18n_Locale_parse = i18n.Locale.parse + def i18n_Locale_parse(identifier, sep='_', resolve_likely_subtags=True): + if identifier == 'oc': + identifier = 'fr' + return old_i18n_Locale_parse(identifier, sep, resolve_likely_subtags) + setattr(i18n.Locale, 'parse', i18n_Locale_parse) + + +def initialize_i18n(translations_path): + monkeypatch() + load_gettext_translations(directory=translations_path, domain="messages") diff --git a/searx/network/client.py b/searx/network/client.py index ef895232a..0185655c9 100644 --- a/searx/network/client.py +++ b/searx/network/client.py @@ -3,7 +3,6 @@ # pylint: disable=missing-module-docstring, missing-function-docstring, global-statement import asyncio -import logging import threading import httpcore import httpx @@ -12,7 +11,7 @@ from python_socks import ( parse_proxy_url, ProxyConnectionError, ProxyTimeoutError, - ProxyError + ProxyError, ) from searx import logger @@ -26,33 +25,38 @@ else: uvloop.install() -logger = logger.getChild('searx.http.client') +logger = logger.getChild("searx.http.client") LOOP = None +LOOP_LOCK = threading.Lock() SSLCONTEXTS = {} TRANSPORT_KWARGS = { - 'backend': 'asyncio', - 'trust_env': False, + "backend": "asyncio", + "trust_env": False, } # pylint: disable=protected-access async def close_connections_for_url( - connection_pool: httpcore.AsyncConnectionPool, - url: httpcore._utils.URL ): + connection_pool: httpcore.AsyncConnectionPool, url: httpcore._utils.URL +): origin = httpcore._utils.url_to_origin(url) - logger.debug('Drop connections for %r', origin) + logger.debug("Drop connections for %r", origin) connections_to_close = connection_pool._connections_for_origin(origin) for connection in connections_to_close: await connection_pool._remove_from_pool(connection) try: await connection.aclose() except httpcore.NetworkError as e: - logger.warning('Error closing an existing connection', exc_info=e) + logger.warning("Error closing an existing connection", exc_info=e) + + # pylint: enable=protected-access -def get_sslcontexts(proxy_url=None, cert=None, verify=True, trust_env=True, http2=False): +def get_sslcontexts( + proxy_url=None, cert=None, verify=True, trust_env=True, http2=False +): global SSLCONTEXTS key = (proxy_url, cert, verify, trust_env, http2) if key not in SSLCONTEXTS: @@ -98,7 +102,7 @@ class AsyncProxyTransportFixed(AsyncProxyTransport): except httpcore.RemoteProtocolError as e: # in case of httpcore.RemoteProtocolError: Server disconnected await close_connections_for_url(self, url) - logger.warning('httpcore.RemoteProtocolError: retry', exc_info=e) + logger.warning("httpcore.RemoteProtocolError: retry", exc_info=e) # retry except (httpcore.NetworkError, httpcore.ProtocolError) as e: # httpcore.WriteError on HTTP/2 connection leaves a new opened stream @@ -124,35 +128,50 @@ class AsyncHTTPTransportFixed(httpx.AsyncHTTPTransport): # raised by _keepalive_sweep() # from https://github.com/encode/httpcore/blob/4b662b5c42378a61e54d673b4c949420102379f5/httpcore/_backends/asyncio.py#L198 # pylint: disable=line-too-long await close_connections_for_url(self._pool, url) - logger.warning('httpcore.CloseError: retry', exc_info=e) + logger.warning("httpcore.CloseError: retry", exc_info=e) # retry except httpcore.RemoteProtocolError as e: # in case of httpcore.RemoteProtocolError: Server disconnected await close_connections_for_url(self._pool, url) - logger.warning('httpcore.RemoteProtocolError: retry', exc_info=e) + logger.warning("httpcore.RemoteProtocolError: retry", exc_info=e) # retry except (httpcore.ProtocolError, httpcore.NetworkError) as e: await close_connections_for_url(self._pool, url) raise e -def get_transport_for_socks_proxy(verify, http2, local_address, proxy_url, limit, retries): +def get_transport_for_socks_proxy( + verify, http2, local_address, proxy_url, limit, retries +): global TRANSPORT_KWARGS # support socks5h (requests compatibility): # https://requests.readthedocs.io/en/master/user/advanced/#socks # socks5:// hostname is resolved on client side # socks5h:// hostname is resolved on proxy side rdns = False - socks5h = 'socks5h://' + socks5h = "socks5h://" if proxy_url.startswith(socks5h): - proxy_url = 'socks5://' + proxy_url[len(socks5h):] + proxy_url = "socks5://" + proxy_url[len(socks5h) :] rdns = True - proxy_type, proxy_host, proxy_port, proxy_username, proxy_password = parse_proxy_url(proxy_url) - verify = get_sslcontexts(proxy_url, None, True, False, http2) if verify is True else verify + ( + proxy_type, + proxy_host, + proxy_port, + proxy_username, + proxy_password, + ) = parse_proxy_url(proxy_url) + verify = ( + get_sslcontexts(proxy_url, None, True, False, http2) + if verify is True + else verify + ) return AsyncProxyTransportFixed( - proxy_type=proxy_type, proxy_host=proxy_host, proxy_port=proxy_port, - username=proxy_username, password=proxy_password, + proxy_type=proxy_type, + proxy_host=proxy_host, + proxy_port=proxy_port, + username=proxy_username, + password=proxy_password, rdns=rdns, loop=get_loop(), verify=verify, @@ -168,7 +187,9 @@ def get_transport_for_socks_proxy(verify, http2, local_address, proxy_url, limit def get_transport(verify, http2, local_address, proxy_url, limit, retries): global TRANSPORT_KWARGS - verify = get_sslcontexts(None, None, True, False, http2) if verify is True else verify + verify = ( + get_sslcontexts(None, None, True, False, http2) if verify is True else verify + ) return AsyncHTTPTransportFixed( # pylint: disable=protected-access verify=verify, @@ -184,30 +205,39 @@ def get_transport(verify, http2, local_address, proxy_url, limit, retries): def iter_proxies(proxies): # https://www.python-httpx.org/compatibility/#proxy-keys if isinstance(proxies, str): - yield 'all://', proxies + yield "all://", proxies elif isinstance(proxies, dict): for pattern, proxy_url in proxies.items(): yield pattern, proxy_url def new_client( - # pylint: disable=too-many-arguments - enable_http, verify, enable_http2, - max_connections, max_keepalive_connections, keepalive_expiry, - proxies, local_address, retries, max_redirects ): + # pylint: disable=too-many-arguments + enable_http, + verify, + enable_http2, + max_connections, + max_keepalive_connections, + keepalive_expiry, + proxies, + local_address, + retries, + max_redirects, +): limit = httpx.Limits( max_connections=max_connections, max_keepalive_connections=max_keepalive_connections, - keepalive_expiry=keepalive_expiry + keepalive_expiry=keepalive_expiry, ) # See https://www.python-httpx.org/advanced/#routing mounts = {} for pattern, proxy_url in iter_proxies(proxies): - if not enable_http and (pattern == 'http' or pattern.startswith('http://')): + if not enable_http and (pattern == "http" or pattern.startswith("http://")): continue - if (proxy_url.startswith('socks4://') - or proxy_url.startswith('socks5://') - or proxy_url.startswith('socks5h://') + if ( + proxy_url.startswith("socks4://") + or proxy_url.startswith("socks5://") + or proxy_url.startswith("socks5h://") ): mounts[pattern] = get_transport_for_socks_proxy( verify, enable_http2, local_address, proxy_url, limit, retries @@ -218,10 +248,39 @@ def new_client( ) if not enable_http: - mounts['http://'] = AsyncHTTPTransportNoHttp() + mounts["http://"] = AsyncHTTPTransportNoHttp() transport = get_transport(verify, enable_http2, local_address, None, limit, retries) - return httpx.AsyncClient(transport=transport, mounts=mounts, max_redirects=max_redirects) + return httpx.AsyncClient( + transport=transport, mounts=mounts, max_redirects=max_redirects + ) + + +def create_loop(): + # pylint: disable=consider-using-with + global LOOP_LOCK + LOOP_LOCK.acquire() + if LOOP: + return + + def loop_thread(): + global LOOP + try: + LOOP = asyncio.new_event_loop() + except: # pylint: disable=bare-except + logger.exception('Error on asyncio.new_event_loop()') + finally: + LOOP_LOCK.release() + if LOOP: + LOOP.run_forever() + + thread = threading.Thread( + target=loop_thread, + name="asyncio_loop", + daemon=True, + ) + thread.start() + LOOP_LOCK.acquire() def get_loop(): @@ -229,20 +288,10 @@ def get_loop(): if LOOP: return LOOP - loop_ready = threading.Lock() - loop_ready.acquire() - - def loop_thread(): - global LOOP - LOOP = asyncio.new_event_loop() - loop_ready.release() - LOOP.run_forever() - - thread = threading.Thread( - target=loop_thread, - name='asyncio_loop', - daemon=True, - ) - thread.start() - loop_ready.acquire() + create_loop() return LOOP + + +def set_loop(loop): + global LOOP + LOOP = loop diff --git a/searx/network/network.py b/searx/network/network.py index e13b5fd0f..2b67d5ef3 100644 --- a/searx/network/network.py +++ b/searx/network/network.py @@ -3,7 +3,6 @@ # pylint: disable=global-statement # pylint: disable=missing-module-docstring, missing-class-docstring, missing-function-docstring -import atexit import asyncio import ipaddress from itertools import cycle @@ -11,54 +10,61 @@ from itertools import cycle import httpx from .client import new_client, get_loop -from searx import logger -DEFAULT_NAME = '__DEFAULT__' +DEFAULT_NAME = "__DEFAULT__" NETWORKS = {} # requests compatibility when reading proxy settings from settings.yml PROXY_PATTERN_MAPPING = { - 'http': 'http://', - 'https': 'https://', - 'socks4': 'socks4://', - 'socks5': 'socks5://', - 'socks5h': 'socks5h://', - 'http:': 'http://', - 'https:': 'https://', - 'socks4:': 'socks4://', - 'socks5:': 'socks5://', - 'socks5h:': 'socks5h://', + "http": "http://", + "https": "https://", + "socks4": "socks4://", + "socks5": "socks5://", + "socks5h": "socks5h://", + "http:": "http://", + "https:": "https://", + "socks4:": "socks4://", + "socks5:": "socks5://", + "socks5h:": "socks5h://", } -ADDRESS_MAPPING = { - 'ipv4': '0.0.0.0', - 'ipv6': '::' -} +ADDRESS_MAPPING = {"ipv4": "0.0.0.0", "ipv6": "::"} class Network: __slots__ = ( - 'enable_http', 'verify', 'enable_http2', - 'max_connections', 'max_keepalive_connections', 'keepalive_expiry', - 'local_addresses', 'proxies', 'max_redirects', 'retries', 'retry_on_http_error', - '_local_addresses_cycle', '_proxies_cycle', '_clients' + "enable_http", + "verify", + "enable_http2", + "max_connections", + "max_keepalive_connections", + "keepalive_expiry", + "local_addresses", + "proxies", + "max_redirects", + "retries", + "retry_on_http_error", + "_local_addresses_cycle", + "_proxies_cycle", + "_clients", ) def __init__( - # pylint: disable=too-many-arguments - self, - enable_http=True, - verify=True, - enable_http2=False, - max_connections=None, - max_keepalive_connections=None, - keepalive_expiry=None, - proxies=None, - local_addresses=None, - retries=0, - retry_on_http_error=None, - max_redirects=30 ): + # pylint: disable=too-many-arguments + self, + enable_http=True, + verify=True, + enable_http2=False, + max_connections=None, + max_keepalive_connections=None, + keepalive_expiry=None, + proxies=None, + local_addresses=None, + retries=0, + retry_on_http_error=None, + max_redirects=30, + ): self.enable_http = enable_http self.verify = verify @@ -78,13 +84,13 @@ class Network: def check_parameters(self): for address in self.iter_ipaddresses(): - if '/' in address: + if "/" in address: ipaddress.ip_network(address, False) else: ipaddress.ip_address(address) if self.proxies is not None and not isinstance(self.proxies, (str, dict)): - raise ValueError('proxies type has to be str, dict or None') + raise ValueError("proxies type has to be str, dict or None") def iter_ipaddresses(self): local_addresses = self.local_addresses @@ -99,7 +105,7 @@ class Network: while True: count = 0 for address in self.iter_ipaddresses(): - if '/' in address: + if "/" in address: for a in ipaddress.ip_network(address, False).hosts(): yield str(a) count += 1 @@ -115,7 +121,7 @@ class Network: return # https://www.python-httpx.org/compatibility/#proxy-keys if isinstance(self.proxies, str): - yield 'all://', [self.proxies] + yield "all://", [self.proxies] else: for pattern, proxy_url in self.proxies.items(): pattern = PROXY_PATTERN_MAPPING.get(pattern, pattern) @@ -129,7 +135,10 @@ class Network: proxy_settings[pattern] = cycle(proxy_urls) while True: # pylint: disable=stop-iteration-return - yield tuple((pattern, next(proxy_url_cycle)) for pattern, proxy_url_cycle in proxy_settings.items()) + yield tuple( + (pattern, next(proxy_url_cycle)) + for pattern, proxy_url_cycle in proxy_settings.items() + ) def get_client(self, verify=None, max_redirects=None): verify = self.verify if verify is None else verify @@ -148,32 +157,43 @@ class Network: dict(proxies), local_address, 0, - max_redirects + max_redirects, ) return self._clients[key] async def aclose(self): - async def close_client(client): + async def close_client(client: httpx.AsyncClient): try: await client.aclose() except httpx.HTTPError: pass - await asyncio.gather(*[close_client(client) for client in self._clients.values()], return_exceptions=False) + + await asyncio.gather( + *[close_client(client) for client in self._clients.values()], + return_exceptions=False + ) @staticmethod def get_kwargs_clients(kwargs): kwargs_clients = {} - if 'verify' in kwargs: - kwargs_clients['verify'] = kwargs.pop('verify') - if 'max_redirects' in kwargs: - kwargs_clients['max_redirects'] = kwargs.pop('max_redirects') + if "verify" in kwargs: + kwargs_clients["verify"] = kwargs.pop("verify") + if "max_redirects" in kwargs: + kwargs_clients["max_redirects"] = kwargs.pop("max_redirects") return kwargs_clients def is_valid_respones(self, response): # pylint: disable=too-many-boolean-expressions - if ((self.retry_on_http_error is True and 400 <= response.status_code <= 599) - or (isinstance(self.retry_on_http_error, list) and response.status_code in self.retry_on_http_error) - or (isinstance(self.retry_on_http_error, int) and response.status_code == self.retry_on_http_error) + if ( + (self.retry_on_http_error is True and 400 <= response.status_code <= 599) + or ( + isinstance(self.retry_on_http_error, list) + and response.status_code in self.retry_on_http_error + ) + or ( + isinstance(self.retry_on_http_error, int) + and response.status_code == self.retry_on_http_error + ) ): return False return True @@ -209,39 +229,52 @@ class Network: @classmethod async def aclose_all(cls): global NETWORKS - await asyncio.gather(*[network.aclose() for network in NETWORKS.values()], return_exceptions=False) + await asyncio.gather( + *[network.aclose() for network in NETWORKS.values()], + return_exceptions=False + ) + + @classmethod + def close_all(cls): + future = asyncio.run_coroutine_threadsafe(Network.aclose_all(), get_loop()) + future.result() def get_network(name=None): global NETWORKS - return NETWORKS.get(name or DEFAULT_NAME) + if name: + return NETWORKS.get(name) + if DEFAULT_NAME not in NETWORKS: + NETWORKS[DEFAULT_NAME] = Network({}) + return NETWORKS[DEFAULT_NAME] def initialize(settings_engines=None, settings_outgoing=None): # pylint: disable=import-outside-toplevel) from searx.engines import engines from searx import settings + # pylint: enable=import-outside-toplevel) global NETWORKS - settings_engines = settings_engines or settings['engines'] - settings_outgoing = settings_outgoing or settings['outgoing'] + settings_engines = settings_engines or settings["engines"] + settings_outgoing = settings_outgoing or settings["outgoing"] # default parameters for AsyncHTTPTransport # see https://github.com/encode/httpx/blob/e05a5372eb6172287458b37447c30f650047e1b8/httpx/_transports/default.py#L108-L121 # pylint: disable=line-too-long default_params = { - 'enable_http': False, - 'verify': True, - 'enable_http2': settings_outgoing['enable_http2'], - 'max_connections': settings_outgoing['pool_connections'], - 'max_keepalive_connections': settings_outgoing['pool_maxsize'], - 'keepalive_expiry': settings_outgoing['keepalive_expiry'], - 'local_addresses': settings_outgoing['source_ips'], - 'proxies': settings_outgoing['proxies'], - 'max_redirects': settings_outgoing['max_redirects'], - 'retries': settings_outgoing['retries'], - 'retry_on_http_error': None, + "enable_http": False, + "verify": True, + "enable_http2": settings_outgoing["enable_http2"], + "max_connections": settings_outgoing["pool_connections"], + "max_keepalive_connections": settings_outgoing["pool_maxsize"], + "keepalive_expiry": settings_outgoing["keepalive_expiry"], + "local_addresses": settings_outgoing["source_ips"], + "proxies": settings_outgoing["proxies"], + "max_redirects": settings_outgoing["max_redirects"], + "retries": settings_outgoing["retries"], + "retry_on_http_error": None, } def new_network(params): @@ -254,22 +287,22 @@ def initialize(settings_engines=None, settings_outgoing=None): def iter_networks(): nonlocal settings_engines for engine_spec in settings_engines: - engine_name = engine_spec['name'] + engine_name = engine_spec["name"] engine = engines.get(engine_name) if engine is None: continue - network = getattr(engine, 'network', None) + network = getattr(engine, "network", None) yield engine_name, engine, network if NETWORKS: - done() + Network.close_all() NETWORKS.clear() NETWORKS[DEFAULT_NAME] = new_network({}) - NETWORKS['ipv4'] = new_network({'local_addresses': '0.0.0.0'}) - NETWORKS['ipv6'] = new_network({'local_addresses': '::'}) + NETWORKS["ipv4"] = new_network({"local_addresses": "0.0.0.0"}) + NETWORKS["ipv6"] = new_network({"local_addresses": "::"}) # define networks from outgoing.networks - for network_name, network in settings_outgoing['networks'].items(): + for network_name, network in settings_outgoing["networks"].items(): NETWORKS[network_name] = new_network(network) # define networks from engines.[i].network (except references) @@ -289,29 +322,3 @@ def initialize(settings_engines=None, settings_outgoing=None): for engine_name, engine, network in iter_networks(): if isinstance(network, str): NETWORKS[engine_name] = NETWORKS[network] - - -@atexit.register -def done(): - """Close all HTTP client - - Avoid a warning at exit - see https://github.com/encode/httpx/blob/1a6e254f72d9fd5694a1c10a28927e193ab4f76b/httpx/_client.py#L1785 - - Note: since Network.aclose has to be async, it is not possible to call this method on Network.__del__ - So Network.aclose is called here using atexit.register - """ - global NETWORKS - try: - loop = get_loop() - if loop: - future = asyncio.run_coroutine_threadsafe(Network.aclose_all(), loop) - # wait 3 seconds to close the HTTP clients - future.result(3) - except: - logger.exception('Exception while closing clients') - finally: - NETWORKS.clear() - - -NETWORKS[DEFAULT_NAME] = Network() diff --git a/searx/plugins/__init__.py b/searx/plugins/__init__.py index 45b210662..6f5ea04a4 100644 --- a/searx/plugins/__init__.py +++ b/searx/plugins/__init__.py @@ -21,6 +21,8 @@ from os import listdir, makedirs, remove, stat, utime from os.path import abspath, basename, dirname, exists, join from shutil import copyfile +import babel.support + from searx import logger, settings @@ -63,9 +65,19 @@ class PluginStore(): plugins = load_external_plugins(plugins) for plugin in plugins: for plugin_attr, plugin_attr_type in required_attrs: - if not hasattr(plugin, plugin_attr) or not isinstance(getattr(plugin, plugin_attr), plugin_attr_type): + if not hasattr(plugin, plugin_attr): logger.critical('missing attribute "{0}", cannot load plugin: {1}'.format(plugin_attr, plugin)) exit(3) + attr = getattr(plugin, plugin_attr) + if isinstance(attr, babel.support.LazyProxy): + attr = attr.value + if not isinstance(attr, plugin_attr_type): + type_attr = str(type(attr)) + logger.critical( + 'attribute "{0}" is of type {2}, must be {3}, cannot load plugin: {1}' + .format(plugin_attr, plugin, type_attr, plugin_attr_type) + ) + exit(3) for plugin_attr, plugin_attr_type in optional_attrs: if not hasattr(plugin, plugin_attr) or not isinstance(getattr(plugin, plugin_attr), plugin_attr_type): setattr(plugin, plugin_attr, plugin_attr_type()) diff --git a/searx/plugins/hash_plugin.py b/searx/plugins/hash_plugin.py index edb91dd8e..43a1094e6 100644 --- a/searx/plugins/hash_plugin.py +++ b/searx/plugins/hash_plugin.py @@ -16,7 +16,7 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. (C) 2018, 2020 by Vaclav Zouzalik ''' -from flask_babel import gettext +from searx.i18n import gettext import hashlib import re diff --git a/searx/plugins/infinite_scroll.py b/searx/plugins/infinite_scroll.py index e3726671a..f498a19a0 100644 --- a/searx/plugins/infinite_scroll.py +++ b/searx/plugins/infinite_scroll.py @@ -1,4 +1,4 @@ -from flask_babel import gettext +from searx.i18n import gettext name = gettext('Infinite scroll') description = gettext('Automatically load next page when scrolling to bottom of current page') diff --git a/searx/plugins/oa_doi_rewrite.py b/searx/plugins/oa_doi_rewrite.py index 02a712942..13da35c3e 100644 --- a/searx/plugins/oa_doi_rewrite.py +++ b/searx/plugins/oa_doi_rewrite.py @@ -1,8 +1,8 @@ from urllib.parse import urlparse, parse_qsl -from flask_babel import gettext import re -from searx import settings +from searx import settings +from searx.i18n import gettext regex = re.compile(r'10\.\d{4,9}/[^\s]+') diff --git a/searx/plugins/search_on_category_select.py b/searx/plugins/search_on_category_select.py index 2a38cac78..8588a6469 100644 --- a/searx/plugins/search_on_category_select.py +++ b/searx/plugins/search_on_category_select.py @@ -14,7 +14,7 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. (C) 2015 by Adam Tauber, ''' -from flask_babel import gettext +from searx.i18n import gettext name = gettext('Search on category select') description = gettext('Perform search immediately if a category selected. ' 'Disable to select multiple categories. (JavaScript required)') diff --git a/searx/plugins/self_info.py b/searx/plugins/self_info.py index 053899483..478a024a4 100644 --- a/searx/plugins/self_info.py +++ b/searx/plugins/self_info.py @@ -14,7 +14,7 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. (C) 2015 by Adam Tauber, ''' -from flask_babel import gettext +from searx.i18n import gettext import re name = gettext('Self Informations') description = gettext('Displays your IP if the query is "ip" and your user agent if the query contains "user agent".') diff --git a/searx/plugins/tracker_url_remover.py b/searx/plugins/tracker_url_remover.py index 98ddddbcd..811d66157 100644 --- a/searx/plugins/tracker_url_remover.py +++ b/searx/plugins/tracker_url_remover.py @@ -14,11 +14,11 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. (C) 2015 by Adam Tauber, ''' - -from flask_babel import gettext import re from urllib.parse import urlunparse, parse_qsl, urlencode +from searx.i18n import gettext + regexes = {re.compile(r'utm_[^&]+'), re.compile(r'(wkey|wemail)[^&]*'), re.compile(r'(_hsenc|_hsmi|hsCtaTracking|__hssc|__hstc|__hsfp)[^&]*'), diff --git a/searx/plugins/vim_hotkeys.py b/searx/plugins/vim_hotkeys.py index 47b830c79..23360715f 100644 --- a/searx/plugins/vim_hotkeys.py +++ b/searx/plugins/vim_hotkeys.py @@ -1,4 +1,4 @@ -from flask_babel import gettext +from searx.i18n import gettext name = gettext('Vim-like hotkeys') description = gettext('Navigate search results with Vim-like hotkeys ' diff --git a/searx/preferences.py b/searx/preferences.py index 69832c052..29fb4b274 100644 --- a/searx/preferences.py +++ b/searx/preferences.py @@ -437,7 +437,7 @@ class Preferences: return urlsafe_b64encode(compress(urlencode(settings_kv).encode())).decode() def parse_encoded_data(self, input_data): - """parse (base64) preferences from request (``flask.request.form['preferences']``)""" + """parse (base64) preferences from request (``context.form['preferences']``)""" bin_data = decompress(urlsafe_b64decode(input_data)) dict_data = {} for x, y in parse_qs(bin_data.decode('ascii')).items(): @@ -445,7 +445,7 @@ class Preferences: self.parse_dict(dict_data) def parse_dict(self, input_data): - """parse preferences from request (``flask.request.form``)""" + """parse preferences from request (``context.form``)""" for user_setting_name, user_setting in input_data.items(): if user_setting_name in self.key_value_settings: if self.key_value_settings[user_setting_name].locked: @@ -468,7 +468,7 @@ class Preferences: self.unknown_params[user_setting_name] = user_setting def parse_form(self, input_data): - """Parse formular (````) data from a ``flask.request.form``""" + """Parse formular (````) data from a ``context.form``""" disabled_engines = [] enabled_categories = [] disabled_plugins = [] diff --git a/searx/run.py b/searx/run.py index 55d267cb9..36bcfd60d 100644 --- a/searx/run.py +++ b/searx/run.py @@ -11,6 +11,7 @@ class CustomUvicornWorker(uvicorn.workers.UvicornWorker): class StandaloneApplication(gunicorn.app.base.BaseApplication): + # pylint: disable=abstract-method def __init__(self, app, options=None): self.options = options or {} @@ -18,8 +19,11 @@ class StandaloneApplication(gunicorn.app.base.BaseApplication): super().__init__() def load_config(self): - config = {key: value for key, value in self.options.items() - if key in self.cfg.settings and value is not None} + config = { + key: value + for key, value in self.options.items() + if key in self.cfg.settings and value is not None + } for key, value in config.items(): self.cfg.set(key.lower(), value) @@ -28,44 +32,46 @@ class StandaloneApplication(gunicorn.app.base.BaseApplication): def number_of_workers(): - return 1 # (multiprocessing.cpu_count() * 2) + 1 + return multiprocessing.cpu_count() + 1 def run_production(app): config_kwargs = { "loop": "uvloop", "http": "httptools", + "proxy_headers": True, } - base_url = settings['server']['base_url'] or None + base_url = settings["server"]["base_url"] or None if base_url: # ? config_kwargs['proxy_headers'] = True - config_kwargs['root_path'] = settings['server']['base_url'] + config_kwargs["root_path"] = settings["server"]["base_url"] CustomUvicornWorker.CONFIG_KWARGS.update(config_kwargs) options = { - 'proc_name': 'searxng', - 'bind': '%s:%s' % (settings['server']['bind_address'], settings['server']['port']), - 'workers': number_of_workers(), - 'worker_class': 'searx.run.CustomUvicornWorker', - 'loglevel': 'debug', - 'capture_output': True, + "proc_name": "searxng", + "bind": "%s:%s" + % (settings["server"]["bind_address"], settings["server"]["port"]), + "workers": number_of_workers(), + "worker_class": "searx.run.CustomUvicornWorker", + "loglevel": "debug", + "capture_output": True, } StandaloneApplication(app, options).run() def run_debug(): kwargs = { - 'reload': True, - 'loop': 'auto', - 'http': 'auto', - 'ws': 'none', - 'host': settings['server']['bind_address'], - 'port': settings['server']['port'], + "reload": True, + "loop": "auto", + "http": "auto", + "ws": "none", + "host": settings["server"]["bind_address"], + "port": settings["server"]["port"], + "proxy_headers": True, } - base_url = settings['server']['base_url'] + base_url = settings["server"]["base_url"] if base_url: - kwargs['proxy_headers'] = True - kwargs['root_path'] = settings['server']['base_url'] + kwargs["root_path"] = settings["server"]["base_url"] - uvicorn.run('searx.__main__:app', **kwargs) + uvicorn.run("searx.webapp:app", **kwargs) diff --git a/searx/search/__init__.py b/searx/search/__init__.py index 041a54c4b..ee5a02aed 100644 --- a/searx/search/__init__.py +++ b/searx/search/__init__.py @@ -3,9 +3,9 @@ # pylint: disable=missing-module-docstring, missing-function-docstring import typing -import threading -from timeit import default_timer +import asyncio from uuid import uuid4 +from timeit import default_timer from searx import settings from searx.answerers import ask @@ -19,6 +19,7 @@ from searx.network import initialize as initialize_network from searx.metrics import initialize as initialize_metrics, counter_inc, histogram_observe_time from searx.search.processors import PROCESSORS, initialize as initialize_processors from searx.search.checker import initialize as initialize_checker +from searx.search.threadnopoolexecutor import ThreadNoPoolExecutor logger = logger.getChild('search') @@ -126,30 +127,33 @@ class Search: return requests, actual_timeout - def search_multiple_requests(self, requests): + async def search_multiple_requests(self, requests): # pylint: disable=protected-access - search_id = uuid4().__str__() + futures = [] + loop = asyncio.get_running_loop() + executor = ThreadNoPoolExecutor(thread_name_prefix=str(uuid4())) for engine_name, query, request_params in requests: - th = threading.Thread( # pylint: disable=invalid-name - target=PROCESSORS[engine_name].search, - args=(query, request_params, self.result_container, self.start_time, self.actual_timeout), - name=search_id, + future = loop.run_in_executor( + executor, + PROCESSORS[engine_name].search, + query, + request_params, + self.result_container, + self.start_time, + self.actual_timeout, ) - th._timeout = False - th._engine_name = engine_name - th.start() + future._engine_name = engine_name + futures.append(future) - for th in threading.enumerate(): # pylint: disable=invalid-name - if th.name == search_id: - remaining_time = max(0.0, self.actual_timeout - (default_timer() - self.start_time)) - th.join(remaining_time) - if th.is_alive(): - th._timeout = True - self.result_container.add_unresponsive_engine(th._engine_name, 'timeout') - logger.warning('engine timeout: {0}'.format(th._engine_name)) + remaining_time = max(0.0, self.actual_timeout - (default_timer() - self.start_time)) + _, pending = await asyncio.wait(futures, return_when=asyncio.ALL_COMPLETED, timeout=remaining_time) + for future in pending: + # th._timeout = True + self.result_container.add_unresponsive_engine(future._engine_name, 'timeout') + logger.warning('engine timeout: {0}'.format(future._engine_name)) - def search_standard(self): + async def search_standard(self): """ Update self.result_container, self.actual_timeout """ @@ -157,17 +161,17 @@ class Search: # send all search-request if requests: - self.search_multiple_requests(requests) + await self.search_multiple_requests(requests) # return results, suggestions, answers and infoboxes return True # do search-request - def search(self): + async def search(self): self.start_time = default_timer() if not self.search_external_bang(): if not self.search_answerers(): - self.search_standard() + await self.search_standard() return self.result_container @@ -181,9 +185,9 @@ class SearchWithPlugins(Search): self.ordered_plugin_list = ordered_plugin_list self.request = request - def search(self): + async def search(self): if plugins.call(self.ordered_plugin_list, 'pre_search', self.request, self): - super().search() + await super().search() plugins.call(self.ordered_plugin_list, 'post_search', self.request, self) diff --git a/searx/search/threadnopoolexecutor.py b/searx/search/threadnopoolexecutor.py new file mode 100644 index 000000000..1f22e08b6 --- /dev/null +++ b/searx/search/threadnopoolexecutor.py @@ -0,0 +1,43 @@ +import threading +from concurrent.futures import ThreadPoolExecutor +from concurrent.futures._base import Future + + +class ThreadNoPoolExecutor(ThreadPoolExecutor): + + def __init__(self, max_workers=None, thread_name_prefix='', + initializer=None, initargs=()): + # pylint: disable=super-init-not-called + if max_workers: + raise NotImplementedError('max_workers not supported') + if initializer: + raise NotImplementedError('initializer not supported') + if initargs: + raise NotImplementedError('initargs not supported') + self.thread_name_prefix = thread_name_prefix + + def submit(self, fn, *args, **kwargs): # pylint: disable=arguments-differ + f = Future() + + def worker(): + if not f.set_running_or_notify_cancel(): + return + try: + result = fn(*args, **kwargs) + except BaseException as exc: + f.set_exception(exc) + else: + f.set_result(result) + + t = threading.Thread( + target=worker, + name=self.thread_name_prefix + '_engine', + daemon=True + ) + t.start() + return f + # submit.__text_signature__ = ThreadPoolExecutor.submit.__text_signature__ + # submit.__doc__ = ThreadPoolExecutor.submit.__doc__ + + def shutdown(self, wait=True): + pass diff --git a/searx/templates.py b/searx/templates.py new file mode 100644 index 000000000..8233e63f5 --- /dev/null +++ b/searx/templates.py @@ -0,0 +1,192 @@ +import os +from typing import Optional +from urllib.parse import parse_qs, urlencode, urlsplit + + +import jinja2 +import babel.support + +from pygments import highlight +from pygments.lexers import get_lexer_by_name +from pygments.formatters import HtmlFormatter # pylint: disable=no-name-in-module + +from starlette.requests import Request +from starlette.templating import Jinja2Templates +from starlette_context import context +from starlette.routing import NoMatchFound +from starlette_i18n import i18n + +from searx import logger, settings +from searx.webutils import ( + get_static_files, + get_result_templates, + get_themes, +) + + +# about static +logger.debug('static directory is %s', settings['ui']['static_path']) +static_files = get_static_files(settings['ui']['static_path']) + +# about templates +logger.debug('templates directory is %s', settings['ui']['templates_path']) +default_theme = settings['ui']['default_theme'] +templates_path = settings['ui']['templates_path'] +themes = get_themes(templates_path) +result_templates = get_result_templates(templates_path) +global_favicons = [] +for indice, theme in enumerate(themes): + global_favicons.append([]) + theme_img_path = os.path.join(settings['ui']['static_path'], 'themes', theme, 'img', 'icons') + for (dirpath, dirnames, filenames) in os.walk(theme_img_path): + global_favicons[indice].extend(filenames) + + +def get_current_theme_name(request: Request, override: Optional[str] =None) -> str: + """Returns theme name. + + Checks in this order: + 1. override + 2. cookies + 3. settings""" + + if override and (override in themes or override == '__common__'): + return override + theme_name = request.query_params.get('theme', context.preferences.get_value('theme')) # pylint: disable=no-member + if theme_name not in themes: + theme_name = default_theme + return theme_name + + +def get_result_template(theme_name: str, template_name: str) -> str: + themed_path = theme_name + '/result_templates/' + template_name + if themed_path in result_templates: + return themed_path + return 'result_templates/' + template_name + + +# code-highlighter +def code_highlighter(codelines, language=None): + if not language: + language = 'text' + + try: + # find lexer by programing language + lexer = get_lexer_by_name(language, stripall=True) + + except Exception as e: # pylint: disable=broad-except + logger.exception(e, exc_info=True) + # if lexer is not found, using default one + lexer = get_lexer_by_name('text', stripall=True) + + html_code = '' + tmp_code = '' + last_line = None + + # parse lines + for line, code in codelines: + if not last_line: + line_code_start = line + + # new codeblock is detected + if last_line is not None and\ + last_line + 1 != line: + + # highlight last codepart + formatter = HtmlFormatter( + linenos='inline', linenostart=line_code_start, cssclass="code-highlight" + ) + html_code = html_code + highlight(tmp_code, lexer, formatter) + + # reset conditions for next codepart + tmp_code = '' + line_code_start = line + + # add codepart + tmp_code += code + '\n' + + # update line + last_line = line + + # highlight last codepart + formatter = HtmlFormatter(linenos='inline', linenostart=line_code_start, cssclass="code-highlight") + html_code = html_code + highlight(tmp_code, lexer, formatter) + + return html_code + + +class I18NTemplates(Jinja2Templates): + """Custom Jinja2Templates with i18n support + """ + + @staticmethod + def url_for_theme(endpoint: str, override_theme=None, **values): + request = context.request # pylint: disable=no-member + + # starlette migration + if '_external' in values: + del values['_external'] + if 'filename' in values: + values['path'] = values['filename'] + del values['filename'] + + # + if endpoint == 'static' and values.get('path'): + theme_name = get_current_theme_name(request, override=override_theme) + filename_with_theme = "themes/{}/{}".format(theme_name, values['path']) + if filename_with_theme in static_files: + values['path'] = filename_with_theme + return request.url_for(endpoint, **values) + try: + url_for_args = {} + for k in ('path', 'filename'): + if k in values: + v = values.pop(k) + url_for_args[k] = v + url = request.url_for(endpoint, **url_for_args) + _url = urlsplit(url) + _query = parse_qs(_url.query) + _query.update(values) + querystr = urlencode(_query, doseq=True) + return _url._replace(query=querystr).geturl() + # if anchor is not None: + # rv += f"#{url_quote(anchor)}" + except NoMatchFound as e: + error_message = "url_for, endpoint='%s' not found (values=%s)" % (endpoint, str(values)) + logger.error(error_message) + context.errors.append(error_message) # pylint: disable=no-member + raise e + + @staticmethod + def ugettext(message): + translations = i18n.get_locale().translations + if isinstance(message, babel.support.LazyProxy): + message = message.value + return translations.ugettext(message) + + @staticmethod + def ungettext(*args): + translations = i18n.get_locale().translations + return translations.ungettext(*args) + + def _create_env(self, directory: str) -> "jinja2.Environment": + loader = jinja2.FileSystemLoader(directory) + env = jinja2.Environment( + loader=loader, + autoescape=True, + trim_blocks=True, + lstrip_blocks=True, + auto_reload=False, + extensions=[ + 'jinja2.ext.loopcontrols', + 'jinja2.ext.i18n' + ], + ) + env.filters["code_highlighter"] = code_highlighter + env.globals["url_for"] = I18NTemplates.url_for_theme + env.install_gettext_callables( # pylint: disable=no-member + I18NTemplates.ugettext, + I18NTemplates.ungettext, + newstyle=True + ) + return env diff --git a/searx/testing.py b/searx/testing.py index b31ba8997..357e6f071 100644 --- a/searx/testing.py +++ b/searx/testing.py @@ -47,13 +47,7 @@ class SearxRobotLayer(): webapp = join(abspath(dirname(realpath(__file__))), 'webapp.py') exe = 'python' - # The Flask app is started by Flask.run(...), don't enable Flask's debug - # mode, the debugger from Flask will cause wired process model, where - # the server never dies. Further read: - # - # - debug mode: https://flask.palletsprojects.com/quickstart/#debug-mode - # - Flask.run(..): https://flask.palletsprojects.com/api/#flask.Flask.run - + # Disable debug mode os.environ['SEARX_DEBUG'] = '0' # set robot settings path diff --git a/searx/webapp.py b/searx/webapp.py index 0bf1a37f5..0c0078298 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -10,50 +10,53 @@ import hmac import json import os import sys +import asyncio +from typing import Optional, List +from functools import partial from datetime import datetime, timedelta from timeit import default_timer from html import escape from io import StringIO - -import urllib from urllib.parse import urlencode -import httpx +import aiohttp -from pygments import highlight -from pygments.lexers import get_lexer_by_name -from pygments.formatters import HtmlFormatter # pylint: disable=no-name-in-module - -import flask - -from flask import ( - Flask, - request, - render_template, - url_for, +from starlette.applications import Starlette +from starlette.requests import Request +from starlette.responses import ( + FileResponse, + JSONResponse, + PlainTextResponse, + RedirectResponse, Response, - make_response, - redirect, - send_from_directory, + StreamingResponse ) -from flask.ctx import has_request_context -from flask.json import jsonify +from starlette.routing import Mount +from starlette.staticfiles import StaticFiles +from starlette_context import context +from starlette_context.middleware import RawContextMiddleware +from starlette_i18n import i18n -from babel.support import Translations -import flask_babel -from flask_babel import ( - Babel, - gettext, - format_date, - format_decimal, -) - -from searx import logger -from searx import get_setting from searx import ( + logger, + get_setting, settings, searx_debug, + searx_dir, +) +from searx.i18n import ( + initialize_i18n, + gettext, + format_date, + format_decimal +) +from searx.templates import ( + I18NTemplates, + get_current_theme_name, + get_result_template, + global_favicons, + themes ) from searx.settings_defaults import OUTPUT_FORMATS from searx.exceptions import SearxParameterException @@ -65,12 +68,8 @@ from searx.engines import ( from searx.webutils import ( UnicodeWriter, highlight_content, - get_static_files, - get_result_templates, - get_themes, prettify_url, new_hmac, - is_flask_run_cmdline, ) from searx.webadapter import ( get_search_query_from_webapp, @@ -91,8 +90,7 @@ from searx.preferences import ( ValidationException, LANGUAGE_CODES, ) -from searx.answerers import answerers -from searx.answerers import ask +from searx.answerers import answerers, ask from searx.metrics import ( get_engines_stats, get_engine_errors, @@ -100,83 +98,20 @@ from searx.metrics import ( histogram, counter, ) -from searx.flaskfix import patch_application + +import searx.network.client +import searx.network.network # renaming names from searx imports ... - from searx.autocomplete import search_autocomplete, backends as autocomplete_backends from searx.languages import language_codes as languages from searx.locales import LOCALE_NAMES, UI_LOCALE_CODES, RTL_LOCALES from searx.search import SearchWithPlugins, initialize as search_initialize -from searx.network import stream as http_stream from searx.search.checker import get_result as checker_get_result -from searx.settings_loader import get_default_settings_path + logger = logger.getChild('webapp') -# check secret_key -if not searx_debug and settings['server']['secret_key'] == 'ultrasecretkey': - logger.error('server.secret_key is not changed. Please use something else instead of ultrasecretkey.') - sys.exit(1) - -# about static -logger.debug('static directory is %s', settings['ui']['static_path']) -static_files = get_static_files(settings['ui']['static_path']) - -# about templates -logger.debug('templates directory is %s', settings['ui']['templates_path']) -default_theme = settings['ui']['default_theme'] -templates_path = settings['ui']['templates_path'] -themes = get_themes(templates_path) -result_templates = get_result_templates(templates_path) -global_favicons = [] -for indice, theme in enumerate(themes): - global_favicons.append([]) - theme_img_path = os.path.join(settings['ui']['static_path'], 'themes', theme, 'img', 'icons') - for (dirpath, dirnames, filenames) in os.walk(theme_img_path): - global_favicons[indice].extend(filenames) - -STATS_SORT_PARAMETERS = { - 'name': (False, 'name', ''), - 'score': (True, 'score', 0), - 'result_count': (True, 'result_count', 0), - 'time': (False, 'total', 0), - 'reliability': (False, 'reliability', 100), -} - -# Flask app -app = Flask( - __name__, - static_folder=settings['ui']['static_path'], - template_folder=templates_path -) - -app.jinja_env.trim_blocks = True -app.jinja_env.lstrip_blocks = True -app.jinja_env.add_extension('jinja2.ext.loopcontrols') # pylint: disable=no-member -app.secret_key = settings['server']['secret_key'] - -# see https://flask.palletsprojects.com/en/1.1.x/cli/ -# True if "FLASK_APP=searx/webapp.py FLASK_ENV=development flask run" -flask_run_development = ( - os.environ.get("FLASK_APP") is not None - and os.environ.get("FLASK_ENV") == 'development' - and is_flask_run_cmdline() -) - -# True if reload feature is activated of werkzeug, False otherwise (including uwsgi, etc..) -# __name__ != "__main__" if searx.webapp is imported (make test, make docs, uwsgi...) -# see run() at the end of this file : searx_debug activates the reload feature. -werkzeug_reloader = flask_run_development or (searx_debug and __name__ == "__main__") - -# initialize the engines except on the first run of the werkzeug server. -if (not werkzeug_reloader - or (werkzeug_reloader - and os.environ.get("WERKZEUG_RUN_MAIN") == "true") ): - search_initialize(enable_checker=True) - -babel = Babel(app) - # used when translating category names _category_names = ( gettext('files'), @@ -223,21 +158,79 @@ exception_classname_to_text = { 'lxml.etree.ParserError': parsing_error_text, } -_flask_babel_get_translations = flask_babel.get_translations +STATS_SORT_PARAMETERS = { + 'name': (False, 'name', ''), + 'score': (True, 'score', 0), + 'result_count': (True, 'result_count', 0), + 'time': (False, 'total', 0), + 'reliability': (False, 'reliability', 100), +} +AIOHTTP_SESSION: Optional[aiohttp.ClientSession] = None + +templates = I18NTemplates(directory=settings['ui']['templates_path']) + +routes = [ + Mount('/static', app=StaticFiles(directory=settings['ui']['static_path']), name="static"), +] + +def on_startup(): + global AIOHTTP_SESSION # pylint: disable=global-statement + # check secret_key + if not searx_debug and settings['server']['secret_key'] == 'ultrasecretkey': + logger.error('server.secret_key is not changed. Please use something else instead of ultrasecretkey.') + sys.exit(1) + searx.network.client.set_loop(asyncio.get_event_loop()) + initialize_i18n(os.path.join(searx_dir, 'translations')) + search_initialize(enable_checker=True) + # + AIOHTTP_SESSION = aiohttp.ClientSession(auto_decompress=False) -# monkey patch for flask_babel.get_translations -def _get_translations(): - if has_request_context() and request.form.get('use-translation') == 'oc': - babel_ext = flask_babel.current_app.extensions['babel'] - return Translations.load(next(babel_ext.translation_directories), 'oc') - return _flask_babel_get_translations() +async def on_shutdown(): + await searx.network.network.Network.aclose_all() -flask_babel.get_translations = _get_translations +app = Starlette(routes=routes, debug=searx_debug, on_startup=[on_startup], on_shutdown=[on_shutdown]) -def _get_browser_or_settings_language(req, lang_list): +@app.middleware("http") +async def pre_post_request(request: Request, call_next): + # pre-request + context.clear() + context.request = request + context.start_time = default_timer() + context.render_time = 0 + context.timings = [] + i18n.set_locale('en') + # call endpoint + response = await call_next(request) + # set default http headers + for header, value in settings['server']['default_http_headers'].items(): + if header not in response.headers: + response.headers[header] = value + # set timing Server-Timing header + total_time = default_timer() - context.start_time + timings_all = ['total;dur=' + str(round(total_time * 1000, 3)), + 'render;dur=' + str(round(context.render_time * 1000, 3))] + if len(context.timings) > 0: + timings = sorted(context.timings, key=lambda v: v['total']) + timings_total = [ + 'total_' + str(i) + '_' + v['engine'] + ';dur=' + str(round(v['total'] * 1000, 3)) + for i, v in enumerate(timings) + ] + timings_load = [ + 'load_' + str(i) + '_' + v['engine'] + ';dur=' + str(round(v['load'] * 1000, 3)) + for i, v in enumerate(timings) if v.get('load') + ] + timings_all = timings_all + timings_total + timings_load + response.headers['Server-Timing'] = ', '.join(timings_all) + return response + + +app.add_middleware(RawContextMiddleware) + + +def _get_browser_or_settings_language(req : Request, lang_list: List[str]): for lang in req.headers.get("Accept-Language", "en").split(","): if ';' in lang: lang = lang.split(';')[0] @@ -250,121 +243,7 @@ def _get_browser_or_settings_language(req, lang_list): return settings['search']['default_lang'] or 'en' -@babel.localeselector -def get_locale(): - if 'locale' in request.form\ - and request.form['locale'] in LOCALE_NAMES: - # use locale from the form - locale = request.form['locale'] - locale_source = 'form' - elif request.preferences.get_value('locale') != '': - # use locale from the preferences - locale = request.preferences.get_value('locale') - locale_source = 'preferences' - else: - # use local from the browser - locale = _get_browser_or_settings_language(request, UI_LOCALE_CODES) - locale = locale.replace('-', '_') - locale_source = 'browser' - - # see _get_translations function - # and https://github.com/searx/searx/pull/1863 - if locale == 'oc': - request.form['use-translation'] = 'oc' - locale = 'fr_FR' - - logger.debug( - "%s uses locale `%s` from %s", urllib.parse.quote(request.url), locale, locale_source - ) - - return locale - - -# code-highlighter -@app.template_filter('code_highlighter') -def code_highlighter(codelines, language=None): - if not language: - language = 'text' - - try: - # find lexer by programing language - lexer = get_lexer_by_name(language, stripall=True) - - except Exception as e: # pylint: disable=broad-except - logger.exception(e, exc_info=True) - # if lexer is not found, using default one - lexer = get_lexer_by_name('text', stripall=True) - - html_code = '' - tmp_code = '' - last_line = None - - # parse lines - for line, code in codelines: - if not last_line: - line_code_start = line - - # new codeblock is detected - if last_line is not None and\ - last_line + 1 != line: - - # highlight last codepart - formatter = HtmlFormatter( - linenos='inline', linenostart=line_code_start, cssclass="code-highlight" - ) - html_code = html_code + highlight(tmp_code, lexer, formatter) - - # reset conditions for next codepart - tmp_code = '' - line_code_start = line - - # add codepart - tmp_code += code + '\n' - - # update line - last_line = line - - # highlight last codepart - formatter = HtmlFormatter(linenos='inline', linenostart=line_code_start, cssclass="code-highlight") - html_code = html_code + highlight(tmp_code, lexer, formatter) - - return html_code - - -def get_current_theme_name(override=None): - """Returns theme name. - - Checks in this order: - 1. override - 2. cookies - 3. settings""" - - if override and (override in themes or override == '__common__'): - return override - theme_name = request.args.get('theme', request.preferences.get_value('theme')) - if theme_name not in themes: - theme_name = default_theme - return theme_name - - -def get_result_template(theme_name, template_name): - themed_path = theme_name + '/result_templates/' + template_name - if themed_path in result_templates: - return themed_path - return 'result_templates/' + template_name - - -def url_for_theme(endpoint, override_theme=None, **values): - if endpoint == 'static' and values.get('filename'): - theme_name = get_current_theme_name(override=override_theme) - filename_with_theme = "themes/{}/{}".format(theme_name, values['filename']) - if filename_with_theme in static_files: - values['filename'] = filename_with_theme - url = url_for(endpoint, **values) - return url - - -def proxify(url): +def proxify(url: str) -> str: if url.startswith('//'): url = 'https:' + url @@ -386,12 +265,12 @@ def proxify(url): ) -def image_proxify(url): +def image_proxify(request: Request, url: str): if url.startswith('//'): url = 'https:' + url - if not request.preferences.get_value('image_proxy'): + if not context.preferences.get_value('image_proxy'): return url if url.startswith('data:image/'): @@ -408,14 +287,14 @@ def image_proxify(url): h = new_hmac(settings['server']['secret_key'], url.encode()) - return '{0}?{1}'.format(url_for('image_proxy'), + return '{0}?{1}'.format(request.url_for('image_proxy'), urlencode(dict(url=url.encode(), h=h))) def get_translations(): return { # when there is autocompletion - 'no_item_found': gettext('No item found') + 'no_item_found': str(gettext('No item found')) } @@ -426,7 +305,7 @@ def _get_ordered_categories(): def _get_enable_categories(all_categories): - disabled_engines = request.preferences.engines.get_disabled() + disabled_engines = context.preferences.engines.get_disabled() enabled_categories = set( # pylint: disable=consider-using-dict-items category for engine_name in engines @@ -436,20 +315,27 @@ def _get_enable_categories(all_categories): return [x for x in all_categories if x in enabled_categories] -def render(template_name, override_theme=None, **kwargs): +def render(request: Request, + template_name: str, + override_theme: bool = None, + status_code: int = 200, + headers: dict = None, + media_type: str = None, + **kwargs) -> Response: # values from the HTTP requests - kwargs['endpoint'] = 'results' if 'q' in kwargs else request.endpoint + kwargs['request'] = request + kwargs['endpoint'] = 'results' if 'q' in kwargs else request.scope['path'] kwargs['cookies'] = request.cookies - kwargs['errors'] = request.errors + kwargs['errors'] = context.errors # values from the preferences - kwargs['preferences'] = request.preferences - kwargs['method'] = request.preferences.get_value('method') - kwargs['autocomplete'] = request.preferences.get_value('autocomplete') - kwargs['results_on_new_tab'] = request.preferences.get_value('results_on_new_tab') - kwargs['advanced_search'] = request.preferences.get_value('advanced_search') - kwargs['safesearch'] = str(request.preferences.get_value('safesearch')) - kwargs['theme'] = get_current_theme_name(override=override_theme) + kwargs['preferences'] = context.preferences + kwargs['method'] = context.preferences.get_value('method') + kwargs['autocomplete'] = context.preferences.get_value('autocomplete') + kwargs['results_on_new_tab'] = context.preferences.get_value('results_on_new_tab') + kwargs['advanced_search'] = context.preferences.get_value('advanced_search') + kwargs['safesearch'] = str(context.preferences.get_value('safesearch')) + kwargs['theme'] = get_current_theme_name(request, override=override_theme) kwargs['all_categories'] = _get_ordered_categories() kwargs['categories'] = _get_enable_categories(kwargs['all_categories']) @@ -457,184 +343,159 @@ def render(template_name, override_theme=None, **kwargs): kwargs['language_codes'] = languages # from searx.languages kwargs['translations'] = json.dumps(get_translations(), separators=(',', ':')) - locale = request.preferences.get_value('locale') + locale = context.preferences.get_value('locale') if locale in RTL_LOCALES and 'rtl' not in kwargs: kwargs['rtl'] = True if 'current_language' not in kwargs: kwargs['current_language'] = match_language( - request.preferences.get_value('language'), LANGUAGE_CODES ) + context.preferences.get_value('language'), LANGUAGE_CODES ) # values from settings kwargs['search_formats'] = [ x for x in settings['search']['formats'] if x != 'html' ] - kwargs['instance_name'] = get_setting('general.instance_name') + kwargs['instance_name'] = settings['general']['instance_name'] kwargs['searx_version'] = VERSION_STRING kwargs['searx_git_url'] = GIT_URL kwargs['get_setting'] = get_setting - # helpers to create links to other pages - kwargs['url_for'] = url_for_theme # override url_for function in templates - kwargs['image_proxify'] = image_proxify + # helpers to create links to other pages + kwargs['image_proxify'] = partial(image_proxify, request) kwargs['proxify'] = proxify if settings.get('result_proxy', {}).get('url') else None kwargs['proxify_results'] = settings.get('result_proxy', {}).get('proxify_results', True) kwargs['get_result_template'] = get_result_template kwargs['opensearch_url'] = ( - url_for('opensearch') + request.url_for('opensearch') + '?' + urlencode({'method': kwargs['method'], 'autocomplete': kwargs['autocomplete']}) ) # scripts from plugins kwargs['scripts'] = set() - for plugin in request.user_plugins: + for plugin in context.user_plugins: for script in plugin.js_dependencies: kwargs['scripts'].add(script) # styles from plugins kwargs['styles'] = set() - for plugin in request.user_plugins: + for plugin in context.user_plugins: for css in plugin.css_dependencies: kwargs['styles'].add(css) start_time = default_timer() - result = render_template( - '{}/{}'.format(kwargs['theme'], template_name), **kwargs) - request.render_time += default_timer() - start_time # pylint: disable=assigning-non-slot + result = templates.TemplateResponse( + '{}/{}'.format(kwargs['theme'], template_name), + kwargs, + status_code=status_code, + headers=headers, + media_type=media_type + ) + context.render_time += default_timer() - start_time # pylint: disable=assigning-non-slot return result -@app.before_request -def pre_request(): - request.start_time = default_timer() # pylint: disable=assigning-non-slot - request.render_time = 0 # pylint: disable=assigning-non-slot - request.timings = [] # pylint: disable=assigning-non-slot - request.errors = [] # pylint: disable=assigning-non-slot +async def set_context(request: Request): + context.errors = [] # pylint: disable=assigning-non-slot preferences = Preferences(themes, list(categories.keys()), engines, plugins) # pylint: disable=redefined-outer-name user_agent = request.headers.get('User-Agent', '').lower() if 'webkit' in user_agent and 'android' in user_agent: preferences.key_value_settings['method'].value = 'GET' - request.preferences = preferences # pylint: disable=assigning-non-slot + context.preferences = preferences # pylint: disable=assigning-non-slot try: preferences.parse_dict(request.cookies) - except Exception as e: # pylint: disable=broad-except logger.exception(e, exc_info=True) - request.errors.append(gettext('Invalid settings, please edit your preferences')) + context.errors.append(gettext('Invalid settings, please edit your preferences')) # merge GET, POST vars - # request.form - request.form = dict(request.form.items()) # pylint: disable=assigning-non-slot - for k, v in request.args.items(): - if k not in request.form: - request.form[k] = v - - if request.form.get('preferences'): - preferences.parse_encoded_data(request.form['preferences']) + # context.form + context.form = dict(await request.form()) # pylint: disable=assigning-non-slot + for k, v in request.query_params.items(): + if k not in context.form: + context.form[k] = v + if context.form.get('preferences'): + preferences.parse_encoded_data(context.form['preferences']) else: try: - preferences.parse_dict(request.form) + preferences.parse_dict(context.form) except Exception as e: # pylint: disable=broad-except logger.exception(e, exc_info=True) - request.errors.append(gettext('Invalid settings')) + context.errors.append(gettext('Invalid settings')) - # init search language and locale + # set search language if not preferences.get_value("language"): preferences.parse_dict({"language": _get_browser_or_settings_language(request, LANGUAGE_CODES)}) - if not preferences.get_value("locale"): - preferences.parse_dict({"locale": get_locale()}) - # request.user_plugins - request.user_plugins = [] # pylint: disable=assigning-non-slot + # set UI locale + locale_source = 'preferences or query' + if not preferences.get_value("locale"): + locale = _get_browser_or_settings_language(request, UI_LOCALE_CODES) + locale = locale.replace('-', '_') + preferences.parse_dict({"locale": locale}) + locale_source = 'browser' + + logger.debug( + "%s uses locale `%s` from %s", + str(request.scope['path']), + preferences.get_value("locale"), + locale_source + ) + + # set starlette.i18n locale (get_text) + i18n.set_locale(code=preferences.get_value("locale")) + + # context.user_plugins + context.user_plugins = [] # pylint: disable=assigning-non-slot allowed_plugins = preferences.plugins.get_enabled() disabled_plugins = preferences.plugins.get_disabled() for plugin in plugins: if ((plugin.default_on and plugin.id not in disabled_plugins) or plugin.id in allowed_plugins): - request.user_plugins.append(plugin) + context.user_plugins.append(plugin) -@app.after_request -def add_default_headers(response): - # set default http headers - for header, value in settings['server']['default_http_headers'].items(): - if header in response.headers: - continue - response.headers[header] = value - return response - - -@app.after_request -def post_request(response): - total_time = default_timer() - request.start_time - timings_all = ['total;dur=' + str(round(total_time * 1000, 3)), - 'render;dur=' + str(round(request.render_time * 1000, 3))] - if len(request.timings) > 0: - timings = sorted(request.timings, key=lambda v: v['total']) - timings_total = [ - 'total_' + str(i) + '_' + v['engine'] + ';dur=' + str(round(v['total'] * 1000, 3)) - for i, v in enumerate(timings) - ] - timings_load = [ - 'load_' + str(i) + '_' + v['engine'] + ';dur=' + str(round(v['load'] * 1000, 3)) - for i, v in enumerate(timings) if v.get('load') - ] - timings_all = timings_all + timings_total + timings_load - response.headers.add('Server-Timing', ', '.join(timings_all)) - return response - - -def index_error(output_format, error_message): +def search_error(request, output_format, error_message): if output_format == 'json': - return Response( - json.dumps({'error': error_message}), - mimetype='application/json' - ) + return JSONResponse({'error': error_message}) if output_format == 'csv': - response = Response('', mimetype='application/csv') cont_disp = 'attachment;Filename=searx.csv' - response.headers.add('Content-Disposition', cont_disp) - return response - + return Response('', media_type='application/csv', headers= {'Content-Disposition': cont_disp}) if output_format == 'rss': response_rss = render( + request, 'opensearch_response_rss.xml', results=[], - q=request.form['q'] if 'q' in request.form else '', + q=context.form['q'] if 'q' in context.form else '', number_of_results=0, error_message=error_message, override_theme='__common__', ) - return Response(response_rss, mimetype='text/xml') + return Response(response_rss, media_type='text/xml') # html - request.errors.append(gettext('search error')) + context.errors.append(gettext('search error')) return render( + request, 'index.html', - selected_categories=get_selected_categories(request.preferences, request.form), + selected_categories=get_selected_categories(context.preferences, context.form), ) @app.route('/', methods=['GET', 'POST']) -def index(): - """Render index page.""" - - # redirect to search if there's a query in the request - if request.form.get('q'): - query = ('?' + request.query_string.decode()) if request.query_string else '' - return redirect(url_for('search') + query, 308) - +async def index(request: Request): + await set_context(request) return render( + request, 'index.html', - selected_categories=get_selected_categories(request.preferences, request.form), + selected_categories=get_selected_categories(context.preferences, context.form) ) @app.route('/search', methods=['GET', 'POST']) -def search(): +async def search(request: Request): """Search query in q and return results. Supported outputs: html, json, csv, rss. @@ -642,22 +503,25 @@ def search(): # pylint: disable=too-many-locals, too-many-return-statements, too-many-branches # pylint: disable=too-many-statements + await set_context(request) + # output_format - output_format = request.form.get('format', 'html') + output_format = context.form.get('format', 'html') if output_format not in OUTPUT_FORMATS: output_format = 'html' if output_format not in settings['search']['formats']: - flask.abort(403) + return PlainTextResponse('', status_code=403) # check if there is query (not None and not an empty string) - if not request.form.get('q'): + if not context.form.get('q'): if output_format == 'html': return render( + request, 'index.html', - selected_categories=get_selected_categories(request.preferences, request.form), + selected_categories=get_selected_categories(context.preferences, context.form), ) - return index_error(output_format, 'No query'), 400 + return search_error(request, output_format, 'No query'), 400 # search search_query = None @@ -665,19 +529,19 @@ def search(): result_container = None try: search_query, raw_text_query, _, _ = get_search_query_from_webapp( - request.preferences, request.form + context.preferences, context.form ) # search = Search(search_query) # without plugins - search = SearchWithPlugins(search_query, request.user_plugins, request) # pylint: disable=redefined-outer-name + search = SearchWithPlugins(search_query, context.user_plugins, request) # pylint: disable=redefined-outer-name - result_container = search.search() + result_container = await search.search() except SearxParameterException as e: logger.exception('search error: SearxParameterException') - return index_error(output_format, e.message), 400 + return search_error(request, output_format, e.message), 400 except Exception as e: # pylint: disable=broad-except logger.exception(e, exc_info=True) - return index_error(output_format, gettext('search error')), 500 + return search_error(request, output_format, gettext('search error')), 500 # results results = result_container.get_ordered_results() @@ -687,10 +551,10 @@ def search(): # checkin for a external bang if result_container.redirect_url: - return redirect(result_container.redirect_url) + return RedirectResponse(result_container.redirect_url) # Server-Timing header - request.timings = result_container.get_timings() # pylint: disable=assigning-non-slot + context.timings = result_container.get_timings() # pylint: disable=assigning-non-slot # output for result in results: @@ -743,7 +607,7 @@ def search(): response = json.dumps( x, default = lambda item: list(item) if isinstance(item, set) else item ) - return Response(response, mimetype='application/json') + return JSONResponse(response) if output_format == 'csv': csv = UnicodeWriter(StringIO()) @@ -763,23 +627,24 @@ def search(): row = {'title': a, 'type': 'correction'} csv.writerow([row.get(key, '') for key in keys]) csv.stream.seek(0) - response = Response(csv.stream.read(), mimetype='application/csv') + response = Response(csv.stream.read(), media_type='application/csv') cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query) - response.headers.add('Content-Disposition', cont_disp) + response.headers['Content-Disposition'] = cont_disp return response if output_format == 'rss': - response_rss = render( + return render( + request, 'opensearch_response_rss.xml', + media_type='text/xml', results=results, answers=result_container.answers, corrections=result_container.corrections, suggestions=result_container.suggestions, - q=request.form['q'], + q=context.form['q'], number_of_results=number_of_results, override_theme='__common__', ) - return Response(response_rss, mimetype='text/xml') # HTML output format @@ -803,9 +668,10 @@ def search(): )) return render( + request, 'results.html', results = results, - q=request.form['q'], + q=context.form['q'], selected_categories = search_query.categories, pageno = search_query.pageno, time_range = search_query.time_range, @@ -822,11 +688,11 @@ def search(): current_language = match_language( search_query.lang, LANGUAGE_CODES, - fallback=request.preferences.get_value("language") + fallback=context.preferences.get_value("language") ), - theme = get_current_theme_name(), - favicons = global_favicons[themes.index(get_current_theme_name())], - timeout_limit = request.form.get('timeout_limit', None) + theme = get_current_theme_name(request), + favicons = global_favicons[themes.index(get_current_theme_name(request))], + timeout_limit = context.form.get('timeout_limit', None) ) @@ -852,23 +718,26 @@ def __get_translated_errors(unresponsive_engines): @app.route('/about', methods=['GET']) -def about(): +async def about(request): """Render about page""" - return render('about.html') + await set_context(request) + return render(request, 'about.html') @app.route('/autocompleter', methods=['GET', 'POST']) -def autocompleter(): +async def autocompleter(request): """Return autocompleter results""" + await set_context(request) + # run autocompleter results = [] # set blocked engines - disabled_engines = request.preferences.engines.get_disabled() + disabled_engines = context.preferences.engines.get_disabled() # parse query - raw_text_query = RawTextQuery(request.form.get('q', ''), disabled_engines) + raw_text_query = RawTextQuery(context.form.get('q', ''), disabled_engines) sug_prefix = raw_text_query.getQuery() # normal autocompletion results only appear if no inner results returned @@ -876,7 +745,7 @@ def autocompleter(): if len(raw_text_query.autocomplete_list) == 0 and len(sug_prefix) > 0: # get language from cookie - language = request.preferences.get_value('language') + language = context.preferences.get_value('language') if not language or language == 'all': language = 'en' else: @@ -884,7 +753,7 @@ def autocompleter(): # run autocompletion raw_results = search_autocomplete( - request.preferences.get_value('autocomplete'), sug_prefix, language + context.preferences.get_value('autocomplete'), sug_prefix, language ) for result in raw_results: # attention: this loop will change raw_text_query object and this is @@ -908,35 +777,40 @@ def autocompleter(): suggestions = json.dumps([sug_prefix, results]) mimetype = 'application/x-suggestions+json' - return Response(suggestions, mimetype=mimetype) + return Response(suggestions, media_type=mimetype) @app.route('/preferences', methods=['GET', 'POST']) -def preferences(): +async def preferences(request: Request): """Render preferences page && save user preferences""" # pylint: disable=too-many-locals, too-many-return-statements, too-many-branches # pylint: disable=too-many-statements + await set_context(request) + # save preferences if request.method == 'POST': - resp = make_response(redirect(url_for('index', _external=True))) + resp = RedirectResponse(url=request.url_for('index')) try: - request.preferences.parse_form(request.form) + context.preferences.parse_form(context.form) except ValidationException: - request.errors.append(gettext('Invalid settings, please edit your preferences')) + context.errors.append(gettext('Invalid settings, please edit your preferences')) return resp - return request.preferences.save(resp) + for cookie_name in request.cookies: + resp.delete_cookie(cookie_name) + context.preferences.save(resp) + return resp # render preferences - image_proxy = request.preferences.get_value('image_proxy') # pylint: disable=redefined-outer-name - disabled_engines = request.preferences.engines.get_disabled() - allowed_plugins = request.preferences.plugins.get_enabled() + image_proxy = context.preferences.get_value('image_proxy') # pylint: disable=redefined-outer-name + disabled_engines = context.preferences.engines.get_disabled() + allowed_plugins = context.preferences.plugins.get_enabled() # stats for preferences page filtered_engines = dict( filter( - lambda kv: (kv[0], request.preferences.validate_token(kv[1])), + lambda kv: (kv[0], context.preferences.validate_token(kv[1])), engines.items() ) ) @@ -969,7 +843,7 @@ def preferences(): 'rate80': rate80, 'rate95': rate95, 'warn_timeout': e.timeout > settings['outgoing']['request_timeout'], - 'supports_selected_language': _is_selected_language_supported(e, request.preferences), + 'supports_selected_language': _is_selected_language_supported(e, context.preferences), 'result_count': result_count, } # end of stats @@ -977,9 +851,10 @@ def preferences(): # reliabilities reliabilities = {} engine_errors = get_engine_errors(filtered_engines) - checker_results = checker_get_result() - checker_results = checker_results['engines'] \ - if checker_results['status'] == 'ok' and 'engines' in checker_results else {} + checker_full_results = checker_get_result() + checker_results = {} + if checker_full_results and checker_full_results['status'] == 'ok' and 'engines' in checker_full_results: + checker_results = checker_results['engines'] for _, e in filtered_engines.items(): checker_result = checker_results.get(e.name, {}) checker_success = checker_result.get('success', True) @@ -1018,7 +893,7 @@ def preferences(): # supports supports = {} for _, e in filtered_engines.items(): - supports_selected_language = _is_selected_language_supported(e, request.preferences) + supports_selected_language = _is_selected_language_supported(e, context.preferences) safesearch = e.safesearch time_range_support = e.time_range_support for checker_test_name in checker_results.get(e.name, {}).get('errors', {}): @@ -1035,10 +910,11 @@ def preferences(): } return render( + request, 'preferences.html', - selected_categories = get_selected_categories(request.preferences, request.form), + selected_categories = get_selected_categories(context.preferences, context.form), locales = LOCALE_NAMES, - current_locale = request.preferences.get_value("locale"), + current_locale = context.preferences.get_value("locale"), image_proxy = image_proxy, engines_by_category = engines_by_category, stats = stats, @@ -1056,11 +932,11 @@ def preferences(): plugins = plugins, doi_resolvers = settings['doi_resolvers'], current_doi_resolver = get_doi_resolver( - request.args, request.preferences.get_value('doi_resolver') + context.form, context.preferences.get_value('doi_resolver') ), allowed_plugins = allowed_plugins, - theme = get_current_theme_name(), - preferences_url_params = request.preferences.get_as_url_params(), + theme = get_current_theme_name(request), + preferences_url_params = context.preferences.get_as_url_params(), locked_preferences = settings['preferences']['lock'], preferences = True ) @@ -1080,83 +956,101 @@ def _is_selected_language_supported(engine, preferences): # pylint: disable=red @app.route('/image_proxy', methods=['GET']) -def image_proxy(): - # pylint: disable=too-many-return-statements +async def image_proxy(request: Request): + # pylint: disable=too-many-return-statements, too-many-branches - url = request.args.get('url') + url = request.query_params.get('url') if not url: - return '', 400 + return PlainTextResponse('No URL', status_code=400) h = new_hmac(settings['server']['secret_key'], url.encode()) - if h != request.args.get('h'): - return '', 400 + if h != request.query_params.get('h'): + return PlainTextResponse('Wrong k', status_code=400) maximum_size = 5 * 1024 * 1024 - + do_forward = False try: - headers = dict_subset(request.headers, {'If-Modified-Since', 'If-None-Match'}) - headers['User-Agent'] = gen_useragent() - stream = http_stream( - method = 'GET', - url = url, - headers = headers, - timeout = settings['outgoing']['request_timeout'], - allow_redirects = True, - max_redirects = 20 - ) - resp = next(stream) + request_headers = { + 'User-Agent': gen_useragent(), + 'Accept': 'image/webp,*/*', + 'Accept-Encoding': 'gzip, deflate', + 'Sec-Fetch-Dest': 'image', + 'Sec-Fetch-Mode': 'no-cors', + 'Sec-GPC': '1', + 'DNT': '1', + } + resp = await AIOHTTP_SESSION.get(url, headers=request_headers).__aenter__() content_length = resp.headers.get('Content-Length') if (content_length and content_length.isdigit() - and int(content_length) > maximum_size ): - return 'Max size', 400 + and int(content_length) > maximum_size): + return PlainTextResponse('Max size', status_code=400) - if resp.status_code == 304: - return '', resp.status_code + if resp.status == 304: + return Response(None, status_code=resp.status, media_type=resp.content_type) - if resp.status_code != 200: - logger.debug( - 'image-proxy: wrong response code: {0}'.format( - resp.status_code)) - if resp.status_code >= 400: - return '', resp.status_code - return '', 400 + if resp.status != 200: + logger.debug('image-proxy: wrong response code: {0}'.format(resp.status)) + if resp.status >= 400: + return PlainTextResponse('Status code', status_code=resp.status) + return PlainTextResponse('Status code', status_code=400) if not resp.headers.get('content-type', '').startswith('image/'): logger.debug( 'image-proxy: wrong content-type: {0}'.format( resp.headers.get('content-type'))) - return '', 400 + return PlainTextResponse('Wrong content type', status_code=400) + + do_forward = True + except aiohttp.ClientError: + logger.exception('HTTP error') + return PlainTextResponse('HTTP Error', status_code=400) + finally: + if not do_forward and resp: + try: + resp.close() + except aiohttp.ClientError: + logger.exception('HTTP error on closing') + + # forward image + try: + async def forward_chunk(resp): + total_length = 0 + try: + chunk = await resp.content.readany() + while chunk: + yield chunk + total_length += len(chunk) + if total_length > maximum_size: + break + chunk = await resp.content.readany() + except aiohttp.client.ClientError: + logger.exception('Error reading URL') + finally: + resp.close() headers = dict_subset( resp.headers, - {'Content-Length', 'Length', 'Date', 'Last-Modified', 'Expires', 'Etag'} + {'Content-Type', 'Content-Encoding', 'Content-Length', 'Length'} ) - total_length = 0 - - def forward_chunk(): - nonlocal total_length - for chunk in stream: - total_length += len(chunk) - if total_length > maximum_size: - break - yield chunk - - return Response(forward_chunk(), mimetype=resp.headers['Content-Type'], headers=headers) - except httpx.HTTPError: - return '', 400 + return StreamingResponse(forward_chunk(resp), headers=headers) + except aiohttp.ClientError: + logger.exception('HTTP error') + return PlainTextResponse('HTTP Error', status_code=400) @app.route('/stats', methods=['GET']) -def stats(): +async def stats(request: Request): """Render engine statistics page.""" - sort_order = request.args.get('sort', default='name', type=str) - selected_engine_name = request.args.get('engine', default=None, type=str) + await set_context(request) + + sort_order = request.query_params.get('sort', 'name') + selected_engine_name = request.query_params.get('engine', None) filtered_engines = dict( filter( - lambda kv: (kv[0], request.preferences.validate_token(kv[1])), + lambda kv: (kv[0], context.preferences.validate_token(kv[1])), engines.items() )) if selected_engine_name: @@ -1193,6 +1087,7 @@ def stats(): engine_stats['time'] = sorted(engine_stats['time'], reverse=reverse, key=get_key) return render( + request, 'stats.html', sort_order = sort_order, engine_stats = engine_stats, @@ -1202,86 +1097,88 @@ def stats(): @app.route('/stats/errors', methods=['GET']) -def stats_errors(): +async def stats_errors(request: Request): + await set_context(request) filtered_engines = dict( filter( - lambda kv: (kv[0], request.preferences.validate_token(kv[1])), + lambda kv: (kv[0], context.preferences.validate_token(kv[1])), engines.items() )) result = get_engine_errors(filtered_engines) - return jsonify(result) + return JSONResponse(result) @app.route('/stats/checker', methods=['GET']) -def stats_checker(): +async def stats_checker(request: Request): # pylint: disable=unused-argument result = checker_get_result() - return jsonify(result) + return JSONResponse(result) @app.route('/robots.txt', methods=['GET']) -def robots(): - return Response("""User-agent: * +async def robots(request: Request): # pylint: disable=unused-argument + return PlainTextResponse("""User-agent: * Allow: / Allow: /about Disallow: /stats Disallow: /preferences Disallow: /*?*q=* -""", mimetype='text/plain') +""", media_type='text/plain') @app.route('/opensearch.xml', methods=['GET']) -def opensearch(): +async def opensearch(request: Request): + await set_context(request) method = 'post' - if request.preferences.get_value('method') == 'GET': + if context.preferences.get_value('method') == 'GET': method = 'get' # chrome/chromium only supports HTTP GET.... if request.headers.get('User-Agent', '').lower().find('webkit') >= 0: method = 'get' - ret = render( + return render( + request, 'opensearch.xml', + status = 200, + media_type = "application/opensearchdescription+xml", opensearch_method=method, override_theme='__common__' ) - resp = Response( - response = ret, - status = 200, - mimetype = "application/opensearchdescription+xml" - ) - return resp - @app.route('/favicon.ico') -def favicon(): - return send_from_directory( +async def favicon(request: Request): + await set_context(request) + return FileResponse( os.path.join( - app.root_path, + searx_dir, settings['ui']['static_path'], 'themes', - get_current_theme_name(), - 'img' + get_current_theme_name(request), + 'img', + 'favicon.png' ), - 'favicon.png', - mimetype = 'image/vnd.microsoft.icon' + media_type = 'image/vnd.microsoft.icon' ) + @app.route('/clear_cookies') -def clear_cookies(): - resp = make_response(redirect(url_for('index', _external=True))) +def clear_cookies(request: Request): + resp = RedirectResponse(request.url_for('index')) for cookie_name in request.cookies: resp.delete_cookie(cookie_name) return resp @app.route('/config') -def config(): +async def config(request: Request): """Return configuration in JSON format.""" + await set_context(request) + _engines = [] for name, engine in engines.items(): - if not request.preferences.validate_token(engine): + if not context.preferences.validate_token(engine): continue supported_languages = engine.supported_languages @@ -1305,7 +1202,7 @@ def config(): for _ in plugins: _plugins.append({'name': _.name, 'enabled': _.default_on}) - return jsonify({ + return JSONResponse({ 'categories': list(categories.keys()), 'engines': _engines, 'plugins': _plugins, @@ -1326,30 +1223,31 @@ def config(): }) -@app.errorhandler(404) -def page_not_found(_e): - return render('404.html'), 404 +@app.exception_handler(404) +async def page_not_found(request: Request, exc): + await set_context(request) + return render( + request, + '404.html', + status_code=exc.status_code + ) def run(): + # pylint: disable=import-outside-toplevel logger.debug( 'starting webserver on %s:%s', settings['server']['bind_address'], settings['server']['port'] ) - app.run( - debug = searx_debug, - use_debugger = searx_debug, - port = settings['server']['port'], - host = settings['server']['bind_address'], - threaded = True, - extra_files = [ - get_default_settings_path() - ], - ) + if searx_debug: + from searx.run import run_debug + run_debug() + else: + from searx.run import run_production + run_production(app) application = app -patch_application(app) if __name__ == "__main__": run() diff --git a/searx/webutils.py b/searx/webutils.py index c27324908..cdda59d4f 100644 --- a/searx/webutils.py +++ b/searx/webutils.py @@ -4,7 +4,6 @@ import csv import hashlib import hmac import re -import inspect from io import StringIO from codecs import getincrementalencoder @@ -123,18 +122,3 @@ def highlight_content(content, query): content, flags=re.I | re.U) return content - - -def is_flask_run_cmdline(): - """Check if the application was started using "flask run" command line - - Inspect the callstack. - See https://github.com/pallets/flask/blob/master/src/flask/__main__.py - - Returns: - bool: True if the application was started using "flask run". - """ - frames = inspect.stack() - if len(frames) < 2: - return False - return frames[-2].filename.endswith('flask/cli.py') diff --git a/tests/unit/test_webapp.py b/tests/unit/test_webapp.py index 2cbdc83d6..5a288ded4 100644 --- a/tests/unit/test_webapp.py +++ b/tests/unit/test_webapp.py @@ -1,14 +1,28 @@ # -*- coding: utf-8 -*- -import json +import unittest from urllib.parse import ParseResult from mock import Mock -from searx.testing import SearxTestCase +# from searx.testing import SearxTestCase from searx.search import Search import searx.search.processors +from starlette.testclient import TestClient -class ViewsTestCase(SearxTestCase): + +class ViewsTestCase(unittest.TestCase): + + def setattr4test(self, obj, attr, value): + """ + setattr(obj, attr, value) + but reset to the previous value in the cleanup. + """ + previous_value = getattr(obj, attr) + + def cleanup_patch(): + setattr(obj, attr, previous_value) + self.addCleanup(cleanup_patch) + setattr(obj, attr, value) def setUp(self): # skip init function (no external HTTP request) @@ -16,10 +30,8 @@ class ViewsTestCase(SearxTestCase): pass self.setattr4test(searx.search.processors, 'initialize_processor', dummy) - from searx import webapp # pylint disable=import-outside-toplevel - - webapp.app.config['TESTING'] = True # to get better error messages - self.app = webapp.app.test_client() + from searx import webapp, templates # pylint disable=import-outside-toplevel + self.client = TestClient(webapp.app) # set some defaults test_results = [ @@ -69,51 +81,51 @@ class ViewsTestCase(SearxTestCase): self.setattr4test(Search, 'search', search_mock) - def get_current_theme_name_mock(override=None): + def get_current_theme_name_mock(request, override=None): if override: return override return 'oscar' - self.setattr4test(webapp, 'get_current_theme_name', get_current_theme_name_mock) + self.setattr4test(templates, 'get_current_theme_name', get_current_theme_name_mock) self.maxDiff = None # to see full diffs def test_index_empty(self): - result = self.app.post('/') + result = self.client.post('/') self.assertEqual(result.status_code, 200) self.assertIn(b'', result.data) def test_index_html_post(self): - result = self.app.post('/', data={'q': 'test'}) + result = self.client.post('/', data={'q': 'test'}) self.assertEqual(result.status_code, 308) self.assertEqual(result.location, 'http://localhost/search') def test_index_html_get(self): - result = self.app.post('/?q=test') + result = self.client.post('/?q=test') self.assertEqual(result.status_code, 308) self.assertEqual(result.location, 'http://localhost/search?q=test') def test_search_empty_html(self): - result = self.app.post('/search', data={'q': ''}) + result = self.client.post('/search', data={'q': ''}) self.assertEqual(result.status_code, 200) self.assertIn(b'searxng', result.data) def test_search_empty_json(self): - result = self.app.post('/search', data={'q': '', 'format': 'json'}) + result = self.client.post('/search', data={'q': '', 'format': 'json'}) self.assertEqual(result.status_code, 400) def test_search_empty_csv(self): - result = self.app.post('/search', data={'q': '', 'format': 'csv'}) + result = self.client.post('/search', data={'q': '', 'format': 'csv'}) self.assertEqual(result.status_code, 400) def test_search_empty_rss(self): - result = self.app.post('/search', data={'q': '', 'format': 'rss'}) + result = self.client.post('/search', data={'q': '', 'format': 'rss'}) self.assertEqual(result.status_code, 400) def test_search_html(self): - result = self.app.post('/search', data={'q': 'test'}) + result = self.client.post('/search', data={'q': 'test'}) self.assertIn( b'

Search results for "test" - searx', @@ -186,12 +198,12 @@ class ViewsTestCase(SearxTestCase): ) def test_about(self): - result = self.app.get('/about') + result = self.client.get('/about') self.assertEqual(result.status_code, 200) self.assertIn(b'

About searxng

', result.data) def test_preferences(self): - result = self.app.get('/preferences') + result = self.client.get('/preferences') self.assertEqual(result.status_code, 200) self.assertIn( b'
', @@ -207,7 +219,7 @@ class ViewsTestCase(SearxTestCase): ) def test_browser_locale(self): - result = self.app.get('/preferences', headers={'Accept-Language': 'zh-tw;q=0.8'}) + result = self.client.get('/preferences', headers={'Accept-Language': 'zh-tw;q=0.8'}) self.assertEqual(result.status_code, 200) self.assertIn( b'