From 67e11c42b973932c8f568d80a0f25bfd7fc150ab Mon Sep 17 00:00:00 2001 From: dalf Date: Sat, 22 Oct 2016 13:10:31 +0200 Subject: [PATCH 1/3] Clean up the architecture Purposes : - isolate the plugins calls - distinction between parsing the web request and running the search (Search class). To be able to test code easily, to run searx code outside a web server, to filter the search query parameters with plugins more easily, etc... Details : - request.request_data contains request.form or request.args (initialize inside pre_request() function) - Query class is renamed RawTextQuery - SearchQuery class defines all search parameters - get_search_query_from_webapp create a SearchQuery instance (basically the previous Search.__init__ code) - Search class and SearchWithPlugins class takes a SearchQuery instance as class constructor parameter - SearchWithPlugins class inherites from Search class, and run plugins - A dedicated function search_with_plugins executes plugins to have a well define locals() (which is used by the plugins code). - All plugins code is executed inside the try...except block (webapp.py, index function) - advanced_search HTTP parameter value stays in webapp.py (it is only part of UI) - multiple calls to result_container.get_ordered_results() doesn't compute the order multiple time (note : this method was call only once before) - paging value is stored in the result_container class (compute in the extend method) - test about engine.suspend_end_time is done during search method call (instead of __init__) - check that the format parameter value is one of these : html, rss, json, rss (before the html value was assumed but some text formatting wasn't not done) --- searx/plugins/self_info.py | 8 +- searx/query.py | 20 ++- searx/results.py | 17 ++- searx/search.py | 302 +++++++++++++++++++++---------------- searx/webapp.py | 106 +++++++------ tests/unit/test_plugins.py | 21 ++- 6 files changed, 272 insertions(+), 202 deletions(-) diff --git a/searx/plugins/self_info.py b/searx/plugins/self_info.py index 438274c41..2f19ad9c7 100644 --- a/searx/plugins/self_info.py +++ b/searx/plugins/self_info.py @@ -37,10 +37,10 @@ def post_search(request, ctx): ip = x_forwarded_for[0] else: ip = request.remote_addr - ctx['search'].result_container.answers.clear() - ctx['search'].result_container.answers.add(ip) + ctx['result_container'].answers.clear() + ctx['result_container'].answers.add(ip) elif p.match(ctx['search'].query): ua = request.user_agent - ctx['search'].result_container.answers.clear() - ctx['search'].result_container.answers.add(ua) + ctx['result_container'].answers.clear() + ctx['result_container'].answers.add(ua) return True diff --git a/searx/query.py b/searx/query.py index 3d617ab05..b8afba6ed 100644 --- a/searx/query.py +++ b/searx/query.py @@ -25,8 +25,8 @@ import string import re -class Query(object): - """parse query""" +class RawTextQuery(object): + """parse raw text query (the value from the html input)""" def __init__(self, query, disabled_engines): self.query = query @@ -130,3 +130,19 @@ class Query(object): def getFullQuery(self): # get full querry including whitespaces return string.join(self.query_parts, '') + + +class SearchQuery(object): + """container for all the search parameters (query, language, etc...)""" + + def __init__(self, query, engines, categories, lang, safesearch, pageno, time_range): + self.query = query + self.engines = engines + self.categories = categories + self.lang = lang + self.safesearch = safesearch + self.pageno = pageno + self.time_range = time_range + + def __str__(self): + return str(self.query) + ";" + str(self.engines) diff --git a/searx/results.py b/searx/results.py index 32832f199..634f71acd 100644 --- a/searx/results.py +++ b/searx/results.py @@ -128,6 +128,8 @@ class ResultContainer(object): self.suggestions = set() self.answers = set() self._number_of_results = [] + self._ordered = False + self.paging = False def extend(self, engine_name, results): for result in list(results): @@ -153,6 +155,9 @@ class ResultContainer(object): self.results[engine_name].extend(results) + if not self.paging and engines[engine_name].paging: + self.paging = True + for i, result in enumerate(results): try: result['url'] = result['url'].decode('utf-8') @@ -219,7 +224,7 @@ class ResultContainer(object): with RLock(): self._merged_results.append(result) - def get_ordered_results(self): + def order_results(self): for result in self._merged_results: score = result_score(result) result['score'] = score @@ -269,8 +274,14 @@ class ResultContainer(object): # update categoryIndex categoryPositions[category] = {'index': len(gresults), 'count': 8} - # return gresults - return gresults + # update _merged_results + self._ordered = True + self._merged_results = gresults + + def get_ordered_results(self): + if not self._ordered: + self.order_results() + return self._merged_results def results_length(self): return len(self._merged_results) diff --git a/searx/search.py b/searx/search.py index c6d17eba9..a41c4b09a 100644 --- a/searx/search.py +++ b/searx/search.py @@ -25,9 +25,10 @@ from searx.engines import ( categories, engines ) from searx.utils import gen_useragent -from searx.query import Query +from searx.query import RawTextQuery, SearchQuery from searx.results import ResultContainer from searx import logger +from searx.plugins import plugins logger = logger.getChild('search') @@ -127,135 +128,130 @@ def make_callback(engine_name, callback, params, result_container): return process_callback +def get_search_query_from_webapp(preferences, request_data): + query = None + query_engines = [] + query_categories = [] + query_paging = False + query_pageno = 1 + query_lang = 'all' + query_time_range = None + + # set blocked engines + disabled_engines = preferences.engines.get_disabled() + + # set specific language if set + query_lang = preferences.get_value('language') + + # safesearch + query_safesearch = preferences.get_value('safesearch') + + # TODO better exceptions + if not request_data.get('q'): + raise Exception('noquery') + + # set pagenumber + pageno_param = request_data.get('pageno', '1') + if not pageno_param.isdigit() or int(pageno_param) < 1: + pageno_param = 1 + + query_pageno = int(pageno_param) + + # parse query, if tags are set, which change + # the serch engine or search-language + raw_text_query = RawTextQuery(request_data['q'], disabled_engines) + raw_text_query.parse_query() + + # set query + query = raw_text_query.getSearchQuery() + + # get last selected language in query, if possible + # TODO support search with multible languages + if len(raw_text_query.languages): + query_lang = raw_text_query.languages[-1] + + query_time_range = request_data.get('time_range') + + query_engines = raw_text_query.engines + + # if engines are calculated from query, + # set categories by using that informations + if query_engines and raw_text_query.specific: + query_categories = list(set(engine['category'] + for engine in query_engines)) + + # otherwise, using defined categories to + # calculate which engines should be used + else: + # set categories/engines + load_default_categories = True + for pd_name, pd in request_data.items(): + if pd_name == 'categories': + query_categories.extend(categ for categ in map(unicode.strip, pd.split(',')) if categ in categories) + elif pd_name == 'engines': + pd_engines = [{'category': engines[engine].categories[0], + 'name': engine} + for engine in map(unicode.strip, pd.split(',')) if engine in engines] + if pd_engines: + query_engines.extend(pd_engines) + load_default_categories = False + elif pd_name.startswith('category_'): + category = pd_name[9:] + + # if category is not found in list, skip + if category not in categories: + continue + + if pd != 'off': + # add category to list + query_categories.append(category) + elif category in query_categories: + # remove category from list if property is set to 'off' + query_categories.remove(category) + + if not load_default_categories: + if not query_categories: + query_categories = list(set(engine['category'] + for engine in engines)) + else: + # if no category is specified for this search, + # using user-defined default-configuration which + # (is stored in cookie) + if not query_categories: + cookie_categories = preferences.get_value('categories') + for ccateg in cookie_categories: + if ccateg in categories: + query_categories.append(ccateg) + + # if still no category is specified, using general + # as default-category + if not query_categories: + query_categories = ['general'] + + # using all engines for that search, which are + # declared under the specific categories + for categ in query_categories: + query_engines.extend({'category': categ, + 'name': engine.name} + for engine in categories[categ] + if (engine.name, categ) not in disabled_engines) + + return SearchQuery(query, query_engines, query_categories, + query_lang, query_safesearch, query_pageno, query_time_range) + + class Search(object): """Search information container""" - def __init__(self, request): + def __init__(self, search_query): # init vars super(Search, self).__init__() - self.query = None - self.engines = [] - self.categories = [] - self.paging = False - self.pageno = 1 - self.lang = 'all' - self.time_range = None - self.is_advanced = None - - # set blocked engines - self.disabled_engines = request.preferences.engines.get_disabled() - + self.search_query = search_query self.result_container = ResultContainer() - self.request_data = {} - - # set specific language if set - self.lang = request.preferences.get_value('language') - - # set request method - if request.method == 'POST': - self.request_data = request.form - else: - self.request_data = request.args - - # TODO better exceptions - if not self.request_data.get('q'): - raise Exception('noquery') - - # set pagenumber - pageno_param = self.request_data.get('pageno', '1') - if not pageno_param.isdigit() or int(pageno_param) < 1: - pageno_param = 1 - - self.pageno = int(pageno_param) - - # parse query, if tags are set, which change - # the serch engine or search-language - query_obj = Query(self.request_data['q'], self.disabled_engines) - query_obj.parse_query() - - # set query - self.query = query_obj.getSearchQuery() - - # get last selected language in query, if possible - # TODO support search with multible languages - if len(query_obj.languages): - self.lang = query_obj.languages[-1] - - self.time_range = self.request_data.get('time_range') - self.is_advanced = self.request_data.get('advanced_search') - - self.engines = query_obj.engines - - # if engines are calculated from query, - # set categories by using that informations - if self.engines and query_obj.specific: - self.categories = list(set(engine['category'] - for engine in self.engines)) - - # otherwise, using defined categories to - # calculate which engines should be used - else: - # set categories/engines - load_default_categories = True - for pd_name, pd in self.request_data.items(): - if pd_name == 'categories': - self.categories.extend(categ for categ in map(unicode.strip, pd.split(',')) if categ in categories) - elif pd_name == 'engines': - pd_engines = [{'category': engines[engine].categories[0], - 'name': engine} - for engine in map(unicode.strip, pd.split(',')) if engine in engines] - if pd_engines: - self.engines.extend(pd_engines) - load_default_categories = False - elif pd_name.startswith('category_'): - category = pd_name[9:] - - # if category is not found in list, skip - if category not in categories: - continue - - if pd != 'off': - # add category to list - self.categories.append(category) - elif category in self.categories: - # remove category from list if property is set to 'off' - self.categories.remove(category) - - if not load_default_categories: - if not self.categories: - self.categories = list(set(engine['category'] - for engine in self.engines)) - return - - # if no category is specified for this search, - # using user-defined default-configuration which - # (is stored in cookie) - if not self.categories: - cookie_categories = request.preferences.get_value('categories') - for ccateg in cookie_categories: - if ccateg in categories: - self.categories.append(ccateg) - - # if still no category is specified, using general - # as default-category - if not self.categories: - self.categories = ['general'] - - # using all engines for that search, which are - # declared under the specific categories - for categ in self.categories: - self.engines.extend({'category': categ, - 'name': engine.name} - for engine in categories[categ] - if (engine.name, categ) not in self.disabled_engines) - - # remove suspended engines - self.engines = [e for e in self.engines - if engines[e['name']].suspend_end_time <= time()] # do search-request - def search(self, request): + def search(self): global number_of_searches # init vars @@ -268,23 +264,30 @@ class Search(object): # user_agent = request.headers.get('User-Agent', '') user_agent = gen_useragent() + search_query = self.search_query + # start search-reqest for all selected engines - for selected_engine in self.engines: + for selected_engine in search_query.engines: if selected_engine['name'] not in engines: continue engine = engines[selected_engine['name']] + # skip suspended engines + if engine.suspend_end_time and engine.suspend_end_time <= time(): + continue + # if paging is not supported, skip - if self.pageno > 1 and not engine.paging: + if search_query.pageno > 1 and not engine.paging: continue # if search-language is set and engine does not # provide language-support, skip - if self.lang != 'all' and not engine.language_support: + if search_query.lang != 'all' and not engine.language_support: continue - if self.time_range and not engine.time_range_support: + # if time_range is not supported, skip + if search_query.time_range and not engine.time_range_support: continue # set default request parameters @@ -292,21 +295,20 @@ class Search(object): request_params['headers']['User-Agent'] = user_agent request_params['category'] = selected_engine['category'] request_params['started'] = time() - request_params['pageno'] = self.pageno + request_params['pageno'] = search_query.pageno if hasattr(engine, 'language') and engine.language: request_params['language'] = engine.language else: - request_params['language'] = self.lang + request_params['language'] = search_query.lang # 0 = None, 1 = Moderate, 2 = Strict - request_params['safesearch'] = request.preferences.get_value('safesearch') - request_params['time_range'] = self.time_range - request_params['advanced_search'] = self.is_advanced + request_params['safesearch'] = search_query.safesearch + request_params['time_range'] = search_query.time_range # update request parameters dependent on # search-engine (contained in engines folder) - engine.request(self.query.encode('utf-8'), request_params) + engine.request(search_query.query.encode('utf-8'), request_params) if request_params['url'] is None: # TODO add support of offline engines @@ -346,10 +348,44 @@ class Search(object): selected_engine['name'])) if not requests: - return self + return self.result_container # send all search-request threaded_requests(requests) start_new_thread(gc.collect, tuple()) # return results, suggestions, answers and infoboxes - return self + return self.result_container + + +def search_with_plugins(do_search, search_query, request, request_data, result_container): + """Search using the do_search function and with plugins filtering. + Standalone function to have a well define locals(). + result_container contains the results after the function call. + """ + search = search_query + + if plugins.call('pre_search', request, locals()): + do_search() + + plugins.call('post_search', request, locals()) + + results = result_container.get_ordered_results() + + for result in results: + plugins.call('on_result', request, locals()) + + +class SearchWithPlugins(Search): + + def __init__(self, search_query, request): + super(SearchWithPlugins, self).__init__(search_query) + self.request = request + self.request_data = request.request_data + + def search(self): + + def do_search(): + super(SearchWithPlugins, self).search() + + search_with_plugins(do_search, self.search_query, self.request, self.request_data, self.result_container) + return self.result_container diff --git a/searx/webapp.py b/searx/webapp.py index 3fd127eca..7c3cb7e9b 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -62,8 +62,8 @@ from searx.utils import ( ) from searx.version import VERSION_STRING from searx.languages import language_codes -from searx.search import Search -from searx.query import Query +from searx.search import Search, SearchWithPlugins, get_search_query_from_webapp +from searx.query import RawTextQuery, SearchQuery from searx.autocomplete import searx_bang, backends as autocomplete_backends from searx.plugins import plugins from searx.preferences import Preferences, ValidationException @@ -364,6 +364,16 @@ def render(template_name, override_theme=None, **kwargs): @app.before_request def pre_request(): + # request.request_data + if request.method == 'POST': + request_data = request.form + elif request.method == 'GET': + request_data = request.args + else: + request_data = {} + + request.request_data = request_data + # merge GET, POST vars preferences = Preferences(themes, categories.keys(), engines, plugins) try: @@ -373,11 +383,13 @@ def pre_request(): logger.warning('Invalid config') request.preferences = preferences + # request.form request.form = dict(request.form.items()) for k, v in request.args.items(): if k not in request.form: request.form[k] = v + # request.user_plugins request.user_plugins = [] allowed_plugins = preferences.plugins.get_enabled() disabled_plugins = preferences.plugins.get_disabled() @@ -400,30 +412,33 @@ def index(): 'index.html', ) + # search + search_query = None + result_container = None try: - search = Search(request) + search_query = get_search_query_from_webapp(request.preferences, request.request_data) + # search = Search(search_query) # without plugins + search = SearchWithPlugins(search_query, request) + result_container = search.search() except: return render( 'index.html', ) - if plugins.call('pre_search', request, locals()): - search.search(request) + results = result_container.get_ordered_results() - plugins.call('post_search', request, locals()) - - results = search.result_container.get_ordered_results() + # UI + advanced_search = request.request_data.get('advanced_search', None) + output_format = request.request_data.get('format', 'html') + if output_format not in ['html', 'csv', 'json', 'rss']: + output_format = 'html' + # output for result in results: - - plugins.call('on_result', request, locals()) - if not search.paging and engines[result['engine']].paging: - search.paging = True - - if search.request_data.get('format', 'html') == 'html': + if output_format == 'html': if 'content' in result and result['content']: - result['content'] = highlight_content(result['content'][:1024], search.query.encode('utf-8')) - result['title'] = highlight_content(result['title'], search.query.encode('utf-8')) + result['content'] = highlight_content(result['content'][:1024], search_query.query.encode('utf-8')) + result['title'] = highlight_content(result['title'], search_query.query.encode('utf-8')) else: if result.get('content'): result['content'] = html_to_text(result['content']).strip() @@ -450,16 +465,16 @@ def index(): else: result['publishedDate'] = format_date(result['publishedDate']) - number_of_results = search.result_container.results_number() - if number_of_results < search.result_container.results_length(): + number_of_results = result_container.results_number() + if number_of_results < result_container.results_length(): number_of_results = 0 - if search.request_data.get('format') == 'json': - return Response(json.dumps({'query': search.query, + if output_format == 'json': + return Response(json.dumps({'query': search_query.query, 'number_of_results': number_of_results, 'results': results}), mimetype='application/json') - elif search.request_data.get('format') == 'csv': + elif output_format == 'csv': csv = UnicodeWriter(cStringIO.StringIO()) keys = ('title', 'url', 'content', 'host', 'engine', 'score') csv.writerow(keys) @@ -468,14 +483,14 @@ def index(): csv.writerow([row.get(key, '') for key in keys]) csv.stream.seek(0) response = Response(csv.stream.read(), mimetype='application/csv') - cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search.query.encode('utf-8')) + cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query.encode('utf-8')) response.headers.add('Content-Disposition', cont_disp) return response - elif search.request_data.get('format') == 'rss': + elif output_format == 'rss': response_rss = render( 'opensearch_response_rss.xml', results=results, - q=search.request_data['q'], + q=request.request_data['q'], number_of_results=number_of_results, base_url=get_base_url() ) @@ -484,17 +499,17 @@ def index(): return render( 'results.html', results=results, - q=search.request_data['q'], - selected_categories=search.categories, - paging=search.paging, + q=request.request_data['q'], + selected_categories=search_query.categories, + pageno=search_query.pageno, + time_range=search_query.time_range, number_of_results=format_decimal(number_of_results), - pageno=search.pageno, - advanced_search=search.is_advanced, - time_range=search.time_range, + advanced_search=advanced_search, + suggestions=result_container.suggestions, + answers=result_container.answers, + infoboxes=result_container.infoboxes, + paging=result_container.paging, base_url=get_base_url(), - suggestions=search.result_container.suggestions, - answers=search.result_container.answers, - infoboxes=search.result_container.infoboxes, theme=get_current_theme_name(), favicons=global_favicons[themes.index(get_current_theme_name())] ) @@ -511,30 +526,23 @@ def about(): @app.route('/autocompleter', methods=['GET', 'POST']) def autocompleter(): """Return autocompleter results""" - request_data = {} - - # select request method - if request.method == 'POST': - request_data = request.form - else: - request_data = request.args # set blocked engines disabled_engines = request.preferences.engines.get_disabled() # parse query - query = Query(request_data.get('q', '').encode('utf-8'), disabled_engines) - query.parse_query() + raw_text_query = RawTextQuery(request.request_data.get('q', '').encode('utf-8'), disabled_engines) + raw_text_query.parse_query() # check if search query is set - if not query.getSearchQuery(): + if not raw_text_query.getSearchQuery(): return '', 400 # run autocompleter completer = autocomplete_backends.get(request.preferences.get_value('autocomplete')) # parse searx specific autocompleter results like !bang - raw_results = searx_bang(query) + raw_results = searx_bang(raw_text_query) # normal autocompletion results only appear if max 3 inner results returned if len(raw_results) <= 3 and completer: @@ -545,19 +553,19 @@ def autocompleter(): else: language = language.split('_')[0] # run autocompletion - raw_results.extend(completer(query.getSearchQuery(), language)) + raw_results.extend(completer(raw_text_query.getSearchQuery(), language)) # parse results (write :language and !engine back to result string) results = [] for result in raw_results: - query.changeSearchQuery(result) + raw_text_query.changeSearchQuery(result) # add parsed result - results.append(query.getFullQuery()) + results.append(raw_text_query.getFullQuery()) # return autocompleter results - if request_data.get('format') == 'x-suggestions': - return Response(json.dumps([query.query, results]), + if request.request_data.get('format') == 'x-suggestions': + return Response(json.dumps([raw_text_query.query, results]), mimetype='application/json') return Response(json.dumps(results), diff --git a/tests/unit/test_plugins.py b/tests/unit/test_plugins.py index b8a8980cf..9ba6fcdd2 100644 --- a/tests/unit/test_plugins.py +++ b/tests/unit/test_plugins.py @@ -6,9 +6,8 @@ from mock import Mock def get_search_mock(query, **kwargs): - return {'search': Mock(query=query, - result_container=Mock(answers=set()), - **kwargs)} + return {'search': Mock(query=query, **kwargs), + 'result_container': Mock(answers=set())} class PluginStoreTest(SearxTestCase): @@ -54,11 +53,11 @@ class SelfIPTest(SearxTestCase): request.headers.getlist.return_value = [] ctx = get_search_mock(query='ip', pageno=1) store.call('post_search', request, ctx) - self.assertTrue('127.0.0.1' in ctx['search'].result_container.answers) + self.assertTrue('127.0.0.1' in ctx['result_container'].answers) ctx = get_search_mock(query='ip', pageno=2) store.call('post_search', request, ctx) - self.assertFalse('127.0.0.1' in ctx['search'].result_container.answers) + self.assertFalse('127.0.0.1' in ctx['result_container'].answers) # User agent test request = Mock(user_plugins=store.plugins, @@ -67,24 +66,24 @@ class SelfIPTest(SearxTestCase): ctx = get_search_mock(query='user-agent', pageno=1) store.call('post_search', request, ctx) - self.assertTrue('Mock' in ctx['search'].result_container.answers) + self.assertTrue('Mock' in ctx['result_container'].answers) ctx = get_search_mock(query='user-agent', pageno=2) store.call('post_search', request, ctx) - self.assertFalse('Mock' in ctx['search'].result_container.answers) + self.assertFalse('Mock' in ctx['result_container'].answers) ctx = get_search_mock(query='user-agent', pageno=1) store.call('post_search', request, ctx) - self.assertTrue('Mock' in ctx['search'].result_container.answers) + self.assertTrue('Mock' in ctx['result_container'].answers) ctx = get_search_mock(query='user-agent', pageno=2) store.call('post_search', request, ctx) - self.assertFalse('Mock' in ctx['search'].result_container.answers) + self.assertFalse('Mock' in ctx['result_container'].answers) ctx = get_search_mock(query='What is my User-Agent?', pageno=1) store.call('post_search', request, ctx) - self.assertTrue('Mock' in ctx['search'].result_container.answers) + self.assertTrue('Mock' in ctx['result_container'].answers) ctx = get_search_mock(query='What is my User-Agent?', pageno=2) store.call('post_search', request, ctx) - self.assertFalse('Mock' in ctx['search'].result_container.answers) + self.assertFalse('Mock' in ctx['result_container'].answers) From fbb080f3588ad43bf896a569257f3a95e1181d7b Mon Sep 17 00:00:00 2001 From: dalf Date: Sat, 22 Oct 2016 14:01:53 +0200 Subject: [PATCH 2/3] Change plugin API : - pre_search(request, search) - post_search(request, search) - on_result(request, search, result) with - request is the Flask request - search a searx.Search instance - result a searx result as usual --- searx/plugins/doai_rewrite.py | 8 ++--- searx/plugins/https_rewrite.py | 3 +- searx/plugins/self_info.py | 16 ++++----- searx/plugins/tracker_url_remover.py | 10 +++--- searx/search.py | 32 ++++++----------- tests/unit/test_plugins.py | 52 ++++++++++++++-------------- 6 files changed, 54 insertions(+), 67 deletions(-) diff --git a/searx/plugins/doai_rewrite.py b/searx/plugins/doai_rewrite.py index fc5998b14..0142af672 100644 --- a/searx/plugins/doai_rewrite.py +++ b/searx/plugins/doai_rewrite.py @@ -20,12 +20,12 @@ def extract_doi(url): return None -def on_result(request, ctx): - doi = extract_doi(ctx['result']['parsed_url']) +def on_result(request, search, result): + doi = extract_doi(result['parsed_url']) if doi and len(doi) < 50: for suffix in ('/', '.pdf', '/full', '/meta', '/abstract'): if doi.endswith(suffix): doi = doi[:-len(suffix)] - ctx['result']['url'] = 'http://doai.io/' + doi - ctx['result']['parsed_url'] = urlparse(ctx['result']['url']) + result['url'] = 'http://doai.io/' + doi + result['parsed_url'] = urlparse(ctx['result']['url']) return True diff --git a/searx/plugins/https_rewrite.py b/searx/plugins/https_rewrite.py index 8a9fcd4ad..8b4c9784e 100644 --- a/searx/plugins/https_rewrite.py +++ b/searx/plugins/https_rewrite.py @@ -220,8 +220,7 @@ def https_url_rewrite(result): return result -def on_result(request, ctx): - result = ctx['result'] +def on_result(request, search, result): if result['parsed_url'].scheme == 'http': https_url_rewrite(result) return True diff --git a/searx/plugins/self_info.py b/searx/plugins/self_info.py index 2f19ad9c7..a2aeda98e 100644 --- a/searx/plugins/self_info.py +++ b/searx/plugins/self_info.py @@ -28,19 +28,19 @@ p = re.compile('.*user[ -]agent.*', re.IGNORECASE) # attach callback to the post search hook # request: flask request object # ctx: the whole local context of the pre search hook -def post_search(request, ctx): - if ctx['search'].pageno > 1: +def post_search(request, search): + if search.search_query.pageno > 1: return True - if ctx['search'].query == 'ip': + if search.search_query.query == 'ip': x_forwarded_for = request.headers.getlist("X-Forwarded-For") if x_forwarded_for: ip = x_forwarded_for[0] else: ip = request.remote_addr - ctx['result_container'].answers.clear() - ctx['result_container'].answers.add(ip) - elif p.match(ctx['search'].query): + search.result_container.answers.clear() + search.result_container.answers.add(ip) + elif p.match(search.search_query.query): ua = request.user_agent - ctx['result_container'].answers.clear() - ctx['result_container'].answers.add(ua) + search.result_container.answers.clear() + search.result_container.answers.add(ua) return True diff --git a/searx/plugins/tracker_url_remover.py b/searx/plugins/tracker_url_remover.py index b909e3fae..68a004e33 100644 --- a/searx/plugins/tracker_url_remover.py +++ b/searx/plugins/tracker_url_remover.py @@ -28,8 +28,8 @@ description = gettext('Remove trackers arguments from the returned URL') default_on = True -def on_result(request, ctx): - query = ctx['result']['parsed_url'].query +def on_result(request, search, result): + query = result['parsed_url'].query if query == "": return True @@ -37,8 +37,8 @@ def on_result(request, ctx): for reg in regexes: query = reg.sub('', query) - if query != ctx['result']['parsed_url'].query: - ctx['result']['parsed_url'] = ctx['result']['parsed_url']._replace(query=query) - ctx['result']['url'] = urlunparse(ctx['result']['parsed_url']) + if query != result['parsed_url'].query: + result['parsed_url'] = result['parsed_url']._replace(query=query) + result['url'] = urlunparse(result['parsed_url']) return True diff --git a/searx/search.py b/searx/search.py index a41c4b09a..a8de143b0 100644 --- a/searx/search.py +++ b/searx/search.py @@ -357,35 +357,23 @@ class Search(object): return self.result_container -def search_with_plugins(do_search, search_query, request, request_data, result_container): - """Search using the do_search function and with plugins filtering. - Standalone function to have a well define locals(). - result_container contains the results after the function call. - """ - search = search_query - - if plugins.call('pre_search', request, locals()): - do_search() - - plugins.call('post_search', request, locals()) - - results = result_container.get_ordered_results() - - for result in results: - plugins.call('on_result', request, locals()) - - class SearchWithPlugins(Search): + """Similar to the Search class but call the plugins.""" + def __init__(self, search_query, request): super(SearchWithPlugins, self).__init__(search_query) self.request = request - self.request_data = request.request_data def search(self): - - def do_search(): + if plugins.call('pre_search', self.request, self): super(SearchWithPlugins, self).search() - search_with_plugins(do_search, self.search_query, self.request, self.request_data, self.result_container) + plugins.call('post_search', self.request, self) + + results = self.result_container.get_ordered_results() + + for result in results: + plugins.call('on_result', self.request, self, result) + return self.result_container diff --git a/tests/unit/test_plugins.py b/tests/unit/test_plugins.py index 9ba6fcdd2..c9e65dfcb 100644 --- a/tests/unit/test_plugins.py +++ b/tests/unit/test_plugins.py @@ -6,8 +6,8 @@ from mock import Mock def get_search_mock(query, **kwargs): - return {'search': Mock(query=query, **kwargs), - 'result_container': Mock(answers=set())} + return Mock(search_query=Mock(query=query, **kwargs), + result_container=Mock(answers=set())) class PluginStoreTest(SearxTestCase): @@ -51,39 +51,39 @@ class SelfIPTest(SearxTestCase): request = Mock(user_plugins=store.plugins, remote_addr='127.0.0.1') request.headers.getlist.return_value = [] - ctx = get_search_mock(query='ip', pageno=1) - store.call('post_search', request, ctx) - self.assertTrue('127.0.0.1' in ctx['result_container'].answers) + search = get_search_mock(query='ip', pageno=1) + store.call('post_search', request, search) + self.assertTrue('127.0.0.1' in search.result_container.answers) - ctx = get_search_mock(query='ip', pageno=2) - store.call('post_search', request, ctx) - self.assertFalse('127.0.0.1' in ctx['result_container'].answers) + search = get_search_mock(query='ip', pageno=2) + store.call('post_search', request, search) + self.assertFalse('127.0.0.1' in search.result_container.answers) # User agent test request = Mock(user_plugins=store.plugins, user_agent='Mock') request.headers.getlist.return_value = [] - ctx = get_search_mock(query='user-agent', pageno=1) - store.call('post_search', request, ctx) - self.assertTrue('Mock' in ctx['result_container'].answers) + search = get_search_mock(query='user-agent', pageno=1) + store.call('post_search', request, search) + self.assertTrue('Mock' in search.result_container.answers) - ctx = get_search_mock(query='user-agent', pageno=2) - store.call('post_search', request, ctx) - self.assertFalse('Mock' in ctx['result_container'].answers) + search = get_search_mock(query='user-agent', pageno=2) + store.call('post_search', request, search) + self.assertFalse('Mock' in search.result_container.answers) - ctx = get_search_mock(query='user-agent', pageno=1) - store.call('post_search', request, ctx) - self.assertTrue('Mock' in ctx['result_container'].answers) + search = get_search_mock(query='user-agent', pageno=1) + store.call('post_search', request, search) + self.assertTrue('Mock' in search.result_container.answers) - ctx = get_search_mock(query='user-agent', pageno=2) - store.call('post_search', request, ctx) - self.assertFalse('Mock' in ctx['result_container'].answers) + search = get_search_mock(query='user-agent', pageno=2) + store.call('post_search', request, search) + self.assertFalse('Mock' in search.result_container.answers) - ctx = get_search_mock(query='What is my User-Agent?', pageno=1) - store.call('post_search', request, ctx) - self.assertTrue('Mock' in ctx['result_container'].answers) + search = get_search_mock(query='What is my User-Agent?', pageno=1) + store.call('post_search', request, search) + self.assertTrue('Mock' in search.result_container.answers) - ctx = get_search_mock(query='What is my User-Agent?', pageno=2) - store.call('post_search', request, ctx) - self.assertFalse('Mock' in ctx['result_container'].answers) + search = get_search_mock(query='What is my User-Agent?', pageno=2) + store.call('post_search', request, search) + self.assertFalse('Mock' in search.result_container.answers) From 58a6c045c81a84abfb8a29368e7d5453efc5f2fa Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Wed, 2 Nov 2016 14:52:22 +0100 Subject: [PATCH 3/3] [mod] replace references request.request_data (GET or POST parameters) by request.form (based on merge of POST and GET parameters) --- searx/search.py | 12 ++++++------ searx/webapp.py | 24 +++++++----------------- 2 files changed, 13 insertions(+), 23 deletions(-) diff --git a/searx/search.py b/searx/search.py index a8de143b0..4c4a6a2f0 100644 --- a/searx/search.py +++ b/searx/search.py @@ -128,7 +128,7 @@ def make_callback(engine_name, callback, params, result_container): return process_callback -def get_search_query_from_webapp(preferences, request_data): +def get_search_query_from_webapp(preferences, form): query = None query_engines = [] query_categories = [] @@ -147,11 +147,11 @@ def get_search_query_from_webapp(preferences, request_data): query_safesearch = preferences.get_value('safesearch') # TODO better exceptions - if not request_data.get('q'): + if not form.get('q'): raise Exception('noquery') # set pagenumber - pageno_param = request_data.get('pageno', '1') + pageno_param = form.get('pageno', '1') if not pageno_param.isdigit() or int(pageno_param) < 1: pageno_param = 1 @@ -159,7 +159,7 @@ def get_search_query_from_webapp(preferences, request_data): # parse query, if tags are set, which change # the serch engine or search-language - raw_text_query = RawTextQuery(request_data['q'], disabled_engines) + raw_text_query = RawTextQuery(form['q'], disabled_engines) raw_text_query.parse_query() # set query @@ -170,7 +170,7 @@ def get_search_query_from_webapp(preferences, request_data): if len(raw_text_query.languages): query_lang = raw_text_query.languages[-1] - query_time_range = request_data.get('time_range') + query_time_range = form.get('time_range') query_engines = raw_text_query.engines @@ -185,7 +185,7 @@ def get_search_query_from_webapp(preferences, request_data): else: # set categories/engines load_default_categories = True - for pd_name, pd in request_data.items(): + for pd_name, pd in form.items(): if pd_name == 'categories': query_categories.extend(categ for categ in map(unicode.strip, pd.split(',')) if categ in categories) elif pd_name == 'engines': diff --git a/searx/webapp.py b/searx/webapp.py index 7c3cb7e9b..d3d5bb51e 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -364,16 +364,6 @@ def render(template_name, override_theme=None, **kwargs): @app.before_request def pre_request(): - # request.request_data - if request.method == 'POST': - request_data = request.form - elif request.method == 'GET': - request_data = request.args - else: - request_data = {} - - request.request_data = request_data - # merge GET, POST vars preferences = Preferences(themes, categories.keys(), engines, plugins) try: @@ -416,7 +406,7 @@ def index(): search_query = None result_container = None try: - search_query = get_search_query_from_webapp(request.preferences, request.request_data) + search_query = get_search_query_from_webapp(request.preferences, request.form) # search = Search(search_query) # without plugins search = SearchWithPlugins(search_query, request) result_container = search.search() @@ -428,8 +418,8 @@ def index(): results = result_container.get_ordered_results() # UI - advanced_search = request.request_data.get('advanced_search', None) - output_format = request.request_data.get('format', 'html') + advanced_search = request.form.get('advanced_search', None) + output_format = request.form.get('format', 'html') if output_format not in ['html', 'csv', 'json', 'rss']: output_format = 'html' @@ -490,7 +480,7 @@ def index(): response_rss = render( 'opensearch_response_rss.xml', results=results, - q=request.request_data['q'], + q=request.form['q'], number_of_results=number_of_results, base_url=get_base_url() ) @@ -499,7 +489,7 @@ def index(): return render( 'results.html', results=results, - q=request.request_data['q'], + q=request.form['q'], selected_categories=search_query.categories, pageno=search_query.pageno, time_range=search_query.time_range, @@ -531,7 +521,7 @@ def autocompleter(): disabled_engines = request.preferences.engines.get_disabled() # parse query - raw_text_query = RawTextQuery(request.request_data.get('q', '').encode('utf-8'), disabled_engines) + raw_text_query = RawTextQuery(request.form.get('q', '').encode('utf-8'), disabled_engines) raw_text_query.parse_query() # check if search query is set @@ -564,7 +554,7 @@ def autocompleter(): results.append(raw_text_query.getFullQuery()) # return autocompleter results - if request.request_data.get('format') == 'x-suggestions': + if request.form.get('format') == 'x-suggestions': return Response(json.dumps([raw_text_query.query, results]), mimetype='application/json')