From 995ba2f40643bd01134902eca91cacf0bab1ebdc Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Thu, 17 Dec 2020 13:51:57 +0100 Subject: [PATCH 1/2] [mod] searx.search.SearchQuery: remove categories parameter The categories parameter is useless in the constructor: it is always the categories from the EngineRef. The categories becomes a property. --- searx/search/__init__.py | 21 +++++++++++---------- searx/webadapter.py | 20 +++++++------------- tests/unit/test_search.py | 22 +++++++++++----------- tests/unit/test_standalone_searx.py | 2 +- 4 files changed, 30 insertions(+), 35 deletions(-) diff --git a/searx/search/__init__.py b/searx/search/__init__.py index 99459c7a3..e9e4f5640 100644 --- a/searx/search/__init__.py +++ b/searx/search/__init__.py @@ -69,22 +69,20 @@ class EngineRef: class SearchQuery: """container for all the search parameters (query, language, etc...)""" - __slots__ = 'query', 'engineref_list', 'categories', 'lang', 'safesearch', 'pageno', 'time_range',\ + __slots__ = 'query', 'engineref_list', 'lang', 'safesearch', 'pageno', 'time_range',\ 'timeout_limit', 'external_bang' def __init__(self, query: str, engineref_list: typing.List[EngineRef], - categories: typing.List[str], - lang: str, - safesearch: int, - pageno: int, - time_range: typing.Optional[str], + lang: str='all', + safesearch: int=0, + pageno: int=1, + time_range: typing.Optional[str]=None, timeout_limit: typing.Optional[float]=None, external_bang: typing.Optional[str]=None): self.query = query self.engineref_list = engineref_list - self.categories = categories self.lang = lang self.safesearch = safesearch self.pageno = pageno @@ -92,15 +90,18 @@ class SearchQuery: self.timeout_limit = timeout_limit self.external_bang = external_bang + @property + def categories(self): + return list(set(map(lambda engineref: engineref.category, self.engineref_list))) + def __repr__(self): - return "SearchQuery({!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r})".\ - format(self.query, self.engineref_list, self.categories, self.lang, self.safesearch, + return "SearchQuery({!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r})".\ + format(self.query, self.engineref_list, self.lang, self.safesearch, self.pageno, self.time_range, self.timeout_limit, self.external_bang) def __eq__(self, other): return self.query == other.query\ and self.engineref_list == other.engineref_list\ - and self.categories == self.categories\ and self.lang == other.lang\ and self.safesearch == other.safesearch\ and self.pageno == other.pageno\ diff --git a/searx/webadapter.py b/searx/webadapter.py index 7c71b7262..7bec42651 100644 --- a/searx/webadapter.py +++ b/searx/webadapter.py @@ -117,8 +117,7 @@ def parse_specific(raw_text_query: RawTextQuery) -> Tuple[List[EngineRef], List[ additional_categories.add('none') else: additional_categories.add(engineref.category) - query_categories = list(additional_categories) - return query_engineref_list, query_categories + return query_engineref_list def parse_category_form(query_categories: List[str], name: str, value: str) -> None: @@ -171,8 +170,7 @@ def get_engineref_from_category_list(category_list: List[str], disabled_engines: return result -def parse_generic(preferences: Preferences, form: Dict[str, str], disabled_engines: List[str])\ - -> Tuple[List[EngineRef], List[str]]: +def parse_generic(preferences: Preferences, form: Dict[str, str], disabled_engines: List[str]) -> List[EngineRef]: query_engineref_list = [] query_categories = [] @@ -195,8 +193,6 @@ def parse_generic(preferences: Preferences, form: Dict[str, str], disabled_engin if query_categories: # add engines from referenced by the "categories" parameter and the "category_*"" parameters query_engineref_list.extend(get_engineref_from_category_list(query_categories, disabled_engines)) - # get categories from the query_engineref_list - query_categories = list(set(engine.category for engine in query_engineref_list)) else: # no "engines" parameters in the form if not query_categories: @@ -208,7 +204,7 @@ def parse_generic(preferences: Preferences, form: Dict[str, str], disabled_engin # declared under the specific categories query_engineref_list.extend(get_engineref_from_category_list(query_categories, disabled_engines)) - return query_engineref_list, query_categories + return query_engineref_list def get_search_query_from_webapp(preferences: Preferences, form: Dict[str, str])\ @@ -236,20 +232,18 @@ def get_search_query_from_webapp(preferences: Preferences, form: Dict[str, str]) if not is_locked('categories') and raw_text_query.enginerefs and raw_text_query.specific: # if engines are calculated from query, # set categories by using that informations - query_engineref_list, query_categories = parse_specific(raw_text_query) + query_engineref_list = parse_specific(raw_text_query) else: # otherwise, using defined categories to # calculate which engines should be used - query_engineref_list, query_categories = parse_generic(preferences, form, disabled_engines) + query_engineref_list = parse_generic(preferences, form, disabled_engines) query_engineref_list = deduplicate_engineref_list(query_engineref_list) query_engineref_list, query_engineref_list_unknown, query_engineref_list_notoken =\ validate_engineref_list(query_engineref_list, preferences) - return (SearchQuery(query, query_engineref_list, query_categories, - query_lang, query_safesearch, query_pageno, - query_time_range, query_timeout, - external_bang=external_bang), + return (SearchQuery(query, query_engineref_list, query_lang, query_safesearch, query_pageno, + query_time_range, query_timeout, external_bang=external_bang), raw_text_query, query_engineref_list_unknown, query_engineref_list_notoken) diff --git a/tests/unit/test_search.py b/tests/unit/test_search.py index 9938a43a5..08b41e062 100644 --- a/tests/unit/test_search.py +++ b/tests/unit/test_search.py @@ -23,13 +23,13 @@ TEST_ENGINES = [ class SearchQueryTestCase(SearxTestCase): def test_repr(self): - s = SearchQuery('test', [EngineRef('bing', 'general', False)], ['general'], 'all', 0, 1, '1', 5.0, 'g') + s = SearchQuery('test', [EngineRef('bing', 'general', False)], 'all', 0, 1, '1', 5.0, 'g') self.assertEqual(repr(s), - "SearchQuery('test', [EngineRef('bing', 'general', False)], ['general'], 'all', 0, 1, '1', 5.0, 'g')") # noqa + "SearchQuery('test', [EngineRef('bing', 'general', False)], 'all', 0, 1, '1', 5.0, 'g')") # noqa def test_eq(self): - s = SearchQuery('test', [EngineRef('bing', 'general', False)], ['general'], 'all', 0, 1, None, None, None) - t = SearchQuery('test', [EngineRef('google', 'general', False)], ['general'], 'all', 0, 1, None, None, None) + s = SearchQuery('test', [EngineRef('bing', 'general', False)], 'all', 0, 1, None, None, None) + t = SearchQuery('test', [EngineRef('google', 'general', False)], 'all', 0, 1, None, None, None) self.assertEqual(s, s) self.assertNotEqual(s, t) @@ -43,7 +43,7 @@ class SearchTestCase(SearxTestCase): def test_timeout_simple(self): searx.search.max_request_timeout = None search_query = SearchQuery('test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], - ['general'], 'en-US', SAFESEARCH, PAGENO, None, None) + 'en-US', SAFESEARCH, PAGENO, None, None) search = searx.search.Search(search_query) search.search() self.assertEqual(search.actual_timeout, 3.0) @@ -51,7 +51,7 @@ class SearchTestCase(SearxTestCase): def test_timeout_query_above_default_nomax(self): searx.search.max_request_timeout = None search_query = SearchQuery('test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], - ['general'], 'en-US', SAFESEARCH, PAGENO, None, 5.0) + 'en-US', SAFESEARCH, PAGENO, None, 5.0) search = searx.search.Search(search_query) search.search() self.assertEqual(search.actual_timeout, 3.0) @@ -59,7 +59,7 @@ class SearchTestCase(SearxTestCase): def test_timeout_query_below_default_nomax(self): searx.search.max_request_timeout = None search_query = SearchQuery('test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], - ['general'], 'en-US', SAFESEARCH, PAGENO, None, 1.0) + 'en-US', SAFESEARCH, PAGENO, None, 1.0) search = searx.search.Search(search_query) search.search() self.assertEqual(search.actual_timeout, 1.0) @@ -67,7 +67,7 @@ class SearchTestCase(SearxTestCase): def test_timeout_query_below_max(self): searx.search.max_request_timeout = 10.0 search_query = SearchQuery('test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], - ['general'], 'en-US', SAFESEARCH, PAGENO, None, 5.0) + 'en-US', SAFESEARCH, PAGENO, None, 5.0) search = searx.search.Search(search_query) search.search() self.assertEqual(search.actual_timeout, 5.0) @@ -75,7 +75,7 @@ class SearchTestCase(SearxTestCase): def test_timeout_query_above_max(self): searx.search.max_request_timeout = 10.0 search_query = SearchQuery('test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], - ['general'], 'en-US', SAFESEARCH, PAGENO, None, 15.0) + 'en-US', SAFESEARCH, PAGENO, None, 15.0) search = searx.search.Search(search_query) search.search() self.assertEqual(search.actual_timeout, 10.0) @@ -83,7 +83,7 @@ class SearchTestCase(SearxTestCase): def test_external_bang(self): search_query = SearchQuery('yes yes', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], - ['general'], 'en-US', SAFESEARCH, PAGENO, None, None, + 'en-US', SAFESEARCH, PAGENO, None, None, external_bang="yt") search = searx.search.Search(search_query) results = search.search() @@ -92,7 +92,7 @@ class SearchTestCase(SearxTestCase): search_query = SearchQuery('youtube never gonna give you up', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], - ['general'], 'en-US', SAFESEARCH, PAGENO, None, None) + 'en-US', SAFESEARCH, PAGENO, None, None) search = searx.search.Search(search_query) results = search.search() diff --git a/tests/unit/test_standalone_searx.py b/tests/unit/test_standalone_searx.py index 06d70246a..d16bfe809 100644 --- a/tests/unit/test_standalone_searx.py +++ b/tests/unit/test_standalone_searx.py @@ -102,7 +102,7 @@ class StandaloneSearx(SearxTestCase): search_q = sas.get_search_query(args) self.assertTrue(search_q) self.assertEqual(search_q, SearchQuery('rain', [EngineRef('engine1', 'general', False)], - ['general'], 'all', 0, 1, None, None, None)) + 'all', 0, 1, None, None, None)) def test_no_parsed_url(self): """test no_parsed_url func""" From eda8934f1548ed228d00a0196b61d35585ea11bc Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Fri, 18 Dec 2020 12:19:14 +0100 Subject: [PATCH 2/2] [mod] searx.search.EngineRef: remove from_bang parameter from_bang is True when the user query contains a bang. In this case the category is also set to 'none'. from_bang only usage was in searx.webadapter.parse_specific : if from_bang is True, then the EngineRef category is ignored and force to 'none'. This commit also removes the searx.webadapter.parse_sepecific function. --- searx/query.py | 4 ++-- searx/search/__init__.py | 9 ++++----- searx/webadapter.py | 13 +------------ tests/unit/test_search.py | 8 ++++---- tests/unit/test_standalone_searx.py | 2 +- 5 files changed, 12 insertions(+), 24 deletions(-) diff --git a/searx/query.py b/searx/query.py index 422cd57b5..e61e24f2c 100644 --- a/searx/query.py +++ b/searx/query.py @@ -125,12 +125,12 @@ class RawTextQuery: searx_query_part = True engine_name = engine_shortcuts[prefix] if engine_name in engines: - self.enginerefs.append(EngineRef(engine_name, 'none', True)) + self.enginerefs.append(EngineRef(engine_name, 'none')) # check if prefix is equal with engine name elif prefix in engines: searx_query_part = True - self.enginerefs.append(EngineRef(prefix, 'none', True)) + self.enginerefs.append(EngineRef(prefix, 'none')) # check if prefix is equal with categorie name elif prefix in categories: diff --git a/searx/search/__init__.py b/searx/search/__init__.py index e9e4f5640..0d45f0b7c 100644 --- a/searx/search/__init__.py +++ b/searx/search/__init__.py @@ -52,18 +52,17 @@ def initialize(settings_engines=None): class EngineRef: - __slots__ = 'name', 'category', 'from_bang' + __slots__ = 'name', 'category' - def __init__(self, name: str, category: str, from_bang: bool=False): + def __init__(self, name: str, category: str): self.name = name self.category = category - self.from_bang = from_bang def __repr__(self): - return "EngineRef({!r}, {!r}, {!r})".format(self.name, self.category, self.from_bang) + return "EngineRef({!r}, {!r})".format(self.name, self.category) def __eq__(self, other): - return self.name == other.name and self.category == other.category and self.from_bang == other.from_bang + return self.name == other.name and self.category == other.category class SearchQuery: diff --git a/searx/webadapter.py b/searx/webadapter.py index 7bec42651..0ad82c62c 100644 --- a/searx/webadapter.py +++ b/searx/webadapter.py @@ -109,17 +109,6 @@ def parse_timeout(form: Dict[str, str], raw_text_query: RawTextQuery) -> Optiona raise SearxParameterException('timeout_limit', timeout_limit) -def parse_specific(raw_text_query: RawTextQuery) -> Tuple[List[EngineRef], List[str]]: - query_engineref_list = raw_text_query.enginerefs - additional_categories = set() - for engineref in raw_text_query.enginerefs: - if engineref.from_bang: - additional_categories.add('none') - else: - additional_categories.add(engineref.category) - return query_engineref_list - - def parse_category_form(query_categories: List[str], name: str, value: str) -> None: if name == 'categories': query_categories.extend(categ for categ in map(str.strip, value.split(',')) if categ in categories) @@ -232,7 +221,7 @@ def get_search_query_from_webapp(preferences: Preferences, form: Dict[str, str]) if not is_locked('categories') and raw_text_query.enginerefs and raw_text_query.specific: # if engines are calculated from query, # set categories by using that informations - query_engineref_list = parse_specific(raw_text_query) + query_engineref_list = raw_text_query.enginerefs else: # otherwise, using defined categories to # calculate which engines should be used diff --git a/tests/unit/test_search.py b/tests/unit/test_search.py index 08b41e062..72732032e 100644 --- a/tests/unit/test_search.py +++ b/tests/unit/test_search.py @@ -23,13 +23,13 @@ TEST_ENGINES = [ class SearchQueryTestCase(SearxTestCase): def test_repr(self): - s = SearchQuery('test', [EngineRef('bing', 'general', False)], 'all', 0, 1, '1', 5.0, 'g') + s = SearchQuery('test', [EngineRef('bing', 'general')], 'all', 0, 1, '1', 5.0, 'g') self.assertEqual(repr(s), - "SearchQuery('test', [EngineRef('bing', 'general', False)], 'all', 0, 1, '1', 5.0, 'g')") # noqa + "SearchQuery('test', [EngineRef('bing', 'general')], 'all', 0, 1, '1', 5.0, 'g')") # noqa def test_eq(self): - s = SearchQuery('test', [EngineRef('bing', 'general', False)], 'all', 0, 1, None, None, None) - t = SearchQuery('test', [EngineRef('google', 'general', False)], 'all', 0, 1, None, None, None) + s = SearchQuery('test', [EngineRef('bing', 'general')], 'all', 0, 1, None, None, None) + t = SearchQuery('test', [EngineRef('google', 'general')], 'all', 0, 1, None, None, None) self.assertEqual(s, s) self.assertNotEqual(s, t) diff --git a/tests/unit/test_standalone_searx.py b/tests/unit/test_standalone_searx.py index d16bfe809..6cc230e6c 100644 --- a/tests/unit/test_standalone_searx.py +++ b/tests/unit/test_standalone_searx.py @@ -101,7 +101,7 @@ class StandaloneSearx(SearxTestCase): args = sas.parse_argument(['rain', ]) search_q = sas.get_search_query(args) self.assertTrue(search_q) - self.assertEqual(search_q, SearchQuery('rain', [EngineRef('engine1', 'general', False)], + self.assertEqual(search_q, SearchQuery('rain', [EngineRef('engine1', 'general')], 'all', 0, 1, None, None, None)) def test_no_parsed_url(self):