diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py index 558531880..2f64ddf27 100644 --- a/searx/engines/xpath.py +++ b/searx/engines/xpath.py @@ -72,6 +72,7 @@ from urllib.parse import urlencode from lxml import html from searx.utils import extract_text, extract_url, eval_xpath, eval_xpath_list from searx.network import raise_for_httperror +from searx import logger search_url = None """ @@ -208,11 +209,14 @@ safe_search_map = {0: '&filter=none', 1: '&filter=moderate', 2: '&filter=strict' ''' +categories = [] +'''engine dependent config''' + def request(query, params): '''Build request parameters (see :ref:`engine request`).''' lang = lang_all - if params['language'] != 'all': + if params.get('language', 'all') != 'all': lang = params['language'][:2] time_range = '' @@ -221,13 +225,13 @@ def request(query, params): time_range = time_range_url.format(time_range_val=time_range_val) safe_search = '' - if params['safesearch']: + if 'safesearch' in params: safe_search = safe_search_map[params['safesearch']] fargs = { 'query': urlencode({'q': query})[2:], 'lang': lang, - 'pageno': (params['pageno'] - 1) * page_size + first_page_num, + 'pageno': (params.get('pageno', 1) - 1) * page_size + first_page_num, 'time_range': time_range, 'safe_search': safe_search, } diff --git a/tests/unit/engines/__init__.py b/tests/unit/engines/__init__.py new file mode 100644 index 000000000..9ed59c825 --- /dev/null +++ b/tests/unit/engines/__init__.py @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# pylint: disable=missing-module-docstring diff --git a/tests/unit/engines/test_command.py b/tests/unit/engines/test_command.py index a7d2d2d56..2123ab168 100644 --- a/tests/unit/engines/test_command.py +++ b/tests/unit/engines/test_command.py @@ -21,7 +21,7 @@ from searx.engines import command as command_engine from tests import SearxTestCase -class TestCommandEngine(SearxTestCase): +class TestCommandEngine(SearxTestCase): # pylint: disable=missing-class-docstring def test_basic_seq_command_engine(self): ls_engine = command_engine ls_engine.command = ['seq', '{{QUERY}}'] @@ -33,10 +33,10 @@ class TestCommandEngine(SearxTestCase): {'number': '4', 'template': 'key-value.html'}, {'number': '5', 'template': 'key-value.html'}, ] - results = ls_engine.search('5'.encode('utf-8'), {'pageno': 1}) + results = ls_engine.search('5', {'pageno': 1}) self.assertEqual(results, expected_results) - def test_delimiter_parsing_command_engine(self): + def test_delimiter_parsing(self): searx_logs = '''DEBUG:searx.webapp:static directory is /home/n/p/searx/searx/static DEBUG:searx.webapp:templates directory is /home/n/p/searx/searx/templates DEBUG:searx.engines:soundcloud engine: Starting background initialization @@ -140,10 +140,10 @@ INFO:werkzeug: * Debugger PIN: 299-578-362''' ] for i in [0, 1]: - results = echo_engine.search(''.encode('utf-8'), {'pageno': i + 1}) + results = echo_engine.search('', {'pageno': i + 1}) self.assertEqual(results, expected_results_by_page[i]) - def test_regex_parsing_command_engine(self): + def test_regex_parsing(self): txt = '''commit 35f9a8c81d162a361b826bbcd4a1081a4fbe76a7 Author: Noémi Ványi Date: Tue Oct 15 11:31:33 2019 +0200 @@ -168,11 +168,12 @@ commit ''' git_log_engine.result_separator = '\n\ncommit ' git_log_engine.delimiter = {} git_log_engine.parse_regex = { - 'commit': '\w{40}', - 'author': '[\w* ]* <\w*@?\w*\.?\w*>', - 'date': 'Date: .*', - 'message': '\n\n.*$', + 'commit': r'\w{40}', + 'author': r'[\w* ]* <\w*@?\w*\.?\w*>', + 'date': r'Date: .*', + 'message': r'\n\n.*$', } + git_log_engine.init({"command": git_log_engine.command, "parse_regex": git_log_engine.parse_regex}) expected_results = [ { 'commit': '35f9a8c81d162a361b826bbcd4a1081a4fbe76a7', @@ -197,7 +198,7 @@ commit ''' }, ] - results = git_log_engine.search(''.encode('utf-8'), {'pageno': 1}) + results = git_log_engine.search('', {'pageno': 1}) self.assertEqual(results, expected_results) def test_working_dir_path_query(self): @@ -207,7 +208,7 @@ commit ''' ls_engine.delimiter = {'chars': ' ', 'keys': ['file']} ls_engine.query_type = 'path' - results = ls_engine.search('.'.encode(), {'pageno': 1}) + results = ls_engine.search('.', {'pageno': 1}) self.assertTrue(len(results) != 0) forbidden_paths = [ @@ -218,7 +219,7 @@ commit ''' '/var', ] for forbidden_path in forbidden_paths: - self.assertRaises(ValueError, ls_engine.search, '..'.encode(), {'pageno': 1}) + self.assertRaises(ValueError, ls_engine.search, forbidden_path, {'pageno': 1}) def test_enum_queries(self): echo_engine = command_engine @@ -227,7 +228,7 @@ commit ''' echo_engine.query_enum = ['i-am-allowed-to-say-this', 'and-that'] for allowed in echo_engine.query_enum: - results = echo_engine.search(allowed.encode(), {'pageno': 1}) + results = echo_engine.search(allowed, {'pageno': 1}) self.assertTrue(len(results) != 0) forbidden_queries = [ @@ -236,4 +237,4 @@ commit ''' 'prohibited', ] for forbidden in forbidden_queries: - self.assertRaises(ValueError, echo_engine.search, forbidden.encode(), {'pageno': 1}) + self.assertRaises(ValueError, echo_engine.search, forbidden, {'pageno': 1}) diff --git a/tests/unit/engines/test_xpath.py b/tests/unit/engines/test_xpath.py index 24f14127b..6c9a350ae 100644 --- a/tests/unit/engines/test_xpath.py +++ b/tests/unit/engines/test_xpath.py @@ -7,7 +7,22 @@ from searx.engines import xpath from tests import SearxTestCase -class TestXpathEngine(SearxTestCase): +class TestXpathEngine(SearxTestCase): # pylint: disable=missing-class-docstring + html = """ +
+
+ Result 1 +

Content 1

+ Cache +
+
+ Result 2 +

Content 2

+ Cache +
+
+ """ + def test_request(self): xpath.search_url = 'https://url.com/{query}' xpath.categories = [] @@ -16,7 +31,7 @@ class TestXpathEngine(SearxTestCase): dicto = defaultdict(dict) params = xpath.request(query, dicto) self.assertIn('url', params) - self.assertEquals('https://url.com/test_query', params['url']) + self.assertEqual('https://url.com/test_query', params['url']) xpath.search_url = 'https://url.com/q={query}&p={pageno}' xpath.paging = True @@ -25,7 +40,7 @@ class TestXpathEngine(SearxTestCase): dicto['pageno'] = 1 params = xpath.request(query, dicto) self.assertIn('url', params) - self.assertEquals('https://url.com/q=test_query&p=1', params['url']) + self.assertEqual('https://url.com/q=test_query&p=1', params['url']) def test_response(self): # without results_xpath @@ -38,24 +53,10 @@ class TestXpathEngine(SearxTestCase): self.assertRaises(AttributeError, xpath.response, '') self.assertRaises(AttributeError, xpath.response, '[]') - response = mock.Mock(text='') + response = mock.Mock(text='', status_code=200) self.assertEqual(xpath.response(response), []) - html = u""" -
-
- Result 1 -

Content 1

- Cache -
-
- Result 2 -

Content 2

- Cache -
-
- """ - response = mock.Mock(text=html) + response = mock.Mock(text=self.html, status_code=200) results = xpath.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 2) @@ -80,6 +81,7 @@ class TestXpathEngine(SearxTestCase): results = xpath.response(response) self.assertTrue(results[0]['is_onion']) + def test_response_results_xpath(self): # with results_xpath xpath.results_xpath = '//div[@class="search_result"]' xpath.url_xpath = './/a[@class="result"]/@href' @@ -93,10 +95,10 @@ class TestXpathEngine(SearxTestCase): self.assertRaises(AttributeError, xpath.response, '') self.assertRaises(AttributeError, xpath.response, '[]') - response = mock.Mock(text='') + response = mock.Mock(text='', status_code=200) self.assertEqual(xpath.response(response), []) - response = mock.Mock(text=html) + response = mock.Mock(text=self.html, status_code=200) results = xpath.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 2) diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index b4f5f8a0d..4c609760e 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -247,6 +247,7 @@ class TestBang(SearxTestCase): # pylint:disable=missing-class-docstring self.assertEqual(query.user_query_parts, TestBang.THE_QUERY.split(' ')) def test_specific(self): + load_engines(TEST_ENGINES) for bang in TestBang.SPECIFIC_BANGS: with self.subTest(msg="Check bang is specific", bang=bang): query_text = TestBang.THE_QUERY + ' ' + bang