forked from zaclys/searxng
		
	[fix] external bangs: don't overwrite Bangs in data trie
Bangs with a `*` suffix (e.g. `!!d*`) overwrite Bangs with the same prefix (e.g. `!!d`) [1]. This can be avoid when a non printable character is used to tag a LEAF_KEY. [1] https://github.com/searxng/searxng/pull/740#issuecomment-1010411888 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
		
							parent
							
								
									6d7e86eece
								
							
						
					
					
						commit
						7cdd31440e
					
				
					 3 changed files with 37 additions and 25 deletions
				
			
		|  | @ -2,6 +2,8 @@ | |||
| 
 | ||||
| from searx.data import EXTERNAL_BANGS | ||||
| 
 | ||||
| LEAF_KEY = chr(16) | ||||
| 
 | ||||
| 
 | ||||
| def get_node(external_bangs_db, bang): | ||||
|     node = external_bangs_db['trie'] | ||||
|  | @ -26,8 +28,8 @@ def get_bang_definition_and_ac(external_bangs_db, bang): | |||
|             if k.startswith(after): | ||||
|                 bang_ac_list.append(before + k) | ||||
|     elif isinstance(node, dict): | ||||
|         bang_definition = node.get('*') | ||||
|         bang_ac_list = [before + k for k in node.keys() if k != '*'] | ||||
|         bang_definition = node.get(LEAF_KEY) | ||||
|         bang_ac_list = [before + k for k in node.keys() if k != LEAF_KEY] | ||||
|     elif isinstance(node, str): | ||||
|         bang_definition = node | ||||
|         bang_ac_list = [] | ||||
|  |  | |||
|  | @ -25,7 +25,7 @@ from os.path import join | |||
| import httpx | ||||
| 
 | ||||
| from searx import searx_dir  # pylint: disable=E0401 C0413 | ||||
| 
 | ||||
| from searx.external_bang import LEAF_KEY | ||||
| 
 | ||||
| # from https://duckduckgo.com/newbang | ||||
| URL_BV1 = 'https://duckduckgo.com/bv1.js' | ||||
|  | @ -51,18 +51,22 @@ def fetch_ddg_bangs(url): | |||
| def merge_when_no_leaf(node): | ||||
|     """Minimize the number of nodes | ||||
| 
 | ||||
|     A -> B -> C | ||||
|     B is child of A | ||||
|     C is child of B | ||||
|     ``A -> B -> C`` | ||||
| 
 | ||||
|     If there are no C equals to '*', then each C are merged into A | ||||
|     - ``B`` is child of ``A`` | ||||
|     - ``C`` is child of ``B`` | ||||
| 
 | ||||
|     If there are no ``C`` equals to ``<LEAF_KEY>``, then each ``C`` are merged | ||||
|     into ``A``.  For example (5 nodes):: | ||||
| 
 | ||||
|       d -> d -> g -> <LEAF_KEY> (ddg) | ||||
|         -> i -> g -> <LEAF_KEY> (dig) | ||||
| 
 | ||||
|     becomes (3 noodes):: | ||||
| 
 | ||||
|       d -> dg -> <LEAF_KEY> | ||||
|         -> ig -> <LEAF_KEY> | ||||
| 
 | ||||
|     For example: | ||||
|       d -> d -> g -> * (ddg*) | ||||
|         -> i -> g -> * (dig*) | ||||
|     becomes | ||||
|       d -> dg -> * | ||||
|         -> ig -> * | ||||
|     """ | ||||
|     restart = False | ||||
|     if not isinstance(node, dict): | ||||
|  | @ -72,12 +76,12 @@ def merge_when_no_leaf(node): | |||
|     keys = list(node.keys()) | ||||
| 
 | ||||
|     for key in keys: | ||||
|         if key == '*': | ||||
|         if key == LEAF_KEY: | ||||
|             continue | ||||
| 
 | ||||
|         value = node[key] | ||||
|         value_keys = list(value.keys()) | ||||
|         if '*' not in value_keys: | ||||
|         if LEAF_KEY not in value_keys: | ||||
|             for value_key in value_keys: | ||||
|                 node[key + value_key] = value[value_key] | ||||
|                 merge_when_no_leaf(node[key + value_key]) | ||||
|  | @ -94,8 +98,8 @@ def optimize_leaf(parent, parent_key, node): | |||
|     if not isinstance(node, dict): | ||||
|         return | ||||
| 
 | ||||
|     if len(node) == 1 and '*' in node and parent is not None: | ||||
|         parent[parent_key] = node['*'] | ||||
|     if len(node) == 1 and LEAF_KEY in node and parent is not None: | ||||
|         parent[parent_key] = node[LEAF_KEY] | ||||
|     else: | ||||
|         for key, value in node.items(): | ||||
|             optimize_leaf(node, key, value) | ||||
|  | @ -138,7 +142,7 @@ def parse_ddg_bangs(ddg_bangs): | |||
|         t = bang_trie | ||||
|         for bang_letter in bang: | ||||
|             t = t.setdefault(bang_letter, {}) | ||||
|         t = t.setdefault('*', bang_def_output) | ||||
|         t = t.setdefault(LEAF_KEY, bang_def_output) | ||||
| 
 | ||||
|     # optimize the trie | ||||
|     merge_when_no_leaf(bang_trie) | ||||
|  |  | |||
|  | @ -1,4 +1,10 @@ | |||
| from searx.external_bang import get_node, resolve_bang_definition, get_bang_url, get_bang_definition_and_autocomplete | ||||
| from searx.external_bang import ( | ||||
|     get_node, | ||||
|     resolve_bang_definition, | ||||
|     get_bang_url, | ||||
|     get_bang_definition_and_autocomplete, | ||||
|     LEAF_KEY, | ||||
| ) | ||||
| from searx.search import SearchQuery, EngineRef | ||||
| from tests import SearxTestCase | ||||
| 
 | ||||
|  | @ -7,12 +13,12 @@ TEST_DB = { | |||
|     'trie': { | ||||
|         'exam': { | ||||
|             'ple': '//example.com/' + chr(2) + chr(1) + '0', | ||||
|             '*': '//wikipedia.org/wiki/' + chr(2) + chr(1) + '0', | ||||
|             LEAF_KEY: '//wikipedia.org/wiki/' + chr(2) + chr(1) + '0', | ||||
|         }, | ||||
|         'sea': { | ||||
|             '*': 'sea' + chr(2) + chr(1) + '0', | ||||
|             LEAF_KEY: 'sea' + chr(2) + chr(1) + '0', | ||||
|             'rch': { | ||||
|                 '*': 'search' + chr(2) + chr(1) + '0', | ||||
|                 LEAF_KEY: 'search' + chr(2) + chr(1) + '0', | ||||
|                 'ing': 'searching' + chr(2) + chr(1) + '0', | ||||
|             }, | ||||
|             's': { | ||||
|  | @ -31,7 +37,7 @@ class TestGetNode(SearxTestCase): | |||
|         'trie': { | ||||
|             'exam': { | ||||
|                 'ple': 'test', | ||||
|                 '*': 'not used', | ||||
|                 LEAF_KEY: 'not used', | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | @ -71,7 +77,7 @@ class TestResolveBangDefinition(SearxTestCase): | |||
| class TestGetBangDefinitionAndAutocomplete(SearxTestCase): | ||||
|     def test_found(self): | ||||
|         bang_definition, new_autocomplete = get_bang_definition_and_autocomplete('exam', external_bangs_db=TEST_DB) | ||||
|         self.assertEqual(bang_definition, TEST_DB['trie']['exam']['*']) | ||||
|         self.assertEqual(bang_definition, TEST_DB['trie']['exam'][LEAF_KEY]) | ||||
|         self.assertEqual(new_autocomplete, ['example']) | ||||
| 
 | ||||
|     def test_found_optimized(self): | ||||
|  | @ -86,7 +92,7 @@ class TestGetBangDefinitionAndAutocomplete(SearxTestCase): | |||
| 
 | ||||
|     def test_partial2(self): | ||||
|         bang_definition, new_autocomplete = get_bang_definition_and_autocomplete('sea', external_bangs_db=TEST_DB) | ||||
|         self.assertEqual(bang_definition, TEST_DB['trie']['sea']['*']) | ||||
|         self.assertEqual(bang_definition, TEST_DB['trie']['sea'][LEAF_KEY]) | ||||
|         self.assertEqual(new_autocomplete, ['search', 'searching', 'seascapes', 'season']) | ||||
| 
 | ||||
|     def test_error(self): | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Markus Heiser
						Markus Heiser