forked from zaclys/searxng
[fix] external bangs: don't overwrite Bangs in data trie
Bangs with a `*` suffix (e.g. `!!d*`) overwrite Bangs with the same prefix (e.g. `!!d`) [1]. This can be avoid when a non printable character is used to tag a LEAF_KEY. [1] https://github.com/searxng/searxng/pull/740#issuecomment-1010411888 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
6d7e86eece
commit
7cdd31440e
|
@ -2,6 +2,8 @@
|
||||||
|
|
||||||
from searx.data import EXTERNAL_BANGS
|
from searx.data import EXTERNAL_BANGS
|
||||||
|
|
||||||
|
LEAF_KEY = chr(16)
|
||||||
|
|
||||||
|
|
||||||
def get_node(external_bangs_db, bang):
|
def get_node(external_bangs_db, bang):
|
||||||
node = external_bangs_db['trie']
|
node = external_bangs_db['trie']
|
||||||
|
@ -26,8 +28,8 @@ def get_bang_definition_and_ac(external_bangs_db, bang):
|
||||||
if k.startswith(after):
|
if k.startswith(after):
|
||||||
bang_ac_list.append(before + k)
|
bang_ac_list.append(before + k)
|
||||||
elif isinstance(node, dict):
|
elif isinstance(node, dict):
|
||||||
bang_definition = node.get('*')
|
bang_definition = node.get(LEAF_KEY)
|
||||||
bang_ac_list = [before + k for k in node.keys() if k != '*']
|
bang_ac_list = [before + k for k in node.keys() if k != LEAF_KEY]
|
||||||
elif isinstance(node, str):
|
elif isinstance(node, str):
|
||||||
bang_definition = node
|
bang_definition = node
|
||||||
bang_ac_list = []
|
bang_ac_list = []
|
||||||
|
|
|
@ -25,7 +25,7 @@ from os.path import join
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
from searx import searx_dir # pylint: disable=E0401 C0413
|
from searx import searx_dir # pylint: disable=E0401 C0413
|
||||||
|
from searx.external_bang import LEAF_KEY
|
||||||
|
|
||||||
# from https://duckduckgo.com/newbang
|
# from https://duckduckgo.com/newbang
|
||||||
URL_BV1 = 'https://duckduckgo.com/bv1.js'
|
URL_BV1 = 'https://duckduckgo.com/bv1.js'
|
||||||
|
@ -51,18 +51,22 @@ def fetch_ddg_bangs(url):
|
||||||
def merge_when_no_leaf(node):
|
def merge_when_no_leaf(node):
|
||||||
"""Minimize the number of nodes
|
"""Minimize the number of nodes
|
||||||
|
|
||||||
A -> B -> C
|
``A -> B -> C``
|
||||||
B is child of A
|
|
||||||
C is child of B
|
|
||||||
|
|
||||||
If there are no C equals to '*', then each C are merged into A
|
- ``B`` is child of ``A``
|
||||||
|
- ``C`` is child of ``B``
|
||||||
|
|
||||||
|
If there are no ``C`` equals to ``<LEAF_KEY>``, then each ``C`` are merged
|
||||||
|
into ``A``. For example (5 nodes)::
|
||||||
|
|
||||||
|
d -> d -> g -> <LEAF_KEY> (ddg)
|
||||||
|
-> i -> g -> <LEAF_KEY> (dig)
|
||||||
|
|
||||||
|
becomes (3 noodes)::
|
||||||
|
|
||||||
|
d -> dg -> <LEAF_KEY>
|
||||||
|
-> ig -> <LEAF_KEY>
|
||||||
|
|
||||||
For example:
|
|
||||||
d -> d -> g -> * (ddg*)
|
|
||||||
-> i -> g -> * (dig*)
|
|
||||||
becomes
|
|
||||||
d -> dg -> *
|
|
||||||
-> ig -> *
|
|
||||||
"""
|
"""
|
||||||
restart = False
|
restart = False
|
||||||
if not isinstance(node, dict):
|
if not isinstance(node, dict):
|
||||||
|
@ -72,12 +76,12 @@ def merge_when_no_leaf(node):
|
||||||
keys = list(node.keys())
|
keys = list(node.keys())
|
||||||
|
|
||||||
for key in keys:
|
for key in keys:
|
||||||
if key == '*':
|
if key == LEAF_KEY:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
value = node[key]
|
value = node[key]
|
||||||
value_keys = list(value.keys())
|
value_keys = list(value.keys())
|
||||||
if '*' not in value_keys:
|
if LEAF_KEY not in value_keys:
|
||||||
for value_key in value_keys:
|
for value_key in value_keys:
|
||||||
node[key + value_key] = value[value_key]
|
node[key + value_key] = value[value_key]
|
||||||
merge_when_no_leaf(node[key + value_key])
|
merge_when_no_leaf(node[key + value_key])
|
||||||
|
@ -94,8 +98,8 @@ def optimize_leaf(parent, parent_key, node):
|
||||||
if not isinstance(node, dict):
|
if not isinstance(node, dict):
|
||||||
return
|
return
|
||||||
|
|
||||||
if len(node) == 1 and '*' in node and parent is not None:
|
if len(node) == 1 and LEAF_KEY in node and parent is not None:
|
||||||
parent[parent_key] = node['*']
|
parent[parent_key] = node[LEAF_KEY]
|
||||||
else:
|
else:
|
||||||
for key, value in node.items():
|
for key, value in node.items():
|
||||||
optimize_leaf(node, key, value)
|
optimize_leaf(node, key, value)
|
||||||
|
@ -138,7 +142,7 @@ def parse_ddg_bangs(ddg_bangs):
|
||||||
t = bang_trie
|
t = bang_trie
|
||||||
for bang_letter in bang:
|
for bang_letter in bang:
|
||||||
t = t.setdefault(bang_letter, {})
|
t = t.setdefault(bang_letter, {})
|
||||||
t = t.setdefault('*', bang_def_output)
|
t = t.setdefault(LEAF_KEY, bang_def_output)
|
||||||
|
|
||||||
# optimize the trie
|
# optimize the trie
|
||||||
merge_when_no_leaf(bang_trie)
|
merge_when_no_leaf(bang_trie)
|
||||||
|
|
|
@ -1,4 +1,10 @@
|
||||||
from searx.external_bang import get_node, resolve_bang_definition, get_bang_url, get_bang_definition_and_autocomplete
|
from searx.external_bang import (
|
||||||
|
get_node,
|
||||||
|
resolve_bang_definition,
|
||||||
|
get_bang_url,
|
||||||
|
get_bang_definition_and_autocomplete,
|
||||||
|
LEAF_KEY,
|
||||||
|
)
|
||||||
from searx.search import SearchQuery, EngineRef
|
from searx.search import SearchQuery, EngineRef
|
||||||
from tests import SearxTestCase
|
from tests import SearxTestCase
|
||||||
|
|
||||||
|
@ -7,12 +13,12 @@ TEST_DB = {
|
||||||
'trie': {
|
'trie': {
|
||||||
'exam': {
|
'exam': {
|
||||||
'ple': '//example.com/' + chr(2) + chr(1) + '0',
|
'ple': '//example.com/' + chr(2) + chr(1) + '0',
|
||||||
'*': '//wikipedia.org/wiki/' + chr(2) + chr(1) + '0',
|
LEAF_KEY: '//wikipedia.org/wiki/' + chr(2) + chr(1) + '0',
|
||||||
},
|
},
|
||||||
'sea': {
|
'sea': {
|
||||||
'*': 'sea' + chr(2) + chr(1) + '0',
|
LEAF_KEY: 'sea' + chr(2) + chr(1) + '0',
|
||||||
'rch': {
|
'rch': {
|
||||||
'*': 'search' + chr(2) + chr(1) + '0',
|
LEAF_KEY: 'search' + chr(2) + chr(1) + '0',
|
||||||
'ing': 'searching' + chr(2) + chr(1) + '0',
|
'ing': 'searching' + chr(2) + chr(1) + '0',
|
||||||
},
|
},
|
||||||
's': {
|
's': {
|
||||||
|
@ -31,7 +37,7 @@ class TestGetNode(SearxTestCase):
|
||||||
'trie': {
|
'trie': {
|
||||||
'exam': {
|
'exam': {
|
||||||
'ple': 'test',
|
'ple': 'test',
|
||||||
'*': 'not used',
|
LEAF_KEY: 'not used',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -71,7 +77,7 @@ class TestResolveBangDefinition(SearxTestCase):
|
||||||
class TestGetBangDefinitionAndAutocomplete(SearxTestCase):
|
class TestGetBangDefinitionAndAutocomplete(SearxTestCase):
|
||||||
def test_found(self):
|
def test_found(self):
|
||||||
bang_definition, new_autocomplete = get_bang_definition_and_autocomplete('exam', external_bangs_db=TEST_DB)
|
bang_definition, new_autocomplete = get_bang_definition_and_autocomplete('exam', external_bangs_db=TEST_DB)
|
||||||
self.assertEqual(bang_definition, TEST_DB['trie']['exam']['*'])
|
self.assertEqual(bang_definition, TEST_DB['trie']['exam'][LEAF_KEY])
|
||||||
self.assertEqual(new_autocomplete, ['example'])
|
self.assertEqual(new_autocomplete, ['example'])
|
||||||
|
|
||||||
def test_found_optimized(self):
|
def test_found_optimized(self):
|
||||||
|
@ -86,7 +92,7 @@ class TestGetBangDefinitionAndAutocomplete(SearxTestCase):
|
||||||
|
|
||||||
def test_partial2(self):
|
def test_partial2(self):
|
||||||
bang_definition, new_autocomplete = get_bang_definition_and_autocomplete('sea', external_bangs_db=TEST_DB)
|
bang_definition, new_autocomplete = get_bang_definition_and_autocomplete('sea', external_bangs_db=TEST_DB)
|
||||||
self.assertEqual(bang_definition, TEST_DB['trie']['sea']['*'])
|
self.assertEqual(bang_definition, TEST_DB['trie']['sea'][LEAF_KEY])
|
||||||
self.assertEqual(new_autocomplete, ['search', 'searching', 'seascapes', 'season'])
|
self.assertEqual(new_autocomplete, ['search', 'searching', 'seascapes', 'season'])
|
||||||
|
|
||||||
def test_error(self):
|
def test_error(self):
|
||||||
|
|
Loading…
Reference in New Issue