mirror of
				https://github.com/searxng/searxng
				synced 2024-01-01 19:24:07 +01:00 
			
		
		
		
	Merge pull request #740 from return42/fix-bang
[fix] get_bang_url: handle ambiguous !!bangs without error
This commit is contained in:
		
						commit
						687bdef410
					
				
					 4 changed files with 2794 additions and 2778 deletions
				
			
		
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							| 
						 | 
					@ -2,6 +2,8 @@
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from searx.data import EXTERNAL_BANGS
 | 
					from searx.data import EXTERNAL_BANGS
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					LEAF_KEY = chr(16)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_node(external_bangs_db, bang):
 | 
					def get_node(external_bangs_db, bang):
 | 
				
			||||||
    node = external_bangs_db['trie']
 | 
					    node = external_bangs_db['trie']
 | 
				
			||||||
| 
						 | 
					@ -26,8 +28,8 @@ def get_bang_definition_and_ac(external_bangs_db, bang):
 | 
				
			||||||
            if k.startswith(after):
 | 
					            if k.startswith(after):
 | 
				
			||||||
                bang_ac_list.append(before + k)
 | 
					                bang_ac_list.append(before + k)
 | 
				
			||||||
    elif isinstance(node, dict):
 | 
					    elif isinstance(node, dict):
 | 
				
			||||||
        bang_definition = node.get('*')
 | 
					        bang_definition = node.get(LEAF_KEY)
 | 
				
			||||||
        bang_ac_list = [before + k for k in node.keys() if k != '*']
 | 
					        bang_ac_list = [before + k for k in node.keys() if k != LEAF_KEY]
 | 
				
			||||||
    elif isinstance(node, str):
 | 
					    elif isinstance(node, str):
 | 
				
			||||||
        bang_definition = node
 | 
					        bang_definition = node
 | 
				
			||||||
        bang_ac_list = []
 | 
					        bang_ac_list = []
 | 
				
			||||||
| 
						 | 
					@ -77,11 +79,14 @@ def get_bang_url(search_query, external_bangs_db=None):
 | 
				
			||||||
    :param search_query: This is a search_query object which contains preferences and the submitted queries.
 | 
					    :param search_query: This is a search_query object which contains preferences and the submitted queries.
 | 
				
			||||||
    :return: None if the bang was invalid, else a string of the redirect url.
 | 
					    :return: None if the bang was invalid, else a string of the redirect url.
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
 | 
					    ret_val = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if external_bangs_db is None:
 | 
					    if external_bangs_db is None:
 | 
				
			||||||
        external_bangs_db = EXTERNAL_BANGS
 | 
					        external_bangs_db = EXTERNAL_BANGS
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if search_query.external_bang:
 | 
					    if search_query.external_bang:
 | 
				
			||||||
        bang_definition, _ = get_bang_definition_and_ac(external_bangs_db, search_query.external_bang)
 | 
					        bang_definition, _ = get_bang_definition_and_ac(external_bangs_db, search_query.external_bang)
 | 
				
			||||||
        return resolve_bang_definition(bang_definition, search_query.query)[0] if bang_definition else None
 | 
					        if bang_definition and isinstance(bang_definition, str):
 | 
				
			||||||
 | 
					            ret_val = resolve_bang_definition(bang_definition, search_query.query)[0]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return None
 | 
					    return ret_val
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -25,7 +25,7 @@ from os.path import join
 | 
				
			||||||
import httpx
 | 
					import httpx
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from searx import searx_dir  # pylint: disable=E0401 C0413
 | 
					from searx import searx_dir  # pylint: disable=E0401 C0413
 | 
				
			||||||
 | 
					from searx.external_bang import LEAF_KEY
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# from https://duckduckgo.com/newbang
 | 
					# from https://duckduckgo.com/newbang
 | 
				
			||||||
URL_BV1 = 'https://duckduckgo.com/bv1.js'
 | 
					URL_BV1 = 'https://duckduckgo.com/bv1.js'
 | 
				
			||||||
| 
						 | 
					@ -51,18 +51,22 @@ def fetch_ddg_bangs(url):
 | 
				
			||||||
def merge_when_no_leaf(node):
 | 
					def merge_when_no_leaf(node):
 | 
				
			||||||
    """Minimize the number of nodes
 | 
					    """Minimize the number of nodes
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    A -> B -> C
 | 
					    ``A -> B -> C``
 | 
				
			||||||
    B is child of A
 | 
					 | 
				
			||||||
    C is child of B
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    If there are no C equals to '*', then each C are merged into A
 | 
					    - ``B`` is child of ``A``
 | 
				
			||||||
 | 
					    - ``C`` is child of ``B``
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    If there are no ``C`` equals to ``<LEAF_KEY>``, then each ``C`` are merged
 | 
				
			||||||
 | 
					    into ``A``.  For example (5 nodes)::
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      d -> d -> g -> <LEAF_KEY> (ddg)
 | 
				
			||||||
 | 
					        -> i -> g -> <LEAF_KEY> (dig)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    becomes (3 noodes)::
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      d -> dg -> <LEAF_KEY>
 | 
				
			||||||
 | 
					        -> ig -> <LEAF_KEY>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    For example:
 | 
					 | 
				
			||||||
      d -> d -> g -> * (ddg*)
 | 
					 | 
				
			||||||
        -> i -> g -> * (dig*)
 | 
					 | 
				
			||||||
    becomes
 | 
					 | 
				
			||||||
      d -> dg -> *
 | 
					 | 
				
			||||||
        -> ig -> *
 | 
					 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    restart = False
 | 
					    restart = False
 | 
				
			||||||
    if not isinstance(node, dict):
 | 
					    if not isinstance(node, dict):
 | 
				
			||||||
| 
						 | 
					@ -72,12 +76,12 @@ def merge_when_no_leaf(node):
 | 
				
			||||||
    keys = list(node.keys())
 | 
					    keys = list(node.keys())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for key in keys:
 | 
					    for key in keys:
 | 
				
			||||||
        if key == '*':
 | 
					        if key == LEAF_KEY:
 | 
				
			||||||
            continue
 | 
					            continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        value = node[key]
 | 
					        value = node[key]
 | 
				
			||||||
        value_keys = list(value.keys())
 | 
					        value_keys = list(value.keys())
 | 
				
			||||||
        if '*' not in value_keys:
 | 
					        if LEAF_KEY not in value_keys:
 | 
				
			||||||
            for value_key in value_keys:
 | 
					            for value_key in value_keys:
 | 
				
			||||||
                node[key + value_key] = value[value_key]
 | 
					                node[key + value_key] = value[value_key]
 | 
				
			||||||
                merge_when_no_leaf(node[key + value_key])
 | 
					                merge_when_no_leaf(node[key + value_key])
 | 
				
			||||||
| 
						 | 
					@ -94,8 +98,8 @@ def optimize_leaf(parent, parent_key, node):
 | 
				
			||||||
    if not isinstance(node, dict):
 | 
					    if not isinstance(node, dict):
 | 
				
			||||||
        return
 | 
					        return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if len(node) == 1 and '*' in node and parent is not None:
 | 
					    if len(node) == 1 and LEAF_KEY in node and parent is not None:
 | 
				
			||||||
        parent[parent_key] = node['*']
 | 
					        parent[parent_key] = node[LEAF_KEY]
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        for key, value in node.items():
 | 
					        for key, value in node.items():
 | 
				
			||||||
            optimize_leaf(node, key, value)
 | 
					            optimize_leaf(node, key, value)
 | 
				
			||||||
| 
						 | 
					@ -138,7 +142,7 @@ def parse_ddg_bangs(ddg_bangs):
 | 
				
			||||||
        t = bang_trie
 | 
					        t = bang_trie
 | 
				
			||||||
        for bang_letter in bang:
 | 
					        for bang_letter in bang:
 | 
				
			||||||
            t = t.setdefault(bang_letter, {})
 | 
					            t = t.setdefault(bang_letter, {})
 | 
				
			||||||
        t = t.setdefault('*', bang_def_output)
 | 
					        t = t.setdefault(LEAF_KEY, bang_def_output)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # optimize the trie
 | 
					    # optimize the trie
 | 
				
			||||||
    merge_when_no_leaf(bang_trie)
 | 
					    merge_when_no_leaf(bang_trie)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,4 +1,10 @@
 | 
				
			||||||
from searx.external_bang import get_node, resolve_bang_definition, get_bang_url, get_bang_definition_and_autocomplete
 | 
					from searx.external_bang import (
 | 
				
			||||||
 | 
					    get_node,
 | 
				
			||||||
 | 
					    resolve_bang_definition,
 | 
				
			||||||
 | 
					    get_bang_url,
 | 
				
			||||||
 | 
					    get_bang_definition_and_autocomplete,
 | 
				
			||||||
 | 
					    LEAF_KEY,
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
from searx.search import SearchQuery, EngineRef
 | 
					from searx.search import SearchQuery, EngineRef
 | 
				
			||||||
from tests import SearxTestCase
 | 
					from tests import SearxTestCase
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -7,12 +13,12 @@ TEST_DB = {
 | 
				
			||||||
    'trie': {
 | 
					    'trie': {
 | 
				
			||||||
        'exam': {
 | 
					        'exam': {
 | 
				
			||||||
            'ple': '//example.com/' + chr(2) + chr(1) + '0',
 | 
					            'ple': '//example.com/' + chr(2) + chr(1) + '0',
 | 
				
			||||||
            '*': '//wikipedia.org/wiki/' + chr(2) + chr(1) + '0',
 | 
					            LEAF_KEY: '//wikipedia.org/wiki/' + chr(2) + chr(1) + '0',
 | 
				
			||||||
        },
 | 
					        },
 | 
				
			||||||
        'sea': {
 | 
					        'sea': {
 | 
				
			||||||
            '*': 'sea' + chr(2) + chr(1) + '0',
 | 
					            LEAF_KEY: 'sea' + chr(2) + chr(1) + '0',
 | 
				
			||||||
            'rch': {
 | 
					            'rch': {
 | 
				
			||||||
                '*': 'search' + chr(2) + chr(1) + '0',
 | 
					                LEAF_KEY: 'search' + chr(2) + chr(1) + '0',
 | 
				
			||||||
                'ing': 'searching' + chr(2) + chr(1) + '0',
 | 
					                'ing': 'searching' + chr(2) + chr(1) + '0',
 | 
				
			||||||
            },
 | 
					            },
 | 
				
			||||||
            's': {
 | 
					            's': {
 | 
				
			||||||
| 
						 | 
					@ -31,7 +37,7 @@ class TestGetNode(SearxTestCase):
 | 
				
			||||||
        'trie': {
 | 
					        'trie': {
 | 
				
			||||||
            'exam': {
 | 
					            'exam': {
 | 
				
			||||||
                'ple': 'test',
 | 
					                'ple': 'test',
 | 
				
			||||||
                '*': 'not used',
 | 
					                LEAF_KEY: 'not used',
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
| 
						 | 
					@ -71,7 +77,7 @@ class TestResolveBangDefinition(SearxTestCase):
 | 
				
			||||||
class TestGetBangDefinitionAndAutocomplete(SearxTestCase):
 | 
					class TestGetBangDefinitionAndAutocomplete(SearxTestCase):
 | 
				
			||||||
    def test_found(self):
 | 
					    def test_found(self):
 | 
				
			||||||
        bang_definition, new_autocomplete = get_bang_definition_and_autocomplete('exam', external_bangs_db=TEST_DB)
 | 
					        bang_definition, new_autocomplete = get_bang_definition_and_autocomplete('exam', external_bangs_db=TEST_DB)
 | 
				
			||||||
        self.assertEqual(bang_definition, TEST_DB['trie']['exam']['*'])
 | 
					        self.assertEqual(bang_definition, TEST_DB['trie']['exam'][LEAF_KEY])
 | 
				
			||||||
        self.assertEqual(new_autocomplete, ['example'])
 | 
					        self.assertEqual(new_autocomplete, ['example'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_found_optimized(self):
 | 
					    def test_found_optimized(self):
 | 
				
			||||||
| 
						 | 
					@ -86,7 +92,7 @@ class TestGetBangDefinitionAndAutocomplete(SearxTestCase):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_partial2(self):
 | 
					    def test_partial2(self):
 | 
				
			||||||
        bang_definition, new_autocomplete = get_bang_definition_and_autocomplete('sea', external_bangs_db=TEST_DB)
 | 
					        bang_definition, new_autocomplete = get_bang_definition_and_autocomplete('sea', external_bangs_db=TEST_DB)
 | 
				
			||||||
        self.assertEqual(bang_definition, TEST_DB['trie']['sea']['*'])
 | 
					        self.assertEqual(bang_definition, TEST_DB['trie']['sea'][LEAF_KEY])
 | 
				
			||||||
        self.assertEqual(new_autocomplete, ['search', 'searching', 'seascapes', 'season'])
 | 
					        self.assertEqual(new_autocomplete, ['search', 'searching', 'seascapes', 'season'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_error(self):
 | 
					    def test_error(self):
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		
		Reference in a new issue