mirror of
				https://github.com/searxng/searxng
				synced 2024-01-01 19:24:07 +01:00 
			
		
		
		
	Merge pull request #609 from LuccoJ/betterwolfram
Improving Wolfram Alpha search hit content
This commit is contained in:
		
						commit
						8f48c518aa
					
				
					 4 changed files with 32 additions and 12 deletions
				
			
		| 
						 | 
				
			
			@ -18,10 +18,10 @@ api_key = ''  # defined in settings.yml
 | 
			
		|||
 | 
			
		||||
# xpath variables
 | 
			
		||||
failure_xpath = '/queryresult[attribute::success="false"]'
 | 
			
		||||
answer_xpath = '//pod[attribute::primary="true"]/subpod/plaintext'
 | 
			
		||||
input_xpath = '//pod[starts-with(attribute::id, "Input")]/subpod/plaintext'
 | 
			
		||||
pods_xpath = '//pod'
 | 
			
		||||
subpods_xpath = './subpod'
 | 
			
		||||
pod_primary_xpath = './@primary'
 | 
			
		||||
pod_id_xpath = './@id'
 | 
			
		||||
pod_title_xpath = './@title'
 | 
			
		||||
plaintext_xpath = './plaintext'
 | 
			
		||||
| 
						 | 
				
			
			@ -75,13 +75,15 @@ def response(resp):
 | 
			
		|||
    try:
 | 
			
		||||
        infobox_title = search_results.xpath(input_xpath)[0].text
 | 
			
		||||
    except:
 | 
			
		||||
        infobox_title = None
 | 
			
		||||
        infobox_title = ""
 | 
			
		||||
 | 
			
		||||
    pods = search_results.xpath(pods_xpath)
 | 
			
		||||
    result_chunks = []
 | 
			
		||||
    result_content = ""
 | 
			
		||||
    for pod in pods:
 | 
			
		||||
        pod_id = pod.xpath(pod_id_xpath)[0]
 | 
			
		||||
        pod_title = pod.xpath(pod_title_xpath)[0]
 | 
			
		||||
        pod_is_result = pod.xpath(pod_primary_xpath)
 | 
			
		||||
 | 
			
		||||
        subpods = pod.xpath(subpods_xpath)
 | 
			
		||||
        if not subpods:
 | 
			
		||||
| 
						 | 
				
			
			@ -94,6 +96,10 @@ def response(resp):
 | 
			
		|||
 | 
			
		||||
            if content and pod_id not in image_pods:
 | 
			
		||||
 | 
			
		||||
                if pod_is_result or not result_content:
 | 
			
		||||
                    if pod_id != "Input":
 | 
			
		||||
                        result_content = "%s: %s" % (pod_title, content)
 | 
			
		||||
 | 
			
		||||
                # if no input pod was found, title is first plaintext pod
 | 
			
		||||
                if not infobox_title:
 | 
			
		||||
                    infobox_title = content
 | 
			
		||||
| 
						 | 
				
			
			@ -109,6 +115,8 @@ def response(resp):
 | 
			
		|||
    if not result_chunks:
 | 
			
		||||
        return []
 | 
			
		||||
 | 
			
		||||
    title = "Wolfram|Alpha (%s)" % infobox_title
 | 
			
		||||
 | 
			
		||||
    # append infobox
 | 
			
		||||
    results.append({'infobox': infobox_title,
 | 
			
		||||
                    'attributes': result_chunks,
 | 
			
		||||
| 
						 | 
				
			
			@ -116,7 +124,7 @@ def response(resp):
 | 
			
		|||
 | 
			
		||||
    # append link to site
 | 
			
		||||
    results.append({'url': resp.request.headers['Referer'].decode('utf8'),
 | 
			
		||||
                    'title': 'Wolfram|Alpha',
 | 
			
		||||
                    'content': infobox_title})
 | 
			
		||||
                    'title': title,
 | 
			
		||||
                    'content': result_content})
 | 
			
		||||
 | 
			
		||||
    return results
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -8,9 +8,11 @@
 | 
			
		|||
# @stable      no
 | 
			
		||||
# @parse       url, infobox
 | 
			
		||||
 | 
			
		||||
from cgi import escape
 | 
			
		||||
from json import loads
 | 
			
		||||
from time import time
 | 
			
		||||
from urllib import urlencode
 | 
			
		||||
from lxml.etree import XML
 | 
			
		||||
 | 
			
		||||
from searx.poolrequests import get as http_get
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -34,7 +36,7 @@ search_url = url + 'input/json.jsp'\
 | 
			
		|||
referer_url = url + 'input/?{query}'
 | 
			
		||||
 | 
			
		||||
token = {'value': '',
 | 
			
		||||
         'last_updated': 0}
 | 
			
		||||
         'last_updated': None}
 | 
			
		||||
 | 
			
		||||
# pods to display as image in infobox
 | 
			
		||||
# this pods do return a plaintext, but they look better and are more useful as images
 | 
			
		||||
| 
						 | 
				
			
			@ -80,10 +82,12 @@ def response(resp):
 | 
			
		|||
 | 
			
		||||
    # TODO handle resp_json['queryresult']['assumptions']
 | 
			
		||||
    result_chunks = []
 | 
			
		||||
    infobox_title = None
 | 
			
		||||
    infobox_title = ""
 | 
			
		||||
    result_content = ""
 | 
			
		||||
    for pod in resp_json['queryresult']['pods']:
 | 
			
		||||
        pod_id = pod.get('id', '')
 | 
			
		||||
        pod_title = pod.get('title', '')
 | 
			
		||||
        pod_is_result = pod.get('primary', None)
 | 
			
		||||
 | 
			
		||||
        if 'subpods' not in pod:
 | 
			
		||||
            continue
 | 
			
		||||
| 
						 | 
				
			
			@ -97,6 +101,10 @@ def response(resp):
 | 
			
		|||
                if subpod['plaintext'] != '(requires interactivity)':
 | 
			
		||||
                    result_chunks.append({'label': pod_title, 'value': subpod['plaintext']})
 | 
			
		||||
 | 
			
		||||
                if pod_is_result or not result_content:
 | 
			
		||||
                    if pod_id != "Input":
 | 
			
		||||
                        result_content = pod_title + ': ' + subpod['plaintext']
 | 
			
		||||
 | 
			
		||||
            elif 'img' in subpod:
 | 
			
		||||
                result_chunks.append({'label': pod_title, 'image': subpod['img']})
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -108,7 +116,7 @@ def response(resp):
 | 
			
		|||
                    'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer'].decode('utf8')}]})
 | 
			
		||||
 | 
			
		||||
    results.append({'url': resp.request.headers['Referer'].decode('utf8'),
 | 
			
		||||
                    'title': 'Wolfram|Alpha',
 | 
			
		||||
                    'content': infobox_title})
 | 
			
		||||
                    'title': 'Wolfram|Alpha (' + infobox_title + ')',
 | 
			
		||||
                    'content': result_content})
 | 
			
		||||
 | 
			
		||||
    return results
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -103,7 +103,8 @@ class TestWolframAlphaAPIEngine(SearxTestCase):
 | 
			
		|||
        self.assertEqual(referer_url, results[0]['urls'][0]['url'])
 | 
			
		||||
        self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title'])
 | 
			
		||||
        self.assertEqual(referer_url, results[1]['url'])
 | 
			
		||||
        self.assertEqual('Wolfram|Alpha', results[1]['title'])
 | 
			
		||||
        self.assertEqual('Wolfram|Alpha (input_plaintext)', results[1]['title'])
 | 
			
		||||
        self.assertIn('result_plaintext', results[1]['content'])
 | 
			
		||||
 | 
			
		||||
        # test calc
 | 
			
		||||
        xml = """<?xml version='1.0' encoding='UTF-8'?>
 | 
			
		||||
| 
						 | 
				
			
			@ -161,4 +162,5 @@ class TestWolframAlphaAPIEngine(SearxTestCase):
 | 
			
		|||
        self.assertEqual(referer_url, results[0]['urls'][0]['url'])
 | 
			
		||||
        self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title'])
 | 
			
		||||
        self.assertEqual(referer_url, results[1]['url'])
 | 
			
		||||
        self.assertEqual('Wolfram|Alpha', results[1]['title'])
 | 
			
		||||
        self.assertEqual('Wolfram|Alpha (integral_plaintext)', results[1]['title'])
 | 
			
		||||
        self.assertIn('integral_plaintext', results[1]['content'])
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -140,7 +140,8 @@ class TestWolframAlphaNoAPIEngine(SearxTestCase):
 | 
			
		|||
        self.assertEqual(referer_url, results[0]['urls'][0]['url'])
 | 
			
		||||
        self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title'])
 | 
			
		||||
        self.assertEqual(referer_url, results[1]['url'])
 | 
			
		||||
        self.assertEqual('Wolfram|Alpha', results[1]['title'])
 | 
			
		||||
        self.assertEqual('Wolfram|Alpha (input_plaintext)', results[1]['title'])
 | 
			
		||||
        self.assertIn('result_plaintext', results[1]['content'])
 | 
			
		||||
 | 
			
		||||
        # test calc
 | 
			
		||||
        json = r"""
 | 
			
		||||
| 
						 | 
				
			
			@ -219,4 +220,5 @@ class TestWolframAlphaNoAPIEngine(SearxTestCase):
 | 
			
		|||
        self.assertEqual(referer_url, results[0]['urls'][0]['url'])
 | 
			
		||||
        self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title'])
 | 
			
		||||
        self.assertEqual(referer_url, results[1]['url'])
 | 
			
		||||
        self.assertEqual('Wolfram|Alpha', results[1]['title'])
 | 
			
		||||
        self.assertEqual('Wolfram|Alpha (integral_plaintext)', results[1]['title'])
 | 
			
		||||
        self.assertIn('integral_plaintext', results[1]['content'])
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		
		Reference in a new issue