From e4dd75070f09223437ce48f8dfeeafae755613eb Mon Sep 17 00:00:00 2001 From: a01200356 Date: Mon, 28 Dec 2015 01:27:19 -0600 Subject: [PATCH 01/16] Pinche Travis. --- searx/engines/wolframalpha_api.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/searx/engines/wolframalpha_api.py b/searx/engines/wolframalpha_api.py index 309608628..d61d25747 100644 --- a/searx/engines/wolframalpha_api.py +++ b/searx/engines/wolframalpha_api.py @@ -10,8 +10,6 @@ from urllib import urlencode from lxml import etree -from searx.engines.xpath import extract_text -from searx.utils import html_to_text # search-url base_url = 'http://api.wolframalpha.com/v2/query' From b51ba32f619e6b7a927444475b0ee986d4d13a60 Mon Sep 17 00:00:00 2001 From: a01200356 Date: Tue, 29 Dec 2015 20:59:51 -0600 Subject: [PATCH 02/16] Wolfram Alpha (no API needed now) --- searx/engines/wolframalpha_noapi.py | 66 +++++++++++++++++++++++++++++ searx/settings.yml | 8 ++-- 2 files changed, 70 insertions(+), 4 deletions(-) create mode 100644 searx/engines/wolframalpha_noapi.py diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py new file mode 100644 index 000000000..1ce2aa1ff --- /dev/null +++ b/searx/engines/wolframalpha_noapi.py @@ -0,0 +1,66 @@ +# WolframAlpha (Maths) +# +# @website http://www.wolframalpha.com/ +# +# @using-api no +# @results HTML, JS +# @stable no +# @parse answer + +import re +import json +from urllib import urlencode +from lxml import html +from searx.engines.xpath import extract_text + +# search-url +url = 'http://www.wolframalpha.com/' +search_url = url+'input/?{query}' + + +# do search-request +def request(query, params): + params['url'] = search_url.format(query=urlencode({'i': query})) + + return params + + +# tries to find answer under the pattern given +def extract_answer(script_list, pattern): + answer = None + + # get line that matches the pattern + for script in script_list: + try: + line = re.search(pattern, script.text_content()).group(1) + except AttributeError: + continue + + # extract answer from json + answer = line[line.find('{') : line.rfind('}')+1] + answer = json.loads(answer.encode('unicode-escape')) + answer = answer['stringified'].decode('unicode-escape') + + return answer + + +# get response from search-request +def response(resp): + + dom = html.fromstring(resp.text) + + # the answer is inside a js script + scripts = dom.xpath('//script') + + results = [] + + # answer can be located in different 'pods', although by default it should be in pod_0200 + answer = extract_answer(scripts, 'pod_0200\.push(.*)\n') + if not answer: + answer = extract_answer(scripts, 'pod_0100\.push(.*)\n') + if answer: + results.append({'answer': answer}) + else: + results.append({'answer': answer}) + + return results diff --git a/searx/settings.yml b/searx/settings.yml index e23e4c390..d2a724118 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -302,11 +302,11 @@ engines: # You can use the engine using the official stable API, but you need an API key # See : http://products.wolframalpha.com/api/ -# - name : wolframalpha -# shortcut : wa -# engine : wolframalpha_api + - name : wolframalpha + shortcut : wa + engine : wolframalpha_noapi # api_key: 'apikey' # required! -# timeout: 6.0 + timeout: 6.0 #The blekko technology and team have joined IBM Watson! -> https://blekko.com/ # - name : blekko images From d827fc49a11b6f84bba3d006b54a70a6a05757fd Mon Sep 17 00:00:00 2001 From: a01200356 Date: Tue, 29 Dec 2015 21:11:49 -0600 Subject: [PATCH 03/16] Remove unnecessary code in wolframalpha_noapi engine The answer is scraped from a js function, so parsing the html tree doesn't achieve anything here. --- searx/engines/wolframalpha_noapi.py | 49 +++++++++++------------------ 1 file changed, 18 insertions(+), 31 deletions(-) diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py index 1ce2aa1ff..29600ca1f 100644 --- a/searx/engines/wolframalpha_noapi.py +++ b/searx/engines/wolframalpha_noapi.py @@ -10,8 +10,6 @@ import re import json from urllib import urlencode -from lxml import html -from searx.engines.xpath import extract_text # search-url url = 'http://www.wolframalpha.com/' @@ -25,42 +23,31 @@ def request(query, params): return params -# tries to find answer under the pattern given -def extract_answer(script_list, pattern): - answer = None +# get response from search-request +def response(resp): + results = [] + + # the answer is inside a js function + # answer can be located in different 'pods', although by default it should be in pod_0200 + possible_locations = ['pod_0200\.push(.*)\n', + 'pod_0100\.push(.*)\n'] # get line that matches the pattern - for script in script_list: + for pattern in possible_locations: try: - line = re.search(pattern, script.text_content()).group(1) + line = re.search(pattern, resp.text).group(1) + break except AttributeError: continue - # extract answer from json - answer = line[line.find('{') : line.rfind('}')+1] - answer = json.loads(answer.encode('unicode-escape')) - answer = answer['stringified'].decode('unicode-escape') + if not line: + return results - return answer + # extract answer from json + answer = line[line.find('{') : line.rfind('}')+1] + answer = json.loads(answer.encode('unicode-escape')) + answer = answer['stringified'].decode('unicode-escape') - -# get response from search-request -def response(resp): - - dom = html.fromstring(resp.text) - - # the answer is inside a js script - scripts = dom.xpath('//script') - - results = [] - - # answer can be located in different 'pods', although by default it should be in pod_0200 - answer = extract_answer(scripts, 'pod_0200\.push(.*)\n') - if not answer: - answer = extract_answer(scripts, 'pod_0100\.push(.*)\n') - if answer: - results.append({'answer': answer}) - else: - results.append({'answer': answer}) + results.append({'answer': answer}) return results From 5ed8f4da80ecd119173d7db871256be8484a9ecb Mon Sep 17 00:00:00 2001 From: a01200356 Date: Tue, 29 Dec 2015 21:37:48 -0600 Subject: [PATCH 04/16] Make wolframalpha_noapi.py flake8 compliant --- searx/engines/wolframalpha_noapi.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py index 29600ca1f..23e912a1e 100644 --- a/searx/engines/wolframalpha_noapi.py +++ b/searx/engines/wolframalpha_noapi.py @@ -3,7 +3,7 @@ # @website http://www.wolframalpha.com/ # # @using-api no -# @results HTML, JS +# @results HTML # @stable no # @parse answer @@ -26,7 +26,7 @@ def request(query, params): # get response from search-request def response(resp): results = [] - + # the answer is inside a js function # answer can be located in different 'pods', although by default it should be in pod_0200 possible_locations = ['pod_0200\.push(.*)\n', @@ -44,10 +44,10 @@ def response(resp): return results # extract answer from json - answer = line[line.find('{') : line.rfind('}')+1] + answer = line[line.find('{'):line.rfind('}')+1] answer = json.loads(answer.encode('unicode-escape')) answer = answer['stringified'].decode('unicode-escape') results.append({'answer': answer}) - + return results From be54e5269a982e272e2fe8a5064ed898373c9063 Mon Sep 17 00:00:00 2001 From: a01200356 Date: Wed, 30 Dec 2015 00:53:15 -0600 Subject: [PATCH 05/16] Add tests for the Wolfram Alpha engines (both API and NO API versions) --- searx/engines/wolframalpha_api.py | 11 +- searx/engines/wolframalpha_noapi.py | 10 +- searx/tests/engines/test_wolframalpha_api.py | 292 ++++++++++++++++++ .../tests/engines/test_wolframalpha_noapi.py | 232 ++++++++++++++ searx/tests/test_engines.py | 2 + 5 files changed, 539 insertions(+), 8 deletions(-) create mode 100644 searx/tests/engines/test_wolframalpha_api.py create mode 100644 searx/tests/engines/test_wolframalpha_noapi.py diff --git a/searx/engines/wolframalpha_api.py b/searx/engines/wolframalpha_api.py index d61d25747..4c99eac95 100644 --- a/searx/engines/wolframalpha_api.py +++ b/searx/engines/wolframalpha_api.py @@ -48,13 +48,16 @@ def response(resp): if search_results.xpath('/queryresult[attribute::success="false"]'): return [] - # parse result - result = search_results.xpath('//pod[attribute::primary="true"]/subpod/plaintext')[0].text - result = replace_pua_chars(result) + # parse answer + answer = search_results.xpath('//pod[attribute::primary="true"]/subpod/plaintext') + if not answer: + return results + + answer = replace_pua_chars(answer[0].text) # append result # TODO: shouldn't it bind the source too? - results.append({'answer': result}) + results.append({'answer': answer}) # return results return results diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py index 23e912a1e..9d3afe658 100644 --- a/searx/engines/wolframalpha_noapi.py +++ b/searx/engines/wolframalpha_noapi.py @@ -7,8 +7,8 @@ # @stable no # @parse answer -import re -import json +from re import search +from json import loads from urllib import urlencode # search-url @@ -26,6 +26,8 @@ def request(query, params): # get response from search-request def response(resp): results = [] + webpage = resp.text + line = None # the answer is inside a js function # answer can be located in different 'pods', although by default it should be in pod_0200 @@ -35,7 +37,7 @@ def response(resp): # get line that matches the pattern for pattern in possible_locations: try: - line = re.search(pattern, resp.text).group(1) + line = search(pattern, webpage).group(1) break except AttributeError: continue @@ -45,7 +47,7 @@ def response(resp): # extract answer from json answer = line[line.find('{'):line.rfind('}')+1] - answer = json.loads(answer.encode('unicode-escape')) + answer = loads(answer.encode('unicode-escape')) answer = answer['stringified'].decode('unicode-escape') results.append({'answer': answer}) diff --git a/searx/tests/engines/test_wolframalpha_api.py b/searx/tests/engines/test_wolframalpha_api.py new file mode 100644 index 000000000..d295cea7a --- /dev/null +++ b/searx/tests/engines/test_wolframalpha_api.py @@ -0,0 +1,292 @@ +# -*- coding: utf-8 -*- +from collections import defaultdict +import mock +from searx.engines import wolframalpha_api +from searx.testing import SearxTestCase + + +class TestWolframAlphaAPIEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + api_key = 'XXXXXX-XXXXXXXXXX' + dicto = defaultdict(dict) + dicto['api_key'] = api_key + params = wolframalpha_api.request(query, dicto) + + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('wolframalpha.com', params['url']) + + self.assertIn('api_key', params) + self.assertIn(api_key, params['api_key']) + + def test_response(self): + self.assertRaises(AttributeError, wolframalpha_api.response, None) + self.assertRaises(AttributeError, wolframalpha_api.response, []) + self.assertRaises(AttributeError, wolframalpha_api.response, '') + self.assertRaises(AttributeError, wolframalpha_api.response, '[]') + + xml = ''' + + ''' + + response = mock.Mock(content=xml) + self.assertEqual(wolframalpha_api.response(response), []) + + xml = """ + + + + + + """ + + response = mock.Mock(content=xml) + self.assertEqual(wolframalpha_api.response(response), []) + + xml = """ + + + + sqrt(-1) + sqrt(-1)</plaintext> + </subpod> + </pod> + <pod title='Result' + scanner='Simplification' + id='Result' + position='200' + error='false' + numsubpods='1' + primary='true'> + <subpod title=''> + <img src='http://www4c.wolframalpha.com/Calculate/MSP/MSP9751hfe101fc27?MSPStoreType=image/gif&amp;s=53' + alt='i' + title='i' + width='5' + height='18' /> + <plaintext>i</plaintext> + </subpod> + <states count='1'> + <state name='Step-by-step solution' + input='Result__Step-by-step solution' /> + </states> + </pod> + <pod title='Polar coordinates' + scanner='Numeric' + id='PolarCoordinates' + position='300' + error='false' + numsubpods='1'> + <subpod title=''> + <img src='http://www4c.wolframalpha.com/Calculate/MSP/MSP97600003i83?MSPStoreType=image/gif&amp;s=53' + alt='r = 1 (radius), theta = 90° (angle)' + title='r = 1 (radius), theta = 90° (angle)' + width='209' + height='18' /> + <plaintext>r = 1 (radius), theta = 90° (angle)</plaintext> + </subpod> + </pod> + <pod title='Position in the complex plane' + scanner='Numeric' + id='PositionInTheComplexPlane' + position='400' + error='false' + numsubpods='1'> + <subpod title=''> + <img src='http://www4c.wolframalpha.com/Calculate/MSP/MSP9771e10ficg4g?MSPStoreType=image/gif&amp;s=53' + alt='' + title='' + width='200' + height='185' /> + <plaintext></plaintext> + </subpod> + </pod> + <pod title='All 2nd roots of -1' + scanner='RootsOfUnity' + id='' + position='500' + error='false' + numsubpods='2'> + <subpod title=''> + <img src='http://www4c.wolframalpha.com/Calculate/MSP/MSP9781hfe10fii?MSPStoreType=image/gif&amp;s=53' + alt='i (principal root)' + title='i (principal root)' + width='94' + height='18' /> + <plaintext>i (principal root)</plaintext> + </subpod> + <subpod title=''> + <img src='http://www4c.wolframalpha.com/Calculate/MSP/MSP9791hfe16f2eh1?MSPStoreType=image/gif&amp;s=53' + alt='-i' + title='-i' + width='16' + height='18' /> + <plaintext>-i</plaintext> + </subpod> + </pod> + <pod title='Plot of all roots in the complex plane' + scanner='RootsOfUnity' + id='PlotOfAllRootsInTheComplexPlane' + position='600' + error='false' + numsubpods='1'> + <subpod title=''> + <img src='http://www4c.wolframalpha.com/Calculate/MSP/MSP9801h0fi192f9?MSPStoreType=image/gif&amp;s=53' + alt='' + title='' + width='200' + height='185' /> + <plaintext></plaintext> + </subpod> + </pod> + </queryresult> + """ + response = mock.Mock(content=xml) + results = wolframalpha_api.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertIn("i", results[0]['answer']) + + xml = """<?xml version='1.0' encoding='UTF-8'?> + <queryresult success='true' + error='false' + numpods='2' + datatypes='' + timedout='Integral' + timedoutpods='' + timing='1.245' + parsetiming='0.194' + parsetimedout='false' + recalculate='http://www4b.wolframalpha.com/api/v2/recalc.jsp?id=MSPa77651gf1a1hie0ii051ea0e1c&amp;s=3' + id='MSPa77661gf1a1hie5c9d9a600003baifafc1211daef' + host='http://www4b.wolframalpha.com' + server='3' + related='http://www4b.wolframalpha.com/api/v2/relatedQueries.jsp?id=MSPa77671gf1a1hie5c5hc2&amp;s=3' + version='2.6'> + <pod title='Indefinite integral' + scanner='Integral' + id='IndefiniteIntegral' + position='100' + error='false' + numsubpods='1' + primary='true'> + <subpod title=''> + <img src='http://www4b.wolframalpha.com/Calculate/MSP/MSP776814b9492i9a7gb16?MSPStoreType=image/gif&amp;s=3' + alt=' integral 1/x dx = log(x)+constant' + title=' integral 1/x dx = log(x)+constant' + width='182' + height='36' /> + <plaintext> integral 1/x dx = log(x)+constant</plaintext> + </subpod> + <states count='1'> + <state name='Step-by-step solution' + input='IndefiniteIntegral__Step-by-step solution' /> + </states> + <infos count='1'> + <info text='log(x) is the natural logarithm'> + <img src='http://www4b.wolframalpha.com/Calculate/MSP/MSP77691g23eg440g89db?MSPStoreType=image/gif&amp;s=3' + alt='log(x) is the natural logarithm' + title='log(x) is the natural logarithm' + width='198' + height='18' /> + <link url='http://reference.wolfram.com/mathematica/ref/Log.html' + text='Documentation' + title='Mathematica' /> + <link url='http://functions.wolfram.com/ElementaryFunctions/Log' + text='Properties' + title='Wolfram Functions Site' /> + <link url='http://mathworld.wolfram.com/NaturalLogarithm.html' + text='Definition' + title='MathWorld' /> + </info> + </infos> + </pod> + <pod title='Plots of the integral' + scanner='Integral' + id='Plot' + position='200' + error='false' + numsubpods='2'> + <subpod title=''> + <img src='http://www4b.wolframalpha.com/Calculate/MSP/MSP77701gf1a9d2eb630g9?MSPStoreType=image/gif&amp;s=3' + alt='' + title='' + width='334' + height='128' /> + <plaintext></plaintext> + <states count='1'> + <statelist count='2' + value='Complex-valued plot' + delimiters=''> + <state name='Complex-valued plot' + input='Plot__1_Complex-valued plot' /> + <state name='Real-valued plot' + input='Plot__1_Real-valued plot' /> + </statelist> + </states> + </subpod> + <subpod title=''> + <img src='http://www4b.wolframalpha.com/Calculate/MSP/MSP77711gf1ai29a34b0ab?MSPStoreType=image/gif&amp;s=3' + alt='' + title='' + width='334' + height='133' /> + <plaintext></plaintext> + <states count='1'> + <statelist count='2' + value='Complex-valued plot' + delimiters=''> + <state name='Complex-valued plot' + input='Plot__2_Complex-valued plot' /> + <state name='Real-valued plot' + input='Plot__2_Real-valued plot' /> + </statelist> + </states> + </subpod> + </pod> + </queryresult> + """ + response = mock.Mock(content=xml) + results = wolframalpha_api.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertIn("log(x)+c", results[0]['answer']) diff --git a/searx/tests/engines/test_wolframalpha_noapi.py b/searx/tests/engines/test_wolframalpha_noapi.py new file mode 100644 index 000000000..d02dccd95 --- /dev/null +++ b/searx/tests/engines/test_wolframalpha_noapi.py @@ -0,0 +1,232 @@ +# -*- coding: utf-8 -*- +from collections import defaultdict +import mock +from searx.engines import wolframalpha_noapi +from searx.testing import SearxTestCase + + +class TestWolframAlphaNoAPIEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 1 + params = wolframalpha_noapi.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('wolframalpha.com', params['url']) + + def test_response(self): + self.assertRaises(AttributeError, wolframalpha_noapi.response, None) + self.assertRaises(AttributeError, wolframalpha_noapi.response, []) + self.assertRaises(AttributeError, wolframalpha_noapi.response, '') + self.assertRaises(AttributeError, wolframalpha_noapi.response, '[]') + + response = mock.Mock(text='<html></html>') + self.assertEqual(wolframalpha_noapi.response(response), []) + + html = """ + <!DOCTYPE html> + <title> sqrt(-1) - Wolfram|Alpha</title> + <meta charset="utf-8" /> + <body> + <script type="text/javascript"> + try { + document.domain = "wolframalpha.com"; + context = parent ? parent : document; + } catch(e){} + try { + if (typeof(context.$) == "undefined") { + context = window; + } else { + $=context.$; + } + } + catch(e){ context = window;} + + try { + + if (typeof context.jsonArray.popups.pod_0100 == "undefined" ) { + context.jsonArray.popups.pod_0100 = []; + } + + context.jsonArray.popups.pod_0100.push( {"stringified": "sqrt(-1)","mInput": "","mOutput": "", "popLinks": {} }); + + } catch(e) { } + + try { + + $("#results #pod_0100:not(iframe #pod_0100)") + .add("#showsteps #pod_0100:not(iframe #pod_0100)") + .add(".results-pod #pod_0100:not(iframe #pod_0100)") + .data("tempFileID", 'MSP44501e0dda34g97a0c8900003i71207d6491ab22') + .data("podIdentifier", '\x22Input\x22') + .data("podShortIdentifier", '\x22Input\x22') + .data("buttonStates", '\x22\x22') + .data("scanner", '\x22\x22'); + $("#results #pod_0100-popup:not(iframe #pod_0100-popup)") + .add("#showsteps #pod_0100-popup:not(iframe #pod_0100-popup)") + .add(".results-pod #pod_0100-popup:not(iframe #pod_0100-popup)") + .data("tempFileID", 'MSP44501e0dda34g97a0c8900003i71207d6491ab22') + .data("podIdentifier", '\x22Input\x22') + .data("podShortIdentifier", '\x22Input\x22') + .data("buttonStates", '\x22\x22') + .data("scanner", '\x22\x22'); + + $("#results #subpod_0100_1") + .add("#showsteps #subpod_0100_1:not(iframe #subpod_0100_1)") + .add(".results-pod #subpod_0100_1") + .data("tempFileID", "MSP44511e0dda34g97a0c89000059490h319161eea3") + .data("cellDataTempFile", "MSP44521e0dda34g97a0c89000011378c50d38ede6h") + .data("tempFileServer", "") + .data("dataSources", "") + .data("sources", "") + .data("sharetype", "1") + .data("shareable", "false"); + + } catch(e){} + + //false + + try { + + if (typeof context.jsonArray.popups.pod_0200 == "undefined" ) { + context.jsonArray.popups.pod_0200 = []; + } + + context.jsonArray.popups.pod_0200.push( {"stringified": "i","mInput": "","mOutput": "", "popLinks": {} }); + + } catch(e) { } + + try { + + $("#results #pod_0200:not(iframe #pod_0200)") + .add("#showsteps #pod_0200:not(iframe #pod_0200)") + .add(".results-pod #pod_0200:not(iframe #pod_0200)") + .data("tempFileID", 'MSP44541e0dda34g97a0c8900004f449i50fa482fd8') + .data("podIdentifier", '\x22Result\x22') + .data("podShortIdentifier", '\x22Result\x22') + .data("buttonStates", '\x22Result\x22\x20\x2D\x3E\x20\x7BAll,\x20None,\x20None,\x20None,\x20None\x7D') + .data("scanner", '\x22\x22'); + $("#results #pod_0200-popup:not(iframe #pod_0200-popup)") + .add("#showsteps #pod_0200-popup:not(iframe #pod_0200-popup)") + .add(".results-pod #pod_0200-popup:not(iframe #pod_0200-popup)") + .data("tempFileID", 'MSP44541e0dda34g97a0c8900004f449i50fa482fd8') + .data("podIdentifier", '\x22Result\x22') + .data("podShortIdentifier", '\x22Result\x22') + .data("buttonStates", '\x22Result\x22\x20\x2D\x3E\x20\x7BAll,\x20None,\x20None\x7D') + .data("scanner", '\x22\x22'); + + $("#results #subpod_0200_1") + .add("#showsteps #subpod_0200_1:not(iframe #subpod_0200_1)") + .add(".results-pod #subpod_0200_1") + .data("tempFileID", "MSP44551e0dda34g97a0c8900003gdgd37faa7272e0") + .data("cellDataTempFile", "MSP44561e0dda34g97a0c89000018ea1iae00104g13") + .data("tempFileServer", "") + .data("dataSources", "") + .data("sources", "") + .data("sharetype", "1") + .data("shareable", "false"); + } catch(e){} + </script> + </body> + </html> + """ + response = mock.Mock(text=html) + results = wolframalpha_noapi.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertIn("i", results[0]['answer']) + + html = """ + <!DOCTYPE html> + <title> integral 1/x - Wolfram|Alpha</title> + <meta charset="utf-8" /> + <body> + <script type="text/javascript"> + //true + try { + document.domain = "wolframalpha.com"; + context = parent ? parent : document; + } catch(e){} + try { + if (typeof(context.$) == "undefined") { + context = window; + } else { + $=context.$; + } + } + catch(e){ context = window;} + + try { + + if (typeof context.jsonArray.popups.pod_0100 == "undefined" ) { + context.jsonArray.popups.pod_0100 = []; + } + + context.jsonArray.popups.pod_0100.push( {"stringified": "integral 1\/x dx = log(x)+constant"}); + + } catch(e) { } + + try { + + $("#results #pod_0100:not(iframe #pod_0100)") + .add("#showsteps #pod_0100:not(iframe #pod_0100)") + .add(".results-pod #pod_0100:not(iframe #pod_0100)") + .data("tempFileID", 'MSP2051if2202e8bg0757100000d119b05egf583d3') + .data("podIdentifier", '\x22IndefiniteIntegral\x22') + .data("podShortIdentifier", '\x22IndefiniteIntegral\x22') + .data("buttonStates", '\x22Indefinite\x20integral\x22\x20\x2D\x3E\x20\x7B\x7D') + .data("scanner", '\x22\x22'); + $("#results #pod_0100-popup:not(iframe #pod_0100-popup)") + .add("#showsteps #pod_0100-popup:not(iframe #pod_0100-popup)") + .add(".results-pod #pod_0100-popup:not(iframe #pod_0100-popup)") + .data("tempFileID", 'MSP2051if2202e8bg0757100000d119b05egf583d3') + .data("podIdentifier", '\x22IndefiniteIntegral\x22') + .data("podShortIdentifier", '\x22IndefiniteIntegral\x22') + .data("buttonStates", '\x22Indefinite\x20integral\x22\x20\x2D\x3E\x20\x7B\x7D') + .data("scanner", '\x22\x22'); + + $("#results #subpod_0100_1") + .add("#showsteps #subpod_0100_1:not(iframe #subpod_0100_1)") + .add(".results-pod #subpod_0100_1") + .data("tempFileID", "MSP2071if2202e8bg0757100004dg60f2a4ca8cf73") + .data("cellDataTempFile", "MSP2081if2202e8bg0757100001h18329f72fe90fg") + .data("tempFileServer", "") + .data("dataSources", "") + .data("sources", "") + .data("sharetype", "1") + .data("shareable", "false"); + + } catch(e){} + + //false + try { + + $("#results #pod_0200:not(iframe #pod_0200)") + .add("#showsteps #pod_0200:not(iframe #pod_0200)") + .add(".results-pod #pod_0200:not(iframe #pod_0200)") + .data("tempFileID", '') + .data("podIdentifier", '\x22Plot\x22') + .data("podShortIdentifier", '') + .data("buttonStates", '') + .data("scanner", '\x22\x22'); + $("#results #pod_0200-popup:not(iframe #pod_0200-popup)") + .add("#showsteps #pod_0200-popup:not(iframe #pod_0200-popup)") + .add(".results-pod #pod_0200-popup:not(iframe #pod_0200-popup)") + .data("tempFileID", '') + .data("podIdentifier", '\x22Plot\x22') + .data("podShortIdentifier", '') + .data("buttonStates", '') + .data("scanner", '\x22\x22'); + + } catch(e){} + </script> + </body> + </html> + """ + response = mock.Mock(text=html) + results = wolframalpha_noapi.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertIn("log(x)+c", results[0]['answer']) diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py index 793b77460..f88d53d71 100644 --- a/searx/tests/test_engines.py +++ b/searx/tests/test_engines.py @@ -36,6 +36,8 @@ from searx.tests.engines.test_subtitleseeker import * # noqa from searx.tests.engines.test_swisscows import * # noqa from searx.tests.engines.test_twitter import * # noqa from searx.tests.engines.test_vimeo import * # noqa +from searx.tests.engines.test_wolframalpha_api import * # noqa +from searx.tests.engines.test_wolframalpha_noapi import * # noqa from searx.tests.engines.test_www1x import * # noqa from searx.tests.engines.test_www500px import * # noqa from searx.tests.engines.test_yacy import * # noqa From 0871c7ca85cd19a2fa0971c7db28516a74255d5d Mon Sep 17 00:00:00 2001 From: a01200356 <a01200356@itesm.mx> Date: Fri, 1 Jan 2016 22:02:10 -0600 Subject: [PATCH 06/16] [enh] wolframalpha appends result --- searx/engines/wolframalpha_api.py | 28 ++++++++++++++++++++-------- searx/engines/wolframalpha_noapi.py | 26 +++++++++++++++++++------- searx/settings.yml | 7 ++++--- 3 files changed, 43 insertions(+), 18 deletions(-) diff --git a/searx/engines/wolframalpha_api.py b/searx/engines/wolframalpha_api.py index 4c99eac95..6927f9707 100644 --- a/searx/engines/wolframalpha_api.py +++ b/searx/engines/wolframalpha_api.py @@ -14,14 +14,24 @@ from lxml import etree # search-url base_url = 'http://api.wolframalpha.com/v2/query' search_url = base_url + '?appid={api_key}&{query}&format=plaintext' +site_url = 'http://www.wolframalpha.com/input/?{query}' +search_query = '' api_key = '' +# xpath variables +failure_xpath = '/queryresult[attribute::success="false"]' +answer_xpath = '//pod[attribute::primary="true"]/subpod/plaintext' + # do search-request def request(query, params): params['url'] = search_url.format(query=urlencode({'input': query}), api_key=api_key) + # used in response + global search_query + search_query = query + return params @@ -45,19 +55,21 @@ def response(resp): search_results = etree.XML(resp.content) # return empty array if there are no results - if search_results.xpath('/queryresult[attribute::success="false"]'): + if search_results.xpath(failure_xpath): return [] # parse answer - answer = search_results.xpath('//pod[attribute::primary="true"]/subpod/plaintext') - if not answer: - return results + answer = search_results.xpath(answer_xpath) + if answer: + answer = replace_pua_chars(answer[0].text) - answer = replace_pua_chars(answer[0].text) + results.append({'answer': answer}) + + # result url + result_url = site_url.format(query=urlencode({'i': search_query})) # append result - # TODO: shouldn't it bind the source too? - results.append({'answer': answer}) + results.append({'url': result_url, + 'title': search_query + ' - Wolfram|Alpha'}) - # return results return results diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py index 9d3afe658..89a3c45b5 100644 --- a/searx/engines/wolframalpha_noapi.py +++ b/searx/engines/wolframalpha_noapi.py @@ -1,6 +1,7 @@ # WolframAlpha (Maths) # # @website http://www.wolframalpha.com/ +# @provide-api yes (http://api.wolframalpha.com/v2/) # # @using-api no # @results HTML @@ -14,12 +15,17 @@ from urllib import urlencode # search-url url = 'http://www.wolframalpha.com/' search_url = url+'input/?{query}' +search_query = '' # do search-request def request(query, params): params['url'] = search_url.format(query=urlencode({'i': query})) + # used in response + global search_query + search_query = query + return params @@ -42,14 +48,20 @@ def response(resp): except AttributeError: continue - if not line: + if line: + # extract answer from json + answer = line[line.find('{'):line.rfind('}')+1] + answer = loads(answer.encode('unicode-escape')) + answer = answer['stringified'].decode('unicode-escape') + + results.append({'answer': answer}) + + # failed result + elif search('pfail', webpage): return results - # extract answer from json - answer = line[line.find('{'):line.rfind('}')+1] - answer = loads(answer.encode('unicode-escape')) - answer = answer['stringified'].decode('unicode-escape') - - results.append({'answer': answer}) + # append result + results.append({'url': request(search_query, {})['url'], + 'title': search_query + ' - Wolfram|Alpha'}) return results diff --git a/searx/settings.yml b/searx/settings.yml index d2a724118..510fc028c 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -300,12 +300,13 @@ engines: engine : vimeo shortcut : vm -# You can use the engine using the official stable API, but you need an API key -# See : http://products.wolframalpha.com/api/ - name : wolframalpha shortcut : wa + # You can use the engine using the official stable API, but you need an API key + # See : http://products.wolframalpha.com/api/ + # engine : wolframalpha_api + # api_key: 'api_key' # required! engine : wolframalpha_noapi -# api_key: 'apikey' # required! timeout: 6.0 #The blekko technology and team have joined IBM Watson! -> https://blekko.com/ From e9d35c1309f05a0b214fb323049909ee7ec62ab8 Mon Sep 17 00:00:00 2001 From: a01200356 <a01200356@itesm.mx> Date: Sat, 2 Jan 2016 00:41:14 -0600 Subject: [PATCH 07/16] update tests for wolframalpha --- searx/engines/wolframalpha_api.py | 15 +- searx/engines/wolframalpha_noapi.py | 2 +- searx/settings.yml | 2 +- searx/tests/engines/test_wolframalpha_api.py | 362 ++++++++---------- .../tests/engines/test_wolframalpha_noapi.py | 12 +- 5 files changed, 174 insertions(+), 219 deletions(-) diff --git a/searx/engines/wolframalpha_api.py b/searx/engines/wolframalpha_api.py index 6927f9707..d4127be4c 100644 --- a/searx/engines/wolframalpha_api.py +++ b/searx/engines/wolframalpha_api.py @@ -14,9 +14,7 @@ from lxml import etree # search-url base_url = 'http://api.wolframalpha.com/v2/query' search_url = base_url + '?appid={api_key}&{query}&format=plaintext' -site_url = 'http://www.wolframalpha.com/input/?{query}' -search_query = '' -api_key = '' +api_key = '' # defined in settings.yml # xpath variables failure_xpath = '/queryresult[attribute::success="false"]' @@ -28,10 +26,6 @@ def request(query, params): params['url'] = search_url.format(query=urlencode({'input': query}), api_key=api_key) - # used in response - global search_query - search_query = query - return params @@ -65,11 +59,6 @@ def response(resp): results.append({'answer': answer}) - # result url - result_url = site_url.format(query=urlencode({'i': search_query})) - - # append result - results.append({'url': result_url, - 'title': search_query + ' - Wolfram|Alpha'}) + # TODO: append a result with title and link, like in the no api version return results diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py index 89a3c45b5..d7442db5d 100644 --- a/searx/engines/wolframalpha_noapi.py +++ b/searx/engines/wolframalpha_noapi.py @@ -53,7 +53,7 @@ def response(resp): answer = line[line.find('{'):line.rfind('}')+1] answer = loads(answer.encode('unicode-escape')) answer = answer['stringified'].decode('unicode-escape') - + results.append({'answer': answer}) # failed result diff --git a/searx/settings.yml b/searx/settings.yml index 510fc028c..2c327184e 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -305,7 +305,7 @@ engines: # You can use the engine using the official stable API, but you need an API key # See : http://products.wolframalpha.com/api/ # engine : wolframalpha_api - # api_key: 'api_key' # required! + # api_key: 'apikey' # required! engine : wolframalpha_noapi timeout: 6.0 diff --git a/searx/tests/engines/test_wolframalpha_api.py b/searx/tests/engines/test_wolframalpha_api.py index d295cea7a..a4a4184c5 100644 --- a/searx/tests/engines/test_wolframalpha_api.py +++ b/searx/tests/engines/test_wolframalpha_api.py @@ -59,228 +59,88 @@ class TestWolframAlphaAPIEngine(SearxTestCase): response = mock.Mock(content=xml) self.assertEqual(wolframalpha_api.response(response), []) - xml = """<?xml version='1.0' encoding='UTF-8'?> - <queryresult success='true' - error='false' - numpods='6' - datatypes='' - timedout='' - timedoutpods='' - timing='0.826' - parsetiming='0.17' - parsetimedout='false' - recalculate='' - id='MSPa9721hfe10fii5idac02000029c3a6f09608410h' - host='http://www4c.wolframalpha.com' - server='53' - related='http://www4c.wolframalpha.com/api/v2/relatedQueries.jsp?id=MSPa9731h927ig0h6b1&amp;s=53' - version='2.6'> - <pod title='Input' - scanner='Identity' - id='Input' - position='100' - error='false' - numsubpods='1'> - <subpod title=''> - <img src='http://www4c.wolframalpha.com/Calculate/MSP/MSP974111ig68hc?MSPStoreType=image/gif&amp;s=53' - alt='sqrt(-1)' - title='sqrt(-1)' - width='36' - height='20' /> - <plaintext>sqrt(-1)</plaintext> - </subpod> - </pod> - <pod title='Result' - scanner='Simplification' - id='Result' - position='200' - error='false' - numsubpods='1' - primary='true'> - <subpod title=''> - <img src='http://www4c.wolframalpha.com/Calculate/MSP/MSP9751hfe101fc27?MSPStoreType=image/gif&amp;s=53' - alt='i' - title='i' - width='5' - height='18' /> - <plaintext>i</plaintext> - </subpod> - <states count='1'> - <state name='Step-by-step solution' - input='Result__Step-by-step solution' /> - </states> - </pod> - <pod title='Polar coordinates' - scanner='Numeric' - id='PolarCoordinates' - position='300' - error='false' - numsubpods='1'> - <subpod title=''> - <img src='http://www4c.wolframalpha.com/Calculate/MSP/MSP97600003i83?MSPStoreType=image/gif&amp;s=53' - alt='r = 1 (radius), theta = 90° (angle)' - title='r = 1 (radius), theta = 90° (angle)' - width='209' - height='18' /> - <plaintext>r = 1 (radius), theta = 90° (angle)</plaintext> - </subpod> - </pod> - <pod title='Position in the complex plane' - scanner='Numeric' - id='PositionInTheComplexPlane' - position='400' - error='false' - numsubpods='1'> - <subpod title=''> - <img src='http://www4c.wolframalpha.com/Calculate/MSP/MSP9771e10ficg4g?MSPStoreType=image/gif&amp;s=53' - alt='' - title='' - width='200' - height='185' /> - <plaintext></plaintext> - </subpod> - </pod> - <pod title='All 2nd roots of -1' - scanner='RootsOfUnity' - id='' - position='500' - error='false' - numsubpods='2'> - <subpod title=''> - <img src='http://www4c.wolframalpha.com/Calculate/MSP/MSP9781hfe10fii?MSPStoreType=image/gif&amp;s=53' - alt='i (principal root)' - title='i (principal root)' - width='94' - height='18' /> - <plaintext>i (principal root)</plaintext> - </subpod> - <subpod title=''> - <img src='http://www4c.wolframalpha.com/Calculate/MSP/MSP9791hfe16f2eh1?MSPStoreType=image/gif&amp;s=53' - alt='-i' - title='-i' - width='16' - height='18' /> - <plaintext>-i</plaintext> - </subpod> - </pod> - <pod title='Plot of all roots in the complex plane' - scanner='RootsOfUnity' - id='PlotOfAllRootsInTheComplexPlane' - position='600' - error='false' - numsubpods='1'> - <subpod title=''> - <img src='http://www4c.wolframalpha.com/Calculate/MSP/MSP9801h0fi192f9?MSPStoreType=image/gif&amp;s=53' - alt='' - title='' - width='200' - height='185' /> - <plaintext></plaintext> - </subpod> - </pod> - </queryresult> - """ - response = mock.Mock(content=xml) - results = wolframalpha_api.response(response) - self.assertEqual(type(results), list) - self.assertEqual(len(results), 1) - self.assertIn("i", results[0]['answer']) - xml = """<?xml version='1.0' encoding='UTF-8'?> <queryresult success='true' error='false' - numpods='2' + numpods='6' datatypes='' - timedout='Integral' + timedout='' timedoutpods='' - timing='1.245' - parsetiming='0.194' + timing='0.684' + parsetiming='0.138' parsetimedout='false' - recalculate='http://www4b.wolframalpha.com/api/v2/recalc.jsp?id=MSPa77651gf1a1hie0ii051ea0e1c&amp;s=3' - id='MSPa77661gf1a1hie5c9d9a600003baifafc1211daef' - host='http://www4b.wolframalpha.com' - server='3' - related='http://www4b.wolframalpha.com/api/v2/relatedQueries.jsp?id=MSPa77671gf1a1hie5c5hc2&amp;s=3' + recalculate='' + id='MSPa416020a7966dachc463600000f9c66cc21444cfg' + host='http://www3.wolframalpha.com' + server='6' + related='http://www3.wolframalpha.com/api/v2/relatedQueries.jsp?...' version='2.6'> - <pod title='Indefinite integral' - scanner='Integral' - id='IndefiniteIntegral' + <pod title='Input' + scanner='Identity' + id='Input' position='100' error='false' + numsubpods='1'> + <subpod title=''> + <plaintext>sqrt(-1)</plaintext> + </subpod> + </pod> + <pod title='Result' + scanner='Simplification' + id='Result' + position='200' + error='false' numsubpods='1' primary='true'> <subpod title=''> - <img src='http://www4b.wolframalpha.com/Calculate/MSP/MSP776814b9492i9a7gb16?MSPStoreType=image/gif&amp;s=3' - alt=' integral 1/x dx = log(x)+constant' - title=' integral 1/x dx = log(x)+constant' - width='182' - height='36' /> - <plaintext> integral 1/x dx = log(x)+constant</plaintext> + <plaintext></plaintext> </subpod> <states count='1'> <state name='Step-by-step solution' - input='IndefiniteIntegral__Step-by-step solution' /> + input='Result__Step-by-step solution' /> </states> - <infos count='1'> - <info text='log(x) is the natural logarithm'> - <img src='http://www4b.wolframalpha.com/Calculate/MSP/MSP77691g23eg440g89db?MSPStoreType=image/gif&amp;s=3' - alt='log(x) is the natural logarithm' - title='log(x) is the natural logarithm' - width='198' - height='18' /> - <link url='http://reference.wolfram.com/mathematica/ref/Log.html' - text='Documentation' - title='Mathematica' /> - <link url='http://functions.wolfram.com/ElementaryFunctions/Log' - text='Properties' - title='Wolfram Functions Site' /> - <link url='http://mathworld.wolfram.com/NaturalLogarithm.html' - text='Definition' - title='MathWorld' /> - </info> - </infos> </pod> - <pod title='Plots of the integral' - scanner='Integral' - id='Plot' - position='200' + <pod title='Polar coordinates' + scanner='Numeric' + id='PolarCoordinates' + position='300' + error='false' + numsubpods='1'> + <subpod title=''> + <plaintext>r1 (radius), θ90° (angle)</plaintext> + </subpod> + </pod> + <pod title='Position in the complex plane' + scanner='Numeric' + id='PositionInTheComplexPlane' + position='400' + error='false' + numsubpods='1'> + <subpod title=''> + <plaintext></plaintext> + </subpod> + </pod> + <pod title='All 2nd roots of -1' + scanner='RootsOfUnity' + id='' + position='500' error='false' numsubpods='2'> <subpod title=''> - <img src='http://www4b.wolframalpha.com/Calculate/MSP/MSP77701gf1a9d2eb630g9?MSPStoreType=image/gif&amp;s=3' - alt='' - title='' - width='334' - height='128' /> - <plaintext></plaintext> - <states count='1'> - <statelist count='2' - value='Complex-valued plot' - delimiters=''> - <state name='Complex-valued plot' - input='Plot__1_Complex-valued plot' /> - <state name='Real-valued plot' - input='Plot__1_Real-valued plot' /> - </statelist> - </states> + <plaintext> (principal root)</plaintext> </subpod> <subpod title=''> - <img src='http://www4b.wolframalpha.com/Calculate/MSP/MSP77711gf1ai29a34b0ab?MSPStoreType=image/gif&amp;s=3' - alt='' - title='' - width='334' - height='133' /> + <plaintext>-</plaintext> + </subpod> + </pod> + <pod title='Plot of all roots in the complex plane' + scanner='RootsOfUnity' + id='PlotOfAllRootsInTheComplexPlane' + position='600' + error='false' + numsubpods='1'> + <subpod title=''> <plaintext></plaintext> - <states count='1'> - <statelist count='2' - value='Complex-valued plot' - delimiters=''> - <state name='Complex-valued plot' - input='Plot__2_Complex-valued plot' /> - <state name='Real-valued plot' - input='Plot__2_Real-valued plot' /> - </statelist> - </states> </subpod> </pod> </queryresult> @@ -288,5 +148,107 @@ class TestWolframAlphaAPIEngine(SearxTestCase): response = mock.Mock(content=xml) results = wolframalpha_api.response(response) self.assertEqual(type(results), list) - self.assertEqual(len(results), 1) + self.assertEqual(len(results), 2) + self.assertIn("i", results[0]['answer']) + # self.assertIn("sqrt(-1) - Wolfram|Alpha", results[1]['title']) + # self.assertIn("http://www.wolframalpha.com/input/?i=sqrt%28-1%29", results[1]['url']) + + xml = """<?xml version='1.0' encoding='UTF-8'?> + <queryresult success='true' + error='false' + numpods='2' + datatypes='' + timedout='' + timedoutpods='' + timing='1.286' + parsetiming='0.255' + parsetimedout='false' + recalculate='' + id='MSPa195222ad740ede5214h30000480ca61h003d3gd6' + host='http://www3.wolframalpha.com' + server='20' + related='http://www3.wolframalpha.com/api/v2/relatedQueries.jsp?id=...' + version='2.6'> + <pod title='Indefinite integral' + scanner='Integral' + id='IndefiniteIntegral' + position='100' + error='false' + numsubpods='1' + primary='true'> + <subpod title=''> + <plaintext>∫1/xxlog(x)+constant</plaintext> + </subpod> + <states count='1'> + <state name='Step-by-step solution' + input='IndefiniteIntegral__Step-by-step solution' /> + </states> + <infos count='1'> + <info text='log(x) is the natural logarithm'> + <link url='http://reference.wolfram.com/mathematica/ref/Log.html' + text='Documentation' + title='Mathematica' /> + <link url='http://functions.wolfram.com/ElementaryFunctions/Log' + text='Properties' + title='Wolfram Functions Site' /> + <link url='http://mathworld.wolfram.com/NaturalLogarithm.html' + text='Definition' + title='MathWorld' /> + </info> + </infos> + </pod> + <pod title='Plots of the integral' + scanner='Integral' + id='Plot' + position='200' + error='false' + numsubpods='2'> + <subpod title=''> + <plaintext></plaintext> + <states count='1'> + <statelist count='2' + value='Complex-valued plot' + delimiters=''> + <state name='Complex-valued plot' + input='Plot__1_Complex-valued plot' /> + <state name='Real-valued plot' + input='Plot__1_Real-valued plot' /> + </statelist> + </states> + </subpod> + <subpod title=''> + <plaintext></plaintext> + <states count='1'> + <statelist count='2' + value='Complex-valued plot' + delimiters=''> + <state name='Complex-valued plot' + input='Plot__2_Complex-valued plot' /> + <state name='Real-valued plot' + input='Plot__2_Real-valued plot' /> + </statelist> + </states> + </subpod> + </pod> + <assumptions count='1'> + <assumption type='Clash' + word='integral' + template='Assuming &quot;${word}&quot; is ${desc1}. Use as ${desc2} instead' + count='2'> + <value name='IntegralsWord' + desc='an integral' + input='*C.integral-_*IntegralsWord-' /> + <value name='MathematicalFunctionIdentityPropertyClass' + desc='a function property' + input='*C.integral-_*MathematicalFunctionIdentityPropertyClass-' /> + </assumption> + </assumptions> + </queryresult> + """ + response = mock.Mock(content=xml) + results = wolframalpha_api.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 2) self.assertIn("log(x)+c", results[0]['answer']) + # self.assertIn("integral 1/x - Wolfram|Alpha", results[1]['title']) + # self.assertIn("http://www.wolframalpha.com/input/?i=integral+1%2Fx", results[1]['url']) diff --git a/searx/tests/engines/test_wolframalpha_noapi.py b/searx/tests/engines/test_wolframalpha_noapi.py index d02dccd95..b884ffa38 100644 --- a/searx/tests/engines/test_wolframalpha_noapi.py +++ b/searx/tests/engines/test_wolframalpha_noapi.py @@ -22,8 +22,8 @@ class TestWolframAlphaNoAPIEngine(SearxTestCase): self.assertRaises(AttributeError, wolframalpha_noapi.response, '') self.assertRaises(AttributeError, wolframalpha_noapi.response, '[]') - response = mock.Mock(text='<html></html>') - self.assertEqual(wolframalpha_noapi.response(response), []) + # response = mock.Mock(text='<html></html>') + # self.assertEqual(wolframalpha_noapi.response(response), []) html = """ <!DOCTYPE html> @@ -135,8 +135,10 @@ class TestWolframAlphaNoAPIEngine(SearxTestCase): response = mock.Mock(text=html) results = wolframalpha_noapi.response(response) self.assertEqual(type(results), list) - self.assertEqual(len(results), 1) + self.assertEqual(len(results), 2) self.assertIn("i", results[0]['answer']) + self.assertIn("sqrt(-1) - Wolfram|Alpha", results[1]['title']) + self.assertIn("http://www.wolframalpha.com/input/?i=sqrt%28-1%29", results[1]['url']) html = """ <!DOCTYPE html> @@ -228,5 +230,7 @@ class TestWolframAlphaNoAPIEngine(SearxTestCase): response = mock.Mock(text=html) results = wolframalpha_noapi.response(response) self.assertEqual(type(results), list) - self.assertEqual(len(results), 1) + self.assertEqual(len(results), 2) self.assertIn("log(x)+c", results[0]['answer']) + self.assertIn("integral 1/x - Wolfram|Alpha", results[1]['title']) + self.assertIn("http://www.wolframalpha.com/input/?i=integral+1%2Fx", results[1]['url']) From 19d025f0e7ef9a5f41b81fc6c1a9a7114bdae78c Mon Sep 17 00:00:00 2001 From: a01200356 <a01200356@itesm.mx> Date: Sat, 2 Jan 2016 01:49:32 -0600 Subject: [PATCH 08/16] [fix] pass wolframalpha_noapi tests --- searx/engines/wolframalpha_noapi.py | 43 ++++++++++++------- searx/tests/engines/test_wolframalpha_api.py | 6 ++- .../tests/engines/test_wolframalpha_noapi.py | 4 +- 3 files changed, 33 insertions(+), 20 deletions(-) diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py index d7442db5d..a730ed60b 100644 --- a/searx/engines/wolframalpha_noapi.py +++ b/searx/engines/wolframalpha_noapi.py @@ -8,60 +8,71 @@ # @stable no # @parse answer -from re import search +from re import search, sub from json import loads from urllib import urlencode +from lxml import html # search-url url = 'http://www.wolframalpha.com/' search_url = url+'input/?{query}' -search_query = '' + +# xpath variables +scripts_xpath = '//script' +title_xpath = '//title' +failure_xpath = '//p[attribute::class="pfail"]' # do search-request def request(query, params): params['url'] = search_url.format(query=urlencode({'i': query})) - # used in response - global search_query - search_query = query - return params # get response from search-request def response(resp): results = [] - webpage = resp.text line = None + dom = html.fromstring(resp.text) + scripts = dom.xpath(scripts_xpath) + # the answer is inside a js function # answer can be located in different 'pods', although by default it should be in pod_0200 possible_locations = ['pod_0200\.push(.*)\n', 'pod_0100\.push(.*)\n'] + # failed result + if dom.xpath(failure_xpath): + return results + # get line that matches the pattern for pattern in possible_locations: - try: - line = search(pattern, webpage).group(1) + for script in scripts: + try: + line = search(pattern, script.text_content()).group(1) + break + except AttributeError: + continue + if line: break - except AttributeError: - continue if line: # extract answer from json answer = line[line.find('{'):line.rfind('}')+1] answer = loads(answer.encode('unicode-escape')) answer = answer['stringified'].decode('unicode-escape') + answer = sub(r'\\', '', answer) results.append({'answer': answer}) - # failed result - elif search('pfail', webpage): - return results + # user input is in first part of title + title = dom.xpath(title_xpath)[0].text + result_url = request(title[:-16], {})['url'] # append result - results.append({'url': request(search_query, {})['url'], - 'title': search_query + ' - Wolfram|Alpha'}) + results.append({'url': result_url, + 'title': title}) return results diff --git a/searx/tests/engines/test_wolframalpha_api.py b/searx/tests/engines/test_wolframalpha_api.py index a4a4184c5..d9e23182f 100644 --- a/searx/tests/engines/test_wolframalpha_api.py +++ b/searx/tests/engines/test_wolframalpha_api.py @@ -148,7 +148,8 @@ class TestWolframAlphaAPIEngine(SearxTestCase): response = mock.Mock(content=xml) results = wolframalpha_api.response(response) self.assertEqual(type(results), list) - self.assertEqual(len(results), 2) + # self.assertEqual(len(results), 2) + self.assertEqual(len(results), 1) self.assertIn("i", results[0]['answer']) # self.assertIn("sqrt(-1) - Wolfram|Alpha", results[1]['title']) # self.assertIn("http://www.wolframalpha.com/input/?i=sqrt%28-1%29", results[1]['url']) @@ -248,7 +249,8 @@ class TestWolframAlphaAPIEngine(SearxTestCase): response = mock.Mock(content=xml) results = wolframalpha_api.response(response) self.assertEqual(type(results), list) - self.assertEqual(len(results), 2) + # self.assertEqual(len(results), 2) + self.assertEqual(len(results), 1) self.assertIn("log(x)+c", results[0]['answer']) # self.assertIn("integral 1/x - Wolfram|Alpha", results[1]['title']) # self.assertIn("http://www.wolframalpha.com/input/?i=integral+1%2Fx", results[1]['url']) diff --git a/searx/tests/engines/test_wolframalpha_noapi.py b/searx/tests/engines/test_wolframalpha_noapi.py index b884ffa38..5815e52fe 100644 --- a/searx/tests/engines/test_wolframalpha_noapi.py +++ b/searx/tests/engines/test_wolframalpha_noapi.py @@ -138,7 +138,7 @@ class TestWolframAlphaNoAPIEngine(SearxTestCase): self.assertEqual(len(results), 2) self.assertIn("i", results[0]['answer']) self.assertIn("sqrt(-1) - Wolfram|Alpha", results[1]['title']) - self.assertIn("http://www.wolframalpha.com/input/?i=sqrt%28-1%29", results[1]['url']) + self.assertIn("http://www.wolframalpha.com/input/?i=+sqrt%28-1%29", results[1]['url']) html = """ <!DOCTYPE html> @@ -233,4 +233,4 @@ class TestWolframAlphaNoAPIEngine(SearxTestCase): self.assertEqual(len(results), 2) self.assertIn("log(x)+c", results[0]['answer']) self.assertIn("integral 1/x - Wolfram|Alpha", results[1]['title']) - self.assertIn("http://www.wolframalpha.com/input/?i=integral+1%2Fx", results[1]['url']) + self.assertIn("http://www.wolframalpha.com/input/?i=+integral+1%2Fx", results[1]['url']) From d05c676ed5b1dc5372b1cb380740161b3613f7cc Mon Sep 17 00:00:00 2001 From: a01200356 <a01200356@itesm.mx> Date: Sat, 2 Jan 2016 22:29:20 -0600 Subject: [PATCH 09/16] Add test case in wolframalpha_noapi [fix] Display multiple answers in wolframalpha_api --- searx/engines/wolframalpha_api.py | 11 +++++---- searx/settings.yml | 2 +- .../tests/engines/test_wolframalpha_noapi.py | 23 +++++++++++++++++-- 3 files changed, 28 insertions(+), 8 deletions(-) diff --git a/searx/engines/wolframalpha_api.py b/searx/engines/wolframalpha_api.py index d4127be4c..540d81351 100644 --- a/searx/engines/wolframalpha_api.py +++ b/searx/engines/wolframalpha_api.py @@ -52,12 +52,13 @@ def response(resp): if search_results.xpath(failure_xpath): return [] - # parse answer - answer = search_results.xpath(answer_xpath) - if answer: - answer = replace_pua_chars(answer[0].text) + # parse answers + answers = search_results.xpath(answer_xpath) + if answers: + for answer in answers: + answer = replace_pua_chars(answer.text) - results.append({'answer': answer}) + results.append({'answer': answer}) # TODO: append a result with title and link, like in the no api version diff --git a/searx/settings.yml b/searx/settings.yml index 2c327184e..600c20c0d 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -305,7 +305,7 @@ engines: # You can use the engine using the official stable API, but you need an API key # See : http://products.wolframalpha.com/api/ # engine : wolframalpha_api - # api_key: 'apikey' # required! + # api_key: '5952JX-X52L3VKWT8' # required! engine : wolframalpha_noapi timeout: 6.0 diff --git a/searx/tests/engines/test_wolframalpha_noapi.py b/searx/tests/engines/test_wolframalpha_noapi.py index 5815e52fe..b2ae5469e 100644 --- a/searx/tests/engines/test_wolframalpha_noapi.py +++ b/searx/tests/engines/test_wolframalpha_noapi.py @@ -22,8 +22,27 @@ class TestWolframAlphaNoAPIEngine(SearxTestCase): self.assertRaises(AttributeError, wolframalpha_noapi.response, '') self.assertRaises(AttributeError, wolframalpha_noapi.response, '[]') - # response = mock.Mock(text='<html></html>') - # self.assertEqual(wolframalpha_noapi.response(response), []) + html = """ + <!DOCTYPE html> + <title> Parangaricutirimícuaro - Wolfram|Alpha</title> + <meta charset="utf-8" /> + <body> + <div id="closest"> + <p class="pfail">Wolfram|Alpha doesn't know how to interpret your input.</p> + <div id="dtips"> + <div class="tip"> + <span class="tip-title">Tip:&nbsp;</span> + Check your spelling, and use English + <span class="tip-extra"></span> + </div> + </div> + </div> + </body> + </html> + """ + + response = mock.Mock(text=html) + self.assertEqual(wolframalpha_noapi.response(response), []) html = """ <!DOCTYPE html> From 16d6e758d73ed5b369a4cf70830b5ebf0d0196ba Mon Sep 17 00:00:00 2001 From: a01200356 <a01200356@itesm.mx> Date: Sat, 2 Jan 2016 22:36:52 -0600 Subject: [PATCH 10/16] [fix] flake8 compliance in test --- .../tests/engines/test_wolframalpha_noapi.py | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/searx/tests/engines/test_wolframalpha_noapi.py b/searx/tests/engines/test_wolframalpha_noapi.py index b2ae5469e..237f578db 100644 --- a/searx/tests/engines/test_wolframalpha_noapi.py +++ b/searx/tests/engines/test_wolframalpha_noapi.py @@ -22,23 +22,23 @@ class TestWolframAlphaNoAPIEngine(SearxTestCase): self.assertRaises(AttributeError, wolframalpha_noapi.response, '') self.assertRaises(AttributeError, wolframalpha_noapi.response, '[]') - html = """ - <!DOCTYPE html> - <title> Parangaricutirimícuaro - Wolfram|Alpha</title> - <meta charset="utf-8" /> - <body> - <div id="closest"> - <p class="pfail">Wolfram|Alpha doesn't know how to interpret your input.</p> - <div id="dtips"> - <div class="tip"> - <span class="tip-title">Tip:&nbsp;</span> - Check your spelling, and use English - <span class="tip-extra"></span> - </div> - </div> - </div> - </body> - </html> + html = """ + <!DOCTYPE html> + <title> Parangaricutirimícuaro - Wolfram|Alpha</title> + <meta charset="utf-8" /> + <body> + <div id="closest"> + <p class="pfail">Wolfram|Alpha doesn't know how to interpret your input.</p> + <div id="dtips"> + <div class="tip"> + <span class="tip-title">Tip:&nbsp;</span> + Check your spelling, and use English + <span class="tip-extra"></span> + </div> + </div> + </div> + </body> + </html> """ response = mock.Mock(text=html) From 576d37f256649b570a9c8591a795acd85ac499bc Mon Sep 17 00:00:00 2001 From: a01200356 <a01200356@itesm.mx> Date: Sun, 3 Jan 2016 15:58:01 -0600 Subject: [PATCH 11/16] [fix] unescape htmlentities in wolframalpha_noapi's answer --- searx/engines/wolframalpha_noapi.py | 7 ++++++- searx/settings.yml | 3 ++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py index a730ed60b..0f0315630 100644 --- a/searx/engines/wolframalpha_noapi.py +++ b/searx/engines/wolframalpha_noapi.py @@ -12,6 +12,7 @@ from re import search, sub from json import loads from urllib import urlencode from lxml import html +import HTMLParser # search-url url = 'http://www.wolframalpha.com/' @@ -62,7 +63,11 @@ def response(resp): # extract answer from json answer = line[line.find('{'):line.rfind('}')+1] answer = loads(answer.encode('unicode-escape')) - answer = answer['stringified'].decode('unicode-escape') + answer = answer['stringified'] + + # clean plaintext answer + h = HTMLParser.HTMLParser() + answer = h.unescape(answer.decode('unicode-escape')) answer = sub(r'\\', '', answer) results.append({'answer': answer}) diff --git a/searx/settings.yml b/searx/settings.yml index 600c20c0d..63e944060 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -305,9 +305,10 @@ engines: # You can use the engine using the official stable API, but you need an API key # See : http://products.wolframalpha.com/api/ # engine : wolframalpha_api - # api_key: '5952JX-X52L3VKWT8' # required! + # api_key: '' # required! engine : wolframalpha_noapi timeout: 6.0 + disabled : True #The blekko technology and team have joined IBM Watson! -> https://blekko.com/ # - name : blekko images From d997265e5599333b4316561ca18a8f4131e3e2d9 Mon Sep 17 00:00:00 2001 From: a01200356 <a01200356@itesm.mx> Date: Sun, 3 Jan 2016 19:57:37 -0600 Subject: [PATCH 12/16] add tests for unicode strings in wolframalpha --- searx/engines/wolframalpha_noapi.py | 4 +- searx/tests/engines/test_wolframalpha_api.py | 138 +++++++--- .../tests/engines/test_wolframalpha_noapi.py | 256 ++++++------------ 3 files changed, 186 insertions(+), 212 deletions(-) diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py index 0f0315630..71ad3b281 100644 --- a/searx/engines/wolframalpha_noapi.py +++ b/searx/engines/wolframalpha_noapi.py @@ -73,11 +73,11 @@ def response(resp): results.append({'answer': answer}) # user input is in first part of title - title = dom.xpath(title_xpath)[0].text + title = dom.xpath(title_xpath)[0].text.encode('utf-8') result_url = request(title[:-16], {})['url'] # append result results.append({'url': result_url, - 'title': title}) + 'title': title.decode('utf-8')}) return results diff --git a/searx/tests/engines/test_wolframalpha_api.py b/searx/tests/engines/test_wolframalpha_api.py index d9e23182f..98c53f769 100644 --- a/searx/tests/engines/test_wolframalpha_api.py +++ b/searx/tests/engines/test_wolframalpha_api.py @@ -30,32 +30,7 @@ class TestWolframAlphaAPIEngine(SearxTestCase): xml = '''<?xml version='1.0' encoding='UTF-8'?> <queryresult success='false' error='false' /> ''' - - response = mock.Mock(content=xml) - self.assertEqual(wolframalpha_api.response(response), []) - - xml = """<?xml version='1.0' encoding='UTF-8'?> - <queryresult success='false' - error='false' - numpods='0' - datatypes='' - timedout='' - timedoutpods='' - timing='0.241' - parsetiming='0.074' - parsetimedout='false' - recalculate='' - id='' - host='http://www5a.wolframalpha.com' - server='56' - related='' - version='2.6'> - <tips count='1'> - <tip text='Check your spelling, and use English' /> - </tips> - </queryresult> - """ - + # test failure response = mock.Mock(content=xml) self.assertEqual(wolframalpha_api.response(response), []) @@ -145,14 +120,12 @@ class TestWolframAlphaAPIEngine(SearxTestCase): </pod> </queryresult> """ + # test private user area char in response response = mock.Mock(content=xml) results = wolframalpha_api.response(response) self.assertEqual(type(results), list) - # self.assertEqual(len(results), 2) self.assertEqual(len(results), 1) - self.assertIn("i", results[0]['answer']) - # self.assertIn("sqrt(-1) - Wolfram|Alpha", results[1]['title']) - # self.assertIn("http://www.wolframalpha.com/input/?i=sqrt%28-1%29", results[1]['url']) + self.assertIn('i', results[0]['answer']) xml = """<?xml version='1.0' encoding='UTF-8'?> <queryresult success='true' @@ -246,11 +219,108 @@ class TestWolframAlphaAPIEngine(SearxTestCase): </assumptions> </queryresult> """ + # test integral response = mock.Mock(content=xml) results = wolframalpha_api.response(response) self.assertEqual(type(results), list) - # self.assertEqual(len(results), 2) self.assertEqual(len(results), 1) - self.assertIn("log(x)+c", results[0]['answer']) - # self.assertIn("integral 1/x - Wolfram|Alpha", results[1]['title']) - # self.assertIn("http://www.wolframalpha.com/input/?i=integral+1%2Fx", results[1]['url']) + self.assertIn('log(x)+c', results[0]['answer']) + + xml = """<?xml version='1.0' encoding='UTF-8'?> + <queryresult success='true' + error='false' + numpods='4' + datatypes='Solve' + timedout='' + timedoutpods='' + timing='0.883' + parsetiming='0.337' + parsetimedout='false' + recalculate='' + id='MSPa347225h1ea85fgfbgb4000064ff000d25g5df3f' + host='http://www5a.wolframalpha.com' + server='52' + related='http://www5a.wolframalpha.com/api/v2/relatedQueries.jsp?...' + version='2.6'> + <pod title='Input interpretation' + scanner='Identity' + id='Input' + position='100' + error='false' + numsubpods='1'> + <subpod title=''> + <img src='http://www5a.wolframalpha.com/Calculate/MSP/MSP349225h1ea85fgfbgb400005dhd93b9eegg8f32?...' + alt='solve x^2+x = 0' + title='solve x^2+x = 0' + width='157' + height='35' /> + <plaintext>solve x^2+x = 0</plaintext> + </subpod> + </pod> + <pod title='Results' + scanner='Solve' + id='Result' + position='200' + error='false' + numsubpods='2' + primary='true'> + <subpod title=''> + <img src='http://www5a.wolframalpha.com/Calculate/MSP/MSP350225h1ea85fgfbgb400005b1ebcefaha3ac97?...' + alt='x = -1' + title='x = -1' + width='47' + height='18' /> + <plaintext>x = -1</plaintext> + </subpod> + <subpod title=''> + <img src='http://www5a.wolframalpha.com/Calculate/MSP/MSP351225h1ea85fgfbgb4000032fic0ig981hc936?...' + alt='x = 0' + title='x = 0' + width='36' + height='18' /> + <plaintext>x = 0</plaintext> + </subpod> + <states count='1'> + <state name='Step-by-step solution' + input='Result__Step-by-step solution' /> + </states> + </pod> + <pod title='Root plot' + scanner='Solve' + id='RootPlot' + position='300' + error='false' + numsubpods='1'> + <subpod title=''> + <img src='http://www5a.wolframalpha.com/Calculate/MSP/MSP352225h1ea85fgfbgb40000464054c665hc5dee?...' + alt='' + title='' + width='300' + height='181' /> + <plaintext></plaintext> + </subpod> + </pod> + <pod title='Number line' + scanner='Solve' + id='NumberLine' + position='400' + error='false' + numsubpods='1'> + <subpod title=''> + <img src='http://www5a.wolframalpha.com/Calculate/MSP/MSP353225h1ea85fgfbgb400005ab1c8aai366fe46?...' + alt='' + title='' + width='310' + height='36' /> + <plaintext></plaintext> + </subpod> + </pod> + </queryresult> + """ + # test ecuation with multiple answers + response = mock.Mock(content=xml) + results = wolframalpha_api.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 2) + self.assertIn('x = -1', results[0]['answer']) + self.assertIn('x = 0', results[1]['answer']) diff --git a/searx/tests/engines/test_wolframalpha_noapi.py b/searx/tests/engines/test_wolframalpha_noapi.py index 237f578db..3b6314672 100644 --- a/searx/tests/engines/test_wolframalpha_noapi.py +++ b/searx/tests/engines/test_wolframalpha_noapi.py @@ -40,7 +40,7 @@ class TestWolframAlphaNoAPIEngine(SearxTestCase): </body> </html> """ - + # test failed query response = mock.Mock(text=html) self.assertEqual(wolframalpha_noapi.response(response), []) @@ -51,113 +51,30 @@ class TestWolframAlphaNoAPIEngine(SearxTestCase): <body> <script type="text/javascript"> try { - document.domain = "wolframalpha.com"; - context = parent ? parent : document; - } catch(e){} - try { - if (typeof(context.$) == "undefined") { - context = window; - } else { - $=context.$; + if (typeof context.jsonArray.popups.pod_0100 == "undefined" ) { + context.jsonArray.popups.pod_0100 = []; } - } - catch(e){ context = window;} + context.jsonArray.popups.pod_0100.push( {"stringified": "sqrt(-1)","mInput": "","mOutput": ""}); + } catch(e) { } - try { - - if (typeof context.jsonArray.popups.pod_0100 == "undefined" ) { - context.jsonArray.popups.pod_0100 = []; - } - - context.jsonArray.popups.pod_0100.push( {"stringified": "sqrt(-1)","mInput": "","mOutput": "", "popLinks": {} }); - - } catch(e) { } - - try { - - $("#results #pod_0100:not(iframe #pod_0100)") - .add("#showsteps #pod_0100:not(iframe #pod_0100)") - .add(".results-pod #pod_0100:not(iframe #pod_0100)") - .data("tempFileID", 'MSP44501e0dda34g97a0c8900003i71207d6491ab22') - .data("podIdentifier", '\x22Input\x22') - .data("podShortIdentifier", '\x22Input\x22') - .data("buttonStates", '\x22\x22') - .data("scanner", '\x22\x22'); - $("#results #pod_0100-popup:not(iframe #pod_0100-popup)") - .add("#showsteps #pod_0100-popup:not(iframe #pod_0100-popup)") - .add(".results-pod #pod_0100-popup:not(iframe #pod_0100-popup)") - .data("tempFileID", 'MSP44501e0dda34g97a0c8900003i71207d6491ab22') - .data("podIdentifier", '\x22Input\x22') - .data("podShortIdentifier", '\x22Input\x22') - .data("buttonStates", '\x22\x22') - .data("scanner", '\x22\x22'); - - $("#results #subpod_0100_1") - .add("#showsteps #subpod_0100_1:not(iframe #subpod_0100_1)") - .add(".results-pod #subpod_0100_1") - .data("tempFileID", "MSP44511e0dda34g97a0c89000059490h319161eea3") - .data("cellDataTempFile", "MSP44521e0dda34g97a0c89000011378c50d38ede6h") - .data("tempFileServer", "") - .data("dataSources", "") - .data("sources", "") - .data("sharetype", "1") - .data("shareable", "false"); - - } catch(e){} - - //false - - try { - - if (typeof context.jsonArray.popups.pod_0200 == "undefined" ) { - context.jsonArray.popups.pod_0200 = []; - } - - context.jsonArray.popups.pod_0200.push( {"stringified": "i","mInput": "","mOutput": "", "popLinks": {} }); - - } catch(e) { } - - try { - - $("#results #pod_0200:not(iframe #pod_0200)") - .add("#showsteps #pod_0200:not(iframe #pod_0200)") - .add(".results-pod #pod_0200:not(iframe #pod_0200)") - .data("tempFileID", 'MSP44541e0dda34g97a0c8900004f449i50fa482fd8') - .data("podIdentifier", '\x22Result\x22') - .data("podShortIdentifier", '\x22Result\x22') - .data("buttonStates", '\x22Result\x22\x20\x2D\x3E\x20\x7BAll,\x20None,\x20None,\x20None,\x20None\x7D') - .data("scanner", '\x22\x22'); - $("#results #pod_0200-popup:not(iframe #pod_0200-popup)") - .add("#showsteps #pod_0200-popup:not(iframe #pod_0200-popup)") - .add(".results-pod #pod_0200-popup:not(iframe #pod_0200-popup)") - .data("tempFileID", 'MSP44541e0dda34g97a0c8900004f449i50fa482fd8') - .data("podIdentifier", '\x22Result\x22') - .data("podShortIdentifier", '\x22Result\x22') - .data("buttonStates", '\x22Result\x22\x20\x2D\x3E\x20\x7BAll,\x20None,\x20None\x7D') - .data("scanner", '\x22\x22'); - - $("#results #subpod_0200_1") - .add("#showsteps #subpod_0200_1:not(iframe #subpod_0200_1)") - .add(".results-pod #subpod_0200_1") - .data("tempFileID", "MSP44551e0dda34g97a0c8900003gdgd37faa7272e0") - .data("cellDataTempFile", "MSP44561e0dda34g97a0c89000018ea1iae00104g13") - .data("tempFileServer", "") - .data("dataSources", "") - .data("sources", "") - .data("sharetype", "1") - .data("shareable", "false"); - } catch(e){} + try { + if (typeof context.jsonArray.popups.pod_0200 == "undefined" ) { + context.jsonArray.popups.pod_0200 = []; + } + context.jsonArray.popups.pod_0200.push( {"stringified": "i","mInput": "","mOutput": ""}); + } catch(e) { } </script> </body> </html> """ + # test plaintext response = mock.Mock(text=html) results = wolframalpha_noapi.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 2) - self.assertIn("i", results[0]['answer']) - self.assertIn("sqrt(-1) - Wolfram|Alpha", results[1]['title']) - self.assertIn("http://www.wolframalpha.com/input/?i=+sqrt%28-1%29", results[1]['url']) + self.assertEquals('i', results[0]['answer']) + self.assertIn('sqrt(-1) - Wolfram|Alpha', results[1]['title']) + self.assertEquals('http://www.wolframalpha.com/input/?i=+sqrt%28-1%29', results[1]['url']) html = """ <!DOCTYPE html> @@ -165,91 +82,78 @@ class TestWolframAlphaNoAPIEngine(SearxTestCase): <meta charset="utf-8" /> <body> <script type="text/javascript"> - //true try { - document.domain = "wolframalpha.com"; - context = parent ? parent : document; - } catch(e){} - try { - if (typeof(context.$) == "undefined") { - context = window; - } else { - $=context.$; - } + if (typeof context.jsonArray.popups.pod_0100 == "undefined" ) { + context.jsonArray.popups.pod_0100 = []; } - catch(e){ context = window;} - - try { - - if (typeof context.jsonArray.popups.pod_0100 == "undefined" ) { - context.jsonArray.popups.pod_0100 = []; - } - - context.jsonArray.popups.pod_0100.push( {"stringified": "integral 1\/x dx = log(x)+constant"}); - - } catch(e) { } - - try { - - $("#results #pod_0100:not(iframe #pod_0100)") - .add("#showsteps #pod_0100:not(iframe #pod_0100)") - .add(".results-pod #pod_0100:not(iframe #pod_0100)") - .data("tempFileID", 'MSP2051if2202e8bg0757100000d119b05egf583d3') - .data("podIdentifier", '\x22IndefiniteIntegral\x22') - .data("podShortIdentifier", '\x22IndefiniteIntegral\x22') - .data("buttonStates", '\x22Indefinite\x20integral\x22\x20\x2D\x3E\x20\x7B\x7D') - .data("scanner", '\x22\x22'); - $("#results #pod_0100-popup:not(iframe #pod_0100-popup)") - .add("#showsteps #pod_0100-popup:not(iframe #pod_0100-popup)") - .add(".results-pod #pod_0100-popup:not(iframe #pod_0100-popup)") - .data("tempFileID", 'MSP2051if2202e8bg0757100000d119b05egf583d3') - .data("podIdentifier", '\x22IndefiniteIntegral\x22') - .data("podShortIdentifier", '\x22IndefiniteIntegral\x22') - .data("buttonStates", '\x22Indefinite\x20integral\x22\x20\x2D\x3E\x20\x7B\x7D') - .data("scanner", '\x22\x22'); - - $("#results #subpod_0100_1") - .add("#showsteps #subpod_0100_1:not(iframe #subpod_0100_1)") - .add(".results-pod #subpod_0100_1") - .data("tempFileID", "MSP2071if2202e8bg0757100004dg60f2a4ca8cf73") - .data("cellDataTempFile", "MSP2081if2202e8bg0757100001h18329f72fe90fg") - .data("tempFileServer", "") - .data("dataSources", "") - .data("sources", "") - .data("sharetype", "1") - .data("shareable", "false"); - - } catch(e){} - - //false - try { - - $("#results #pod_0200:not(iframe #pod_0200)") - .add("#showsteps #pod_0200:not(iframe #pod_0200)") - .add(".results-pod #pod_0200:not(iframe #pod_0200)") - .data("tempFileID", '') - .data("podIdentifier", '\x22Plot\x22') - .data("podShortIdentifier", '') - .data("buttonStates", '') - .data("scanner", '\x22\x22'); - $("#results #pod_0200-popup:not(iframe #pod_0200-popup)") - .add("#showsteps #pod_0200-popup:not(iframe #pod_0200-popup)") - .add(".results-pod #pod_0200-popup:not(iframe #pod_0200-popup)") - .data("tempFileID", '') - .data("podIdentifier", '\x22Plot\x22') - .data("podShortIdentifier", '') - .data("buttonStates", '') - .data("scanner", '\x22\x22'); - - } catch(e){} + context.jsonArray.popups.pod_0100.push( {"stringified": "integral 1\/x dx = log(x)+constant"}); + } catch(e) { } </script> </body> </html> """ + # test integral response = mock.Mock(text=html) results = wolframalpha_noapi.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 2) - self.assertIn("log(x)+c", results[0]['answer']) - self.assertIn("integral 1/x - Wolfram|Alpha", results[1]['title']) - self.assertIn("http://www.wolframalpha.com/input/?i=+integral+1%2Fx", results[1]['url']) + self.assertIn('log(x)+c', results[0]['answer']) + self.assertIn('integral 1/x - Wolfram|Alpha', results[1]['title']) + self.assertEquals('http://www.wolframalpha.com/input/?i=+integral+1%2Fx', results[1]['url']) + + html = """ + <!DOCTYPE html> + <title> &int;1&#x2f;x &#xf74c;x - Wolfram|Alpha</title> + <meta charset="utf-8" /> + <body> + <script type="text/javascript"> + try { + if (typeof context.jsonArray.popups.pod_0100 == "undefined" ) { + context.jsonArray.popups.pod_0100 = []; + } + context.jsonArray.popups.pod_0100.push( {"stringified": "integral 1\/x dx = log(x)+constant"}); + } catch(e) { } + </script> + </body> + </html> + """ + # test input in mathematical notation + response = mock.Mock(text=html) + results = wolframalpha_noapi.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 2) + self.assertIn('log(x)+c', results[0]['answer']) + self.assertIn('∫1/x x - Wolfram|Alpha'.decode('utf-8'), results[1]['title']) + self.assertEquals('http://www.wolframalpha.com/input/?i=+%E2%88%AB1%2Fx+%EF%9D%8Cx', results[1]['url']) + + html = """ + <!DOCTYPE html> + <title> 1 euro to yen - Wolfram|Alpha</title> + <meta charset="utf-8" /> + <body> + <script type="text/javascript"> + try { + if (typeof context.jsonArray.popups.pod_0100 == "undefined" ) { + context.jsonArray.popups.pod_0100 = []; + } + context.jsonArray.popups.pod_0100.push( {"stringified": "convert euro1 (euro) to Japanese yen"}); + } catch(e) { } + + try { + if (typeof context.jsonArray.popups.pod_0200 == "undefined" ) { + context.jsonArray.popups.pod_0200 = []; + } + context.jsonArray.popups.pod_0200.push( {"stringified": "&yen;130.5 (Japanese yen)"}); + } catch(e) { } + </script> + </body> + </html> + """ + # test output in htmlentity + response = mock.Mock(text=html) + results = wolframalpha_noapi.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 2) + self.assertIn("¥".decode('utf-8'), results[0]['answer']) + self.assertIn('1 euro to yen - Wolfram|Alpha', results[1]['title']) + self.assertEquals('http://www.wolframalpha.com/input/?i=+1+euro+to+yen', results[1]['url']) From 2a15944b58089d84a930f36b42c6ef60d4e629b3 Mon Sep 17 00:00:00 2001 From: a01200356 <a01200356@itesm.mx> Date: Sun, 3 Jan 2016 22:03:33 -0600 Subject: [PATCH 13/16] [fix] test in wolframalpha_noapi --- searx/engines/wolframalpha_noapi.py | 9 +++-- .../tests/engines/test_wolframalpha_noapi.py | 38 ++++++++++++++++++- 2 files changed, 42 insertions(+), 5 deletions(-) diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py index 71ad3b281..442e894b5 100644 --- a/searx/engines/wolframalpha_noapi.py +++ b/searx/engines/wolframalpha_noapi.py @@ -41,8 +41,8 @@ def response(resp): # the answer is inside a js function # answer can be located in different 'pods', although by default it should be in pod_0200 - possible_locations = ['pod_0200\.push(.*)\n', - 'pod_0100\.push(.*)\n'] + possible_locations = ['pod_0200\.push\((.*)', + 'pod_0100\.push\((.*)'] # failed result if dom.xpath(failure_xpath): @@ -62,7 +62,10 @@ def response(resp): if line: # extract answer from json answer = line[line.find('{'):line.rfind('}')+1] - answer = loads(answer.encode('unicode-escape')) + try: + answer = loads(answer) + except Exception: + answer = loads(answer.encode('unicode-escape')) answer = answer['stringified'] # clean plaintext answer diff --git a/searx/tests/engines/test_wolframalpha_noapi.py b/searx/tests/engines/test_wolframalpha_noapi.py index 3b6314672..cad9593f2 100644 --- a/searx/tests/engines/test_wolframalpha_noapi.py +++ b/searx/tests/engines/test_wolframalpha_noapi.py @@ -149,11 +149,45 @@ class TestWolframAlphaNoAPIEngine(SearxTestCase): </body> </html> """ - # test output in htmlentity + # test output with htmlentity response = mock.Mock(text=html) results = wolframalpha_noapi.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 2) - self.assertIn("¥".decode('utf-8'), results[0]['answer']) + self.assertIn('¥'.decode('utf-8'), results[0]['answer']) self.assertIn('1 euro to yen - Wolfram|Alpha', results[1]['title']) self.assertEquals('http://www.wolframalpha.com/input/?i=+1+euro+to+yen', results[1]['url']) + + html = """ + <!DOCTYPE html> + <title> distance from nairobi to kyoto in inches - Wolfram|Alpha</title> + <meta charset="utf-8" /> + <body> + <script type="text/javascript"> + try { + if (typeof context.jsonArray.popups.pod_0100 == "undefined" ) { + context.jsonArray.popups.pod_0100 = []; + } +[...].pod_0100.push( {"stringified": "convert distance | from | Nairobi, Kenya\nto | Kyoto, Japan to inches"}); + } catch(e) { } + + try { + if (typeof context.jsonArray.popups.pod_0200 == "undefined" ) { + context.jsonArray.popups.pod_0200 = []; + } +pod_0200.push({"stringified": "4.295&times;10^8 inches","mOutput": "Quantity[4.295×10^8,&amp;quot;Inches&amp;quot;]"}); + + } catch(e) { } + </script> + </body> + </html> + """ + # test output with utf-8 character + response = mock.Mock(text=html) + results = wolframalpha_noapi.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 2) + self.assertIn('4.295×10^8 inches'.decode('utf-8'), results[0]['answer']) + self.assertIn('distance from nairobi to kyoto in inches - Wolfram|Alpha', results[1]['title']) + self.assertEquals('http://www.wolframalpha.com/input/?i=+distance+from+nairobi+to+kyoto+in+inches', + results[1]['url']) From 8ca574481485847d5e0f47627d20c543c39b7b66 Mon Sep 17 00:00:00 2001 From: a01200356 <a01200356@itesm.mx> Date: Tue, 5 Jan 2016 21:47:31 -0600 Subject: [PATCH 14/16] append link to result in wolframalpha_api (and the tests to validate that) --- searx/engines/wolframalpha_api.py | 14 ++++- searx/settings.yml | 2 +- searx/tests/engines/test_wolframalpha_api.py | 59 +++++++------------- 3 files changed, 34 insertions(+), 41 deletions(-) diff --git a/searx/engines/wolframalpha_api.py b/searx/engines/wolframalpha_api.py index 540d81351..303c6c165 100644 --- a/searx/engines/wolframalpha_api.py +++ b/searx/engines/wolframalpha_api.py @@ -10,15 +10,18 @@ from urllib import urlencode from lxml import etree +from re import search # search-url base_url = 'http://api.wolframalpha.com/v2/query' search_url = base_url + '?appid={api_key}&{query}&format=plaintext' +site_url = 'http://www.wolframalpha.com/input/?{query}' api_key = '' # defined in settings.yml # xpath variables failure_xpath = '/queryresult[attribute::success="false"]' answer_xpath = '//pod[attribute::primary="true"]/subpod/plaintext' +input_xpath = '//pod[starts-with(attribute::title, "Input")]/subpod/plaintext' # do search-request @@ -60,6 +63,15 @@ def response(resp): results.append({'answer': answer}) - # TODO: append a result with title and link, like in the no api version + # if there's no input section in search_results, check if answer has the input embedded (before their "=" sign) + try: + query_input = search_results.xpath(input_xpath)[0].text + except IndexError: + query_input = search(u'([^\uf7d9]+)', answers[0].text).group(1) + + # append link to site + result_url = site_url.format(query=urlencode({'i': query_input.encode('utf-8')})) + results.append({'url': result_url, + 'title': query_input + " - Wolfram|Alpha"}) return results diff --git a/searx/settings.yml b/searx/settings.yml index 63e944060..7a6fc6d8a 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -305,7 +305,7 @@ engines: # You can use the engine using the official stable API, but you need an API key # See : http://products.wolframalpha.com/api/ # engine : wolframalpha_api - # api_key: '' # required! + # api_key: 'apikey' # required! engine : wolframalpha_noapi timeout: 6.0 disabled : True diff --git a/searx/tests/engines/test_wolframalpha_api.py b/searx/tests/engines/test_wolframalpha_api.py index 98c53f769..c80775795 100644 --- a/searx/tests/engines/test_wolframalpha_api.py +++ b/searx/tests/engines/test_wolframalpha_api.py @@ -124,8 +124,10 @@ class TestWolframAlphaAPIEngine(SearxTestCase): response = mock.Mock(content=xml) results = wolframalpha_api.response(response) self.assertEqual(type(results), list) - self.assertEqual(len(results), 1) + self.assertEqual(len(results), 2) self.assertIn('i', results[0]['answer']) + self.assertIn('sqrt(-1) - Wolfram|Alpha', results[1]['title']) + self.assertEquals('http://www.wolframalpha.com/input/?i=sqrt%28-1%29', results[1]['url']) xml = """<?xml version='1.0' encoding='UTF-8'?> <queryresult success='true' @@ -223,8 +225,10 @@ class TestWolframAlphaAPIEngine(SearxTestCase): response = mock.Mock(content=xml) results = wolframalpha_api.response(response) self.assertEqual(type(results), list) - self.assertEqual(len(results), 1) + self.assertEqual(len(results), 2) self.assertIn('log(x)+c', results[0]['answer']) + self.assertIn('∫1/xx - Wolfram|Alpha'.decode('utf-8'), results[1]['title']) + self.assertEquals('http://www.wolframalpha.com/input/?i=%E2%88%AB1%2Fx%EF%9D%8Cx', results[1]['url']) xml = """<?xml version='1.0' encoding='UTF-8'?> <queryresult success='true' @@ -233,14 +237,14 @@ class TestWolframAlphaAPIEngine(SearxTestCase): datatypes='Solve' timedout='' timedoutpods='' - timing='0.883' - parsetiming='0.337' + timing='0.79' + parsetiming='0.338' parsetimedout='false' recalculate='' - id='MSPa347225h1ea85fgfbgb4000064ff000d25g5df3f' - host='http://www5a.wolframalpha.com' - server='52' - related='http://www5a.wolframalpha.com/api/v2/relatedQueries.jsp?...' + id='MSPa7481f7i06d25h3deh2900004810i3a78d9b4fdc' + host='http://www5b.wolframalpha.com' + server='23' + related='http://www5b.wolframalpha.com/api/v2/relatedQueries.jsp?id=...' version='2.6'> <pod title='Input interpretation' scanner='Identity' @@ -249,12 +253,7 @@ class TestWolframAlphaAPIEngine(SearxTestCase): error='false' numsubpods='1'> <subpod title=''> - <img src='http://www5a.wolframalpha.com/Calculate/MSP/MSP349225h1ea85fgfbgb400005dhd93b9eegg8f32?...' - alt='solve x^2+x = 0' - title='solve x^2+x = 0' - width='157' - height='35' /> - <plaintext>solve x^2+x = 0</plaintext> + <plaintext>solve x^2+x0</plaintext> </subpod> </pod> <pod title='Results' @@ -265,20 +264,10 @@ class TestWolframAlphaAPIEngine(SearxTestCase): numsubpods='2' primary='true'> <subpod title=''> - <img src='http://www5a.wolframalpha.com/Calculate/MSP/MSP350225h1ea85fgfbgb400005b1ebcefaha3ac97?...' - alt='x = -1' - title='x = -1' - width='47' - height='18' /> - <plaintext>x = -1</plaintext> + <plaintext>x-1</plaintext> </subpod> <subpod title=''> - <img src='http://www5a.wolframalpha.com/Calculate/MSP/MSP351225h1ea85fgfbgb4000032fic0ig981hc936?...' - alt='x = 0' - title='x = 0' - width='36' - height='18' /> - <plaintext>x = 0</plaintext> + <plaintext>x0</plaintext> </subpod> <states count='1'> <state name='Step-by-step solution' @@ -292,11 +281,6 @@ class TestWolframAlphaAPIEngine(SearxTestCase): error='false' numsubpods='1'> <subpod title=''> - <img src='http://www5a.wolframalpha.com/Calculate/MSP/MSP352225h1ea85fgfbgb40000464054c665hc5dee?...' - alt='' - title='' - width='300' - height='181' /> <plaintext></plaintext> </subpod> </pod> @@ -307,11 +291,6 @@ class TestWolframAlphaAPIEngine(SearxTestCase): error='false' numsubpods='1'> <subpod title=''> - <img src='http://www5a.wolframalpha.com/Calculate/MSP/MSP353225h1ea85fgfbgb400005ab1c8aai366fe46?...' - alt='' - title='' - width='310' - height='36' /> <plaintext></plaintext> </subpod> </pod> @@ -321,6 +300,8 @@ class TestWolframAlphaAPIEngine(SearxTestCase): response = mock.Mock(content=xml) results = wolframalpha_api.response(response) self.assertEqual(type(results), list) - self.assertEqual(len(results), 2) - self.assertIn('x = -1', results[0]['answer']) - self.assertIn('x = 0', results[1]['answer']) + self.assertEqual(len(results), 3) + self.assertIn('x=-1', results[0]['answer']) + self.assertIn('x=0', results[1]['answer']) + self.assertIn('solve x^2+x0 - Wolfram|Alpha'.decode('utf-8'), results[2]['title']) + self.assertEquals('http://www.wolframalpha.com/input/?i=solve+x%5E2%2Bx%EF%9F%990', results[2]['url']) From c2e034f52a31d4eb84b01cafb3af70ed55dad792 Mon Sep 17 00:00:00 2001 From: a01200356 <a01200356@itesm.mx> Date: Sun, 10 Jan 2016 19:51:40 -0600 Subject: [PATCH 15/16] move two tests --- {searx/tests => tests/unit}/engines/test_wolframalpha_api.py | 0 {searx/tests => tests/unit}/engines/test_wolframalpha_noapi.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename {searx/tests => tests/unit}/engines/test_wolframalpha_api.py (100%) rename {searx/tests => tests/unit}/engines/test_wolframalpha_noapi.py (100%) diff --git a/searx/tests/engines/test_wolframalpha_api.py b/tests/unit/engines/test_wolframalpha_api.py similarity index 100% rename from searx/tests/engines/test_wolframalpha_api.py rename to tests/unit/engines/test_wolframalpha_api.py diff --git a/searx/tests/engines/test_wolframalpha_noapi.py b/tests/unit/engines/test_wolframalpha_noapi.py similarity index 100% rename from searx/tests/engines/test_wolframalpha_noapi.py rename to tests/unit/engines/test_wolframalpha_noapi.py From 30bfbf2e07def8911d0b293e8032699812f43599 Mon Sep 17 00:00:00 2001 From: a01200356 <a01200356@itesm.mx> Date: Mon, 18 Jan 2016 11:34:38 -0600 Subject: [PATCH 16/16] [fix] pep8 --- searx/engines/wolframalpha_noapi.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py index 442e894b5..291fee04d 100644 --- a/searx/engines/wolframalpha_noapi.py +++ b/searx/engines/wolframalpha_noapi.py @@ -16,7 +16,7 @@ import HTMLParser # search-url url = 'http://www.wolframalpha.com/' -search_url = url+'input/?{query}' +search_url = url + 'input/?{query}' # xpath variables scripts_xpath = '//script' @@ -61,7 +61,7 @@ def response(resp): if line: # extract answer from json - answer = line[line.find('{'):line.rfind('}')+1] + answer = line[line.find('{'):line.rfind('}') + 1] try: answer = loads(answer) except Exception: