From b51ba32f619e6b7a927444475b0ee986d4d13a60 Mon Sep 17 00:00:00 2001 From: a01200356 Date: Tue, 29 Dec 2015 20:59:51 -0600 Subject: [PATCH] Wolfram Alpha (no API needed now) --- searx/engines/wolframalpha_noapi.py | 66 +++++++++++++++++++++++++++++ searx/settings.yml | 8 ++-- 2 files changed, 70 insertions(+), 4 deletions(-) create mode 100644 searx/engines/wolframalpha_noapi.py diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py new file mode 100644 index 000000000..1ce2aa1ff --- /dev/null +++ b/searx/engines/wolframalpha_noapi.py @@ -0,0 +1,66 @@ +# WolframAlpha (Maths) +# +# @website http://www.wolframalpha.com/ +# +# @using-api no +# @results HTML, JS +# @stable no +# @parse answer + +import re +import json +from urllib import urlencode +from lxml import html +from searx.engines.xpath import extract_text + +# search-url +url = 'http://www.wolframalpha.com/' +search_url = url+'input/?{query}' + + +# do search-request +def request(query, params): + params['url'] = search_url.format(query=urlencode({'i': query})) + + return params + + +# tries to find answer under the pattern given +def extract_answer(script_list, pattern): + answer = None + + # get line that matches the pattern + for script in script_list: + try: + line = re.search(pattern, script.text_content()).group(1) + except AttributeError: + continue + + # extract answer from json + answer = line[line.find('{') : line.rfind('}')+1] + answer = json.loads(answer.encode('unicode-escape')) + answer = answer['stringified'].decode('unicode-escape') + + return answer + + +# get response from search-request +def response(resp): + + dom = html.fromstring(resp.text) + + # the answer is inside a js script + scripts = dom.xpath('//script') + + results = [] + + # answer can be located in different 'pods', although by default it should be in pod_0200 + answer = extract_answer(scripts, 'pod_0200\.push(.*)\n') + if not answer: + answer = extract_answer(scripts, 'pod_0100\.push(.*)\n') + if answer: + results.append({'answer': answer}) + else: + results.append({'answer': answer}) + + return results diff --git a/searx/settings.yml b/searx/settings.yml index e23e4c390..d2a724118 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -302,11 +302,11 @@ engines: # You can use the engine using the official stable API, but you need an API key # See : http://products.wolframalpha.com/api/ -# - name : wolframalpha -# shortcut : wa -# engine : wolframalpha_api + - name : wolframalpha + shortcut : wa + engine : wolframalpha_noapi # api_key: 'apikey' # required! -# timeout: 6.0 + timeout: 6.0 #The blekko technology and team have joined IBM Watson! -> https://blekko.com/ # - name : blekko images