From 4578575c284584a58cce0acd85f86bef2f49d77f Mon Sep 17 00:00:00 2001 From: a01200356 Date: Wed, 23 Dec 2015 00:01:00 -0600 Subject: [PATCH] Wolfie kinda works using API --- searx/engines/wolframalpha.py | 60 ++++++++++++++++++++++++++ searx/engines/wolframalpha_api.py | 70 +++++++++++++++++++++++++++++++ searx/search.py | 2 +- searx/settings.yml | 6 +++ 4 files changed, 137 insertions(+), 1 deletion(-) create mode 100644 searx/engines/wolframalpha.py create mode 100644 searx/engines/wolframalpha_api.py diff --git a/searx/engines/wolframalpha.py b/searx/engines/wolframalpha.py new file mode 100644 index 000000000..be467681f --- /dev/null +++ b/searx/engines/wolframalpha.py @@ -0,0 +1,60 @@ +""" + WolframAlpha + + @website http://www.wolframalpha.com/ + + @using-api yes + @results no c + @stable i guess so + @parse result +""" + +import wolframalpha + +# engine dependent config +paging = False + +# search-url +# url = 'http://www.wolframalpha.com/' +# search_url = url+'input/?{query}' + +client_id = '5952JX-X52L3VKWT8' +''' +# do search-request +def request(query, params): + params['url'] = search_url.format(query=urlencode({'i': query})) + print params + + return params + + +# get response from search-request +def response(resp): + print resp + + dom = html.fromstring(resp.text) + #resshit = dom.find_class('output pnt') + #for shit in resshit: + #print shit.text_content() + results = [] + #results.append({'url': 'https://wikipedia.org', 'title': 'Wolfie, lol', 'content': 'es kwatro'}) + #print results + #return results + + # parse results + for result in dom.xpath(results_xpath): + print result + + link = result.xpath(link_xpath)[0] + href = urljoin(url, link.attrib.get('href')) + title = escape(extract_text(link)) + content = escape(extract_text(result.xpath(content_xpath))) + + # append result + results.append({'url': href, + 'title': title, + 'content': content}) + + print results + return results +''' diff --git a/searx/engines/wolframalpha_api.py b/searx/engines/wolframalpha_api.py new file mode 100644 index 000000000..1cf908b62 --- /dev/null +++ b/searx/engines/wolframalpha_api.py @@ -0,0 +1,70 @@ +# Wolfram Alpha (Maths) +# +# @website http://www.wolframalpha.com +# @provide-api yes (http://api.wolframalpha.com/v2/) +# +# @using-api yes +# @results XML +# @stable yes +# @parse result + +from urllib import urlencode +from lxml import etree +from searx.engines.xpath import extract_text +from searx.utils import html_to_text + +# search-url +base_url = 'http://api.wolframalpha.com/v2/query' +search_url = base_url + '?appid={api_key}&{query}&format=plaintext' +site_url = 'http://wolframalpha.com/input/?{query}' + +#embedded_url = '' + +# do search-request +def request(query, params): + params['url'] = search_url.format(query=urlencode({'input': query}), + api_key=api_key) + + # need this for url in response + global my_query + my_query = query + + return params + +# replace private user area characters to make text legible +def replace_pua_chars(text): + pua_chars = { u'\uf74c': 'd', + u'\uf74d': u'\u212f', + u'\uf74e': 'i', + u'\uf7d9': '=' } + + for k, v in pua_chars.iteritems(): + text = text.replace(k, v) + + return text + +# get response from search-request +def response(resp): + results = [] + + search_results = etree.XML(resp.content) + + # return empty array if there are no results + if search_results.xpath('/queryresult[attribute::success="false"]'): + return [] + + # parse result + result = search_results.xpath('//pod[attribute::primary="true"]/subpod/plaintext')[0].text + result = replace_pua_chars(result) + + # bind url from site + result_url = site_url.format(query=urlencode({'i': my_query})) + + # append result + results.append({'url': result_url, + 'title': result}) + + # return results + return results diff --git a/searx/search.py b/searx/search.py index 655b7808a..85d88b9d3 100644 --- a/searx/search.py +++ b/searx/search.py @@ -98,7 +98,7 @@ def make_callback(engine_name, callback, params, result_container): with threading.RLock(): engines[engine_name].stats['page_load_time'] += search_duration - timeout_overhead = 0.2 # seconds + timeout_overhead = 0.5 # seconds timeout_limit = engines[engine_name].timeout + timeout_overhead if search_duration > timeout_limit: diff --git a/searx/settings.yml b/searx/settings.yml index c7f659e5f..1c8ba3f7f 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -300,6 +300,12 @@ engines: engine : vimeo shortcut : vm + - name : wolframalpha + shortcut : wa + engine : wolframalpha_api + api_key: '5952JX-X52L3VKWT8' + timeout: 6.0 + #The blekko technology and team have joined IBM Watson! -> https://blekko.com/ # - name : blekko images # engine : blekko_images