From 4578575c284584a58cce0acd85f86bef2f49d77f Mon Sep 17 00:00:00 2001 From: a01200356 Date: Wed, 23 Dec 2015 00:01:00 -0600 Subject: [PATCH 1/7] Wolfie kinda works using API --- searx/engines/wolframalpha.py | 60 ++++++++++++++++++++++++++ searx/engines/wolframalpha_api.py | 70 +++++++++++++++++++++++++++++++ searx/search.py | 2 +- searx/settings.yml | 6 +++ 4 files changed, 137 insertions(+), 1 deletion(-) create mode 100644 searx/engines/wolframalpha.py create mode 100644 searx/engines/wolframalpha_api.py diff --git a/searx/engines/wolframalpha.py b/searx/engines/wolframalpha.py new file mode 100644 index 000000000..be467681f --- /dev/null +++ b/searx/engines/wolframalpha.py @@ -0,0 +1,60 @@ +""" + WolframAlpha + + @website http://www.wolframalpha.com/ + + @using-api yes + @results no c + @stable i guess so + @parse result +""" + +import wolframalpha + +# engine dependent config +paging = False + +# search-url +# url = 'http://www.wolframalpha.com/' +# search_url = url+'input/?{query}' + +client_id = '5952JX-X52L3VKWT8' +''' +# do search-request +def request(query, params): + params['url'] = search_url.format(query=urlencode({'i': query})) + print params + + return params + + +# get response from search-request +def response(resp): + print resp + + dom = html.fromstring(resp.text) + #resshit = dom.find_class('output pnt') + #for shit in resshit: + #print shit.text_content() + results = [] + #results.append({'url': 'https://wikipedia.org', 'title': 'Wolfie, lol', 'content': 'es kwatro'}) + #print results + #return results + + # parse results + for result in dom.xpath(results_xpath): + print result + + link = result.xpath(link_xpath)[0] + href = urljoin(url, link.attrib.get('href')) + title = escape(extract_text(link)) + content = escape(extract_text(result.xpath(content_xpath))) + + # append result + results.append({'url': href, + 'title': title, + 'content': content}) + + print results + return results +''' diff --git a/searx/engines/wolframalpha_api.py b/searx/engines/wolframalpha_api.py new file mode 100644 index 000000000..1cf908b62 --- /dev/null +++ b/searx/engines/wolframalpha_api.py @@ -0,0 +1,70 @@ +# Wolfram Alpha (Maths) +# +# @website http://www.wolframalpha.com +# @provide-api yes (http://api.wolframalpha.com/v2/) +# +# @using-api yes +# @results XML +# @stable yes +# @parse result + +from urllib import urlencode +from lxml import etree +from searx.engines.xpath import extract_text +from searx.utils import html_to_text + +# search-url +base_url = 'http://api.wolframalpha.com/v2/query' +search_url = base_url + '?appid={api_key}&{query}&format=plaintext' +site_url = 'http://wolframalpha.com/input/?{query}' + +#embedded_url = '' + +# do search-request +def request(query, params): + params['url'] = search_url.format(query=urlencode({'input': query}), + api_key=api_key) + + # need this for url in response + global my_query + my_query = query + + return params + +# replace private user area characters to make text legible +def replace_pua_chars(text): + pua_chars = { u'\uf74c': 'd', + u'\uf74d': u'\u212f', + u'\uf74e': 'i', + u'\uf7d9': '=' } + + for k, v in pua_chars.iteritems(): + text = text.replace(k, v) + + return text + +# get response from search-request +def response(resp): + results = [] + + search_results = etree.XML(resp.content) + + # return empty array if there are no results + if search_results.xpath('/queryresult[attribute::success="false"]'): + return [] + + # parse result + result = search_results.xpath('//pod[attribute::primary="true"]/subpod/plaintext')[0].text + result = replace_pua_chars(result) + + # bind url from site + result_url = site_url.format(query=urlencode({'i': my_query})) + + # append result + results.append({'url': result_url, + 'title': result}) + + # return results + return results diff --git a/searx/search.py b/searx/search.py index 655b7808a..85d88b9d3 100644 --- a/searx/search.py +++ b/searx/search.py @@ -98,7 +98,7 @@ def make_callback(engine_name, callback, params, result_container): with threading.RLock(): engines[engine_name].stats['page_load_time'] += search_duration - timeout_overhead = 0.2 # seconds + timeout_overhead = 0.5 # seconds timeout_limit = engines[engine_name].timeout + timeout_overhead if search_duration > timeout_limit: diff --git a/searx/settings.yml b/searx/settings.yml index c7f659e5f..1c8ba3f7f 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -300,6 +300,12 @@ engines: engine : vimeo shortcut : vm + - name : wolframalpha + shortcut : wa + engine : wolframalpha_api + api_key: '5952JX-X52L3VKWT8' + timeout: 6.0 + #The blekko technology and team have joined IBM Watson! -> https://blekko.com/ # - name : blekko images # engine : blekko_images From 922565c8a9f1061c8f6bcfa2ea8a11667d111c7c Mon Sep 17 00:00:00 2001 From: a01200356 Date: Sat, 26 Dec 2015 22:26:59 -0600 Subject: [PATCH 2/7] Returns answer rather than results. More appropriate in this case because it will always give just one answer. --- searx/engines/wolframalpha_api.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/searx/engines/wolframalpha_api.py b/searx/engines/wolframalpha_api.py index 1cf908b62..8390e91f9 100644 --- a/searx/engines/wolframalpha_api.py +++ b/searx/engines/wolframalpha_api.py @@ -18,16 +18,12 @@ base_url = 'http://api.wolframalpha.com/v2/query' search_url = base_url + '?appid={api_key}&{query}&format=plaintext' site_url = 'http://wolframalpha.com/input/?{query}' -#embedded_url = '' - # do search-request def request(query, params): params['url'] = search_url.format(query=urlencode({'input': query}), api_key=api_key) - # need this for url in response + need this for url in response global my_query my_query = query @@ -63,8 +59,9 @@ def response(resp): result_url = site_url.format(query=urlencode({'i': my_query})) # append result + # TODO: shouldn't it bind the source too? results.append({'url': result_url, - 'title': result}) + 'answer': result}) # return results return results From b3fde19ed59c6e05b1f076ea1f8a7cec21229083 Mon Sep 17 00:00:00 2001 From: a01200356 Date: Sun, 27 Dec 2015 21:09:45 -0600 Subject: [PATCH 3/7] Remove non API version (doesn't work) --- searx/engines/wolframalpha.py | 60 ------------------------------- searx/engines/wolframalpha_api.py | 2 +- 2 files changed, 1 insertion(+), 61 deletions(-) delete mode 100644 searx/engines/wolframalpha.py diff --git a/searx/engines/wolframalpha.py b/searx/engines/wolframalpha.py deleted file mode 100644 index be467681f..000000000 --- a/searx/engines/wolframalpha.py +++ /dev/null @@ -1,60 +0,0 @@ -""" - WolframAlpha - - @website http://www.wolframalpha.com/ - - @using-api yes - @results no c - @stable i guess so - @parse result -""" - -import wolframalpha - -# engine dependent config -paging = False - -# search-url -# url = 'http://www.wolframalpha.com/' -# search_url = url+'input/?{query}' - -client_id = '5952JX-X52L3VKWT8' -''' -# do search-request -def request(query, params): - params['url'] = search_url.format(query=urlencode({'i': query})) - print params - - return params - - -# get response from search-request -def response(resp): - print resp - - dom = html.fromstring(resp.text) - #resshit = dom.find_class('output pnt') - #for shit in resshit: - #print shit.text_content() - results = [] - #results.append({'url': 'https://wikipedia.org', 'title': 'Wolfie, lol', 'content': 'es kwatro'}) - #print results - #return results - - # parse results - for result in dom.xpath(results_xpath): - print result - - link = result.xpath(link_xpath)[0] - href = urljoin(url, link.attrib.get('href')) - title = escape(extract_text(link)) - content = escape(extract_text(result.xpath(content_xpath))) - - # append result - results.append({'url': href, - 'title': title, - 'content': content}) - - print results - return results -''' diff --git a/searx/engines/wolframalpha_api.py b/searx/engines/wolframalpha_api.py index 8390e91f9..b9da87c8e 100644 --- a/searx/engines/wolframalpha_api.py +++ b/searx/engines/wolframalpha_api.py @@ -23,7 +23,7 @@ def request(query, params): params['url'] = search_url.format(query=urlencode({'input': query}), api_key=api_key) - need this for url in response + # need this for url in response global my_query my_query = query From 1b7e80db03faef848d0614493e2bf58366db29ea Mon Sep 17 00:00:00 2001 From: a01200356 Date: Sun, 27 Dec 2015 22:11:12 -0600 Subject: [PATCH 4/7] Undo minor change. --- searx/search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/searx/search.py b/searx/search.py index 85d88b9d3..655b7808a 100644 --- a/searx/search.py +++ b/searx/search.py @@ -98,7 +98,7 @@ def make_callback(engine_name, callback, params, result_container): with threading.RLock(): engines[engine_name].stats['page_load_time'] += search_duration - timeout_overhead = 0.5 # seconds + timeout_overhead = 0.2 # seconds timeout_limit = engines[engine_name].timeout + timeout_overhead if search_duration > timeout_limit: From 9cd3017dcb35b69574339b3a7ce3896da656e1a4 Mon Sep 17 00:00:00 2001 From: a01200356 Date: Sun, 27 Dec 2015 22:39:55 -0600 Subject: [PATCH 5/7] Disable engine by default. --- searx/settings.yml | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/searx/settings.yml b/searx/settings.yml index 1c8ba3f7f..e23e4c390 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -300,11 +300,13 @@ engines: engine : vimeo shortcut : vm - - name : wolframalpha - shortcut : wa - engine : wolframalpha_api - api_key: '5952JX-X52L3VKWT8' - timeout: 6.0 +# You can use the engine using the official stable API, but you need an API key +# See : http://products.wolframalpha.com/api/ +# - name : wolframalpha +# shortcut : wa +# engine : wolframalpha_api +# api_key: 'apikey' # required! +# timeout: 6.0 #The blekko technology and team have joined IBM Watson! -> https://blekko.com/ # - name : blekko images From bc2420f8d4b51959326f4ee4c44378f1b05abb8c Mon Sep 17 00:00:00 2001 From: a01200356 Date: Mon, 28 Dec 2015 01:17:42 -0600 Subject: [PATCH 6/7] Fix Travis errors. --- searx/engines/wolframalpha_api.py | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/searx/engines/wolframalpha_api.py b/searx/engines/wolframalpha_api.py index b9da87c8e..309608628 100644 --- a/searx/engines/wolframalpha_api.py +++ b/searx/engines/wolframalpha_api.py @@ -16,31 +16,30 @@ from searx.utils import html_to_text # search-url base_url = 'http://api.wolframalpha.com/v2/query' search_url = base_url + '?appid={api_key}&{query}&format=plaintext' -site_url = 'http://wolframalpha.com/input/?{query}' +api_key = '' + # do search-request def request(query, params): params['url'] = search_url.format(query=urlencode({'input': query}), api_key=api_key) - # need this for url in response - global my_query - my_query = query - return params + # replace private user area characters to make text legible def replace_pua_chars(text): - pua_chars = { u'\uf74c': 'd', - u'\uf74d': u'\u212f', - u'\uf74e': 'i', - u'\uf7d9': '=' } + pua_chars = {u'\uf74c': 'd', + u'\uf74d': u'\u212f', + u'\uf74e': 'i', + u'\uf7d9': '='} for k, v in pua_chars.iteritems(): text = text.replace(k, v) return text + # get response from search-request def response(resp): results = [] @@ -55,13 +54,9 @@ def response(resp): result = search_results.xpath('//pod[attribute::primary="true"]/subpod/plaintext')[0].text result = replace_pua_chars(result) - # bind url from site - result_url = site_url.format(query=urlencode({'i': my_query})) - # append result # TODO: shouldn't it bind the source too? - results.append({'url': result_url, - 'answer': result}) + results.append({'answer': result}) # return results return results From 0bb403bb4740d20c8a158fef622919dcd30e6e69 Mon Sep 17 00:00:00 2001 From: a01200356 Date: Mon, 28 Dec 2015 01:24:26 -0600 Subject: [PATCH 7/7] More fixes (Travis again). forfuckssaketravisnotagain --- searx/engines/wolframalpha_api.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/searx/engines/wolframalpha_api.py b/searx/engines/wolframalpha_api.py index 309608628..d61d25747 100644 --- a/searx/engines/wolframalpha_api.py +++ b/searx/engines/wolframalpha_api.py @@ -10,8 +10,6 @@ from urllib import urlencode from lxml import etree -from searx.engines.xpath import extract_text -from searx.utils import html_to_text # search-url base_url = 'http://api.wolframalpha.com/v2/query'