From 8af2184a45d28105876f1e5529c829de3f36a428 Mon Sep 17 00:00:00 2001 From: Thomas Pointhuber Date: Fri, 17 Oct 2014 12:34:51 +0200 Subject: [PATCH] add faroo engine support --- searx/engines/faroo.py | 108 +++++++++++++++++++++++++++++++++++++++++ searx/settings.yml | 6 +++ searx/utils.py | 3 ++ 3 files changed, 117 insertions(+) create mode 100644 searx/engines/faroo.py diff --git a/searx/engines/faroo.py b/searx/engines/faroo.py new file mode 100644 index 000000000..31104353f --- /dev/null +++ b/searx/engines/faroo.py @@ -0,0 +1,108 @@ +## Farro (Web, News) +# +# @website http://www.faroo.com +# @provide-api yes (http://www.faroo.com/hp/api/api.html), require API-key +# +# @using-api yes +# @results JSON +# @stable yes +# @parse url, title, content, publishedDate, img_src + +from urllib import urlencode +from json import loads +import datetime +from searx.utils import searx_useragent + +# engine dependent config +categories = ['general', 'news'] +paging = True +language_support = True +number_of_results = 10 +api_key = None + +# search-url +url = 'http://www.faroo.com/' +search_url = url + 'api?{query}&start={offset}&length={number_of_results}&l={language}&src={categorie}&i=false&f=json&key={api_key}' + +search_category = {'general': 'web', + 'news': 'news'} + +# do search-request +def request(query, params): + offset = (params['pageno'] - 1) * number_of_results + categorie = search_category.get(params['category'], 'web') + + if params['language'] == 'all': + language = 'en' + else: + language = params['language'].split('_')[0] + + # skip, if language is not supported + if language != 'en' and\ + language != 'de' and\ + language != 'zh': + return params + + params['url'] = search_url.format(offset=offset, + number_of_results=number_of_results, + query=urlencode({'q': query}), + language=language, + categorie=categorie, + api_key=api_key ) + + # using searx User-Agent + params['headers']['User-Agent'] = searx_useragent() + + return params + + +# get response from search-request +def response(resp): + # HTTP-Code 401: api-key is not valide + if resp.status_code == 401: + raise Exception("API key is not valide") + return [] + + # HTTP-Code 429: rate limit exceeded + if resp.status_code == 429: + raise Exception("rate limit has been exceeded!") + return [] + + results = [] + + search_res = loads(resp.text) + + # return empty array if there are no results + if not search_res.get('results', {}): + return [] + + # parse results + for result in search_res['results']: + if result['news'] == 'true': + # timestamp (how many milliseconds have passed between now and the beginning of 1970) + publishedDate = datetime.datetime.fromtimestamp(result['date']/1000.0) + + # append news result + results.append({'url': result['url'], + 'title': result['title'], + 'publishedDate': publishedDate, + 'content': result['kwic']}) + + else: + # append general result + # TODO, publishedDate correct? + results.append({'url': result['url'], + 'title': result['title'], + 'content': result['kwic']}) + + # append image result if image url is set + # TODO, show results with an image like in faroo + if result['iurl']: + results.append({'template': 'images.html', + 'url': result['url'], + 'title': result['title'], + 'content': result['kwic'], + 'img_src': result['iurl']}) + + # return results + return results diff --git a/searx/settings.yml b/searx/settings.yml index da053ce6a..a627c3676 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -48,6 +48,12 @@ engines: engine : duckduckgo shortcut : ddg +# api-key required: http://www.faroo.com/hp/api/api.html#key +# - name : faroo +# engine : faroo +# shortcut : fa +# api_key : 'apikey' # required! + # down - website is under criminal investigation by the UK # - name : filecrop # engine : filecrop diff --git a/searx/utils.py b/searx/utils.py index a9ece355a..28e42b272 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -20,6 +20,9 @@ def gen_useragent(): return ua.format(os=choice(ua_os), version=choice(ua_versions)) +def searx_useragent(): + return 'searx' + def highlight_content(content, query): if not content: