From 2fc1091b7fbd76189754767e2da8315e04ca1e47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=A9mi=20V=C3=A1nyi?= Date: Sun, 11 Dec 2016 16:36:32 +0100 Subject: [PATCH 1/3] add "Last year" option to time range selection --- searx/templates/oscar/time-range.html | 3 +++ 1 file changed, 3 insertions(+) diff --git a/searx/templates/oscar/time-range.html b/searx/templates/oscar/time-range.html index 4a13c4fdb..6ce1b91cb 100644 --- a/searx/templates/oscar/time-range.html +++ b/searx/templates/oscar/time-range.html @@ -11,4 +11,7 @@ + From c59c76e6ee14f0417ad55ecf0f888f62f34f1f6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=A9mi=20V=C3=A1nyi?= Date: Sun, 11 Dec 2016 16:39:12 +0100 Subject: [PATCH 2/3] add year to time range to engines which support "Last year" Engines: * Bing images * Flickr (noapi) * Google * Google Images * Google News --- searx/engines/bing_images.py | 3 ++- searx/engines/flickr_noapi.py | 3 ++- searx/engines/google.py | 3 ++- searx/engines/google_images.py | 10 +++++++++- searx/engines/google_news.py | 3 ++- tests/unit/engines/test_youtube_noapi.py | 19 +++++++++++++++++++ 6 files changed, 36 insertions(+), 5 deletions(-) diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py index 417871d6f..4dd362cb3 100644 --- a/searx/engines/bing_images.py +++ b/searx/engines/bing_images.py @@ -33,7 +33,8 @@ time_range_string = '&qft=+filterui:age-lt{interval}' thumb_url = "https://www.bing.com/th?id={ihk}" time_range_dict = {'day': '1440', 'week': '10080', - 'month': '43200'} + 'month': '43200', + 'year': '525600'} # safesearch definitions safesearch_types = {2: 'STRICT', diff --git a/searx/engines/flickr_noapi.py b/searx/engines/flickr_noapi.py index 68be139be..3c0ec7b70 100644 --- a/searx/engines/flickr_noapi.py +++ b/searx/engines/flickr_noapi.py @@ -34,7 +34,8 @@ paging = True time_range_support = True time_range_dict = {'day': 60 * 60 * 24, 'week': 60 * 60 * 24 * 7, - 'month': 60 * 60 * 24 * 7 * 4} + 'month': 60 * 60 * 24 * 7 * 4, + 'year': 60 * 60 * 24 * 7 * 52} def build_flickr_url(user_id, photo_id): diff --git a/searx/engines/google.py b/searx/engines/google.py index 0e2d522f4..a02b6940e 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -95,7 +95,8 @@ search_url = ('https://{hostname}' + time_range_search = "&tbs=qdr:{range}" time_range_dict = {'day': 'd', 'week': 'w', - 'month': 'm'} + 'month': 'm', + 'year': 'y'} # other URLs map_hostname_start = 'maps.google.' diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py index 77bdc13b2..9a3c71c7e 100644 --- a/searx/engines/google_images.py +++ b/searx/engines/google_images.py @@ -10,10 +10,12 @@ @parse url, title, img_src """ +from datetime import date, timedelta from urllib import urlencode from json import loads from lxml import html + # engine dependent config categories = ['images'] paging = True @@ -29,6 +31,7 @@ search_url = 'https://www.google.com/search'\ '&yv=2'\ '&{search_options}' time_range_attr = "qdr:{range}" +time_range_custom_attr = "cdr:1,cd_min:{start},cd_max{end}" time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm'} @@ -36,7 +39,6 @@ time_range_dict = {'day': 'd', # do search-request def request(query, params): - search_options = { 'ijn': params['pageno'] - 1, 'start': (params['pageno'] - 1) * number_of_results @@ -44,6 +46,12 @@ def request(query, params): if params['time_range'] in time_range_dict: search_options['tbs'] = time_range_attr.format(range=time_range_dict[params['time_range']]) + elif params['time_range'] == 'year': + now = date.today() + then = now - timedelta(days=365) + start = then.strftime('%m/%d/%Y') + end = now.strftime('%m/%d/%Y') + search_options['tbs'] = time_range_custom_attr.format(start=start, end=end) if safesearch and params['safesearch']: search_options['safe'] = 'on' diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py index ede615614..37253c6a7 100644 --- a/searx/engines/google_news.py +++ b/searx/engines/google_news.py @@ -29,7 +29,8 @@ search_url = 'https://www.google.com/search'\ time_range_attr = "qdr:{range}" time_range_dict = {'day': 'd', 'week': 'w', - 'month': 'm'} + 'month': 'm', + 'year': 'y'} # do search-request diff --git a/tests/unit/engines/test_youtube_noapi.py b/tests/unit/engines/test_youtube_noapi.py index 986441b51..41dcbb749 100644 --- a/tests/unit/engines/test_youtube_noapi.py +++ b/tests/unit/engines/test_youtube_noapi.py @@ -17,6 +17,25 @@ class TestYoutubeNoAPIEngine(SearxTestCase): self.assertIn(query, params['url']) self.assertIn('youtube.com', params['url']) + def test_time_range_search(self): + dicto = defaultdict(dict) + query = 'test_query' + dicto['time_range'] = 'year' + params = youtube_noapi.request(query, dicto) + self.assertIn('&sp=EgIIBQ%253D%253D', params['url']) + + dicto['time_range'] = 'month' + params = youtube_noapi.request(query, dicto) + self.assertIn('&sp=EgIIBA%253D%253D', params['url']) + + dicto['time_range'] = 'week' + params = youtube_noapi.request(query, dicto) + self.assertIn('&sp=EgIIAw%253D%253D', params['url']) + + dicto['time_range'] = 'day' + params = youtube_noapi.request(query, dicto) + self.assertIn('&sp=EgIIAg%253D%253D', params['url']) + def test_response(self): self.assertRaises(AttributeError, youtube_noapi.response, None) self.assertRaises(AttributeError, youtube_noapi.response, []) From b034356825420507c9fb7ee2dc100676a88cf6c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=A9mi=20V=C3=A1nyi?= Date: Sun, 11 Dec 2016 16:41:14 +0100 Subject: [PATCH 3/3] add year filter to engines with time range support && tests Following engines does not support "Last year": * Bing News * DeviantArt * DuckDuckGo * Yahoo * YouTube (noapi) --- searx/engines/bing_news.py | 3 +++ searx/engines/deviantart.py | 3 +++ searx/engines/duckduckgo.py | 3 +++ searx/engines/yahoo.py | 3 +++ searx/engines/youtube_noapi.py | 3 ++- tests/unit/engines/test_bing_news.py | 7 +++++++ tests/unit/engines/test_deviantart.py | 9 ++++++++- tests/unit/engines/test_duckduckgo.py | 7 +++++++ tests/unit/engines/test_yahoo.py | 7 +++++++ 9 files changed, 43 insertions(+), 2 deletions(-) diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py index 906afe662..4e7c33129 100644 --- a/searx/engines/bing_news.py +++ b/searx/engines/bing_news.py @@ -66,6 +66,9 @@ def _get_url(query, language, offset, time_range): # do search-request def request(query, params): + if params['time_range'] and params['time_range'] not in time_range_dict: + return params + offset = (params['pageno'] - 1) * 10 + 1 if params['language'] == 'all': diff --git a/searx/engines/deviantart.py b/searx/engines/deviantart.py index d893fc7fe..a24b75b8a 100644 --- a/searx/engines/deviantart.py +++ b/searx/engines/deviantart.py @@ -34,6 +34,9 @@ time_range_dict = {'day': 11, # do search-request def request(query, params): + if params['time_range'] and params['time_range'] not in time_range_dict: + return params + offset = (params['pageno'] - 1) * 24 params['url'] = search_url.format(offset=offset, diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py index 2153492e9..9959a52e6 100644 --- a/searx/engines/duckduckgo.py +++ b/searx/engines/duckduckgo.py @@ -41,6 +41,9 @@ content_xpath = './/a[@class="result__snippet"]' # do search-request def request(query, params): + if params['time_range'] and params['time_range'] not in time_range_dict: + return params + offset = (params['pageno'] - 1) * 30 if params['language'] == 'all': diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py index 8e24a283e..2bb34b83d 100644 --- a/searx/engines/yahoo.py +++ b/searx/engines/yahoo.py @@ -77,6 +77,9 @@ def _get_language(params): # do search-request def request(query, params): + if params['time_range'] and params['time_range'] not in time_range_dict: + return params + offset = (params['pageno'] - 1) * 10 + 1 language = _get_language(params) diff --git a/searx/engines/youtube_noapi.py b/searx/engines/youtube_noapi.py index b33594465..9b7ca64c8 100644 --- a/searx/engines/youtube_noapi.py +++ b/searx/engines/youtube_noapi.py @@ -25,7 +25,8 @@ search_url = base_url + '?search_query={query}&page={page}' time_range_url = '&sp=EgII{time_range}%253D%253D' time_range_dict = {'day': 'Ag', 'week': 'Aw', - 'month': 'BA'} + 'month': 'BA', + 'year': 'BQ'} embedded_url = '