From 2434c29dc535b034d2fb7be2b753e3bfd453609f Mon Sep 17 00:00:00 2001 From: woorst Date: Sat, 19 Aug 2017 19:05:17 -0500 Subject: [PATCH 1/4] New engine: Genius (lyrics) --- searx/engines/genius.py | 89 ++++++++++++ searx/settings.yml | 4 + tests/unit/engines/test_genius.py | 231 ++++++++++++++++++++++++++++++ 3 files changed, 324 insertions(+) create mode 100644 searx/engines/genius.py create mode 100644 tests/unit/engines/test_genius.py diff --git a/searx/engines/genius.py b/searx/engines/genius.py new file mode 100644 index 000000000..c7bce1c68 --- /dev/null +++ b/searx/engines/genius.py @@ -0,0 +1,89 @@ +""" +Genius + + @website https://www.genius.com/ + @provide-api yes (https://docs.genius.com/) + + @using-api yes + @results JSON + @stable yes + @parse url, title, content, thumbnail, publishedDate +""" + +from json import loads +from searx.url_utils import urlencode +from datetime import datetime + +# engine dependent config +categories = ['music'] +paging = True +language_support = False +page_size = 5 + +indicies = ['top_hit', 'song', 'lyric', 'artist', 'album', 'tag', 'video', 'article', 'user'] +url = 'https://genius.com/api/' +search_url = url + 'search/{index}?{query}&page={pageno}&per_page={page_size}' + + +def request(query, params): + params['url'] = search_url.format(query=urlencode({'q': query}), + index='multi', + page_size=page_size, + pageno=params['pageno']) + return params + + +def parse_lyric(hit): + try: + content = hit['highlights'][0]['value'] + except: + content = None + timestamp = hit['result']['lyrics_updated_at'] + result = {'url': hit['result']['url'], + 'title': hit['result']['full_title'], + 'content': content, + 'thumbnail': hit['result']['song_art_image_thumbnail_url'], + 'template': 'videos.html'} + if timestamp: + result.update({'publishedDate': datetime.fromtimestamp(timestamp)}) + return result + + +def parse_artist(hit): + result = {'url': hit['result']['url'], + 'title': hit['result']['name'], + 'content': None, + 'thumbnail': hit['result']['image_url'], + 'template': 'videos.html'} + return result + + +def parse_album(hit): + result = {'url': hit['result']['url'], + 'title': hit['result']['full_title'], + 'thumbnail': hit['result']['cover_art_url'], + # 'thumbnail': hit['result']['cover_art_thumbnail_url'], + 'template': 'videos.html'} + try: + year = hit['result']['release_date_components']['year'] + except: + pass + else: + if year: + result.update({'content': 'Released: {}'.format(year)}) + return result + +parse = {'lyric': parse_lyric, 'song': parse_lyric, 'artist': parse_artist, 'album': parse_album} + + +def response(resp): + results = [] + json = loads(resp.text) + hits = [hit for section in json['response']['sections'] for hit in section['hits']] + for hit in hits: + try: + func = parse[hit['type']] + except KeyError: + continue + results.append(func(hit)) + return results diff --git a/searx/settings.yml b/searx/settings.yml index 4da96b5bf..33d1de829 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -236,6 +236,10 @@ engines: shortcut : frk disabled : True + - name : genius + engine : genius + shortcut : gen + - name : gigablast engine : gigablast shortcut : gb diff --git a/tests/unit/engines/test_genius.py b/tests/unit/engines/test_genius.py new file mode 100644 index 000000000..d81b1bdcf --- /dev/null +++ b/tests/unit/engines/test_genius.py @@ -0,0 +1,231 @@ +from collections import defaultdict +import mock +from datetime import datetime +from searx.engines import genius +from searx.testing import SearxTestCase + + +class TestGeniusEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 1 + params = genius.request(query, dicto) + self.assertTrue('url' in params) + self.assertTrue(query in params['url']) + self.assertTrue('genius.com' in params['url']) + + def test_response(self): + + json_empty = """ + { + "meta": { + "status": 200 + }, + "response": { + "sections": [ + { + "type": "top_hit", + "hits": [] + }, + { + "type": "song", + "hits": [] + }, + { + "type": "lyric", + "hits": [] + }, + { + "type": "artist", + "hits": [] + }, + { + "type": "album", + "hits": [] + }, + { + "type": "tag", + "hits": [] + }, + { + "type": "video", + "hits": [] + }, + { + "type": "article", + "hits": [] + }, + { + "type": "user", + "hits": [] + } + ] + } + } + """ + + resp = mock.Mock(text=json_empty) + self.assertEqual(genius.response(resp), []) + + json = """ + { + "meta": { + "status": 200 + }, + "response": { + "sections": [ + { + "type": "lyric", + "hits": [ + { + "highlights": [ + { + "property": "lyrics", + "value": "Sample lyrics", + "snippet": true, + "ranges": [] + } + ], + "index": "lyric", + "type": "song", + "result": { + "_type": "song", + "annotation_count": 45, + "api_path": "/songs/52916", + "full_title": "J't'emmerde by MC Jean Gab'1", + "header_image_thumbnail_url": "https://images.genius.com/ef9f736a86df3c3b1772f3fb7fbdb21c.300x300x1.jpg", + "header_image_url": "https://images.genius.com/ef9f736a86df3c3b1772f3fb7fbdb21c.1000x1000x1.jpg", + "id": 52916, + "instrumental": false, + "lyrics_owner_id": 15586, + "lyrics_state": "complete", + "lyrics_updated_at": 1498744545, + "path": "/Mc-jean-gab1-jtemmerde-lyrics", + "pyongs_count": 4, + "song_art_image_thumbnail_url": "https://images.genius.com/ef9f736a86df3c3b1772f3fb7fbdb21c.300x300x1.jpg", + "stats": { + "hot": false, + "unreviewed_annotations": 0, + "pageviews": 62490 + }, + "title": "J't'emmerde", + "title_with_featured": "J't'emmerde", + "updated_by_human_at": 1498744546, + "url": "https://genius.com/Mc-jean-gab1-jtemmerde-lyrics", + "primary_artist": { + "_type": "artist", + "api_path": "/artists/12691", + "header_image_url": "https://images.genius.com/c7847662a58f8c2b0f02a6e217d60907.960x657x1.jpg", + "id": 12691, + "image_url": "https://s3.amazonaws.com/rapgenius/Mc-jean-gab1.jpg", + "index_character": "m", + "is_meme_verified": false, + "is_verified": false, + "name": "MC Jean Gab'1", + "slug": "Mc-jean-gab1", + "url": "https://genius.com/artists/Mc-jean-gab1" + } + } + } + ] + }, + { + "type": "artist", + "hits": [ + { + "highlights": [], + "index": "artist", + "type": "artist", + "result": { + "_type": "artist", + "api_path": "/artists/191580", + "header_image_url": "https://assets.genius.com/images/default_avatar_300.png?1503090542", + "id": 191580, + "image_url": "https://assets.genius.com/images/default_avatar_300.png?1503090542", + "index_character": "a", + "is_meme_verified": false, + "is_verified": false, + "name": "ASDF Guy", + "slug": "Asdf-guy", + "url": "https://genius.com/artists/Asdf-guy" + } + } + ] + }, + { + "type": "album", + "hits": [ + { + "highlights": [], + "index": "album", + "type": "album", + "result": { + "_type": "album", + "api_path": "/albums/132332", + "cover_art_thumbnail_url": "https://images.genius.com/147d70434ba190b9b1c26b06aee87d17.300x300x1.jpg", + "cover_art_url": "https://images.genius.com/147d70434ba190b9b1c26b06aee87d17.600x600x1.jpg", + "full_title": "ASD by A Skylit Drive", + "id": 132332, + "name": "ASD", + "name_with_artist": "ASD (artist: A Skylit Drive)", + "release_date_components": { + "year": 2015, + "month": null, + "day": null + }, + "url": "https://genius.com/albums/A-skylit-drive/Asd", + "artist": { + "_type": "artist", + "api_path": "/artists/48712", + "header_image_url": "https://images.genius.com/814c1551293172c56306d0e310c6aa89.620x400x1.jpg", + "id": 48712, + "image_url": "https://images.genius.com/814c1551293172c56306d0e310c6aa89.620x400x1.jpg", + "index_character": "s", + "is_meme_verified": false, + "is_verified": false, + "name": "A Skylit Drive", + "slug": "A-skylit-drive", + "url": "https://genius.com/artists/A-skylit-drive" + } + } + } + ] + } + ] + } + } + """ + + resp = mock.Mock(text=json) + results = genius.response(resp) + + self.assertEqual(len(results), 3) + self.assertEqual(type(results), list) + + # check lyric parsing + r = results[0] + self.assertEqual(r['url'], 'https://genius.com/Mc-jean-gab1-jtemmerde-lyrics') + self.assertEqual(r['title'], "J't'emmerde by MC Jean Gab'1") + self.assertEqual(r['content'], "Sample lyrics") + self.assertEqual(r['template'], 'videos.html') + self.assertEqual(r['thumbnail'], 'https://images.genius.com/ef9f736a86df3c3b1772f3fb7fbdb21c.300x300x1.jpg') + created = datetime.fromtimestamp(1498744545) + self.assertEqual(r['publishedDate'], created) + + # check artist parsing + r = results[1] + self.assertEqual(r['url'], 'https://genius.com/artists/Asdf-guy') + self.assertEqual(r['title'], "ASDF Guy") + self.assertEqual(r['content'], None) + self.assertEqual(r['template'], 'videos.html') + self.assertEqual(r['thumbnail'], 'https://assets.genius.com/images/default_avatar_300.png?1503090542') + + # check album parsing + r = results[2] + self.assertEqual(r['url'], 'https://genius.com/albums/A-skylit-drive/Asd') + self.assertEqual(r['title'], "ASD by A Skylit Drive") + self.assertEqual(r['content'], "Released: 2015") + self.assertEqual(r['template'], 'videos.html') + self.assertEqual(r['thumbnail'], 'https://images.genius.com/147d70434ba190b9b1c26b06aee87d17.600x600x1.jpg') From 62b2f79ce7ac6000263dd8c52ec6e4096424180e Mon Sep 17 00:00:00 2001 From: woorst Date: Sun, 20 Aug 2017 21:10:51 -0500 Subject: [PATCH 2/4] fix line lengths for pep8 standards --- tests/unit/engines/test_genius.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/unit/engines/test_genius.py b/tests/unit/engines/test_genius.py index d81b1bdcf..2b563d87b 100644 --- a/tests/unit/engines/test_genius.py +++ b/tests/unit/engines/test_genius.py @@ -95,7 +95,7 @@ class TestGeniusEngine(SearxTestCase): "annotation_count": 45, "api_path": "/songs/52916", "full_title": "J't'emmerde by MC Jean Gab'1", - "header_image_thumbnail_url": "https://images.genius.com/ef9f736a86df3c3b1772f3fb7fbdb21c.300x300x1.jpg", + "header_image_thumbnail_url": "https://images.genius.com/xxx.300x300x1.jpg", "header_image_url": "https://images.genius.com/ef9f736a86df3c3b1772f3fb7fbdb21c.1000x1000x1.jpg", "id": 52916, "instrumental": false, @@ -104,7 +104,7 @@ class TestGeniusEngine(SearxTestCase): "lyrics_updated_at": 1498744545, "path": "/Mc-jean-gab1-jtemmerde-lyrics", "pyongs_count": 4, - "song_art_image_thumbnail_url": "https://images.genius.com/ef9f736a86df3c3b1772f3fb7fbdb21c.300x300x1.jpg", + "song_art_image_thumbnail_url": "https://images.genius.com/xxx.300x300x1.jpg", "stats": { "hot": false, "unreviewed_annotations": 0, @@ -164,8 +164,8 @@ class TestGeniusEngine(SearxTestCase): "result": { "_type": "album", "api_path": "/albums/132332", - "cover_art_thumbnail_url": "https://images.genius.com/147d70434ba190b9b1c26b06aee87d17.300x300x1.jpg", - "cover_art_url": "https://images.genius.com/147d70434ba190b9b1c26b06aee87d17.600x600x1.jpg", + "cover_art_thumbnail_url": "https://images.genius.com/xxx.300x300x1.jpg", + "cover_art_url": "https://images.genius.com/xxx.600x600x1.jpg", "full_title": "ASD by A Skylit Drive", "id": 132332, "name": "ASD", @@ -210,7 +210,7 @@ class TestGeniusEngine(SearxTestCase): self.assertEqual(r['title'], "J't'emmerde by MC Jean Gab'1") self.assertEqual(r['content'], "Sample lyrics") self.assertEqual(r['template'], 'videos.html') - self.assertEqual(r['thumbnail'], 'https://images.genius.com/ef9f736a86df3c3b1772f3fb7fbdb21c.300x300x1.jpg') + self.assertEqual(r['thumbnail'], 'https://images.genius.com/xxx.300x300x1.jpg') created = datetime.fromtimestamp(1498744545) self.assertEqual(r['publishedDate'], created) @@ -228,4 +228,4 @@ class TestGeniusEngine(SearxTestCase): self.assertEqual(r['title'], "ASD by A Skylit Drive") self.assertEqual(r['content'], "Released: 2015") self.assertEqual(r['template'], 'videos.html') - self.assertEqual(r['thumbnail'], 'https://images.genius.com/147d70434ba190b9b1c26b06aee87d17.600x600x1.jpg') + self.assertEqual(r['thumbnail'], 'https://images.genius.com/xxx.600x600x1.jpg') From 636b76019668960417c1253d1aa383b832c18a4d Mon Sep 17 00:00:00 2001 From: woorst Date: Mon, 21 Aug 2017 11:45:23 -0500 Subject: [PATCH 3/4] remove unicode characters --- tests/unit/engines/test_genius.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/engines/test_genius.py b/tests/unit/engines/test_genius.py index 2b563d87b..ea721943a 100644 --- a/tests/unit/engines/test_genius.py +++ b/tests/unit/engines/test_genius.py @@ -94,7 +94,7 @@ class TestGeniusEngine(SearxTestCase): "_type": "song", "annotation_count": 45, "api_path": "/songs/52916", - "full_title": "J't'emmerde by MC Jean Gab'1", + "full_title": "J't'emmerde by MC Jean Gab'1", "header_image_thumbnail_url": "https://images.genius.com/xxx.300x300x1.jpg", "header_image_url": "https://images.genius.com/ef9f736a86df3c3b1772f3fb7fbdb21c.1000x1000x1.jpg", "id": 52916, @@ -207,7 +207,7 @@ class TestGeniusEngine(SearxTestCase): # check lyric parsing r = results[0] self.assertEqual(r['url'], 'https://genius.com/Mc-jean-gab1-jtemmerde-lyrics') - self.assertEqual(r['title'], "J't'emmerde by MC Jean Gab'1") + self.assertEqual(r['title'], "J't'emmerde by MC Jean Gab'1") self.assertEqual(r['content'], "Sample lyrics") self.assertEqual(r['template'], 'videos.html') self.assertEqual(r['thumbnail'], 'https://images.genius.com/xxx.300x300x1.jpg') From 18a4e7035f72a3c31239ae0bd1ee67cc2ad354b8 Mon Sep 17 00:00:00 2001 From: Apply55gx Date: Wed, 25 Oct 2017 10:42:37 +0200 Subject: [PATCH 4/4] removed unused indicies array --- searx/engines/genius.py | 1 - 1 file changed, 1 deletion(-) diff --git a/searx/engines/genius.py b/searx/engines/genius.py index c7bce1c68..b265e9d76 100644 --- a/searx/engines/genius.py +++ b/searx/engines/genius.py @@ -20,7 +20,6 @@ paging = True language_support = False page_size = 5 -indicies = ['top_hit', 'song', 'lyric', 'artist', 'album', 'tag', 'video', 'article', 'user'] url = 'https://genius.com/api/' search_url = url + 'search/{index}?{query}&page={pageno}&per_page={page_size}'