diff --git a/searx/engines/piratebay.py b/searx/engines/piratebay.py index f6144faa2..207df276c 100644 --- a/searx/engines/piratebay.py +++ b/searx/engines/piratebay.py @@ -13,6 +13,7 @@ from cgi import escape from urllib import quote from lxml import html from operator import itemgetter +from searx.engines.xpath import extract_text # engine dependent config categories = ['videos', 'music', 'files'] @@ -29,7 +30,8 @@ search_types = {'files': '0', # specific xpath variables magnet_xpath = './/a[@title="Download this torrent using magnet"]' -content_xpath = './/font[@class="detDesc"]//text()' +torrent_xpath = './/a[@title="Download this torrent"]' +content_xpath = './/font[@class="detDesc"]' # do search-request @@ -59,8 +61,8 @@ def response(resp): for result in search_res[1:]: link = result.xpath('.//div[@class="detName"]//a')[0] href = urljoin(url, link.attrib.get('href')) - title = ' '.join(link.xpath('.//text()')) - content = escape(' '.join(result.xpath(content_xpath))) + title = extract_text(link) + content = escape(extract_text(result.xpath(content_xpath))) seed, leech = result.xpath('.//td[@align="right"]/text()')[:2] # convert seed to int if possible @@ -76,6 +78,7 @@ def response(resp): leech = 0 magnetlink = result.xpath(magnet_xpath)[0] + torrentfile = result.xpath(torrent_xpath)[0] # append result results.append({'url': href, @@ -83,7 +86,8 @@ def response(resp): 'content': content, 'seed': seed, 'leech': leech, - 'magnetlink': magnetlink.attrib['href'], + 'magnetlink': magnetlink.attrib.get('href'), + 'torrentfile': torrentfile.attrib.get('href'), 'template': 'torrent.html'}) # return results sorted by seeder diff --git a/searx/settings.yml b/searx/settings.yml index b0a2853c7..2c9441c34 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -152,9 +152,9 @@ engines: engine : photon shortcut : ph -# - name : piratebay -# engine : piratebay -# shortcut : tpb + - name : piratebay + engine : piratebay + shortcut : tpb - name : kickass engine : kickass diff --git a/searx/tests/engines/test_piratebay.py b/searx/tests/engines/test_piratebay.py new file mode 100644 index 000000000..7207c408a --- /dev/null +++ b/searx/tests/engines/test_piratebay.py @@ -0,0 +1,137 @@ +# -*- coding: utf-8 -*- +from collections import defaultdict +import mock +from searx.engines import piratebay +from searx.testing import SearxTestCase + + +class TestPiratebayEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 1 + dicto['category'] = 'Toto' + params = piratebay.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('piratebay.cr', params['url']) + self.assertIn('0', params['url']) + + dicto['category'] = 'music' + params = piratebay.request(query, dicto) + self.assertIn('100', params['url']) + + def test_response(self): + self.assertRaises(AttributeError, piratebay.response, None) + self.assertRaises(AttributeError, piratebay.response, []) + self.assertRaises(AttributeError, piratebay.response, '') + self.assertRaises(AttributeError, piratebay.response, '[]') + + response = mock.Mock(text='') + self.assertEqual(piratebay.response(response), []) + + html = """ +
+ + (Anime) + |
+ + + + + + + + + + + + + + This is the content and should be OK + + | +13 | +334 | +
+ + (Anime) + |
+ + + + + + + + + + + + + + This is the content and should be OK + + | +s | +d | +