From 9ad8013a4578dc8069227d3e9de88d734089828e Mon Sep 17 00:00:00 2001 From: asciimoo Date: Tue, 22 Oct 2013 18:58:01 +0200 Subject: [PATCH] [enh] piratebay engine added --- searx/engines/piratebay.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 searx/engines/piratebay.py diff --git a/searx/engines/piratebay.py b/searx/engines/piratebay.py new file mode 100644 index 000000000..0d99e4722 --- /dev/null +++ b/searx/engines/piratebay.py @@ -0,0 +1,34 @@ +from lxml import html +from urlparse import urljoin +from cgi import escape +from urllib import quote + +categories = ['videos', 'music'] + +base_url = 'https://thepiratebay.sx/' +search_url = base_url + 'search/{search_term}/0/99/{search_type}' +search_types = {'videos': '200' + ,'music' : '100' + } + +def request(query, params): + global search_url, search_types + # 200 is the video category + params['url'] = search_url.format(search_term=quote(query), search_type=search_types.get(params['category'])) + return params + + +def response(resp): + global base_url + results = [] + dom = html.fromstring(resp.text) + search_res = dom.xpath('//table[@id="searchResult"]//tr') + if not search_res: + return results + for result in search_res[1:]: + link = result.xpath('.//div[@class="detName"]//a')[0] + url = urljoin(base_url, link.attrib.get('href')) + title = ' '.join(link.xpath('.//text()')) + content = escape(' '.join(result.xpath('.//font[@class="detDesc"]//text()'))) + results.append({'url': url, 'title': title, 'content': content}) + return results