update piratebay engine and add comments

This commit is contained in:
Thomas Pointhuber 2014-09-02 17:56:32 +02:00
parent 334a286c18
commit dae88d862b
2 changed files with 35 additions and 9 deletions

View File

@ -1,39 +1,61 @@
## Piratebay (Videos, Music, Files)
#
# @website https://thepiratebay.se
# @provide-api no (nothing found)
#
# @using-api no
# @results HTML (using search portal)
# @stable yes (HTML can change)
# @parse url, title, content, seed, leech, magnetlink
from urlparse import urljoin from urlparse import urljoin
from cgi import escape from cgi import escape
from urllib import quote from urllib import quote
from lxml import html from lxml import html
from operator import itemgetter from operator import itemgetter
categories = ['videos', 'music'] # engine dependent config
categories = ['videos', 'music', 'files']
paging = True
# search-url
url = 'https://thepiratebay.se/' url = 'https://thepiratebay.se/'
search_url = url + 'search/{search_term}/{pageno}/99/{search_type}' search_url = url + 'search/{search_term}/{pageno}/99/{search_type}'
search_types = {'videos': '200',
'music': '100',
'files': '0'}
# piratebay specific type-definitions
search_types = {'files': '0',
'music': '100',
'videos': '200'}
# specific xpath variables
magnet_xpath = './/a[@title="Download this torrent using magnet"]' magnet_xpath = './/a[@title="Download this torrent using magnet"]'
content_xpath = './/font[@class="detDesc"]//text()' content_xpath = './/font[@class="detDesc"]//text()'
paging = True
# do search-request
def request(query, params): def request(query, params):
search_type = search_types.get(params['category'], '200') search_type = search_types.get(params['category'], '0')
params['url'] = search_url.format(search_term=quote(query), params['url'] = search_url.format(search_term=quote(query),
search_type=search_type, search_type=search_type,
pageno=params['pageno'] - 1) pageno=params['pageno'] - 1)
return params return params
# get response from search-request
def response(resp): def response(resp):
results = [] results = []
dom = html.fromstring(resp.text) dom = html.fromstring(resp.text)
search_res = dom.xpath('//table[@id="searchResult"]//tr') search_res = dom.xpath('//table[@id="searchResult"]//tr')
# return empty array if nothing is found
if not search_res: if not search_res:
return results return []
# parse results
for result in search_res[1:]: for result in search_res[1:]:
link = result.xpath('.//div[@class="detName"]//a')[0] link = result.xpath('.//div[@class="detName"]//a')[0]
href = urljoin(url, link.attrib.get('href')) href = urljoin(url, link.attrib.get('href'))
@ -41,17 +63,21 @@ def response(resp):
content = escape(' '.join(result.xpath(content_xpath))) content = escape(' '.join(result.xpath(content_xpath)))
seed, leech = result.xpath('.//td[@align="right"]/text()')[:2] seed, leech = result.xpath('.//td[@align="right"]/text()')[:2]
# convert seed to int if possible
if seed.isdigit(): if seed.isdigit():
seed = int(seed) seed = int(seed)
else: else:
seed = 0 seed = 0
# convert leech to int if possible
if leech.isdigit(): if leech.isdigit():
leech = int(leech) leech = int(leech)
else: else:
leech = 0 leech = 0
magnetlink = result.xpath(magnet_xpath)[0] magnetlink = result.xpath(magnet_xpath)[0]
# append result
results.append({'url': href, results.append({'url': href,
'title': title, 'title': title,
'content': content, 'content': content,
@ -60,4 +86,5 @@ def response(resp):
'magnetlink': magnetlink.attrib['href'], 'magnetlink': magnetlink.attrib['href'],
'template': 'torrent.html'}) 'template': 'torrent.html'})
# return results sorted by seeder
return sorted(results, key=itemgetter('seed'), reverse=True) return sorted(results, key=itemgetter('seed'), reverse=True)

View File

@ -82,7 +82,6 @@ engines:
- name : piratebay - name : piratebay
engine : piratebay engine : piratebay
categories : videos, music, files
shortcut : tpb shortcut : tpb
- name : soundcloud - name : soundcloud