From 85dcfa2c7d66dbfde1c0aa349f01020b02195676 Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Wed, 21 Jan 2015 18:02:29 +0100 Subject: [PATCH] BTDigg and Mixcloud engines --- searx/engines/btdigg.py | 109 ++++++++++++++++++++++++++++++++++++++ searx/engines/mixcloud.py | 59 +++++++++++++++++++++ searx/settings.yml | 8 +++ 3 files changed, 176 insertions(+) create mode 100644 searx/engines/btdigg.py create mode 100644 searx/engines/mixcloud.py diff --git a/searx/engines/btdigg.py b/searx/engines/btdigg.py new file mode 100644 index 000000000..59556a2ae --- /dev/null +++ b/searx/engines/btdigg.py @@ -0,0 +1,109 @@ +## BTDigg (Videos, Music, Files) +# +# @website https://btdigg.org +# @provide-api yes (on demand) +# +# @using-api no +# @results HTML (using search portal) +# @stable no (HTML can change) +# @parse url, title, content, seed, leech, magnetlink + +from urlparse import urljoin +from cgi import escape +from urllib import quote +from lxml import html +from operator import itemgetter +from searx.engines.xpath import extract_text + +# engine dependent config +categories = ['videos', 'music', 'files'] +paging = True + +# search-url +url = 'https://btdigg.org' +search_url = url + '/search?q=22%20jump%20street&p=1' + +# specific xpath variables +magnet_xpath = './/a[@title="Torrent magnet link"]' +torrent_xpath = './/a[@title="Download torrent file"]' +content_xpath = './/span[@class="font11px lightgrey block"]' + + +# do search-request +def request(query, params): + params['url'] = search_url.format(search_term=quote(query), + pageno=params['pageno']-1) + + return params + + +# get response from search-request +def response(resp): + results = [] + + dom = html.fromstring(resp.text) + + search_res = dom.xpath('//div[@id="search_res"]/table/tr') + + # return empty array if nothing is found + if not search_res: + return [] + + # parse results + for result in search_res: + link = result.xpath('.//td[@class="torrent_name"]//a')[0] + href = urljoin(url, link.attrib['href']) + title = escape(extract_text(link.xpath('.//text()'))) + content = escape(extract_text(result.xpath('.//pre[@class="snippet"]')[0])) + content = "
".join(content.split("\n")) + + filesize = result.xpath('.//span[@class="attr_val"]/text()')[0].split()[0] + filesize_multiplier = result.xpath('.//span[@class="attr_val"]/text()')[0].split()[1] + files = result.xpath('.//span[@class="attr_val"]/text()')[1] + seed = result.xpath('.//span[@class="attr_val"]/text()')[2] + + # convert seed to int if possible + if seed.isdigit(): + seed = int(seed) + else: + seed = 0 + + leech = 0 + + # convert filesize to byte if possible + try: + filesize = float(filesize) + + # convert filesize to byte + if filesize_multiplier == 'TB': + filesize = int(filesize * 1024 * 1024 * 1024 * 1024) + elif filesize_multiplier == 'GB': + filesize = int(filesize * 1024 * 1024 * 1024) + elif filesize_multiplier == 'MB': + filesize = int(filesize * 1024 * 1024) + elif filesize_multiplier == 'kb': + filesize = int(filesize * 1024) + except: + filesize = None + + # convert files to int if possible + if files.isdigit(): + files = int(files) + else: + files = None + + magnetlink = result.xpath('.//td[@class="ttth"]//a')[0].attrib['href'] + + # append result + results.append({'url': href, + 'title': title, + 'content': content, + 'seed': seed, + 'leech': leech, + 'filesize': filesize, + 'files': files, + 'magnetlink': magnetlink, + 'template': 'torrent.html'}) + + # return results sorted by seeder + return sorted(results, key=itemgetter('seed'), reverse=True) diff --git a/searx/engines/mixcloud.py b/searx/engines/mixcloud.py new file mode 100644 index 000000000..676e6f845 --- /dev/null +++ b/searx/engines/mixcloud.py @@ -0,0 +1,59 @@ +## Mixcloud (Music) +# +# @website https://http://www.mixcloud.com/ +# @provide-api yes (http://www.mixcloud.com/developers/ +# +# @using-api yes +# @results JSON +# @stable yes +# @parse url, title, content, embedded, publishedDate + +from json import loads +from urllib import urlencode +from dateutil import parser + +# engine dependent config +categories = ['music'] +paging = True + +# search-url +url = 'http://api.mixcloud.com/' +search_url = url + 'search/?{query}&type=cloudcast&limit=10&offset={offset}' + +embedded_url = '' + + +# do search-request +def request(query, params): + offset = (params['pageno'] - 1) * 10 + + params['url'] = search_url.format(query=urlencode({'q': query}), + offset=offset) + + return params + + +# get response from search-request +def response(resp): + results = [] + + search_res = loads(resp.text) + + # parse results + for result in search_res.get('data', []): + title = result['name'] + url = result['url'] + content = result['user']['name'] + embedded = embedded_url.format(url=url) + publishedDate = parser.parse(result['created_time']) + + # append result + results.append({'url': url, + 'title': title, + 'embedded': embedded, + 'publishedDate': publishedDate, + 'content': content}) + + # return results + return results diff --git a/searx/settings.yml b/searx/settings.yml index 097358841..8f63203c9 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -33,6 +33,10 @@ engines: locale : en-US shortcut : bin + - name : btdigg + engine : btdigg + shortcut : bt + - name : currency engine : currency_convert categories : general @@ -136,6 +140,10 @@ engines: categories : music shortcut : gps + - name : mixcloud + engine : mixcloud + shortcut : mc + - name : openstreetmap engine : openstreetmap shortcut : osm