Merge pull request #194 from Cqoicebordel/moar-engines

Moar engines
This commit is contained in:
Adam Tauber 2015-01-22 08:46:04 +01:00
commit c169fc3aa2
4 changed files with 178 additions and 0 deletions

109
searx/engines/btdigg.py Normal file
View File

@ -0,0 +1,109 @@
## BTDigg (Videos, Music, Files)
#
# @website https://btdigg.org
# @provide-api yes (on demand)
#
# @using-api no
# @results HTML (using search portal)
# @stable no (HTML can change)
# @parse url, title, content, seed, leech, magnetlink
from urlparse import urljoin
from cgi import escape
from urllib import quote
from lxml import html
from operator import itemgetter
from searx.engines.xpath import extract_text
# engine dependent config
categories = ['videos', 'music', 'files']
paging = True
# search-url
url = 'https://btdigg.org'
search_url = url + '/search?q=22%20jump%20street&p=1'
# specific xpath variables
magnet_xpath = './/a[@title="Torrent magnet link"]'
torrent_xpath = './/a[@title="Download torrent file"]'
content_xpath = './/span[@class="font11px lightgrey block"]'
# do search-request
def request(query, params):
params['url'] = search_url.format(search_term=quote(query),
pageno=params['pageno']-1)
return params
# get response from search-request
def response(resp):
results = []
dom = html.fromstring(resp.text)
search_res = dom.xpath('//div[@id="search_res"]/table/tr')
# return empty array if nothing is found
if not search_res:
return []
# parse results
for result in search_res:
link = result.xpath('.//td[@class="torrent_name"]//a')[0]
href = urljoin(url, link.attrib['href'])
title = escape(extract_text(link.xpath('.//text()')))
content = escape(extract_text(result.xpath('.//pre[@class="snippet"]')[0]))
content = "<br />".join(content.split("\n"))
filesize = result.xpath('.//span[@class="attr_val"]/text()')[0].split()[0]
filesize_multiplier = result.xpath('.//span[@class="attr_val"]/text()')[0].split()[1]
files = result.xpath('.//span[@class="attr_val"]/text()')[1]
seed = result.xpath('.//span[@class="attr_val"]/text()')[2]
# convert seed to int if possible
if seed.isdigit():
seed = int(seed)
else:
seed = 0
leech = 0
# convert filesize to byte if possible
try:
filesize = float(filesize)
# convert filesize to byte
if filesize_multiplier == 'TB':
filesize = int(filesize * 1024 * 1024 * 1024 * 1024)
elif filesize_multiplier == 'GB':
filesize = int(filesize * 1024 * 1024 * 1024)
elif filesize_multiplier == 'MB':
filesize = int(filesize * 1024 * 1024)
elif filesize_multiplier == 'kb':
filesize = int(filesize * 1024)
except:
filesize = None
# convert files to int if possible
if files.isdigit():
files = int(files)
else:
files = None
magnetlink = result.xpath('.//td[@class="ttth"]//a')[0].attrib['href']
# append result
results.append({'url': href,
'title': title,
'content': content,
'seed': seed,
'leech': leech,
'filesize': filesize,
'files': files,
'magnetlink': magnetlink,
'template': 'torrent.html'})
# return results sorted by seeder
return sorted(results, key=itemgetter('seed'), reverse=True)

59
searx/engines/mixcloud.py Normal file
View File

@ -0,0 +1,59 @@
## Mixcloud (Music)
#
# @website https://http://www.mixcloud.com/
# @provide-api yes (http://www.mixcloud.com/developers/
#
# @using-api yes
# @results JSON
# @stable yes
# @parse url, title, content, embedded, publishedDate
from json import loads
from urllib import urlencode
from dateutil import parser
# engine dependent config
categories = ['music']
paging = True
# search-url
url = 'http://api.mixcloud.com/'
search_url = url + 'search/?{query}&type=cloudcast&limit=10&offset={offset}'
embedded_url = '<iframe scrolling="no" frameborder="0" allowTransparency="true" ' +\
'data-src="https://www.mixcloud.com/widget/iframe/?feed={url}" width="300" height="300"></iframe>'
# do search-request
def request(query, params):
offset = (params['pageno'] - 1) * 10
params['url'] = search_url.format(query=urlencode({'q': query}),
offset=offset)
return params
# get response from search-request
def response(resp):
results = []
search_res = loads(resp.text)
# parse results
for result in search_res.get('data', []):
title = result['name']
url = result['url']
content = result['user']['name']
embedded = embedded_url.format(url=url)
publishedDate = parser.parse(result['created_time'])
# append result
results.append({'url': url,
'title': title,
'embedded': embedded,
'publishedDate': publishedDate,
'content': content})
# return results
return results

View File

@ -33,6 +33,10 @@ engines:
locale : en-US locale : en-US
shortcut : bin shortcut : bin
- name : btdigg
engine : btdigg
shortcut : bt
- name : currency - name : currency
engine : currency_convert engine : currency_convert
categories : general categories : general
@ -136,6 +140,10 @@ engines:
categories : music categories : music
shortcut : gps shortcut : gps
- name : mixcloud
engine : mixcloud
shortcut : mc
- name : openstreetmap - name : openstreetmap
engine : openstreetmap engine : openstreetmap
shortcut : osm shortcut : osm

View File

@ -18,6 +18,8 @@
{% macro result_sub_header(result) -%} {% macro result_sub_header(result) -%}
{% if result.publishedDate %}<time class="text-muted" datetime="{{ result.pubdate }}" >{{ result.publishedDate }}</time>{% endif %} {% if result.publishedDate %}<time class="text-muted" datetime="{{ result.pubdate }}" >{{ result.publishedDate }}</time>{% endif %}
<small><a class="text-info" href="https://web.archive.org/web/{{ result.url }}">{{ icon('link') }} {{ _('cached') }}</a></small> <small><a class="text-info" href="https://web.archive.org/web/{{ result.url }}">{{ icon('link') }} {{ _('cached') }}</a></small>
{% if result.magnetlink %}<small> &bull; <a href="{{ result.magnetlink }}" class="magnetlink">{{ icon('magnet') }} {{ _('magnet link') }}</a></small>{% endif %}
{% if result.torrentfile %}<small> &bull; <a href="{{ result.torrentfile }}" class="torrentfile">{{ icon('download-alt') }} {{ _('torrent file') }}</a></small>{% endif %}
{%- endmacro %} {%- endmacro %}
<!-- Draw result footer --> <!-- Draw result footer -->