searxng/searx/engines/vimeo.py
Cqoicebordel 4a195e0b28 Integrated media in results + Deezer Engine
New "embedded" item for the results, allow to give an iframe to display the media directly in the results.
Note that the attributes src of the iframes are not set, but instead data-src is set, allowing to only load the iframe when clicked.

Deezer engine based on public API (no key).
2015-01-05 02:04:23 +01:00

82 lines
2.4 KiB
Python

## Vimeo (Videos)
#
# @website https://vimeo.com/
# @provide-api yes (http://developer.vimeo.com/api),
# they have a maximum count of queries/hour
#
# @using-api no (TODO, rewrite to api)
# @results HTML (using search portal)
# @stable no (HTML can change)
# @parse url, title, publishedDate, thumbnail, embedded
#
# @todo rewrite to api
# @todo set content-parameter with correct data
from urllib import urlencode
from HTMLParser import HTMLParser
from lxml import html
from searx.engines.xpath import extract_text
from dateutil import parser
# engine dependent config
categories = ['videos']
paging = True
# search-url
base_url = 'https://vimeo.com'
search_url = base_url + '/search/page:{pageno}?{query}'
# specific xpath variables
url_xpath = './a/@href'
content_xpath = './a/img/@src'
title_xpath = './a/div[@class="data"]/p[@class="title"]/text()'
results_xpath = '//div[@id="browse_content"]/ol/li'
publishedDate_xpath = './/p[@class="meta"]//attribute::datetime'
embedded_url = '<iframe data-src="//player.vimeo.com/video{videoid}" ' +\
'width="540" height="304" frameborder="0" ' +\
'webkitallowfullscreen mozallowfullscreen allowfullscreen></iframe>'
# do search-request
def request(query, params):
params['url'] = search_url.format(pageno=params['pageno'],
query=urlencode({'q': query}))
# TODO required?
params['cookies']['__utma'] =\
'00000000.000#0000000.0000000000.0000000000.0000000000.0'
return params
# get response from search-request
def response(resp):
results = []
dom = html.fromstring(resp.text)
p = HTMLParser()
# parse results
for result in dom.xpath(results_xpath):
videoid = result.xpath(url_xpath)[0]
url = base_url + videoid
title = p.unescape(extract_text(result.xpath(title_xpath)))
thumbnail = extract_text(result.xpath(content_xpath)[0])
publishedDate = parser.parse(extract_text(
result.xpath(publishedDate_xpath)[0]))
embedded = embedded_url.format(videoid=videoid)
# append result
results.append({'url': url,
'title': title,
'content': '',
'template': 'videos.html',
'publishedDate': publishedDate,
'embedded': embedded,
'thumbnail': thumbnail})
# return results
return results