[fix] dailymotion engine : no more html tag in the description

This commit is contained in:
Dalf 2014-01-05 13:55:17 +01:00
parent e88cf0a0a8
commit 49c85fce51
2 changed files with 10 additions and 3 deletions

View File

@ -82,5 +82,6 @@ categories = videos
[dailymotion] [dailymotion]
engine = dailymotion engine = dailymotion
locale = en_US
categories = videos categories = videos

View File

@ -1,16 +1,17 @@
from urllib import urlencode from urllib import urlencode
from lxml import html
from json import loads from json import loads
from cgi import escape from cgi import escape
categories = ['videos'] categories = ['videos']
localization = 'en' locale = 'en_US'
# see http://www.dailymotion.com/doc/api/obj-video.html # see http://www.dailymotion.com/doc/api/obj-video.html
search_url = 'https://api.dailymotion.com/videos?fields=title,description,duration,url,thumbnail_360_url&sort=relevance&limit=25&page=1&{query}' search_url = 'https://api.dailymotion.com/videos?fields=title,description,duration,url,thumbnail_360_url&sort=relevance&limit=25&page=1&{query}'
def request(query, params): def request(query, params):
global search_url global search_url
params['url'] = search_url.format(query=urlencode({'search': query, 'localization': localization })) params['url'] = search_url.format(query=urlencode({'search': query, 'localization': locale }))
return params return params
@ -27,6 +28,11 @@ def response(resp):
else: else:
content = '' content = ''
if res['description']: if res['description']:
content += escape(res['description'][:500]) description = text_content_from_html(res['description'])
content += description[:500]
results.append({'url': url, 'title': title, 'content': content}) results.append({'url': url, 'title': title, 'content': content})
return results return results
def text_content_from_html(html_string):
desc_html = html.fragment_fromstring(html_string, create_parent=True)
return desc_html.text_content()