mirror of
https://github.com/searxng/searxng
synced 2024-01-01 19:24:07 +01:00
Merge branch 'master' of https://github.com/asciimoo/searx into code_results
Conflicts: searx/engines/searchcode_code.py searx/engines/searchcode_doc.py searx/static/oscar/js/searx.min.js searx/templates/oscar/result_templates/default.html searx/templates/oscar/result_templates/images.html searx/templates/oscar/result_templates/map.html searx/templates/oscar/result_templates/torrent.html searx/templates/oscar/result_templates/videos.html
This commit is contained in:
commit
400b54191c
252 changed files with 1747 additions and 600 deletions
|
|
@ -35,9 +35,9 @@ def request(query, params):
|
|||
# get response from search-request
|
||||
def response(resp):
|
||||
results = []
|
||||
|
||||
|
||||
dom = html.fromstring(resp.text)
|
||||
|
||||
|
||||
# parse results
|
||||
for result in dom.xpath('//div[@class="photo"]'):
|
||||
link = result.xpath('.//a')[0]
|
||||
|
|
|
|||
|
|
@ -22,6 +22,10 @@ from imp import load_source
|
|||
from flask.ext.babel import gettext
|
||||
from operator import itemgetter
|
||||
from searx import settings
|
||||
from searx import logger
|
||||
|
||||
|
||||
logger = logger.getChild('engines')
|
||||
|
||||
engine_dir = dirname(realpath(__file__))
|
||||
|
||||
|
|
@ -81,7 +85,7 @@ def load_engine(engine_data):
|
|||
if engine_attr.startswith('_'):
|
||||
continue
|
||||
if getattr(engine, engine_attr) is None:
|
||||
print('[E] Engine config error: Missing attribute "{0}.{1}"'\
|
||||
logger.error('Missing engine config attribute: "{0}.{1}"'
|
||||
.format(engine.name, engine_attr))
|
||||
sys.exit(1)
|
||||
|
||||
|
|
@ -100,9 +104,8 @@ def load_engine(engine_data):
|
|||
categories['general'].append(engine)
|
||||
|
||||
if engine.shortcut:
|
||||
# TODO check duplications
|
||||
if engine.shortcut in engine_shortcuts:
|
||||
print('[E] Engine config error: ambigious shortcut: {0}'\
|
||||
logger.error('Engine config error: ambigious shortcut: {0}'
|
||||
.format(engine.shortcut))
|
||||
sys.exit(1)
|
||||
engine_shortcuts[engine.shortcut] = engine.name
|
||||
|
|
@ -199,7 +202,7 @@ def get_engines_stats():
|
|||
|
||||
|
||||
if 'engines' not in settings or not settings['engines']:
|
||||
print '[E] Error no engines found. Edit your settings.yml'
|
||||
logger.error('No engines found. Edit your settings.yml')
|
||||
exit(2)
|
||||
|
||||
for engine_data in settings['engines']:
|
||||
|
|
|
|||
|
|
@ -6,12 +6,14 @@
|
|||
# @using-api yes
|
||||
# @results JSON
|
||||
# @stable yes
|
||||
# @parse url, title, thumbnail
|
||||
# @parse url, title, thumbnail, publishedDate, embedded
|
||||
#
|
||||
# @todo set content-parameter with correct data
|
||||
|
||||
from urllib import urlencode
|
||||
from json import loads
|
||||
from cgi import escape
|
||||
from datetime import datetime
|
||||
|
||||
# engine dependent config
|
||||
categories = ['videos']
|
||||
|
|
@ -20,7 +22,9 @@ language_support = True
|
|||
|
||||
# search-url
|
||||
# see http://www.dailymotion.com/doc/api/obj-video.html
|
||||
search_url = 'https://api.dailymotion.com/videos?fields=title,description,duration,url,thumbnail_360_url&sort=relevance&limit=5&page={pageno}&{query}' # noqa
|
||||
search_url = 'https://api.dailymotion.com/videos?fields=created_time,title,description,duration,url,thumbnail_360_url,id&sort=relevance&limit=5&page={pageno}&{query}' # noqa
|
||||
embedded_url = '<iframe frameborder="0" width="540" height="304" ' +\
|
||||
'data-src="//www.dailymotion.com/embed/video/{videoid}" allowfullscreen></iframe>'
|
||||
|
||||
|
||||
# do search-request
|
||||
|
|
@ -51,14 +55,17 @@ def response(resp):
|
|||
for res in search_res['list']:
|
||||
title = res['title']
|
||||
url = res['url']
|
||||
#content = res['description']
|
||||
content = ''
|
||||
content = escape(res['description'])
|
||||
thumbnail = res['thumbnail_360_url']
|
||||
publishedDate = datetime.fromtimestamp(res['created_time'], None)
|
||||
embedded = embedded_url.format(videoid=res['id'])
|
||||
|
||||
results.append({'template': 'videos.html',
|
||||
'url': url,
|
||||
'title': title,
|
||||
'content': content,
|
||||
'publishedDate': publishedDate,
|
||||
'embedded': embedded,
|
||||
'thumbnail': thumbnail})
|
||||
|
||||
# return results
|
||||
|
|
|
|||
61
searx/engines/deezer.py
Normal file
61
searx/engines/deezer.py
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
## Deezer (Music)
|
||||
#
|
||||
# @website https://deezer.com
|
||||
# @provide-api yes (http://developers.deezer.com/api/)
|
||||
#
|
||||
# @using-api yes
|
||||
# @results JSON
|
||||
# @stable yes
|
||||
# @parse url, title, content, embedded
|
||||
|
||||
from json import loads
|
||||
from urllib import urlencode
|
||||
|
||||
# engine dependent config
|
||||
categories = ['music']
|
||||
paging = True
|
||||
|
||||
# search-url
|
||||
url = 'http://api.deezer.com/'
|
||||
search_url = url + 'search?{query}&index={offset}'
|
||||
|
||||
embedded_url = '<iframe scrolling="no" frameborder="0" allowTransparency="true" ' +\
|
||||
'data-src="http://www.deezer.com/plugins/player?type=tracks&id={audioid}" ' +\
|
||||
'width="540" height="80"></iframe>'
|
||||
|
||||
|
||||
# do search-request
|
||||
def request(query, params):
|
||||
offset = (params['pageno'] - 1) * 25
|
||||
|
||||
params['url'] = search_url.format(query=urlencode({'q': query}),
|
||||
offset=offset)
|
||||
|
||||
return params
|
||||
|
||||
|
||||
# get response from search-request
|
||||
def response(resp):
|
||||
results = []
|
||||
|
||||
search_res = loads(resp.text)
|
||||
|
||||
# parse results
|
||||
for result in search_res.get('data', []):
|
||||
if result['type'] == 'track':
|
||||
title = result['title']
|
||||
url = result['link']
|
||||
content = result['artist']['name'] +\
|
||||
" • " +\
|
||||
result['album']['title'] +\
|
||||
" • " + result['title']
|
||||
embedded = embedded_url.format(audioid=result['id'])
|
||||
|
||||
# append result
|
||||
results.append({'url': url,
|
||||
'title': title,
|
||||
'embedded': embedded,
|
||||
'content': content})
|
||||
|
||||
# return results
|
||||
return results
|
||||
70
searx/engines/digg.py
Normal file
70
searx/engines/digg.py
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
## Digg (News, Social media)
|
||||
#
|
||||
# @website https://digg.com/
|
||||
# @provide-api no
|
||||
#
|
||||
# @using-api no
|
||||
# @results HTML (using search portal)
|
||||
# @stable no (HTML can change)
|
||||
# @parse url, title, content, publishedDate, thumbnail
|
||||
|
||||
from urllib import quote_plus
|
||||
from json import loads
|
||||
from lxml import html
|
||||
from cgi import escape
|
||||
from dateutil import parser
|
||||
|
||||
# engine dependent config
|
||||
categories = ['news', 'social media']
|
||||
paging = True
|
||||
|
||||
# search-url
|
||||
base_url = 'https://digg.com/'
|
||||
search_url = base_url+'api/search/{query}.json?position={position}&format=html'
|
||||
|
||||
# specific xpath variables
|
||||
results_xpath = '//article'
|
||||
link_xpath = './/small[@class="time"]//a'
|
||||
title_xpath = './/h2//a//text()'
|
||||
content_xpath = './/p//text()'
|
||||
pubdate_xpath = './/time'
|
||||
|
||||
|
||||
# do search-request
|
||||
def request(query, params):
|
||||
offset = (params['pageno'] - 1) * 10
|
||||
params['url'] = search_url.format(position=offset,
|
||||
query=quote_plus(query))
|
||||
return params
|
||||
|
||||
|
||||
# get response from search-request
|
||||
def response(resp):
|
||||
results = []
|
||||
|
||||
search_result = loads(resp.text)
|
||||
|
||||
if search_result['html'] == '':
|
||||
return results
|
||||
|
||||
dom = html.fromstring(search_result['html'])
|
||||
|
||||
# parse results
|
||||
for result in dom.xpath(results_xpath):
|
||||
url = result.attrib.get('data-contenturl')
|
||||
thumbnail = result.xpath('.//img')[0].attrib.get('src')
|
||||
title = ''.join(result.xpath(title_xpath))
|
||||
content = escape(''.join(result.xpath(content_xpath)))
|
||||
pubdate = result.xpath(pubdate_xpath)[0].attrib.get('datetime')
|
||||
publishedDate = parser.parse(pubdate)
|
||||
|
||||
# append result
|
||||
results.append({'url': url,
|
||||
'title': title,
|
||||
'content': content,
|
||||
'template': 'videos.html',
|
||||
'publishedDate': publishedDate,
|
||||
'thumbnail': thumbnail})
|
||||
|
||||
# return results
|
||||
return results
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
import json
|
||||
from urllib import urlencode
|
||||
from lxml import html
|
||||
from searx.utils import html_to_text
|
||||
from searx.engines.xpath import extract_text
|
||||
|
||||
url = 'https://api.duckduckgo.com/'\
|
||||
|
|
@ -17,11 +18,6 @@ def result_to_text(url, text, htmlResult):
|
|||
return text
|
||||
|
||||
|
||||
def html_to_text(htmlFragment):
|
||||
dom = html.fromstring(htmlFragment)
|
||||
return extract_text(dom)
|
||||
|
||||
|
||||
def request(query, params):
|
||||
# TODO add kl={locale}
|
||||
params['url'] = url.format(query=urlencode({'q': query}))
|
||||
|
|
|
|||
95
searx/engines/flickr-noapi.py
Normal file
95
searx/engines/flickr-noapi.py
Normal file
|
|
@ -0,0 +1,95 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
# Flickr (Images)
|
||||
#
|
||||
# @website https://www.flickr.com
|
||||
# @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html)
|
||||
#
|
||||
# @using-api no
|
||||
# @results HTML
|
||||
# @stable no
|
||||
# @parse url, title, thumbnail, img_src
|
||||
|
||||
from urllib import urlencode
|
||||
from json import loads
|
||||
import re
|
||||
|
||||
categories = ['images']
|
||||
|
||||
url = 'https://secure.flickr.com/'
|
||||
search_url = url+'search/?{query}&page={page}'
|
||||
photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}'
|
||||
regex = re.compile(r"\"search-photos-models\",\"photos\":(.*}),\"totalItems\":", re.DOTALL)
|
||||
image_sizes = ('o', 'k', 'h', 'b', 'c', 'z', 'n', 'm', 't', 'q', 's')
|
||||
|
||||
paging = True
|
||||
|
||||
|
||||
def build_flickr_url(user_id, photo_id):
|
||||
return photo_url.format(userid=user_id, photoid=photo_id)
|
||||
|
||||
|
||||
def request(query, params):
|
||||
params['url'] = search_url.format(query=urlencode({'text': query}),
|
||||
page=params['pageno'])
|
||||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
results = []
|
||||
|
||||
matches = regex.search(resp.text)
|
||||
|
||||
if matches is None:
|
||||
return results
|
||||
|
||||
match = matches.group(1)
|
||||
search_results = loads(match)
|
||||
|
||||
if '_data' not in search_results:
|
||||
return []
|
||||
|
||||
photos = search_results['_data']
|
||||
|
||||
for photo in photos:
|
||||
|
||||
# In paged configuration, the first pages' photos
|
||||
# are represented by a None object
|
||||
if photo is None:
|
||||
continue
|
||||
|
||||
img_src = None
|
||||
# From the biggest to the lowest format
|
||||
for image_size in image_sizes:
|
||||
if image_size in photo['sizes']:
|
||||
img_src = photo['sizes'][image_size]['displayUrl']
|
||||
break
|
||||
|
||||
if not img_src:
|
||||
continue
|
||||
|
||||
if 'id' not in photo['owner']:
|
||||
continue
|
||||
|
||||
url = build_flickr_url(photo['owner']['id'], photo['id'])
|
||||
|
||||
title = photo['title']
|
||||
|
||||
content = '<span class="photo-author">' +\
|
||||
photo['owner']['username'] +\
|
||||
'</span><br />'
|
||||
|
||||
if 'description' in photo:
|
||||
content = content +\
|
||||
'<span class="description">' +\
|
||||
photo['description'] +\
|
||||
'</span>'
|
||||
|
||||
# append result
|
||||
results.append({'url': url,
|
||||
'title': title,
|
||||
'img_src': img_src,
|
||||
'content': content,
|
||||
'template': 'images.html'})
|
||||
|
||||
return results
|
||||
|
|
@ -1,54 +1,87 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
## Flickr (Images)
|
||||
#
|
||||
# @website https://www.flickr.com
|
||||
# @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html)
|
||||
#
|
||||
# @using-api yes
|
||||
# @results JSON
|
||||
# @stable yes
|
||||
# @parse url, title, thumbnail, img_src
|
||||
#More info on api-key : https://www.flickr.com/services/apps/create/
|
||||
|
||||
from urllib import urlencode
|
||||
#from json import loads
|
||||
from urlparse import urljoin
|
||||
from lxml import html
|
||||
from time import time
|
||||
from json import loads
|
||||
|
||||
categories = ['images']
|
||||
|
||||
url = 'https://secure.flickr.com/'
|
||||
search_url = url+'search/?{query}&page={page}'
|
||||
results_xpath = '//div[@class="view display-item-tile"]/figure/div'
|
||||
nb_per_page = 15
|
||||
paging = True
|
||||
api_key = None
|
||||
|
||||
|
||||
url = 'https://api.flickr.com/services/rest/?method=flickr.photos.search' +\
|
||||
'&api_key={api_key}&{text}&sort=relevance' +\
|
||||
'&extras=description%2C+owner_name%2C+url_o%2C+url_z' +\
|
||||
'&per_page={nb_per_page}&format=json&nojsoncallback=1&page={page}'
|
||||
photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}'
|
||||
|
||||
paging = True
|
||||
|
||||
|
||||
def build_flickr_url(user_id, photo_id):
|
||||
return photo_url.format(userid=user_id, photoid=photo_id)
|
||||
|
||||
|
||||
def request(query, params):
|
||||
params['url'] = search_url.format(query=urlencode({'text': query}),
|
||||
page=params['pageno'])
|
||||
time_string = str(int(time())-3)
|
||||
params['cookies']['BX'] = '3oqjr6d9nmpgl&b=3&s=dh'
|
||||
params['cookies']['xb'] = '421409'
|
||||
params['cookies']['localization'] = 'en-us'
|
||||
params['cookies']['flrbp'] = time_string +\
|
||||
'-3a8cdb85a427a33efda421fbda347b2eaf765a54'
|
||||
params['cookies']['flrbs'] = time_string +\
|
||||
'-ed142ae8765ee62c9ec92a9513665e0ee1ba6776'
|
||||
params['cookies']['flrb'] = '9'
|
||||
params['url'] = url.format(text=urlencode({'text': query}),
|
||||
api_key=api_key,
|
||||
nb_per_page=nb_per_page,
|
||||
page=params['pageno'])
|
||||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
results = []
|
||||
dom = html.fromstring(resp.text)
|
||||
for result in dom.xpath(results_xpath):
|
||||
img = result.xpath('.//img')
|
||||
|
||||
if not img:
|
||||
search_results = loads(resp.text)
|
||||
|
||||
# return empty array if there are no results
|
||||
if not 'photos' in search_results:
|
||||
return []
|
||||
|
||||
if not 'photo' in search_results['photos']:
|
||||
return []
|
||||
|
||||
photos = search_results['photos']['photo']
|
||||
|
||||
# parse results
|
||||
for photo in photos:
|
||||
if 'url_o' in photo:
|
||||
img_src = photo['url_o']
|
||||
elif 'url_z' in photo:
|
||||
img_src = photo['url_z']
|
||||
else:
|
||||
continue
|
||||
|
||||
img = img[0]
|
||||
img_src = 'https:'+img.attrib.get('src')
|
||||
url = build_flickr_url(photo['owner'], photo['id'])
|
||||
|
||||
if not img_src:
|
||||
continue
|
||||
title = photo['title']
|
||||
|
||||
href = urljoin(url, result.xpath('.//a')[0].attrib.get('href'))
|
||||
title = img.attrib.get('alt', '')
|
||||
results.append({'url': href,
|
||||
content = '<span class="photo-author">' +\
|
||||
photo['ownername'] +\
|
||||
'</span><br />' +\
|
||||
'<span class="description">' +\
|
||||
photo['description']['_content'] +\
|
||||
'</span>'
|
||||
|
||||
# append result
|
||||
results.append({'url': url,
|
||||
'title': title,
|
||||
'img_src': img_src,
|
||||
'content': content,
|
||||
'template': 'images.html'})
|
||||
|
||||
# return results
|
||||
return results
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ search_url = url + 'search/{search_term}/{pageno}/'
|
|||
|
||||
# specific xpath variables
|
||||
magnet_xpath = './/a[@title="Torrent magnet link"]'
|
||||
#content_xpath = './/font[@class="detDesc"]//text()'
|
||||
content_xpath = './/span[@class="font11px lightgrey block"]'
|
||||
|
||||
|
||||
# do search-request
|
||||
|
|
@ -56,7 +56,8 @@ def response(resp):
|
|||
link = result.xpath('.//a[@class="cellMainLink"]')[0]
|
||||
href = urljoin(url, link.attrib['href'])
|
||||
title = ' '.join(link.xpath('.//text()'))
|
||||
content = escape(html.tostring(result.xpath('.//span[@class="font11px lightgrey block"]')[0], method="text"))
|
||||
content = escape(html.tostring(result.xpath(content_xpath)[0],
|
||||
method="text"))
|
||||
seed = result.xpath('.//td[contains(@class, "green")]/text()')[0]
|
||||
leech = result.xpath('.//td[contains(@class, "red")]/text()')[0]
|
||||
|
||||
|
|
|
|||
|
|
@ -38,10 +38,14 @@ def response(resp):
|
|||
for result in search_results['results']:
|
||||
href = result['url']
|
||||
title = "[" + result['type'] + "] " +\
|
||||
result['namespace'] + " " + result['name']
|
||||
content = '<span class="highlight">[' + result['type'] + "] " +\
|
||||
result['name'] + " " + result['synopsis'] +\
|
||||
"</span><br />" + result['description']
|
||||
result['namespace'] +\
|
||||
" " + result['name']
|
||||
content = '<span class="highlight">[' +\
|
||||
result['type'] + "] " +\
|
||||
result['name'] + " " +\
|
||||
result['synopsis'] +\
|
||||
"</span><br />" +\
|
||||
result['description']
|
||||
|
||||
# append result
|
||||
results.append({'url': href,
|
||||
|
|
|
|||
|
|
@ -6,10 +6,11 @@
|
|||
# @using-api yes
|
||||
# @results JSON
|
||||
# @stable yes
|
||||
# @parse url, title, content
|
||||
# @parse url, title, content, publishedDate, embedded
|
||||
|
||||
from json import loads
|
||||
from urllib import urlencode
|
||||
from urllib import urlencode, quote_plus
|
||||
from dateutil import parser
|
||||
|
||||
# engine dependent config
|
||||
categories = ['music']
|
||||
|
|
@ -27,6 +28,10 @@ search_url = url + 'search?{query}'\
|
|||
'&linked_partitioning=1'\
|
||||
'&client_id={client_id}' # noqa
|
||||
|
||||
embedded_url = '<iframe width="100%" height="166" ' +\
|
||||
'scrolling="no" frameborder="no" ' +\
|
||||
'data-src="https://w.soundcloud.com/player/?url={uri}"></iframe>'
|
||||
|
||||
|
||||
# do search-request
|
||||
def request(query, params):
|
||||
|
|
@ -50,10 +55,15 @@ def response(resp):
|
|||
if result['kind'] in ('track', 'playlist'):
|
||||
title = result['title']
|
||||
content = result['description']
|
||||
publishedDate = parser.parse(result['last_modified'])
|
||||
uri = quote_plus(result['uri'])
|
||||
embedded = embedded_url.format(uri=uri)
|
||||
|
||||
# append result
|
||||
results.append({'url': result['permalink_url'],
|
||||
'title': title,
|
||||
'publishedDate': publishedDate,
|
||||
'embedded': embedded,
|
||||
'content': content})
|
||||
|
||||
# return results
|
||||
|
|
|
|||
|
|
@ -66,7 +66,10 @@ def response(resp):
|
|||
continue
|
||||
link = links[0]
|
||||
url = link.attrib.get('href')
|
||||
title = escape(link.text_content())
|
||||
try:
|
||||
title = escape(link.text_content())
|
||||
except UnicodeDecodeError:
|
||||
continue
|
||||
|
||||
# block google-ad url's
|
||||
if re.match("^http(s|)://www.google.[a-z]+/aclk.*$", url):
|
||||
|
|
|
|||
78
searx/engines/subtitleseeker.py
Normal file
78
searx/engines/subtitleseeker.py
Normal file
|
|
@ -0,0 +1,78 @@
|
|||
## Subtitleseeker (Video)
|
||||
#
|
||||
# @website http://www.subtitleseeker.com
|
||||
# @provide-api no
|
||||
#
|
||||
# @using-api no
|
||||
# @results HTML
|
||||
# @stable no (HTML can change)
|
||||
# @parse url, title, content
|
||||
|
||||
from cgi import escape
|
||||
from urllib import quote_plus
|
||||
from lxml import html
|
||||
from searx.languages import language_codes
|
||||
|
||||
# engine dependent config
|
||||
categories = ['videos']
|
||||
paging = True
|
||||
language = ""
|
||||
|
||||
# search-url
|
||||
url = 'http://www.subtitleseeker.com/'
|
||||
search_url = url+'search/TITLES/{query}&p={pageno}'
|
||||
|
||||
# specific xpath variables
|
||||
results_xpath = '//div[@class="boxRows"]'
|
||||
|
||||
|
||||
# do search-request
|
||||
def request(query, params):
|
||||
params['url'] = search_url.format(query=quote_plus(query),
|
||||
pageno=params['pageno'])
|
||||
return params
|
||||
|
||||
|
||||
# get response from search-request
|
||||
def response(resp):
|
||||
results = []
|
||||
|
||||
dom = html.fromstring(resp.text)
|
||||
|
||||
search_lang = ""
|
||||
|
||||
if resp.search_params['language'] != 'all':
|
||||
search_lang = [lc[1]
|
||||
for lc in language_codes
|
||||
if lc[0][:2] == resp.search_params['language']][0]
|
||||
|
||||
# parse results
|
||||
for result in dom.xpath(results_xpath):
|
||||
link = result.xpath(".//a")[0]
|
||||
href = link.attrib.get('href')
|
||||
|
||||
if language is not "":
|
||||
href = href + language + '/'
|
||||
elif search_lang:
|
||||
href = href + search_lang + '/'
|
||||
|
||||
title = escape(link.xpath(".//text()")[0])
|
||||
|
||||
content = result.xpath('.//div[contains(@class,"red")]//text()')[0]
|
||||
content = content + " - "
|
||||
text = result.xpath('.//div[contains(@class,"grey-web")]')[0]
|
||||
content = content + html.tostring(text, method='text')
|
||||
|
||||
if result.xpath(".//span") != []:
|
||||
content = content +\
|
||||
" - (" +\
|
||||
result.xpath(".//span//text()")[0].strip() +\
|
||||
")"
|
||||
|
||||
# append result
|
||||
results.append({'url': href,
|
||||
'title': title,
|
||||
'content': escape(content)})
|
||||
|
||||
# return results
|
||||
return results
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
## Twitter (Social media)
|
||||
#
|
||||
# @website https://www.bing.com/news
|
||||
# @website https://twitter.com/
|
||||
# @provide-api yes (https://dev.twitter.com/docs/using-search)
|
||||
#
|
||||
# @using-api no
|
||||
|
|
@ -14,6 +14,7 @@ from urlparse import urljoin
|
|||
from urllib import urlencode
|
||||
from lxml import html
|
||||
from cgi import escape
|
||||
from datetime import datetime
|
||||
|
||||
# engine dependent config
|
||||
categories = ['social media']
|
||||
|
|
@ -27,7 +28,8 @@ search_url = base_url+'search?'
|
|||
results_xpath = '//li[@data-item-type="tweet"]'
|
||||
link_xpath = './/small[@class="time"]//a'
|
||||
title_xpath = './/span[@class="username js-action-profile-name"]//text()'
|
||||
content_xpath = './/p[@class="js-tweet-text tweet-text"]//text()'
|
||||
content_xpath = './/p[@class="js-tweet-text tweet-text"]'
|
||||
timestamp_xpath = './/span[contains(@class,"_timestamp")]'
|
||||
|
||||
|
||||
# do search-request
|
||||
|
|
@ -52,12 +54,21 @@ def response(resp):
|
|||
link = tweet.xpath(link_xpath)[0]
|
||||
url = urljoin(base_url, link.attrib.get('href'))
|
||||
title = ''.join(tweet.xpath(title_xpath))
|
||||
content = escape(''.join(tweet.xpath(content_xpath)))
|
||||
|
||||
# append result
|
||||
results.append({'url': url,
|
||||
'title': title,
|
||||
'content': content})
|
||||
content = escape(html.tostring(tweet.xpath(content_xpath)[0], method='text', encoding='UTF-8').decode("utf-8"))
|
||||
pubdate = tweet.xpath(timestamp_xpath)
|
||||
if len(pubdate) > 0:
|
||||
timestamp = float(pubdate[0].attrib.get('data-time'))
|
||||
publishedDate = datetime.fromtimestamp(timestamp, None)
|
||||
# append result
|
||||
results.append({'url': url,
|
||||
'title': title,
|
||||
'content': content,
|
||||
'publishedDate': publishedDate})
|
||||
else:
|
||||
# append result
|
||||
results.append({'url': url,
|
||||
'title': title,
|
||||
'content': content})
|
||||
|
||||
# return results
|
||||
return results
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
## Vimeo (Videos)
|
||||
# Vimeo (Videos)
|
||||
#
|
||||
# @website https://vimeo.com/
|
||||
# @provide-api yes (http://developer.vimeo.com/api),
|
||||
|
|
@ -7,14 +7,14 @@
|
|||
# @using-api no (TODO, rewrite to api)
|
||||
# @results HTML (using search portal)
|
||||
# @stable no (HTML can change)
|
||||
# @parse url, title, publishedDate, thumbnail
|
||||
# @parse url, title, publishedDate, thumbnail, embedded
|
||||
#
|
||||
# @todo rewrite to api
|
||||
# @todo set content-parameter with correct data
|
||||
|
||||
from urllib import urlencode
|
||||
from HTMLParser import HTMLParser
|
||||
from lxml import html
|
||||
from HTMLParser import HTMLParser
|
||||
from searx.engines.xpath import extract_text
|
||||
from dateutil import parser
|
||||
|
||||
|
|
@ -23,26 +23,26 @@ categories = ['videos']
|
|||
paging = True
|
||||
|
||||
# search-url
|
||||
base_url = 'https://vimeo.com'
|
||||
base_url = 'http://vimeo.com'
|
||||
search_url = base_url + '/search/page:{pageno}?{query}'
|
||||
|
||||
# specific xpath variables
|
||||
url_xpath = './a/@href'
|
||||
content_xpath = './a/img/@src'
|
||||
title_xpath = './a/div[@class="data"]/p[@class="title"]/text()'
|
||||
results_xpath = '//div[@id="browse_content"]/ol/li'
|
||||
url_xpath = './a/@href'
|
||||
title_xpath = './a/div[@class="data"]/p[@class="title"]'
|
||||
content_xpath = './a/img/@src'
|
||||
publishedDate_xpath = './/p[@class="meta"]//attribute::datetime'
|
||||
|
||||
embedded_url = '<iframe data-src="//player.vimeo.com/video{videoid}" ' +\
|
||||
'width="540" height="304" frameborder="0" ' +\
|
||||
'webkitallowfullscreen mozallowfullscreen allowfullscreen></iframe>'
|
||||
|
||||
|
||||
# do search-request
|
||||
def request(query, params):
|
||||
params['url'] = search_url.format(pageno=params['pageno'],
|
||||
query=urlencode({'q': query}))
|
||||
|
||||
# TODO required?
|
||||
params['cookies']['__utma'] =\
|
||||
'00000000.000#0000000.0000000000.0000000000.0000000000.0'
|
||||
|
||||
return params
|
||||
|
||||
|
||||
|
|
@ -51,16 +51,17 @@ def response(resp):
|
|||
results = []
|
||||
|
||||
dom = html.fromstring(resp.text)
|
||||
|
||||
p = HTMLParser()
|
||||
|
||||
# parse results
|
||||
for result in dom.xpath(results_xpath):
|
||||
url = base_url + result.xpath(url_xpath)[0]
|
||||
videoid = result.xpath(url_xpath)[0]
|
||||
url = base_url + videoid
|
||||
title = p.unescape(extract_text(result.xpath(title_xpath)))
|
||||
thumbnail = extract_text(result.xpath(content_xpath)[0])
|
||||
publishedDate = parser.parse(extract_text(
|
||||
result.xpath(publishedDate_xpath)[0]))
|
||||
embedded = embedded_url.format(videoid=videoid)
|
||||
|
||||
# append result
|
||||
results.append({'url': url,
|
||||
|
|
@ -68,6 +69,7 @@ def response(resp):
|
|||
'content': '',
|
||||
'template': 'videos.html',
|
||||
'publishedDate': publishedDate,
|
||||
'embedded': embedded,
|
||||
'thumbnail': thumbnail})
|
||||
|
||||
# return results
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
import json
|
||||
from requests import get
|
||||
from urllib import urlencode
|
||||
import locale
|
||||
import dateutil.parser
|
||||
|
||||
result_count = 1
|
||||
wikidata_host = 'https://www.wikidata.org'
|
||||
|
|
@ -35,6 +37,16 @@ def response(resp):
|
|||
language = resp.search_params['language'].split('_')[0]
|
||||
if language == 'all':
|
||||
language = 'en'
|
||||
|
||||
try:
|
||||
locale.setlocale(locale.LC_ALL, str(resp.search_params['language']))
|
||||
except:
|
||||
try:
|
||||
locale.setlocale(locale.LC_ALL, 'en_US')
|
||||
except:
|
||||
pass
|
||||
pass
|
||||
|
||||
url = url_detail.format(query=urlencode({'ids': '|'.join(wikidata_ids),
|
||||
'languages': language + '|en'}))
|
||||
|
||||
|
|
@ -164,10 +176,12 @@ def getDetail(jsonresponse, wikidata_id, language):
|
|||
|
||||
date_of_birth = get_time(claims, 'P569', None)
|
||||
if date_of_birth is not None:
|
||||
date_of_birth = dateutil.parser.parse(date_of_birth[8:]).strftime(locale.nl_langinfo(locale.D_FMT))
|
||||
attributes.append({'label': 'Date of birth', 'value': date_of_birth})
|
||||
|
||||
date_of_death = get_time(claims, 'P570', None)
|
||||
if date_of_death is not None:
|
||||
date_of_death = dateutil.parser.parse(date_of_death[8:]).strftime(locale.nl_langinfo(locale.D_FMT))
|
||||
attributes.append({'label': 'Date of death', 'value': date_of_death})
|
||||
|
||||
if len(attributes) == 0 and len(urls) == 2 and len(description) == 0:
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
# @using-api yes
|
||||
# @results JSON
|
||||
# @stable yes
|
||||
# @parse url, title, content, publishedDate, thumbnail
|
||||
# @parse url, title, content, publishedDate, thumbnail, embedded
|
||||
|
||||
from json import loads
|
||||
from urllib import urlencode
|
||||
|
|
@ -19,7 +19,11 @@ language_support = True
|
|||
|
||||
# search-url
|
||||
base_url = 'https://gdata.youtube.com/feeds/api/videos'
|
||||
search_url = base_url + '?alt=json&{query}&start-index={index}&max-results=5' # noqa
|
||||
search_url = base_url + '?alt=json&{query}&start-index={index}&max-results=5'
|
||||
|
||||
embedded_url = '<iframe width="540" height="304" ' +\
|
||||
'data-src="//www.youtube-nocookie.com/embed/{videoid}" ' +\
|
||||
'frameborder="0" allowfullscreen></iframe>'
|
||||
|
||||
|
||||
# do search-request
|
||||
|
|
@ -60,6 +64,8 @@ def response(resp):
|
|||
if url.endswith('&'):
|
||||
url = url[:-1]
|
||||
|
||||
videoid = url[32:]
|
||||
|
||||
title = result['title']['$t']
|
||||
content = ''
|
||||
thumbnail = ''
|
||||
|
|
@ -72,12 +78,15 @@ def response(resp):
|
|||
|
||||
content = result['content']['$t']
|
||||
|
||||
embedded = embedded_url.format(videoid=videoid)
|
||||
|
||||
# append result
|
||||
results.append({'url': url,
|
||||
'title': title,
|
||||
'content': content,
|
||||
'template': 'videos.html',
|
||||
'publishedDate': publishedDate,
|
||||
'embedded': embedded,
|
||||
'thumbnail': thumbnail})
|
||||
|
||||
# return results
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue