diff --git a/searx/engines/500px.py b/searx/engines/500px.py
index 5d53af32c..3b95619a1 100644
--- a/searx/engines/500px.py
+++ b/searx/engines/500px.py
@@ -35,9 +35,9 @@ def request(query, params):
# get response from search-request
def response(resp):
results = []
-
+
dom = html.fromstring(resp.text)
-
+
# parse results
for result in dom.xpath('//div[@class="photo"]'):
link = result.xpath('.//a')[0]
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py
index d42339af8..9bc5cdfd4 100644
--- a/searx/engines/__init__.py
+++ b/searx/engines/__init__.py
@@ -81,7 +81,7 @@ def load_engine(engine_data):
if engine_attr.startswith('_'):
continue
if getattr(engine, engine_attr) is None:
- print('[E] Engine config error: Missing attribute "{0}.{1}"'\
+ print('[E] Engine config error: Missing attribute "{0}.{1}"'
.format(engine.name, engine_attr))
sys.exit(1)
@@ -102,7 +102,7 @@ def load_engine(engine_data):
if engine.shortcut:
# TODO check duplications
if engine.shortcut in engine_shortcuts:
- print('[E] Engine config error: ambigious shortcut: {0}'\
+ print('[E] Engine config error: ambigious shortcut: {0}'
.format(engine.shortcut))
sys.exit(1)
engine_shortcuts[engine.shortcut] = engine.name
diff --git a/searx/engines/digg.py b/searx/engines/digg.py
new file mode 100644
index 000000000..241234fdb
--- /dev/null
+++ b/searx/engines/digg.py
@@ -0,0 +1,67 @@
+## Digg (News, Social media)
+#
+# @website https://digg.com/
+# @provide-api no
+#
+# @using-api no
+# @results HTML (using search portal)
+# @stable no (HTML can change)
+# @parse url, title, content, publishedDate, thumbnail
+
+from urllib import quote_plus
+from json import loads
+from lxml import html
+from cgi import escape
+from dateutil import parser
+
+# engine dependent config
+categories = ['news', 'social media']
+paging = True
+
+# search-url
+base_url = 'https://digg.com/'
+search_url = base_url+'api/search/{query}.json?position={position}&format=html'
+
+# specific xpath variables
+results_xpath = '//article'
+link_xpath = './/small[@class="time"]//a'
+title_xpath = './/h2//a//text()'
+content_xpath = './/p//text()'
+pubdate_xpath = './/time'
+
+
+# do search-request
+def request(query, params):
+ offset = (params['pageno'] - 1) * 10
+ params['url'] = search_url.format(position=offset,
+ query=quote_plus(query))
+ return params
+
+
+# get response from search-request
+def response(resp):
+ results = []
+
+ search_result = loads(resp.text)
+
+ dom = html.fromstring(search_result['html'])
+
+ # parse results
+ for result in dom.xpath(results_xpath):
+ url = result.attrib.get('data-contenturl')
+ thumbnail = result.xpath('.//img')[0].attrib.get('src')
+ title = ''.join(result.xpath(title_xpath))
+ content = escape(''.join(result.xpath(content_xpath)))
+ pubdate = result.xpath(pubdate_xpath)[0].attrib.get('datetime')
+ publishedDate = parser.parse(pubdate)
+
+ # append result
+ results.append({'url': url,
+ 'title': title,
+ 'content': content,
+ 'template': 'videos.html',
+ 'publishedDate': publishedDate,
+ 'thumbnail': thumbnail})
+
+ # return results
+ return results
diff --git a/searx/engines/flickr-noapi.py b/searx/engines/flickr-noapi.py
index 522503b53..f90903647 100644
--- a/searx/engines/flickr-noapi.py
+++ b/searx/engines/flickr-noapi.py
@@ -53,7 +53,8 @@ def response(resp):
for photo in photos:
- # In paged configuration, the first pages' photos are represented by a None object
+ # In paged configuration, the first pages' photos
+ # are represented by a None object
if photo is None:
continue
@@ -74,10 +75,15 @@ def response(resp):
title = photo['title']
- content = '
'
+ content = '
'
if 'description' in photo:
- content = content + '' + photo['description'] + ''
+ content = content +\
+ '' +\
+ photo['description'] +\
+ ''
# append result
results.append({'url': url,
diff --git a/searx/engines/flickr.py b/searx/engines/flickr.py
index 2fa5ed7ec..4dadd80a6 100644
--- a/searx/engines/flickr.py
+++ b/searx/engines/flickr.py
@@ -1,10 +1,10 @@
#!/usr/bin/env python
## Flickr (Images)
-#
+#
# @website https://www.flickr.com
-# @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html)
-#
+# @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html)
+#
# @using-api yes
# @results JSON
# @stable yes
@@ -18,16 +18,20 @@ categories = ['images']
nb_per_page = 15
paging = True
-api_key= None
+api_key = None
-url = 'https://api.flickr.com/services/rest/?method=flickr.photos.search&api_key={api_key}&{text}&sort=relevance&extras=description%2C+owner_name%2C+url_o%2C+url_z&per_page={nb_per_page}&format=json&nojsoncallback=1&page={page}'
+url = 'https://api.flickr.com/services/rest/?method=flickr.photos.search' +\
+ '&api_key={api_key}&{text}&sort=relevance' +\
+ '&extras=description%2C+owner_name%2C+url_o%2C+url_z' +\
+ '&per_page={nb_per_page}&format=json&nojsoncallback=1&page={page}'
photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}'
paging = True
+
def build_flickr_url(user_id, photo_id):
- return photo_url.format(userid=user_id,photoid=photo_id)
+ return photo_url.format(userid=user_id, photoid=photo_id)
def request(query, params):
@@ -40,7 +44,7 @@ def request(query, params):
def response(resp):
results = []
-
+
search_results = loads(resp.text)
# return empty array if there are no results
@@ -64,11 +68,14 @@ def response(resp):
url = build_flickr_url(photo['owner'], photo['id'])
title = photo['title']
-
- content = '
'
-
- content = content + '' + photo['description']['_content'] + ''
-
+
+ content = '
' +\
+ '' +\
+ photo['description']['_content'] +\
+ ''
+
# append result
results.append({'url': url,
'title': title,
diff --git a/searx/engines/kickass.py b/searx/engines/kickass.py
index f1fcd9e1a..16e9d6de6 100644
--- a/searx/engines/kickass.py
+++ b/searx/engines/kickass.py
@@ -24,7 +24,7 @@ search_url = url + 'search/{search_term}/{pageno}/'
# specific xpath variables
magnet_xpath = './/a[@title="Torrent magnet link"]'
-#content_xpath = './/font[@class="detDesc"]//text()'
+content_xpath = './/span[@class="font11px lightgrey block"]'
# do search-request
@@ -56,7 +56,8 @@ def response(resp):
link = result.xpath('.//a[@class="cellMainLink"]')[0]
href = urljoin(url, link.attrib['href'])
title = ' '.join(link.xpath('.//text()'))
- content = escape(html.tostring(result.xpath('.//span[@class="font11px lightgrey block"]')[0], method="text"))
+ content = escape(html.tostring(result.xpath(content_xpath)[0],
+ method="text"))
seed = result.xpath('.//td[contains(@class, "green")]/text()')[0]
leech = result.xpath('.//td[contains(@class, "red")]/text()')[0]
diff --git a/searx/engines/searchcode_code.py b/searx/engines/searchcode_code.py
index 2ba0e52f1..0f98352c1 100644
--- a/searx/engines/searchcode_code.py
+++ b/searx/engines/searchcode_code.py
@@ -11,7 +11,6 @@
from urllib import urlencode
from json import loads
import cgi
-import re
# engine dependent config
categories = ['it']
@@ -33,7 +32,7 @@ def request(query, params):
# get response from search-request
def response(resp):
results = []
-
+
search_results = loads(resp.text)
# parse results
@@ -41,21 +40,22 @@ def response(resp):
href = result['url']
title = "" + result['name'] + " - " + result['filename']
content = result['repo'] + "
"
-
+
lines = dict()
for line, code in result['lines'].items():
lines[int(line)] = code
content = content + '
' - content = content + str(line) + ' | ' - # Replace every two spaces with ' &nbps;' to keep formatting while allowing the browser to break the line if necessary - content = content + cgi.escape(code).replace('\t', ' ').replace(' ', ' ').replace(' ', ' ') + content = content + ' |
' + content = content + str(line) + ' | ' + # Replace every two spaces with ' &nbps;' to keep formatting + # while allowing the browser to break the line if necessary + content = content + cgi.escape(code).replace('\t', ' ').replace(' ', ' ').replace(' ', ' ') content = content + " |