diff --git a/.gitignore b/.gitignore
index ed5ed1624..0b9057a41 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,20 +1,24 @@
-env
-engines.cfg
-.installed.cfg
.coverage
-covearge/
+.installed.cfg
+engines.cfg
+env
+robot_log.html
+robot_output.xml
+robot_report.html
setup.cfg
*.pyc
*/*.pyc
bin/
+build/
+covearge/
+develop-eggs/
+dist/
+eggs/
include/
lib/
-build/
-develop-eggs/
-eggs/
local/
-searx.egg-info/
parts/
+searx.egg-info/
var/
diff --git a/Makefile b/Makefile
index da59ad659..cc5967581 100644
--- a/Makefile
+++ b/Makefile
@@ -21,11 +21,7 @@ $(python):
tests: .installed.cfg
@bin/test
-enginescfg:
- @test -f ./engines.cfg || echo "Copying engines.cfg ..."
- @cp --no-clobber engines.cfg_sample engines.cfg
-
-robot: .installed.cfg enginescfg
+robot: .installed.cfg
@bin/robot
flake8: .installed.cfg
@@ -37,18 +33,21 @@ coverage: .installed.cfg
@bin/coverage report --show-missing
@bin/coverage html --directory ./coverage
-production: bin/buildout production.cfg setup.py enginescfg
+production: bin/buildout production.cfg setup.py
bin/buildout -c production.cfg $(options)
@echo "* Please modify `readlink --canonicalize-missing ./searx/settings.py`"
@echo "* Hint 1: on production, disable debug mode and change secret_key"
@echo "* Hint 2: searx will be executed at server startup by crontab"
@echo "* Hint 3: to run immediatley, execute 'bin/supervisord'"
-minimal: bin/buildout minimal.cfg setup.py enginescfg
+minimal: bin/buildout minimal.cfg setup.py
bin/buildout -c minimal.cfg $(options)
+locales:
+ @pybabel compile -d searx/translations
+
clean:
@rm -rf .installed.cfg .mr.developer.cfg bin parts develop-eggs \
searx.egg-info lib include .coverage coverage
-.PHONY: all tests enginescfg robot flake8 coverage production minimal clean
+.PHONY: all tests robot flake8 coverage production minimal locales clean
diff --git a/README.md b/README.md
deleted file mode 100644
index e6638cf74..000000000
--- a/README.md
+++ /dev/null
@@ -1,122 +0,0 @@
-searx
-=====
-
-A privacy-respecting, hackable [metasearch engine](https://en.wikipedia.org/wiki/Metasearch_engine).
-
-List of [running instances](https://github.com/asciimoo/searx/wiki/Searx-instances).
-
-[![Flattr searx](http://api.flattr.com/button/flattr-badge-large.png)](https://flattr.com/submit/auto?user_id=asciimoo&url=https://github.com/asciimoo/searx&title=searx&language=&tags=github&category=software)
-
-
-### Features
-
-* Tracking free
-* Modular (see [examples](https://github.com/asciimoo/searx/blob/master/examples))
-* Parallel queries
-* Supports multiple output formats
- * json `curl https://searx.0x2a.tk/?format=json&q=[query]`
- * csv `curl https://searx.0x2a.tk/?format=csv&q=[query]`
- * opensearch/rss `curl https://searx.0x2a.tk/?format=rss&q=[query]`
-* Opensearch support (you can set as default search engine)
-* Configurable search engines/categories
-
-
-### Installation
-
-* clone source: `git clone git@github.com:asciimoo/searx.git && cd searx`
-* install dependencies: `pip install -r requirements.txt`
-* edit your [settings.yml](https://github.com/asciimoo/searx/blob/master/settings.yml) (set your `secret_key`!)
-* run `python searx/webapp.py` to start the application
-
-For all the details, follow this [step by step installation](https://github.com/asciimoo/searx/wiki/Installation)
-
-
-### Alternative (Recommended) Installation
-
-* clone source: `git clone git@github.com:asciimoo/searx.git && cd searx`
-* build in current folder: `make minimal`
-* run `bin/searx-run` to start the application
-
-
-### Development
-
-Just run `make`. Versions of dependencies are pinned down inside `versions.cfg` to produce most stable build. Also remember, NO make command should be run as root, not even `make production`
-
-
-### Deployment
-
-* clone source: `git clone git@github.com:asciimoo/searx.git && cd searx`
-* build in current folder: `make production`
-* run `bin/supervisord` to start the application
-
-
-### Upgrading
-
-* inside previously cloned searx directory run: `git stash` to temporarily save any changes you have made
-* pull source: `git pull origin master`
-* re-build in current folder: `make production`
-* run `bin/supervisorctl stop searx` to stop searx, if it does not, then run `fuser -k 8888/tcp`
-* run `bin/supervisorctl reload` to re-read supervisor config and start searx
-
-
-### Command make
-
-##### `make`
-
-Builds development environment with testing support.
-
-##### `make tests`
-
-Runs tests. You can write tests [here](https://github.com/asciimoo/searx/tree/master/searx/tests) and remember 'untested code is broken code'.
-
-##### `make robot`
-
-Runs robot (Selenium) tests, you must have `firefox` installed because this functional tests actually run the browser and perform operations on it. Also searx is executed with [settings_robot](https://github.com/asciimoo/searx/blob/master/searx/settings_robot.py).
-
-##### `make flake8`
-
-'pep8 is a tool to check your Python code against some of the style conventions in [PEP 8](http://www.python.org/dev/peps/pep-0008/).'
-
-##### `make coverage`
-
-Checks coverage of tests, after running this, execute this: `firefox ./coverage/index.html`
-
-##### `make production`
-
-Used to make co-called production environment - without tests (you should ran tests before deploying searx on the server). This installs supervisord, so if searx crashes, it will try to pick itself up again. And crontab entry is added to start supervisord at server boot.
-
-##### `make minimal`
-
-Minimal build - without test frameworks, the quickest build option.
-
-##### `make clean`
-
-Deletes several folders and files (see `Makefile` for more), so that next time you run any other `make` command it will rebuild everithing.
-
-
-### TODO
-
-* Moar engines
-* Better ui
-* Language support
-* Documentation
-* Pagination
-* Fix `flake8` errors, `make flake8` will be merged into `make tests` when it does not fail anymore
-* Tests
-* When we have more tests, we can integrate Travis-CI
-
-
-### Bugs
-
-Bugs or suggestions? Visit the [issue tracker](https://github.com/asciimoo/searx/issues).
-
-
-### [License](https://github.com/asciimoo/searx/blob/master/LICENSE)
-
-
-### More about searx
-
-* [ohloh](https://www.ohloh.net/p/searx/)
-* [twitter](https://twitter.com/Searx_engine)
-* IRC: #searx @ freenode
-
diff --git a/README.rst b/README.rst
new file mode 100644
index 000000000..7f9a3e598
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,159 @@
+searx
+=====
+
+A privacy-respecting, hackable `metasearch
+engine
'
+
def request(query, params):
global search_url
- params['url'] = search_url.format(query=urlencode({'search': query, 'localization': locale }))
+ params['url'] = search_url.format(
+ query=urlencode({'search': query, 'localization': locale}))
return params
@@ -24,7 +28,7 @@ def response(resp):
title = res['title']
url = res['url']
if res['thumbnail_360_url']:
- content = '
'.format(url, res['thumbnail_360_url'])
+ content = content_tpl.format(url, res['thumbnail_360_url'])
else:
content = ''
if res['description']:
@@ -33,6 +37,7 @@ def response(resp):
results.append({'url': url, 'title': title, 'content': content})
return results
+
def text_content_from_html(html_string):
desc_html = html.fragment_fromstring(html_string, create_parent=True)
return desc_html.text_content()
diff --git a/searx/engines/deviantart.py b/searx/engines/deviantart.py
index 9a4a8abde..94a94bf16 100644
--- a/searx/engines/deviantart.py
+++ b/searx/engines/deviantart.py
@@ -7,6 +7,7 @@ categories = ['images']
base_url = 'https://www.deviantart.com/'
search_url = base_url+'search?'
+
def request(query, params):
global search_url
params['url'] = search_url + urlencode({'q': query})
@@ -22,8 +23,11 @@ def response(resp):
for result in dom.xpath('//div[contains(@class, "tt-a tt-fh")]'):
link = result.xpath('.//a[contains(@class, "thumb")]')[0]
url = urljoin(base_url, link.attrib.get('href'))
- title_links = result.xpath('.//span[@class="details"]//a[contains(@class, "t")]')
+ title_links = result.xpath('.//span[@class="details"]//a[contains(@class, "t")]') # noqa
title = ''.join(title_links[0].xpath('.//text()'))
img_src = link.xpath('.//img')[0].attrib['src']
- results.append({'url': url, 'title': title, 'img_src': img_src, 'template': 'images.html'})
+ results.append({'url': url,
+ 'title': title,
+ 'img_src': img_src,
+ 'template': 'images.html'})
return results
diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py
index 4bf770972..7cae87d95 100644
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@@ -6,8 +6,11 @@ url = 'https://duckduckgo.com/'
search_url = url + 'd.js?{query}&p=1&s=0'
locale = 'us-en'
+
def request(query, params):
- params['url'] = search_url.format(query=urlencode({'q': query, 'l': locale}))
+ q = urlencode({'q': query,
+ 'l': locale})
+ params['url'] = search_url.format(query=q)
return params
@@ -17,8 +20,7 @@ def response(resp):
for r in search_res:
if not r.get('t'):
continue
- results.append({'title': r['t']
- ,'content': html_to_text(r['a'])
- ,'url': r['u']
- })
+ results.append({'title': r['t'],
+ 'content': html_to_text(r['a']),
+ 'url': r['u']})
return results
diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py
index 7b3950b85..3037aae53 100644
--- a/searx/engines/duckduckgo_definitions.py
+++ b/searx/engines/duckduckgo_definitions.py
@@ -3,8 +3,9 @@ from urllib import urlencode
url = 'http://api.duckduckgo.com/?{query}&format=json&pretty=0&no_redirect=1'
+
def request(query, params):
- params['url'] = url.format(query=urlencode({'q': query}))
+ params['url'] = url.format(query=urlencode({'q': query}))
return params
@@ -13,11 +14,10 @@ def response(resp):
results = []
if 'Definition' in search_res:
if search_res.get('AbstractURL'):
- res = {'title' : search_res.get('Heading', '')
- ,'content' : search_res.get('Definition', '')
- ,'url' : search_res.get('AbstractURL', '')
- ,'class' : 'definition_result'
- }
+ res = {'title': search_res.get('Heading', ''),
+ 'content': search_res.get('Definition', ''),
+ 'url': search_res.get('AbstractURL', ''),
+ 'class': 'definition_result'}
results.append(res)
return results
diff --git a/searx/engines/filecrop.py b/searx/engines/filecrop.py
index 52426b84a..81340e601 100644
--- a/searx/engines/filecrop.py
+++ b/searx/engines/filecrop.py
@@ -2,7 +2,8 @@ from urllib import urlencode
from HTMLParser import HTMLParser
url = 'http://www.filecrop.com/'
-search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1'
+search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1' # noqa
+
class FilecropResultParser(HTMLParser):
def __init__(self):
@@ -18,22 +19,28 @@ class FilecropResultParser(HTMLParser):
def handle_starttag(self, tag, attrs):
if tag == 'tr':
- if ('bgcolor', '#edeff5') in attrs or ('bgcolor', '#ffffff') in attrs:
+ if ('bgcolor', '#edeff5') in attrs or\
+ ('bgcolor', '#ffffff') in attrs:
self.__start_processing = True
if not self.__start_processing:
return
if tag == 'label':
- self.result['title'] = [attr[1] for attr in attrs if attr[0] == 'title'][0]
- elif tag == 'a' and ('rel', 'nofollow') in attrs and ('class', 'sourcelink') in attrs:
+ self.result['title'] = [attr[1] for attr in attrs
+ if attr[0] == 'title'][0]
+ elif tag == 'a' and ('rel', 'nofollow') in attrs\
+ and ('class', 'sourcelink') in attrs:
if 'content' in self.result:
- self.result['content'] += [attr[1] for attr in attrs if attr[0] == 'title'][0]
+ self.result['content'] += [attr[1] for attr in attrs
+ if attr[0] == 'title'][0]
else:
- self.result['content'] = [attr[1] for attr in attrs if attr[0] == 'title'][0]
+ self.result['content'] = [attr[1] for attr in attrs
+ if attr[0] == 'title'][0]
self.result['content'] += ' '
elif tag == 'a':
- self.result['url'] = url + [attr[1] for attr in attrs if attr[0] == 'href'][0]
+ self.result['url'] = url + [attr[1] for attr in attrs
+ if attr[0] == 'href'][0]
def handle_endtag(self, tag):
if self.__start_processing is False:
@@ -60,10 +67,12 @@ class FilecropResultParser(HTMLParser):
self.data_counter += 1
+
def request(query, params):
- params['url'] = search_url.format(query=urlencode({'w' :query}))
+ params['url'] = search_url.format(query=urlencode({'w': query}))
return params
+
def response(resp):
parser = FilecropResultParser()
parser.feed(resp.text)
diff --git a/searx/engines/flickr.py b/searx/engines/flickr.py
index a9832856d..d9554b99a 100644
--- a/searx/engines/flickr.py
+++ b/searx/engines/flickr.py
@@ -8,21 +8,27 @@ categories = ['images']
url = 'https://secure.flickr.com/'
search_url = url+'search/?{query}'
+results_xpath = '//div[@id="thumbnails"]//a[@class="rapidnofollow photo-click" and @data-track="photo-click"]' # noqa
+
def request(query, params):
params['url'] = search_url.format(query=urlencode({'q': query}))
return params
+
def response(resp):
global base_url
results = []
dom = html.fromstring(resp.text)
- for result in dom.xpath('//div[@id="thumbnails"]//a[@class="rapidnofollow photo-click" and @data-track="photo-click"]'):
+ for result in dom.xpath(results_xpath):
href = urljoin(url, result.attrib.get('href'))
img = result.xpath('.//img')[0]
title = img.attrib.get('alt', '')
img_src = img.attrib.get('data-defer-src')
if not img_src:
continue
- results.append({'url': href, 'title': title, 'img_src': img_src, 'template': 'images.html'})
+ results.append({'url': href,
+ 'title': title,
+ 'img_src': img_src,
+ 'template': 'images.html'})
return results
diff --git a/searx/engines/github.py b/searx/engines/github.py
index b4baea6e8..be2cfe7c5 100644
--- a/searx/engines/github.py
+++ b/searx/engines/github.py
@@ -4,12 +4,15 @@ from cgi import escape
categories = ['it']
-search_url = 'https://api.github.com/search/repositories?sort=stars&order=desc&{query}'
+search_url = 'https://api.github.com/search/repositories?sort=stars&order=desc&{query}' # noqa
+
+accept_header = 'application/vnd.github.preview.text-match+json'
+
def request(query, params):
global search_url
params['url'] = search_url.format(query=urlencode({'q': query}))
- params['headers']['Accept'] = 'application/vnd.github.preview.text-match+json'
+ params['headers']['Accept'] = accept_header
return params
diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py
index d828a9c4b..57e749265 100644
--- a/searx/engines/google_images.py
+++ b/searx/engines/google_images.py
@@ -6,12 +6,14 @@ from json import loads
categories = ['images']
url = 'https://ajax.googleapis.com/'
-search_url = url + 'ajax/services/search/images?v=1.0&start=0&rsz=large&safe=off&filter=off&{query}'
+search_url = url + 'ajax/services/search/images?v=1.0&start=0&rsz=large&safe=off&filter=off&{query}' # noqa
+
def request(query, params):
params['url'] = search_url.format(query=urlencode({'q': query}))
return params
+
def response(resp):
results = []
search_res = loads(resp.text)
@@ -24,5 +26,9 @@ def response(resp):
title = result['title']
if not result['url']:
continue
- results.append({'url': href, 'title': title, 'content': '', 'img_src': result['url'], 'template': 'images.html'})
+ results.append({'url': href,
+ 'title': title,
+ 'content': '',
+ 'img_src': result['url'],
+ 'template': 'images.html'})
return results
diff --git a/searx/engines/json_engine.py b/searx/engines/json_engine.py
index 0386d53f7..e7cc808bb 100644
--- a/searx/engines/json_engine.py
+++ b/searx/engines/json_engine.py
@@ -2,12 +2,13 @@ from urllib import urlencode
from json import loads
from collections import Iterable
-search_url = None
-url_query = None
+search_url = None
+url_query = None
content_query = None
-title_query = None
+title_query = None
#suggestion_xpath = ''
+
def iterate(iterable):
if type(iterable) == dict:
it = iterable.iteritems()
@@ -17,11 +18,15 @@ def iterate(iterable):
for index, value in it:
yield str(index), value
+
def is_iterable(obj):
- if type(obj) == str: return False
- if type(obj) == unicode: return False
+ if type(obj) == str:
+ return False
+ if type(obj) == unicode:
+ return False
return isinstance(obj, Iterable)
+
def parse(query):
q = []
for part in query.split('/'):
@@ -31,6 +36,7 @@ def parse(query):
q.append(part)
return q
+
def do_query(data, q):
ret = []
if not len(q):
@@ -38,7 +44,7 @@ def do_query(data, q):
qkey = q[0]
- for key,value in iterate(data):
+ for key, value in iterate(data):
if len(q) == 1:
if key == qkey:
@@ -54,11 +60,13 @@ def do_query(data, q):
ret.extend(do_query(value, q))
return ret
+
def query(data, query_string):
q = parse(query_string)
return do_query(data, q)
+
def request(query, params):
query = urlencode({'q': query})[2:]
params['url'] = search_url.format(query=query)
diff --git a/searx/engines/mediawiki.py b/searx/engines/mediawiki.py
index 00ad0f106..bc4aab6df 100644
--- a/searx/engines/mediawiki.py
+++ b/searx/engines/mediawiki.py
@@ -3,10 +3,12 @@ from urllib import urlencode, quote
url = 'https://en.wikipedia.org/'
+search_url = url + 'w/api.php?action=query&list=search&{query}&srprop=timestamp&format=json' # noqa
+
number_of_results = 10
+
def request(query, params):
- search_url = url + 'w/api.php?action=query&list=search&{query}&srprop=timestamp&format=json'
params['url'] = search_url.format(query=urlencode({'srsearch': query}))
return params
@@ -14,7 +16,5 @@ def request(query, params):
def response(resp):
search_results = loads(resp.text)
res = search_results.get('query', {}).get('search', [])
-
- return [{'url': url + 'wiki/' + quote(result['title'].replace(' ', '_').encode('utf-8')),
+ return [{'url': url + 'wiki/' + quote(result['title'].replace(' ', '_').encode('utf-8')), # noqa
'title': result['title']} for result in res[:int(number_of_results)]]
-
diff --git a/searx/engines/piratebay.py b/searx/engines/piratebay.py
index 9cf410106..7319b49c1 100644
--- a/searx/engines/piratebay.py
+++ b/searx/engines/piratebay.py
@@ -7,13 +7,18 @@ categories = ['videos', 'music']
url = 'https://thepiratebay.se/'
search_url = url + 'search/{search_term}/0/99/{search_type}'
-search_types = {'videos': '200'
- ,'music' : '100'
- ,'files' : '0'
- }
+search_types = {'videos': '200',
+ 'music': '100',
+ 'files': '0'}
+
+magnet_xpath = './/a[@title="Download this torrent using magnet"]'
+content_xpath = './/font[@class="detDesc"]//text()'
+
def request(query, params):
- params['url'] = search_url.format(search_term=quote(query), search_type=search_types.get(params['category']))
+ search_type = search_types.get(params['category'])
+ params['url'] = search_url.format(search_term=quote(query),
+ search_type=search_type)
return params
@@ -27,10 +32,14 @@ def response(resp):
link = result.xpath('.//div[@class="detName"]//a')[0]
href = urljoin(url, link.attrib.get('href'))
title = ' '.join(link.xpath('.//text()'))
- content = escape(' '.join(result.xpath('.//font[@class="detDesc"]//text()')))
+ content = escape(' '.join(result.xpath(content_xpath)))
seed, leech = result.xpath('.//td[@align="right"]/text()')[:2]
- magnetlink = result.xpath('.//a[@title="Download this torrent using magnet"]')[0]
- results.append({'url': href, 'title': title, 'content': content,
- 'seed': seed, 'leech': leech, 'magnetlink': magnetlink.attrib['href'],
+ magnetlink = result.xpath(magnet_xpath)[0]
+ results.append({'url': href,
+ 'title': title,
+ 'content': content,
+ 'seed': seed,
+ 'leech': leech,
+ 'magnetlink': magnetlink.attrib['href'],
'template': 'torrent.html'})
return results
diff --git a/searx/engines/soundcloud.py b/searx/engines/soundcloud.py
index 50414f153..b1930b2ee 100644
--- a/searx/engines/soundcloud.py
+++ b/searx/engines/soundcloud.py
@@ -5,7 +5,8 @@ categories = ['music']
guest_client_id = 'b45b1aa10f1ac2941910a7f0d10f8e28'
url = 'https://api.soundcloud.com/'
-search_url = url + 'search?{query}&facet=model&limit=20&offset=0&linked_partitioning=1&client_id='+guest_client_id
+search_url = url + 'search?{query}&facet=model&limit=20&offset=0&linked_partitioning=1&client_id='+guest_client_id # noqa
+
def request(query, params):
global search_url
@@ -21,5 +22,7 @@ def response(resp):
if result['kind'] in ('track', 'playlist'):
title = result['title']
content = result['description']
- results.append({'url': result['permalink_url'], 'title': title, 'content': content})
+ results.append({'url': result['permalink_url'],
+ 'title': title,
+ 'content': content})
return results
diff --git a/searx/engines/stackoverflow.py b/searx/engines/stackoverflow.py
index 9ee89bc6e..35230600f 100644
--- a/searx/engines/stackoverflow.py
+++ b/searx/engines/stackoverflow.py
@@ -7,6 +7,8 @@ categories = ['it']
url = 'http://stackoverflow.com/'
search_url = url+'search?'
+result_xpath = './/div[@class="excerpt"]//text()'
+
def request(query, params):
params['url'] = search_url + urlencode({'q': query})
@@ -20,6 +22,6 @@ def response(resp):
link = result.xpath('.//div[@class="result-link"]//a')[0]
href = urljoin(url, link.attrib.get('href'))
title = escape(' '.join(link.xpath('.//text()')))
- content = escape(' '.join(result.xpath('.//div[@class="excerpt"]//text()')))
+ content = escape(' '.join(result.xpath(result_xpath)))
results.append({'url': href, 'title': title, 'content': content})
return results
diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py
index 87c091e2d..d6d7cf44d 100644
--- a/searx/engines/startpage.py
+++ b/searx/engines/startpage.py
@@ -1,11 +1,10 @@
from urllib import urlencode
from lxml import html
-from urlparse import urlparse
-from cgi import escape
base_url = 'https://startpage.com/'
search_url = base_url+'do/search'
+
def request(query, params):
global search_url
query = urlencode({'q': query})[2:]
@@ -20,11 +19,10 @@ def response(resp):
results = []
dom = html.fromstring(resp.content)
# ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"]
- # not ads : div[@class="result"] are the direct childs of div[@id="results"]
+ # not ads: div[@class="result"] are the direct childs of div[@id="results"]
for result in dom.xpath('//div[@id="results"]/div[@class="result"]'):
link = result.xpath('.//h3/a')[0]
url = link.attrib.get('href')
- parsed_url = urlparse(url)
title = link.text_content()
content = result.xpath('./p[@class="desc"]')[0].text_content()
results.append({'url': url, 'title': title, 'content': content})
diff --git a/searx/engines/twitter.py b/searx/engines/twitter.py
index f9d9e26ad..23393ac4d 100644
--- a/searx/engines/twitter.py
+++ b/searx/engines/twitter.py
@@ -7,6 +7,9 @@ categories = ['social media']
base_url = 'https://twitter.com/'
search_url = base_url+'search?'
+title_xpath = './/span[@class="username js-action-profile-name"]//text()'
+content_xpath = './/p[@class="js-tweet-text tweet-text"]//text()'
+
def request(query, params):
global search_url
@@ -21,7 +24,9 @@ def response(resp):
for tweet in dom.xpath('//li[@data-item-type="tweet"]'):
link = tweet.xpath('.//small[@class="time"]//a')[0]
url = urljoin(base_url, link.attrib.get('href'))
- title = ''.join(tweet.xpath('.//span[@class="username js-action-profile-name"]//text()'))
- content = escape(''.join(tweet.xpath('.//p[@class="js-tweet-text tweet-text"]//text()')))
- results.append({'url': url, 'title': title, 'content': content})
+ title = ''.join(tweet.xpath(title_xpath))
+ content = escape(''.join(tweet.xpath(content_xpath)))
+ results.append({'url': url,
+ 'title': title,
+ 'content': content})
return results
diff --git a/searx/engines/vimeo.py b/searx/engines/vimeo.py
index 35bc3d50a..924497a99 100644
--- a/searx/engines/vimeo.py
+++ b/searx/engines/vimeo.py
@@ -5,27 +5,31 @@ from lxml import html
base_url = 'http://vimeo.com'
search_url = base_url + '/search?{query}'
-url_xpath = None
+url_xpath = None
content_xpath = None
-title_xpath = None
+title_xpath = None
results_xpath = ''
+content_tpl = ' '
-# the cookie set by vimeo contains all the following values, but only __utma seems to be requiered
+# the cookie set by vimeo contains all the following values,
+# but only __utma seems to be requiered
cookie = {
#'vuid':'918282893.1027205400'
# 'ab_bs':'%7B%223%22%3A279%7D'
- '__utma':'00000000.000#0000000.0000000000.0000000000.0000000000.0'
+ '__utma': '00000000.000#0000000.0000000000.0000000000.0000000000.0'
# '__utmb':'18302654.1.10.1388942090'
#, '__utmc':'18302654'
- #, '__utmz':'18#302654.1388942090.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)'
+ #, '__utmz':'18#302654.1388942090.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)' # noqa
#, '__utml':'search'
}
+
def request(query, params):
- params['url'] = search_url.format(query=urlencode({'q' :query}))
+ params['url'] = search_url.format(query=urlencode({'q': query}))
params['cookies'] = cookie
return params
+
def response(resp):
results = []
dom = html.fromstring(resp.text)
@@ -36,10 +40,9 @@ def response(resp):
url = base_url + result.xpath(url_xpath)[0]
title = p.unescape(extract_text(result.xpath(title_xpath)))
thumbnail = extract_text(result.xpath(content_xpath)[0])
- content = ' '.format(url, title, thumbnail)
- results.append({'url': url
- , 'title': title
- , 'content': content
- , 'template':'videos.html'
- , 'thumbnail': thumbnail})
+ results.append({'url': url,
+ 'title': title,
+ 'content': content_tpl.format(url, title, thumbnail),
+ 'template': 'videos.html',
+ 'thumbnail': thumbnail})
return results
diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py
index 5e2c3c38b..8960b5f21 100644
--- a/searx/engines/xpath.py
+++ b/searx/engines/xpath.py
@@ -1,21 +1,25 @@
from lxml import html
from urllib import urlencode, unquote
from urlparse import urlparse, urljoin
-from cgi import escape
from lxml.etree import _ElementStringResult
+from searx.utils import html_to_text
-search_url = None
-url_xpath = None
+search_url = None
+url_xpath = None
content_xpath = None
-title_xpath = None
+title_xpath = None
suggestion_xpath = ''
results_xpath = ''
+
'''
if xpath_results is list, extract the text from each result and concat the list
-if xpath_results is a xml element, extract all the text node from it ( text_content() method from lxml )
+if xpath_results is a xml element, extract all the text node from it
+ ( text_content() method from lxml )
if xpath_results is a string element, then it's already done
'''
+
+
def extract_text(xpath_results):
if type(xpath_results) == list:
# it's list of result : concat everything using recursive call
@@ -30,7 +34,7 @@ def extract_text(xpath_results):
return ''.join(xpath_results)
else:
# it's a element
- return xpath_results.text_content()
+ return html_to_text(xpath_results.text_content())
def extract_url(xpath_results):
@@ -60,7 +64,8 @@ def normalize_url(url):
url += '/'
# FIXME : hack for yahoo
- if parsed_url.hostname == 'search.yahoo.com' and parsed_url.path.startswith('/r'):
+ if parsed_url.hostname == 'search.yahoo.com'\
+ and parsed_url.path.startswith('/r'):
p = parsed_url.path
mark = p.find('/**')
if mark != -1:
@@ -82,15 +87,15 @@ def response(resp):
if results_xpath:
for result in dom.xpath(results_xpath):
url = extract_url(result.xpath(url_xpath))
- title = extract_text(result.xpath(title_xpath)[0 ])
+ title = extract_text(result.xpath(title_xpath)[0])
content = extract_text(result.xpath(content_xpath)[0])
results.append({'url': url, 'title': title, 'content': content})
else:
for url, title, content in zip(
- map(extract_url, dom.xpath(url_xpath)), \
- map(extract_text, dom.xpath(title_xpath)), \
- map(extract_text, dom.xpath(content_xpath)), \
- ):
+ map(extract_url, dom.xpath(url_xpath)),
+ map(extract_text, dom.xpath(title_xpath)),
+ map(extract_text, dom.xpath(content_xpath))
+ ):
results.append({'url': url, 'title': title, 'content': content})
if not suggestion_xpath:
diff --git a/searx/engines/yacy.py b/searx/engines/yacy.py
index c93ac522f..a4a41ac3b 100644
--- a/searx/engines/yacy.py
+++ b/searx/engines/yacy.py
@@ -4,10 +4,12 @@ from urllib import urlencode
url = 'http://localhost:8090'
search_url = '/yacysearch.json?{query}&maximumRecords=10'
+
def request(query, params):
- params['url'] = url + search_url.format(query=urlencode({'query':query}))
+ params['url'] = url + search_url.format(query=urlencode({'query': query}))
return params
+
def response(resp):
raw_search_results = loads(resp.text)
@@ -25,7 +27,7 @@ def response(resp):
tmp_result['content'] = ''
if len(result['description']):
- tmp_result['content'] += result['description'] +"
"
+ tmp_result['content'] += result['description'] + "
"
if len(result['pubDate']):
tmp_result['content'] += result['pubDate'] + "
"
diff --git a/searx/engines/youtube.py b/searx/engines/youtube.py
index cefdb6536..62884702f 100644
--- a/searx/engines/youtube.py
+++ b/searx/engines/youtube.py
@@ -5,6 +5,7 @@ categories = ['videos']
search_url = 'https://gdata.youtube.com/feeds/api/videos?alt=json&{query}'
+
def request(query, params):
params['url'] = search_url.format(query=urlencode({'q': query}))
return params
@@ -30,17 +31,16 @@ def response(resp):
thumbnail = ''
if len(result['media$group']['media$thumbnail']):
thumbnail = result['media$group']['media$thumbnail'][0]['url']
- content += ''.format(url, thumbnail)
+ content += ''.format(url, thumbnail) # noqa
if len(content):
content += '
' + result['content']['$t']
else:
content = result['content']['$t']
- results.append({'url': url
- , 'title': title
- , 'content': content
- , 'template':'videos.html'
- , 'thumbnail':thumbnail})
+ results.append({'url': url,
+ 'title': title,
+ 'content': content,
+ 'template': 'videos.html',
+ 'thumbnail': thumbnail})
return results
-
diff --git a/searx/settings.yml b/searx/settings.yml
new file mode 100644
index 000000000..c207f3f57
--- /dev/null
+++ b/searx/settings.yml
@@ -0,0 +1,111 @@
+server:
+ port : 8888
+ secret_key : "ultrasecretkey" # change this!
+ debug : True
+ request_timeout : 3.0 # seconds
+ base_url: False
+
+engines:
+ - name : wikipedia
+ engine : mediawiki
+ url : https://en.wikipedia.org/
+ number_of_results : 1
+
+ - name : bing
+ engine : bing
+ locale : en-US
+
+ - name : currency
+ engine : currency_convert
+ categories : general
+
+ - name : deviantart
+ engine : deviantart
+ categories : images
+
+ - name : ddg definitions
+ engine : duckduckgo_definitions
+
+ - name : duckduckgo
+ engine : duckduckgo
+ locale : en-us
+
+ - name : filecrop
+ engine : filecrop
+ categories : files
+
+ - name : flickr
+ engine : flickr
+ categories : images
+
+ - name : github
+ engine : github
+ categories : it
+
+ - name : google
+ engine : json_engine
+ search_url : https://ajax.googleapis.com/ajax/services/search/web?v=2.0&start=0&rsz=large&safe=off&filter=off&q={query}
+ categories : general
+ url_query : /responseData/results/unescapedUrl
+ content_query : /responseData/results/content
+ title_query : /responseData/results/titleNoFormatting
+
+ - name : google images
+ engine : google_images
+ categories : images
+
+ - name : piratebay
+ engine : piratebay
+ categories : videos, music, files
+
+ - name : soundcloud
+ engine : soundcloud
+ categories : music
+
+ - name : stackoverflow
+ engine : stackoverflow
+ categories : it
+
+ - name : startpage
+ engine : startpage
+
+ - name : twitter
+ engine : twitter
+ categories : social media
+
+ - name : urbandictionary
+ engine : xpath
+ search_url : http://www.urbandictionary.com/define.php?term={query}
+ url_xpath : //div[@class="word"]//a/@href
+ title_xpath : //div[@class="word"]//a
+ content_xpath : //div[@class="definition"]
+
+ - name : yahoo
+ engine : xpath
+ search_url : http://search.yahoo.com/search?p={query}
+ results_xpath : //div[@class="res"]
+ url_xpath : .//h3/a/@href
+ title_xpath : .//h3/a
+ content_xpath : .//div[@class="abstr"]
+ suggestion_xpath : //div[@id="satat"]//a
+
+ - name : youtube
+ engine : youtube
+ categories : videos
+
+ - name : dailymotion
+ engine : dailymotion
+ locale : en_US
+ categories : videos
+
+ - name : vimeo
+ engine : vimeo
+ categories : videos
+ results_xpath : //div[@id="browse_content"]/ol/li
+ url_xpath : ./a/@href
+ title_xpath : ./a/div[@class="data"]/p[@class="title"]/text()
+ content_xpath : ./a/img/@src
+
+locales:
+ en : English
+ hu : Magyar
diff --git a/searx/settings_robot.py b/searx/settings_robot.py
deleted file mode 100644
index 004add2a1..000000000
--- a/searx/settings_robot.py
+++ /dev/null
@@ -1,16 +0,0 @@
-
-port = 11111
-
-secret_key = "ultrasecretkey" # change this!
-
-debug = False
-
-request_timeout = 5.0 # seconds
-
-weights = {} # 'search_engine_name': float(weight) | default is 1.0
-
-blacklist = [] # search engine blacklist
-
-categories = {} # custom search engine categories
-
-base_url = None # "https://your.domain.tld/" or None (to use request parameters)
diff --git a/settings.yml b/searx/settings_robot.yml
similarity index 98%
rename from settings.yml
rename to searx/settings_robot.yml
index b7c82cc72..d60ed3272 100644
--- a/settings.yml
+++ b/searx/settings_robot.yml
@@ -1,7 +1,7 @@
server:
- port : 8888
+ port : 11111
secret_key : "ultrasecretkey" # change this!
- debug : True
+ debug : False
request_timeout : 3.0 # seconds
base_url: False
diff --git a/searx/static/css/style.css b/searx/static/css/style.css
index 83d281806..4163e753d 100644
--- a/searx/static/css/style.css
+++ b/searx/static/css/style.css
@@ -49,6 +49,8 @@ input[type="submit"] { border: 1px solid #666666; color: #444444; padding: 4px;
input[type="checkbox"] { visibility: hidden; }
+fieldset { margin: 8px; }
+
#categories { margin: 0 10px; }
.checkbox_container { display: inline-block; position: relative; margin: 0 3px; padding: 0px; }
@@ -79,7 +81,6 @@ a { text-decoration: none; color: #1a11be; }
a:visited { color: #7b11be; }
.result { margin: 19px 0 18px 0; padding: 0; max-width: 55em; clear: both; }
-.result:hover { background: #e8e7e6; }
.result_title { margin-bottom: 0; }
.result h3 { font-size: 1em; word-wrap:break-word; margin: 5px 0 1px 0; padding: 0 }
.result .content { font-size: 0.8em; margin: 0; padding: 0; max-width: 54em; word-wrap:break-word; line-height: 1.24; }
@@ -201,3 +202,5 @@ tr:hover td { background: #DDDDDD; }
.result img { max-width: 90%; width: auto; height: auto }
}
+
+.favicon { float: left; margin-right: 4px; }
diff --git a/searx/templates/about.html b/searx/templates/about.html
index 4e3f4bf4e..bb0a3e882 100644
--- a/searx/templates/about.html
+++ b/searx/templates/about.html
@@ -8,25 +8,25 @@
If you do care about privacy, want to be a conscious user, moreover believe +
If you do care about privacy, want to be a conscious user, or otherwise believe in digital freedom, make Searx your default search engine or run it on your own server
Searx is a metasearch engine,
inspired by the seeks project.
-It provides basic privacy by mixing your queries with searches on other platforms without storing search data. Queries are made using a POST request on every browser (except chrome*). Therefore they don't show up in our logs, neither in your url history. In case of Chrome* users there is an exception, Searx uses the search bar to perform GET requests.
-Searx can be added to your browser's search bar, moreover it can be set as the default search engine.
+It provides basic privacy by mixing your queries with searches on other platforms without storing search data. Queries are made using a POST request on every browser (except chrome*). Therefore they show up in neither our logs, nor your url history. In case of Chrome* users there is an exception, Searx uses the search bar to perform GET requests.
+Searx can be added to your browser's search bar; moreover, it can be set as the default search engine.
Searx appreciates your suspicion regarding logs, so take the code and run it yourself!
Add your Searx to this list to help other people to have privacy and make the Internet freer!
-
The more decentralized the Internet is the more freedom we have!
Searx appreciates your concern regarding logs, so take the code and run it yourself!
Add your Searx to this list to help other people reclaim their privacy and make the Internet freer!
+
The more decentralized the Internet, is the more freedom we have!
Don't forget to restart searx after config edit!
@@ -48,7 +48,7 @@ Searx can be added to your browser's search bar, moreover it can be set as the dSee the installation and setup wiki page
Stats page contains some useful data about the used engines.
+Stats page contains some useful data about the engines used.
{% endblock %} diff --git a/searx/templates/categories.html b/searx/templates/categories.html index b1fd3d1fc..57e63c85d 100644 --- a/searx/templates/categories.html +++ b/searx/templates/categories.html @@ -1,7 +1,7 @@Engine name | -Category | +{{ _('Engine name') }} | +{{ _('Category') }} |
---|
Please add more engines to this list, pull requests are welcome!
- +- about - preferences + {{ _('about') }} + {{ _('preferences') }}
{% endblock %} diff --git a/searx/templates/preferences.html b/searx/templates/preferences.html index 705139e58..3c2afef21 100644 --- a/searx/templates/preferences.html +++ b/searx/templates/preferences.html @@ -2,18 +2,28 @@ {% block head %} {% endblock %} {% block content %} {% endblock %} diff --git a/searx/templates/result_templates/default.html b/searx/templates/result_templates/default.html index ab6d469b4..d06a4598a 100644 --- a/searx/templates/result_templates/default.html +++ b/searx/templates/result_templates/default.html @@ -1,13 +1,11 @@{% if result.content %}{{ result.content|safe }}
{% endif %}
{{ result.pretty_url }}