diff --git a/.travis.yml b/.travis.yml index 0a174ff66..b6017cd93 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,6 +9,7 @@ addons: language: python python: - "2.7" + - "3.6" before_install: - "export DISPLAY=:99.0" - "sh -e /etc/init.d/xvfb start" @@ -24,9 +25,9 @@ script: - ./manage.sh styles - ./manage.sh grunt_build - ./manage.sh tests - - ./manage.sh py_test_coverage after_success: - coveralls + - ./manage.sh py_test_coverage + - coveralls notifications: irc: channels: diff --git a/requirements-dev.txt b/requirements-dev.txt index 01d1e1497..691a1e7ba 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -3,8 +3,7 @@ mock==2.0.0 nose2[coverage-plugin] pep8==1.7.0 plone.testing==5.0.0 -robotframework-selenium2library==1.8.0 -robotsuite==1.7.0 +splinter==0.7.5 transifex-client==0.12.2 unittest2==1.1.0 zope.testrunner==4.5.1 diff --git a/searx/answerers/__init__.py b/searx/answerers/__init__.py index 8f5951c75..444316f11 100644 --- a/searx/answerers/__init__.py +++ b/searx/answerers/__init__.py @@ -1,8 +1,12 @@ from os import listdir from os.path import realpath, dirname, join, isdir +from sys import version_info from searx.utils import load_module from collections import defaultdict +if version_info[0] == 3: + unicode = str + answerers_dir = dirname(realpath(__file__)) @@ -10,7 +14,7 @@ answerers_dir = dirname(realpath(__file__)) def load_answerers(): answerers = [] for filename in listdir(answerers_dir): - if not isdir(join(answerers_dir, filename)): + if not isdir(join(answerers_dir, filename)) or filename.startswith('_'): continue module = load_module('answerer.py', join(answerers_dir, filename)) if not hasattr(module, 'keywords') or not isinstance(module.keywords, tuple) or not len(module.keywords): @@ -30,12 +34,12 @@ def get_answerers_by_keywords(answerers): def ask(query): results = [] - query_parts = filter(None, query.query.split()) + query_parts = list(filter(None, query.query.split())) - if query_parts[0] not in answerers_by_keywords: + if query_parts[0].decode('utf-8') not in answerers_by_keywords: return results - for answerer in answerers_by_keywords[query_parts[0]]: + for answerer in answerers_by_keywords[query_parts[0].decode('utf-8')]: result = answerer(query) if result: results.append(result) diff --git a/searx/answerers/random/answerer.py b/searx/answerers/random/answerer.py index 510d9f5be..f2b8bf3e5 100644 --- a/searx/answerers/random/answerer.py +++ b/searx/answerers/random/answerer.py @@ -1,5 +1,6 @@ import random import string +import sys from flask_babel import gettext # required answerer attribute @@ -8,7 +9,11 @@ keywords = ('random',) random_int_max = 2**31 -random_string_letters = string.lowercase + string.digits + string.uppercase +if sys.version_info[0] == 2: + random_string_letters = string.lowercase + string.digits + string.uppercase +else: + unicode = str + random_string_letters = string.ascii_lowercase + string.digits + string.ascii_uppercase def random_string(): @@ -24,9 +29,9 @@ def random_int(): return unicode(random.randint(-random_int_max, random_int_max)) -random_types = {u'string': random_string, - u'int': random_int, - u'float': random_float} +random_types = {b'string': random_string, + b'int': random_int, + b'float': random_float} # required answerer function diff --git a/searx/answerers/statistics/answerer.py b/searx/answerers/statistics/answerer.py index a04695f56..73dd25cfd 100644 --- a/searx/answerers/statistics/answerer.py +++ b/searx/answerers/statistics/answerer.py @@ -1,8 +1,12 @@ +from sys import version_info from functools import reduce from operator import mul from flask_babel import gettext +if version_info[0] == 3: + unicode = str + keywords = ('min', 'max', 'avg', @@ -19,22 +23,22 @@ def answer(query): return [] try: - args = map(float, parts[1:]) + args = list(map(float, parts[1:])) except: return [] func = parts[0] answer = None - if func == 'min': + if func == b'min': answer = min(args) - elif func == 'max': + elif func == b'max': answer = max(args) - elif func == 'avg': + elif func == b'avg': answer = sum(args) / len(args) - elif func == 'sum': + elif func == b'sum': answer = sum(args) - elif func == 'prod': + elif func == b'prod': answer = reduce(mul, args, 1) if answer is None: diff --git a/searx/autocomplete.py b/searx/autocomplete.py index b360af9f6..de0623a8a 100644 --- a/searx/autocomplete.py +++ b/searx/autocomplete.py @@ -18,7 +18,6 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. from lxml import etree from json import loads -from urllib import urlencode from searx import settings from searx.languages import language_codes from searx.engines import ( @@ -26,6 +25,11 @@ from searx.engines import ( ) from searx.poolrequests import get as http_get +try: + from urllib import urlencode +except: + from urllib.parse import urlencode + def get(*args, **kwargs): if 'timeout' not in kwargs: diff --git a/searx/engines/1337x.py b/searx/engines/1337x.py index c6bc3cb6d..0de04bd95 100644 --- a/searx/engines/1337x.py +++ b/searx/engines/1337x.py @@ -1,8 +1,7 @@ -from urllib import quote from lxml import html from searx.engines.xpath import extract_text from searx.utils import get_torrent_size -from urlparse import urljoin +from searx.url_utils import quote, urljoin url = 'https://1337x.to/' search_url = url + 'search/{search_term}/{pageno}/' diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index 77184a282..023ec409a 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -72,12 +72,11 @@ def load_engine(engine_data): if engine_data['categories'] == 'none': engine.categories = [] else: - engine.categories = map( - str.strip, engine_data['categories'].split(',')) + engine.categories = list(map(str.strip, engine_data['categories'].split(','))) continue setattr(engine, param_name, engine_data[param_name]) - for arg_name, arg_value in engine_default_args.iteritems(): + for arg_name, arg_value in engine_default_args.items(): if not hasattr(engine, arg_name): setattr(engine, arg_name, arg_value) diff --git a/searx/engines/archlinux.py b/searx/engines/archlinux.py index dca825790..cad06f8c6 100644 --- a/searx/engines/archlinux.py +++ b/searx/engines/archlinux.py @@ -11,10 +11,9 @@ @parse url, title """ -from urlparse import urljoin -from urllib import urlencode from lxml import html from searx.engines.xpath import extract_text +from searx.url_utils import urlencode, urljoin # engine dependent config categories = ['it'] diff --git a/searx/engines/base.py b/searx/engines/base.py index a552453ce..ff006a3bc 100755 --- a/searx/engines/base.py +++ b/searx/engines/base.py @@ -14,10 +14,10 @@ """ from lxml import etree -from urllib import urlencode -from searx.utils import searx_useragent from datetime import datetime import re +from searx.url_utils import urlencode +from searx.utils import searx_useragent categories = ['science'] @@ -73,7 +73,7 @@ def request(query, params): def response(resp): results = [] - search_results = etree.XML(resp.content) + search_results = etree.XML(resp.text) for entry in search_results.xpath('./result/doc'): content = "No description available" diff --git a/searx/engines/bing.py b/searx/engines/bing.py index 4e7ead82d..052d567ea 100644 --- a/searx/engines/bing.py +++ b/searx/engines/bing.py @@ -13,9 +13,9 @@ @todo publishedDate """ -from urllib import urlencode from lxml import html from searx.engines.xpath import extract_text +from searx.url_utils import urlencode # engine dependent config categories = ['general'] diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py index 97f6dca37..e79740e50 100644 --- a/searx/engines/bing_images.py +++ b/searx/engines/bing_images.py @@ -15,11 +15,11 @@ limited response to 10 images """ -from urllib import urlencode from lxml import html from json import loads import re from searx.engines.bing import _fetch_supported_languages, supported_languages_url +from searx.url_utils import urlencode # engine dependent config categories = ['images'] diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py index 765bcd38e..8e3cc517e 100644 --- a/searx/engines/bing_news.py +++ b/searx/engines/bing_news.py @@ -11,13 +11,12 @@ @parse url, title, content, publishedDate, thumbnail """ -from urllib import urlencode -from urlparse import urlparse, parse_qsl from datetime import datetime from dateutil import parser from lxml import etree from searx.utils import list_get from searx.engines.bing import _fetch_supported_languages, supported_languages_url +from searx.url_utils import urlencode, urlparse, parse_qsl # engine dependent config categories = ['news'] @@ -86,7 +85,7 @@ def request(query, params): def response(resp): results = [] - rss = etree.fromstring(resp.content) + rss = etree.fromstring(resp.text) ns = rss.nsmap diff --git a/searx/engines/blekko_images.py b/searx/engines/blekko_images.py index c0664f390..f71645634 100644 --- a/searx/engines/blekko_images.py +++ b/searx/engines/blekko_images.py @@ -11,7 +11,7 @@ """ from json import loads -from urllib import urlencode +from searx.url_utils import urlencode # engine dependent config categories = ['images'] diff --git a/searx/engines/btdigg.py b/searx/engines/btdigg.py index 33c8355de..40438673f 100644 --- a/searx/engines/btdigg.py +++ b/searx/engines/btdigg.py @@ -10,11 +10,10 @@ @parse url, title, content, seed, leech, magnetlink """ -from urlparse import urljoin -from urllib import quote from lxml import html from operator import itemgetter from searx.engines.xpath import extract_text +from searx.url_utils import quote, urljoin from searx.utils import get_torrent_size # engine dependent config @@ -38,7 +37,7 @@ def request(query, params): def response(resp): results = [] - dom = html.fromstring(resp.content) + dom = html.fromstring(resp.text) search_res = dom.xpath('//div[@id="search_res"]/table/tr') diff --git a/searx/engines/currency_convert.py b/searx/engines/currency_convert.py index bc839cfb5..1218d4849 100644 --- a/searx/engines/currency_convert.py +++ b/searx/engines/currency_convert.py @@ -1,21 +1,25 @@ -from datetime import datetime +import json import re import os -import json +import sys import unicodedata +from datetime import datetime + +if sys.version_info[0] == 3: + unicode = str categories = [] url = 'https://download.finance.yahoo.com/d/quotes.csv?e=.csv&f=sl1d1t1&s={query}=X' weight = 100 -parser_re = re.compile(u'.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I) # noqa +parser_re = re.compile(b'.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I) db = 1 def normalize_name(name): - name = name.lower().replace('-', ' ').rstrip('s') + name = name.decode('utf-8').lower().replace('-', ' ').rstrip('s') name = re.sub(' +', ' ', name) return unicodedata.normalize('NFKD', name).lower() @@ -35,7 +39,7 @@ def iso4217_to_name(iso4217, language): def request(query, params): - m = parser_re.match(unicode(query, 'utf8')) + m = parser_re.match(query) if not m: # wrong query return params diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py index 8c69aafe0..fad7e596c 100644 --- a/searx/engines/dailymotion.py +++ b/searx/engines/dailymotion.py @@ -12,10 +12,9 @@ @todo set content-parameter with correct data """ -from urllib import urlencode from json import loads from datetime import datetime -from requests import get +from searx.url_utils import urlencode # engine dependent config categories = ['videos'] diff --git a/searx/engines/deezer.py b/searx/engines/deezer.py index 3db1af3d2..af63478fb 100644 --- a/searx/engines/deezer.py +++ b/searx/engines/deezer.py @@ -11,7 +11,7 @@ """ from json import loads -from urllib import urlencode +from searx.url_utils import urlencode # engine dependent config categories = ['music'] @@ -30,8 +30,7 @@ embedded_url = '' +cid_re = re.compile(r'client_id:"([^"]*)"', re.I | re.U) + def get_client_id(): response = http_get("https://soundcloud.com") - rx_namespace = {"re": "http://exslt.org/regular-expressions"} if response.ok: - tree = etree.parse(StringIO(response.content), etree.HTMLParser()) - script_tags = tree.xpath("//script[re:match(@src, '(.*app.*js)')]", namespaces=rx_namespace) + tree = html.fromstring(response.content) + script_tags = tree.xpath("//script[contains(@src, '/assets/app')]") app_js_urls = [script_tag.get('src') for script_tag in script_tags if script_tag is not None] # extracts valid app_js urls from soundcloud.com content @@ -51,7 +56,7 @@ def get_client_id(): # gets app_js and searches for the clientid response = http_get(app_js_url) if response.ok: - cids = re.search(r'client_id:"([^"]*)"', response.content, re.M | re.I) + cids = cid_re.search(response.text) if cids is not None and len(cids.groups()): return cids.groups()[0] logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!") diff --git a/searx/engines/spotify.py b/searx/engines/spotify.py index 249ba91ef..aed756be3 100644 --- a/searx/engines/spotify.py +++ b/searx/engines/spotify.py @@ -11,7 +11,7 @@ """ from json import loads -from urllib import urlencode +from searx.url_utils import urlencode # engine dependent config categories = ['music'] @@ -29,8 +29,7 @@ embedded_url = '
""" - response = mock.Mock(content=html) + response = mock.Mock(text=html.encode('utf-8')) results = startpage.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 1) @@ -133,7 +133,7 @@ class TestStartpageEngine(SearxTestCase): """ - response = mock.Mock(content=html) + response = mock.Mock(text=html.encode('utf-8')) results = startpage.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 1) diff --git a/tests/unit/engines/test_swisscows.py b/tests/unit/engines/test_swisscows.py index 27f33d70a..53890be78 100644 --- a/tests/unit/engines/test_swisscows.py +++ b/tests/unit/engines/test_swisscows.py @@ -33,13 +33,13 @@ class TestSwisscowsEngine(SearxTestCase): self.assertRaises(AttributeError, swisscows.response, '') self.assertRaises(AttributeError, swisscows.response, '[]') - response = mock.Mock(content='') + response = mock.Mock(text=b'') self.assertEqual(swisscows.response(response), []) - response = mock.Mock(content='') + response = mock.Mock(text=b'') self.assertEqual(swisscows.response(response), []) - html = u""" + html = b""" """ - response = mock.Mock(content=html) + response = mock.Mock(text=html) results = swisscows.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 3) diff --git a/tests/unit/engines/test_tokyotoshokan.py b/tests/unit/engines/test_tokyotoshokan.py index efe7dbfc2..b5c6fad17 100644 --- a/tests/unit/engines/test_tokyotoshokan.py +++ b/tests/unit/engines/test_tokyotoshokan.py @@ -91,7 +91,7 @@ class TestTokyotoshokanEngine(SearxTestCase): self.assertEqual(r['title'], 'Koyomimonogatari') self.assertEqual(r['magnetlink'], 'magnet:?xt=urn:btih:4c19eb46b5113685fbd2288ed2531b0b') self.assertEqual(r['filesize'], int(1024 * 1024 * 10.5)) - self.assertEqual(r['publishedDate'], datetime(2016, 03, 26, 16, 41)) + self.assertEqual(r['publishedDate'], datetime(2016, 3, 26, 16, 41)) self.assertEqual(r['content'], 'Comment: sample comment') self.assertEqual(r['seed'], 53) self.assertEqual(r['leech'], 18) diff --git a/tests/unit/engines/test_wikidata.py b/tests/unit/engines/test_wikidata.py index ec5f52ef9..aa69f116e 100644 --- a/tests/unit/engines/test_wikidata.py +++ b/tests/unit/engines/test_wikidata.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -from json import loads from lxml.html import fromstring from collections import defaultdict import mock @@ -31,7 +30,7 @@ class TestWikidataEngine(SearxTestCase): self.assertRaises(AttributeError, wikidata.response, '') self.assertRaises(AttributeError, wikidata.response, '[]') - response = mock.Mock(content='', search_params={"language": "all"}) + response = mock.Mock(text='', search_params={"language": "all"}) self.assertEqual(wikidata.response(response), []) def test_getDetail(self): diff --git a/tests/unit/engines/test_wikipedia.py b/tests/unit/engines/test_wikipedia.py index 988080b6a..7a86514c7 100644 --- a/tests/unit/engines/test_wikipedia.py +++ b/tests/unit/engines/test_wikipedia.py @@ -13,15 +13,15 @@ class TestWikipediaEngine(SearxTestCase): query = 'test_query' dicto = defaultdict(dict) dicto['language'] = 'fr-FR' - params = wikipedia.request(query, dicto) + params = wikipedia.request(query.encode('utf-8'), dicto) self.assertIn('url', params) self.assertIn(query, params['url']) self.assertIn('test_query', params['url']) self.assertIn('Test_Query', params['url']) self.assertIn('fr.wikipedia.org', params['url']) - query = 'Test_Query' - params = wikipedia.request(query, dicto) + query = u'Test_Query' + params = wikipedia.request(query.encode('utf-8'), dicto) self.assertIn('Test_Query', params['url']) self.assertNotIn('test_query', params['url']) @@ -57,7 +57,7 @@ class TestWikipediaEngine(SearxTestCase): } } }""" - response = mock.Mock(content=json, search_params=dicto) + response = mock.Mock(text=json, search_params=dicto) self.assertEqual(wikipedia.response(response), []) # normal case @@ -80,7 +80,7 @@ class TestWikipediaEngine(SearxTestCase): } } }""" - response = mock.Mock(content=json, search_params=dicto) + response = mock.Mock(text=json, search_params=dicto) results = wikipedia.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 2) @@ -108,10 +108,10 @@ class TestWikipediaEngine(SearxTestCase): } } }""" - response = mock.Mock(content=json, search_params=dicto) + response = mock.Mock(text=json, search_params=dicto) results = wikipedia.response(response) self.assertEqual(type(results), list) - self.assertEqual(len(results), 0) + self.assertEqual(len(results), 2) # no image json = """ @@ -130,7 +130,7 @@ class TestWikipediaEngine(SearxTestCase): } } }""" - response = mock.Mock(content=json, search_params=dicto) + response = mock.Mock(text=json, search_params=dicto) results = wikipedia.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 2) @@ -158,7 +158,7 @@ class TestWikipediaEngine(SearxTestCase): } } }""" - response = mock.Mock(content=json, search_params=dicto) + response = mock.Mock(text=json, search_params=dicto) results = wikipedia.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 2) diff --git a/tests/unit/engines/test_wolframalpha_api.py b/tests/unit/engines/test_wolframalpha_api.py index 64a64ceb3..30d337645 100644 --- a/tests/unit/engines/test_wolframalpha_api.py +++ b/tests/unit/engines/test_wolframalpha_api.py @@ -35,11 +35,11 @@ class TestWolframAlphaAPIEngine(SearxTestCase): xml = '''first test content
first test content