From 52e615dede8538c36f569d2cf07835427a9a0db6 Mon Sep 17 00:00:00 2001 From: Adam Tauber Date: Wed, 30 Nov 2016 18:43:03 +0100 Subject: [PATCH] [enh] py3 compatibility --- .travis.yml | 5 +- requirements-dev.txt | 3 +- searx/answerers/__init__.py | 12 +- searx/answerers/random/answerer.py | 13 +- searx/answerers/statistics/answerer.py | 16 +- searx/autocomplete.py | 6 +- searx/engines/1337x.py | 3 +- searx/engines/__init__.py | 5 +- searx/engines/archlinux.py | 3 +- searx/engines/base.py | 6 +- searx/engines/bing.py | 2 +- searx/engines/bing_images.py | 2 +- searx/engines/bing_news.py | 5 +- searx/engines/blekko_images.py | 2 +- searx/engines/btdigg.py | 5 +- searx/engines/currency_convert.py | 14 +- searx/engines/dailymotion.py | 3 +- searx/engines/deezer.py | 5 +- searx/engines/deviantart.py | 2 +- searx/engines/dictzone.py | 6 +- searx/engines/digbt.py | 8 +- searx/engines/digg.py | 4 +- searx/engines/doku.py | 2 +- searx/engines/duckduckgo.py | 2 +- searx/engines/duckduckgo_definitions.py | 6 +- searx/engines/faroo.py | 2 +- searx/engines/fdroid.py | 7 +- searx/engines/filecrop.py | 11 +- searx/engines/flickr.py | 2 +- searx/engines/flickr_noapi.py | 2 +- searx/engines/framalibre.py | 4 +- searx/engines/frinkiac.py | 2 +- searx/engines/gigablast.py | 3 +- searx/engines/github.py | 2 +- searx/engines/google.py | 5 +- searx/engines/google_images.py | 2 +- searx/engines/google_news.py | 3 +- searx/engines/ina.py | 10 +- searx/engines/json_engine.py | 11 +- searx/engines/kickass.py | 3 +- searx/engines/mediawiki.py | 2 +- searx/engines/mixcloud.py | 2 +- searx/engines/nyaa.py | 2 +- searx/engines/openstreetmap.py | 4 - searx/engines/photon.py | 2 +- searx/engines/piratebay.py | 3 +- searx/engines/qwant.py | 3 +- searx/engines/reddit.py | 6 +- searx/engines/scanr_structures.py | 4 +- searx/engines/searchcode_code.py | 5 +- searx/engines/searchcode_doc.py | 5 +- searx/engines/seedpeer.py | 4 +- searx/engines/soundcloud.py | 19 ++- searx/engines/spotify.py | 5 +- searx/engines/stackoverflow.py | 6 +- searx/engines/startpage.py | 2 +- searx/engines/subtitleseeker.py | 2 +- searx/engines/swisscows.py | 27 ++-- searx/engines/tokyotoshokan.py | 11 +- searx/engines/torrentz.py | 8 +- searx/engines/translated.py | 4 + searx/engines/twitter.py | 3 +- searx/engines/vimeo.py | 2 +- searx/engines/wikidata.py | 13 +- searx/engines/wikipedia.py | 21 ++- searx/engines/wolframalpha_api.py | 13 +- searx/engines/wolframalpha_noapi.py | 9 +- searx/engines/www1x.py | 6 +- searx/engines/www500px.py | 3 +- searx/engines/xpath.py | 4 +- searx/engines/yacy.py | 2 +- searx/engines/yahoo.py | 3 +- searx/engines/yahoo_news.py | 6 +- searx/engines/yandex.py | 4 +- searx/engines/youtube_api.py | 2 +- searx/engines/youtube_noapi.py | 2 +- searx/plugins/__init__.py | 5 +- searx/plugins/doai_rewrite.py | 2 +- searx/plugins/https_rewrite.py | 5 +- searx/plugins/self_info.py | 4 +- searx/plugins/tracker_url_remover.py | 2 +- searx/preferences.py | 18 +-- searx/query.py | 8 +- searx/results.py | 6 +- searx/search.py | 12 +- searx/settings_robot.yml | 2 +- searx/templates/courgette/404.html | 2 +- searx/templates/legacy/404.html | 2 +- searx/templates/oscar/404.html | 2 +- searx/templates/pix-art/404.html | 2 +- searx/testing.py | 42 ++++-- searx/url_utils.py | 28 ++++ searx/utils.py | 26 +++- searx/webapp.py | 36 +++-- tests/robot/__init__.py | 75 ++++++++++ tests/robot/test_basic.robot | 153 -------------------- tests/unit/engines/test_archlinux.py | 4 +- tests/unit/engines/test_bing.py | 6 +- tests/unit/engines/test_bing_news.py | 12 +- tests/unit/engines/test_btdigg.py | 12 +- tests/unit/engines/test_currency_convert.py | 4 +- tests/unit/engines/test_digbt.py | 4 +- tests/unit/engines/test_duckduckgo.py | 3 +- tests/unit/engines/test_frinkiac.py | 5 +- tests/unit/engines/test_gigablast.py | 1 + tests/unit/engines/test_soundcloud.py | 2 +- tests/unit/engines/test_startpage.py | 6 +- tests/unit/engines/test_swisscows.py | 8 +- tests/unit/engines/test_tokyotoshokan.py | 2 +- tests/unit/engines/test_wikidata.py | 3 +- tests/unit/engines/test_wikipedia.py | 18 +-- tests/unit/engines/test_wolframalpha_api.py | 10 +- tests/unit/test_plugins.py | 16 +- tests/unit/test_utils.py | 8 +- tests/unit/test_webapp.py | 46 +++--- 115 files changed, 517 insertions(+), 513 deletions(-) create mode 100644 searx/url_utils.py delete mode 100644 tests/robot/test_basic.robot diff --git a/.travis.yml b/.travis.yml index 0a174ff66..b6017cd93 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,6 +9,7 @@ addons: language: python python: - "2.7" + - "3.6" before_install: - "export DISPLAY=:99.0" - "sh -e /etc/init.d/xvfb start" @@ -24,9 +25,9 @@ script: - ./manage.sh styles - ./manage.sh grunt_build - ./manage.sh tests - - ./manage.sh py_test_coverage after_success: - coveralls + - ./manage.sh py_test_coverage + - coveralls notifications: irc: channels: diff --git a/requirements-dev.txt b/requirements-dev.txt index 01d1e1497..691a1e7ba 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -3,8 +3,7 @@ mock==2.0.0 nose2[coverage-plugin] pep8==1.7.0 plone.testing==5.0.0 -robotframework-selenium2library==1.8.0 -robotsuite==1.7.0 +splinter==0.7.5 transifex-client==0.12.2 unittest2==1.1.0 zope.testrunner==4.5.1 diff --git a/searx/answerers/__init__.py b/searx/answerers/__init__.py index 8f5951c75..444316f11 100644 --- a/searx/answerers/__init__.py +++ b/searx/answerers/__init__.py @@ -1,8 +1,12 @@ from os import listdir from os.path import realpath, dirname, join, isdir +from sys import version_info from searx.utils import load_module from collections import defaultdict +if version_info[0] == 3: + unicode = str + answerers_dir = dirname(realpath(__file__)) @@ -10,7 +14,7 @@ answerers_dir = dirname(realpath(__file__)) def load_answerers(): answerers = [] for filename in listdir(answerers_dir): - if not isdir(join(answerers_dir, filename)): + if not isdir(join(answerers_dir, filename)) or filename.startswith('_'): continue module = load_module('answerer.py', join(answerers_dir, filename)) if not hasattr(module, 'keywords') or not isinstance(module.keywords, tuple) or not len(module.keywords): @@ -30,12 +34,12 @@ def get_answerers_by_keywords(answerers): def ask(query): results = [] - query_parts = filter(None, query.query.split()) + query_parts = list(filter(None, query.query.split())) - if query_parts[0] not in answerers_by_keywords: + if query_parts[0].decode('utf-8') not in answerers_by_keywords: return results - for answerer in answerers_by_keywords[query_parts[0]]: + for answerer in answerers_by_keywords[query_parts[0].decode('utf-8')]: result = answerer(query) if result: results.append(result) diff --git a/searx/answerers/random/answerer.py b/searx/answerers/random/answerer.py index 510d9f5be..f2b8bf3e5 100644 --- a/searx/answerers/random/answerer.py +++ b/searx/answerers/random/answerer.py @@ -1,5 +1,6 @@ import random import string +import sys from flask_babel import gettext # required answerer attribute @@ -8,7 +9,11 @@ keywords = ('random',) random_int_max = 2**31 -random_string_letters = string.lowercase + string.digits + string.uppercase +if sys.version_info[0] == 2: + random_string_letters = string.lowercase + string.digits + string.uppercase +else: + unicode = str + random_string_letters = string.ascii_lowercase + string.digits + string.ascii_uppercase def random_string(): @@ -24,9 +29,9 @@ def random_int(): return unicode(random.randint(-random_int_max, random_int_max)) -random_types = {u'string': random_string, - u'int': random_int, - u'float': random_float} +random_types = {b'string': random_string, + b'int': random_int, + b'float': random_float} # required answerer function diff --git a/searx/answerers/statistics/answerer.py b/searx/answerers/statistics/answerer.py index a04695f56..73dd25cfd 100644 --- a/searx/answerers/statistics/answerer.py +++ b/searx/answerers/statistics/answerer.py @@ -1,8 +1,12 @@ +from sys import version_info from functools import reduce from operator import mul from flask_babel import gettext +if version_info[0] == 3: + unicode = str + keywords = ('min', 'max', 'avg', @@ -19,22 +23,22 @@ def answer(query): return [] try: - args = map(float, parts[1:]) + args = list(map(float, parts[1:])) except: return [] func = parts[0] answer = None - if func == 'min': + if func == b'min': answer = min(args) - elif func == 'max': + elif func == b'max': answer = max(args) - elif func == 'avg': + elif func == b'avg': answer = sum(args) / len(args) - elif func == 'sum': + elif func == b'sum': answer = sum(args) - elif func == 'prod': + elif func == b'prod': answer = reduce(mul, args, 1) if answer is None: diff --git a/searx/autocomplete.py b/searx/autocomplete.py index b360af9f6..de0623a8a 100644 --- a/searx/autocomplete.py +++ b/searx/autocomplete.py @@ -18,7 +18,6 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. from lxml import etree from json import loads -from urllib import urlencode from searx import settings from searx.languages import language_codes from searx.engines import ( @@ -26,6 +25,11 @@ from searx.engines import ( ) from searx.poolrequests import get as http_get +try: + from urllib import urlencode +except: + from urllib.parse import urlencode + def get(*args, **kwargs): if 'timeout' not in kwargs: diff --git a/searx/engines/1337x.py b/searx/engines/1337x.py index c6bc3cb6d..0de04bd95 100644 --- a/searx/engines/1337x.py +++ b/searx/engines/1337x.py @@ -1,8 +1,7 @@ -from urllib import quote from lxml import html from searx.engines.xpath import extract_text from searx.utils import get_torrent_size -from urlparse import urljoin +from searx.url_utils import quote, urljoin url = 'https://1337x.to/' search_url = url + 'search/{search_term}/{pageno}/' diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index 77184a282..023ec409a 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -72,12 +72,11 @@ def load_engine(engine_data): if engine_data['categories'] == 'none': engine.categories = [] else: - engine.categories = map( - str.strip, engine_data['categories'].split(',')) + engine.categories = list(map(str.strip, engine_data['categories'].split(','))) continue setattr(engine, param_name, engine_data[param_name]) - for arg_name, arg_value in engine_default_args.iteritems(): + for arg_name, arg_value in engine_default_args.items(): if not hasattr(engine, arg_name): setattr(engine, arg_name, arg_value) diff --git a/searx/engines/archlinux.py b/searx/engines/archlinux.py index dca825790..cad06f8c6 100644 --- a/searx/engines/archlinux.py +++ b/searx/engines/archlinux.py @@ -11,10 +11,9 @@ @parse url, title """ -from urlparse import urljoin -from urllib import urlencode from lxml import html from searx.engines.xpath import extract_text +from searx.url_utils import urlencode, urljoin # engine dependent config categories = ['it'] diff --git a/searx/engines/base.py b/searx/engines/base.py index a552453ce..ff006a3bc 100755 --- a/searx/engines/base.py +++ b/searx/engines/base.py @@ -14,10 +14,10 @@ """ from lxml import etree -from urllib import urlencode -from searx.utils import searx_useragent from datetime import datetime import re +from searx.url_utils import urlencode +from searx.utils import searx_useragent categories = ['science'] @@ -73,7 +73,7 @@ def request(query, params): def response(resp): results = [] - search_results = etree.XML(resp.content) + search_results = etree.XML(resp.text) for entry in search_results.xpath('./result/doc'): content = "No description available" diff --git a/searx/engines/bing.py b/searx/engines/bing.py index 4e7ead82d..052d567ea 100644 --- a/searx/engines/bing.py +++ b/searx/engines/bing.py @@ -13,9 +13,9 @@ @todo publishedDate """ -from urllib import urlencode from lxml import html from searx.engines.xpath import extract_text +from searx.url_utils import urlencode # engine dependent config categories = ['general'] diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py index 97f6dca37..e79740e50 100644 --- a/searx/engines/bing_images.py +++ b/searx/engines/bing_images.py @@ -15,11 +15,11 @@ limited response to 10 images """ -from urllib import urlencode from lxml import html from json import loads import re from searx.engines.bing import _fetch_supported_languages, supported_languages_url +from searx.url_utils import urlencode # engine dependent config categories = ['images'] diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py index 765bcd38e..8e3cc517e 100644 --- a/searx/engines/bing_news.py +++ b/searx/engines/bing_news.py @@ -11,13 +11,12 @@ @parse url, title, content, publishedDate, thumbnail """ -from urllib import urlencode -from urlparse import urlparse, parse_qsl from datetime import datetime from dateutil import parser from lxml import etree from searx.utils import list_get from searx.engines.bing import _fetch_supported_languages, supported_languages_url +from searx.url_utils import urlencode, urlparse, parse_qsl # engine dependent config categories = ['news'] @@ -86,7 +85,7 @@ def request(query, params): def response(resp): results = [] - rss = etree.fromstring(resp.content) + rss = etree.fromstring(resp.text) ns = rss.nsmap diff --git a/searx/engines/blekko_images.py b/searx/engines/blekko_images.py index c0664f390..f71645634 100644 --- a/searx/engines/blekko_images.py +++ b/searx/engines/blekko_images.py @@ -11,7 +11,7 @@ """ from json import loads -from urllib import urlencode +from searx.url_utils import urlencode # engine dependent config categories = ['images'] diff --git a/searx/engines/btdigg.py b/searx/engines/btdigg.py index 33c8355de..40438673f 100644 --- a/searx/engines/btdigg.py +++ b/searx/engines/btdigg.py @@ -10,11 +10,10 @@ @parse url, title, content, seed, leech, magnetlink """ -from urlparse import urljoin -from urllib import quote from lxml import html from operator import itemgetter from searx.engines.xpath import extract_text +from searx.url_utils import quote, urljoin from searx.utils import get_torrent_size # engine dependent config @@ -38,7 +37,7 @@ def request(query, params): def response(resp): results = [] - dom = html.fromstring(resp.content) + dom = html.fromstring(resp.text) search_res = dom.xpath('//div[@id="search_res"]/table/tr') diff --git a/searx/engines/currency_convert.py b/searx/engines/currency_convert.py index bc839cfb5..1218d4849 100644 --- a/searx/engines/currency_convert.py +++ b/searx/engines/currency_convert.py @@ -1,21 +1,25 @@ -from datetime import datetime +import json import re import os -import json +import sys import unicodedata +from datetime import datetime + +if sys.version_info[0] == 3: + unicode = str categories = [] url = 'https://download.finance.yahoo.com/d/quotes.csv?e=.csv&f=sl1d1t1&s={query}=X' weight = 100 -parser_re = re.compile(u'.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I) # noqa +parser_re = re.compile(b'.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I) db = 1 def normalize_name(name): - name = name.lower().replace('-', ' ').rstrip('s') + name = name.decode('utf-8').lower().replace('-', ' ').rstrip('s') name = re.sub(' +', ' ', name) return unicodedata.normalize('NFKD', name).lower() @@ -35,7 +39,7 @@ def iso4217_to_name(iso4217, language): def request(query, params): - m = parser_re.match(unicode(query, 'utf8')) + m = parser_re.match(query) if not m: # wrong query return params diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py index 8c69aafe0..fad7e596c 100644 --- a/searx/engines/dailymotion.py +++ b/searx/engines/dailymotion.py @@ -12,10 +12,9 @@ @todo set content-parameter with correct data """ -from urllib import urlencode from json import loads from datetime import datetime -from requests import get +from searx.url_utils import urlencode # engine dependent config categories = ['videos'] diff --git a/searx/engines/deezer.py b/searx/engines/deezer.py index 3db1af3d2..af63478fb 100644 --- a/searx/engines/deezer.py +++ b/searx/engines/deezer.py @@ -11,7 +11,7 @@ """ from json import loads -from urllib import urlencode +from searx.url_utils import urlencode # engine dependent config categories = ['music'] @@ -30,8 +30,7 @@ embedded_url = '' +cid_re = re.compile(r'client_id:"([^"]*)"', re.I | re.U) + def get_client_id(): response = http_get("https://soundcloud.com") - rx_namespace = {"re": "http://exslt.org/regular-expressions"} if response.ok: - tree = etree.parse(StringIO(response.content), etree.HTMLParser()) - script_tags = tree.xpath("//script[re:match(@src, '(.*app.*js)')]", namespaces=rx_namespace) + tree = html.fromstring(response.content) + script_tags = tree.xpath("//script[contains(@src, '/assets/app')]") app_js_urls = [script_tag.get('src') for script_tag in script_tags if script_tag is not None] # extracts valid app_js urls from soundcloud.com content @@ -51,7 +56,7 @@ def get_client_id(): # gets app_js and searches for the clientid response = http_get(app_js_url) if response.ok: - cids = re.search(r'client_id:"([^"]*)"', response.content, re.M | re.I) + cids = cid_re.search(response.text) if cids is not None and len(cids.groups()): return cids.groups()[0] logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!") diff --git a/searx/engines/spotify.py b/searx/engines/spotify.py index 249ba91ef..aed756be3 100644 --- a/searx/engines/spotify.py +++ b/searx/engines/spotify.py @@ -11,7 +11,7 @@ """ from json import loads -from urllib import urlencode +from searx.url_utils import urlencode # engine dependent config categories = ['music'] @@ -29,8 +29,7 @@ embedded_url = '