diff --git a/Makefile b/Makefile index 2f10c62bc..636ed275b 100644 --- a/Makefile +++ b/Makefile @@ -213,10 +213,6 @@ gecko.driver: PHONY += test test.sh test.pylint test.pep8 test.unit test.coverage test.robot test: buildenv test.pylint test.pep8 test.unit gecko.driver test.robot -ifeq ($(PY),2) -test.pylint: - @echo "LINT skip liniting py2" -else # TODO: balance linting with pylint test.pylint: pyenvinstall @@ -225,7 +221,6 @@ test.pylint: pyenvinstall searx/testing.py \ searx/engines/gigablast.py \ ) -endif # ignored rules: # E402 module level import not at top of file diff --git a/manage.sh b/manage.sh index b3c57bf88..78571e45b 100755 --- a/manage.sh +++ b/manage.sh @@ -39,7 +39,7 @@ install_geckodriver() { return fi GECKODRIVER_VERSION="v0.24.0" - PLATFORM="`python -c "import six; import platform; six.print_(platform.system().lower(), platform.architecture()[0])"`" + PLATFORM="`python3 -c "import platform; print(platform.system().lower(), platform.architecture()[0])"`" case "$PLATFORM" in "linux 32bit" | "linux2 32bit") ARCH="linux32";; "linux 64bit" | "linux2 64bit") ARCH="linux64";; @@ -136,7 +136,7 @@ docker_build() { # Check consistency between the git tag and the searx/version.py file # /!\ HACK : parse Python file with bash /!\ # otherwise it is not possible build the docker image without all Python dependencies ( version.py loads __init__.py ) - # SEARX_PYTHON_VERSION=$(python -c "import six; import searx.version; six.print_(searx.version.VERSION_STRING)") + # SEARX_PYTHON_VERSION=$(python3 -c "import six; import searx.version; six.print_(searx.version.VERSION_STRING)") SEARX_PYTHON_VERSION=$(cat searx/version.py | grep "\(VERSION_MAJOR\|VERSION_MINOR\|VERSION_BUILD\) =" | cut -d\= -f2 | sed -e 's/^[[:space:]]*//' | paste -sd "." -) if [ $(echo "$SEARX_GIT_VERSION" | cut -d- -f1) != "$SEARX_PYTHON_VERSION" ]; then echo "Inconsistency between the last git tag and the searx/version.py file" diff --git a/searx/__init__.py b/searx/__init__.py index 1ba03ad63..80a7ffc76 100644 --- a/searx/__init__.py +++ b/searx/__init__.py @@ -21,12 +21,8 @@ from os import environ from os.path import realpath, dirname, join, abspath, isfile from io import open from ssl import OPENSSL_VERSION_INFO, OPENSSL_VERSION -try: - from yaml import safe_load -except: - from sys import exit, stderr - stderr.write('[E] install pyyaml\n') - exit(2) +from yaml import safe_load + searx_dir = abspath(dirname(__file__)) engine_dir = dirname(realpath(__file__)) diff --git a/searx/answerers/__init__.py b/searx/answerers/__init__.py index 444316f11..97e7e5854 100644 --- a/searx/answerers/__init__.py +++ b/searx/answerers/__init__.py @@ -1,12 +1,8 @@ from os import listdir from os.path import realpath, dirname, join, isdir -from sys import version_info from searx.utils import load_module from collections import defaultdict -if version_info[0] == 3: - unicode = str - answerers_dir = dirname(realpath(__file__)) @@ -36,10 +32,10 @@ def ask(query): results = [] query_parts = list(filter(None, query.query.split())) - if query_parts[0].decode('utf-8') not in answerers_by_keywords: + if query_parts[0] not in answerers_by_keywords: return results - for answerer in answerers_by_keywords[query_parts[0].decode('utf-8')]: + for answerer in answerers_by_keywords[query_parts[0]]: result = answerer(query) if result: results.append(result) diff --git a/searx/answerers/random/answerer.py b/searx/answerers/random/answerer.py index 4aafa2cfd..d5223e517 100644 --- a/searx/answerers/random/answerer.py +++ b/searx/answerers/random/answerer.py @@ -1,7 +1,6 @@ import hashlib import random import string -import sys import uuid from flask_babel import gettext @@ -10,12 +9,7 @@ from flask_babel import gettext keywords = ('random',) random_int_max = 2**31 - -if sys.version_info[0] == 2: - random_string_letters = string.lowercase + string.digits + string.uppercase -else: - unicode = str - random_string_letters = string.ascii_lowercase + string.digits + string.ascii_uppercase +random_string_letters = string.ascii_lowercase + string.digits + string.ascii_uppercase def random_characters(): @@ -24,32 +18,32 @@ def random_characters(): def random_string(): - return u''.join(random_characters()) + return ''.join(random_characters()) def random_float(): - return unicode(random.random()) + return str(random.random()) def random_int(): - return unicode(random.randint(-random_int_max, random_int_max)) + return str(random.randint(-random_int_max, random_int_max)) def random_sha256(): m = hashlib.sha256() m.update(''.join(random_characters()).encode()) - return unicode(m.hexdigest()) + return str(m.hexdigest()) def random_uuid(): - return unicode(uuid.uuid4()) + return str(uuid.uuid4()) -random_types = {b'string': random_string, - b'int': random_int, - b'float': random_float, - b'sha256': random_sha256, - b'uuid': random_uuid} +random_types = {'string': random_string, + 'int': random_int, + 'float': random_float, + 'sha256': random_sha256, + 'uuid': random_uuid} # required answerer function @@ -70,4 +64,4 @@ def answer(query): def self_info(): return {'name': gettext('Random value generator'), 'description': gettext('Generate different random values'), - 'examples': [u'random {}'.format(x.decode('utf-8')) for x in random_types]} + 'examples': ['random {}'.format(x) for x in random_types]} diff --git a/searx/answerers/statistics/answerer.py b/searx/answerers/statistics/answerer.py index 73dd25cfd..abd4be7f5 100644 --- a/searx/answerers/statistics/answerer.py +++ b/searx/answerers/statistics/answerer.py @@ -1,11 +1,8 @@ -from sys import version_info from functools import reduce from operator import mul from flask_babel import gettext -if version_info[0] == 3: - unicode = str keywords = ('min', 'max', @@ -30,21 +27,21 @@ def answer(query): func = parts[0] answer = None - if func == b'min': + if func == 'min': answer = min(args) - elif func == b'max': + elif func == 'max': answer = max(args) - elif func == b'avg': + elif func == 'avg': answer = sum(args) / len(args) - elif func == b'sum': + elif func == 'sum': answer = sum(args) - elif func == b'prod': + elif func == 'prod': answer = reduce(mul, args, 1) if answer is None: return [] - return [{'answer': unicode(answer)}] + return [{'answer': str(answer)}] # required answerer function diff --git a/searx/autocomplete.py b/searx/autocomplete.py index 00a9f9553..9bc6a98f2 100644 --- a/searx/autocomplete.py +++ b/searx/autocomplete.py @@ -16,19 +16,16 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. ''' -import sys from lxml import etree from json import loads +from urllib.parse import urlencode + from searx import settings from searx.languages import language_codes from searx.engines import ( categories, engines, engine_shortcuts ) from searx.poolrequests import get as http_get -from searx.url_utils import urlencode - -if sys.version_info[0] == 3: - unicode = str def get(*args, **kwargs): @@ -85,22 +82,22 @@ def searx_bang(full_query): engine_query = full_query.getSearchQuery()[1:] for lc in language_codes: - lang_id, lang_name, country, english_name = map(unicode.lower, lc) + lang_id, lang_name, country, english_name = map(str.lower, lc) # check if query starts with language-id if lang_id.startswith(engine_query): if len(engine_query) <= 2: - results.append(u':{lang_id}'.format(lang_id=lang_id.split('-')[0])) + results.append(':{lang_id}'.format(lang_id=lang_id.split('-')[0])) else: - results.append(u':{lang_id}'.format(lang_id=lang_id)) + results.append(':{lang_id}'.format(lang_id=lang_id)) # check if query starts with language name if lang_name.startswith(engine_query) or english_name.startswith(engine_query): - results.append(u':{lang_name}'.format(lang_name=lang_name)) + results.append(':{lang_name}'.format(lang_name=lang_name)) # check if query starts with country if country.startswith(engine_query.replace('_', ' ')): - results.append(u':{country}'.format(country=country.replace(' ', '_'))) + results.append(':{country}'.format(country=country.replace(' ', '_'))) # remove duplicates result_set = set(results) diff --git a/searx/engines/1337x.py b/searx/engines/1337x.py index 0de04bd95..76a7a1634 100644 --- a/searx/engines/1337x.py +++ b/searx/engines/1337x.py @@ -1,7 +1,8 @@ +from urllib.parse import quote, urljoin from lxml import html from searx.engines.xpath import extract_text from searx.utils import get_torrent_size -from searx.url_utils import quote, urljoin + url = 'https://1337x.to/' search_url = url + 'search/{search_term}/{pageno}/' diff --git a/searx/engines/acgsou.py b/searx/engines/acgsou.py index cca28f0db..d5d3e3178 100644 --- a/searx/engines/acgsou.py +++ b/searx/engines/acgsou.py @@ -9,9 +9,9 @@ @parse url, title, content, seed, leech, torrentfile """ +from urllib.parse import urlencode from lxml import html from searx.engines.xpath import extract_text -from searx.url_utils import urlencode from searx.utils import get_torrent_size, int_or_zero # engine dependent config @@ -63,7 +63,7 @@ def response(resp): except: pass # I didn't add download/seed/leech count since as I figured out they are generated randomly everytime - content = u'Category: "{category}".' + content = 'Category: "{category}".' content = content.format(category=category) results.append({'url': href, diff --git a/searx/engines/apkmirror.py b/searx/engines/apkmirror.py index f2ee12b29..4e6dcd486 100644 --- a/searx/engines/apkmirror.py +++ b/searx/engines/apkmirror.py @@ -9,9 +9,10 @@ @parse url, title, thumbnail_src """ +from urllib.parse import urlencode from lxml import html from searx.engines.xpath import extract_text -from searx.url_utils import urlencode + # engine dependent config categories = ['it'] diff --git a/searx/engines/archlinux.py b/searx/engines/archlinux.py index dce862f55..e2f44b0f5 100644 --- a/searx/engines/archlinux.py +++ b/searx/engines/archlinux.py @@ -11,9 +11,9 @@ @parse url, title """ +from urllib.parse import urlencode, urljoin from lxml import html from searx.engines.xpath import extract_text -from searx.url_utils import urlencode, urljoin # engine dependent config categories = ['it'] @@ -105,7 +105,7 @@ def request(query, params): # if our language is hosted on the main site, we need to add its name # to the query in order to narrow the results to that language if language in main_langs: - query += b' (' + main_langs[language] + b')' + query += ' (' + main_langs[language] + ')' # prepare the request parameters query = urlencode({'search': query}) diff --git a/searx/engines/arxiv.py b/searx/engines/arxiv.py index e3c871d17..77ddc572e 100644 --- a/searx/engines/arxiv.py +++ b/searx/engines/arxiv.py @@ -11,9 +11,9 @@ More info on api: https://arxiv.org/help/api/user-manual """ +from urllib.parse import urlencode from lxml import html from datetime import datetime -from searx.url_utils import urlencode categories = ['science'] @@ -30,7 +30,7 @@ def request(query, params): # basic search offset = (params['pageno'] - 1) * number_of_results - string_args = dict(query=query.decode('utf-8'), + string_args = dict(query=query, offset=offset, number_of_results=number_of_results) diff --git a/searx/engines/base.py b/searx/engines/base.py index f1b1cf671..0114f9798 100755 --- a/searx/engines/base.py +++ b/searx/engines/base.py @@ -13,10 +13,10 @@ More info on api: http://base-search.net/about/download/base_interface.pdf """ +from urllib.parse import urlencode from lxml import etree from datetime import datetime import re -from searx.url_utils import urlencode from searx.utils import searx_useragent @@ -55,7 +55,7 @@ shorcut_dict = { def request(query, params): # replace shortcuts with API advanced search keywords for key in shorcut_dict.keys(): - query = re.sub(key, shorcut_dict[key], str(query)) + query = re.sub(key, shorcut_dict[key], query) # basic search offset = (params['pageno'] - 1) * number_of_results diff --git a/searx/engines/bing.py b/searx/engines/bing.py index afb776acd..c7b619369 100644 --- a/searx/engines/bing.py +++ b/searx/engines/bing.py @@ -14,10 +14,10 @@ """ import re +from urllib.parse import urlencode from lxml import html from searx import logger, utils from searx.engines.xpath import extract_text -from searx.url_utils import urlencode from searx.utils import match_language, gen_useragent, eval_xpath logger = logger.getChild('bing engine') @@ -47,7 +47,7 @@ def request(query, params): else: lang = match_language(params['language'], supported_languages, language_aliases) - query = u'language:{} {}'.format(lang.split('-')[0].upper(), query.decode('utf-8')).encode('utf-8') + query = 'language:{} {}'.format(lang.split('-')[0].upper(), query) search_path = search_string.format( query=urlencode({'q': query}), diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py index 93b25008c..10da42b5c 100644 --- a/searx/engines/bing_images.py +++ b/searx/engines/bing_images.py @@ -12,10 +12,10 @@ """ +from urllib.parse import urlencode from lxml import html from json import loads import re -from searx.url_utils import urlencode from searx.utils import match_language from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases @@ -91,7 +91,7 @@ def response(resp): # strip 'Unicode private use area' highlighting, they render to Tux # the Linux penguin and a standing diamond on my machine... - title = m.get('t', '').replace(u'\ue000', '').replace(u'\ue001', '') + title = m.get('t', '').replace('\ue000', '').replace('\ue001', '') results.append({'template': 'images.html', 'url': m['purl'], 'thumbnail_src': m['turl'], diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py index d13be777c..fbe51faed 100644 --- a/searx/engines/bing_news.py +++ b/searx/engines/bing_news.py @@ -13,10 +13,9 @@ from datetime import datetime from dateutil import parser +from urllib.parse import urlencode, urlparse, parse_qsl from lxml import etree from searx.utils import list_get, match_language -from searx.url_utils import urlencode, urlparse, parse_qsl - from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases # engine dependent config diff --git a/searx/engines/bing_videos.py b/searx/engines/bing_videos.py index f048f0d8e..63264de6f 100644 --- a/searx/engines/bing_videos.py +++ b/searx/engines/bing_videos.py @@ -12,7 +12,7 @@ from json import loads from lxml import html -from searx.url_utils import urlencode +from urllib.parse import urlencode from searx.utils import match_language from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases diff --git a/searx/engines/btdigg.py b/searx/engines/btdigg.py index 82eedc24b..2faade3e2 100644 --- a/searx/engines/btdigg.py +++ b/searx/engines/btdigg.py @@ -12,8 +12,8 @@ from lxml import html from operator import itemgetter +from urllib.parse import quote, urljoin from searx.engines.xpath import extract_text -from searx.url_utils import quote, urljoin from searx.utils import get_torrent_size # engine dependent config diff --git a/searx/engines/currency_convert.py b/searx/engines/currency_convert.py index 8eab8f673..c6067c4a8 100644 --- a/searx/engines/currency_convert.py +++ b/searx/engines/currency_convert.py @@ -1,26 +1,23 @@ import json import re import os -import sys import unicodedata from io import open from datetime import datetime -if sys.version_info[0] == 3: - unicode = str categories = [] url = 'https://duckduckgo.com/js/spice/currency/1/{0}/{1}' weight = 100 -parser_re = re.compile(b'.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I) +parser_re = re.compile('.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I) db = 1 def normalize_name(name): - name = name.decode('utf-8').lower().replace('-', ' ').rstrip('s') + name = name.lower().replace('-', ' ').rstrip('s') name = re.sub(' +', ' ', name) return unicodedata.normalize('NFKD', name).lower() diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py index 1038e64bf..1e24e41da 100644 --- a/searx/engines/dailymotion.py +++ b/searx/engines/dailymotion.py @@ -14,7 +14,7 @@ from json import loads from datetime import datetime -from searx.url_utils import urlencode +from urllib.parse import urlencode from searx.utils import match_language, html_to_text # engine dependent config diff --git a/searx/engines/deezer.py b/searx/engines/deezer.py index af63478fb..48c0429a7 100644 --- a/searx/engines/deezer.py +++ b/searx/engines/deezer.py @@ -11,7 +11,7 @@ """ from json import loads -from searx.url_utils import urlencode +from urllib.parse import urlencode # engine dependent config categories = ['music'] @@ -50,7 +50,7 @@ def response(resp): if url.startswith('http://'): url = 'https' + url[4:] - content = u'{} - {} - {}'.format( + content = '{} - {} - {}'.format( result['artist']['name'], result['album']['title'], result['title']) diff --git a/searx/engines/deviantart.py b/searx/engines/deviantart.py index a0e27e622..2bd21fa5d 100644 --- a/searx/engines/deviantart.py +++ b/searx/engines/deviantart.py @@ -14,8 +14,9 @@ from lxml import html import re +from urllib.parse import urlencode from searx.engines.xpath import extract_text -from searx.url_utils import urlencode + # engine dependent config categories = ['images'] diff --git a/searx/engines/dictzone.py b/searx/engines/dictzone.py index 423af0971..5a1fea3cf 100644 --- a/searx/engines/dictzone.py +++ b/searx/engines/dictzone.py @@ -10,15 +10,15 @@ """ import re +from urllib.parse import urljoin from lxml import html from searx.utils import is_valid_lang, eval_xpath -from searx.url_utils import urljoin categories = ['general'] -url = u'https://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}' +url = 'https://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}' weight = 100 -parser_re = re.compile(b'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I) +parser_re = re.compile('.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I) results_xpath = './/table[@id="r"]/tr' @@ -37,7 +37,7 @@ def request(query, params): params['url'] = url.format(from_lang=from_lang[2], to_lang=to_lang[2], - query=query.decode('utf-8')) + query=query) return params diff --git a/searx/engines/digbt.py b/searx/engines/digbt.py index ff2f94593..e2c0389c6 100644 --- a/searx/engines/digbt.py +++ b/searx/engines/digbt.py @@ -10,14 +10,11 @@ @parse url, title, content, magnetlink """ -from sys import version_info +from urllib.parse import urljoin from lxml import html from searx.engines.xpath import extract_text from searx.utils import get_torrent_size -from searx.url_utils import urljoin -if version_info[0] == 3: - unicode = str categories = ['videos', 'music', 'files'] paging = True diff --git a/searx/engines/digg.py b/searx/engines/digg.py index 073410eb0..24a932d53 100644 --- a/searx/engines/digg.py +++ b/searx/engines/digg.py @@ -14,8 +14,8 @@ import random import string from dateutil import parser from json import loads +from urllib.parse import urlencode from lxml import html -from searx.url_utils import urlencode from datetime import datetime # engine dependent config diff --git a/searx/engines/doku.py b/searx/engines/doku.py index d20e66026..513ffda89 100644 --- a/searx/engines/doku.py +++ b/searx/engines/doku.py @@ -9,10 +9,10 @@ # @stable yes # @parse (general) url, title, content +from urllib.parse import urlencode from lxml.html import fromstring from searx.engines.xpath import extract_text from searx.utils import eval_xpath -from searx.url_utils import urlencode # engine dependent config categories = ['general'] # TODO , 'images', 'music', 'videos', 'files' diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py index 6e07b5021..fb1ea2b2d 100644 --- a/searx/engines/duckduckgo.py +++ b/searx/engines/duckduckgo.py @@ -15,9 +15,9 @@ from lxml.html import fromstring from json import loads +from urllib.parse import urlencode from searx.engines.xpath import extract_text from searx.poolrequests import get -from searx.url_utils import urlencode from searx.utils import match_language, eval_xpath # engine dependent config diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py index 79d10c303..73154a525 100644 --- a/searx/engines/duckduckgo_definitions.py +++ b/searx/engines/duckduckgo_definitions.py @@ -10,11 +10,11 @@ DuckDuckGo (definitions) """ import json +from urllib.parse import urlencode from lxml import html from re import compile from searx.engines.xpath import extract_text from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url, language_aliases -from searx.url_utils import urlencode from searx.utils import html_to_text, match_language url = 'https://api.duckduckgo.com/'\ diff --git a/searx/engines/duckduckgo_images.py b/searx/engines/duckduckgo_images.py index 89924b71c..38e141f8b 100644 --- a/searx/engines/duckduckgo_images.py +++ b/searx/engines/duckduckgo_images.py @@ -14,13 +14,13 @@ """ from json import loads +from urllib.parse import urlencode from searx.engines.xpath import extract_text from searx.engines.duckduckgo import ( _fetch_supported_languages, supported_languages_url, get_region_code, language_aliases ) from searx.poolrequests import get -from searx.url_utils import urlencode # engine dependent config categories = ['images'] diff --git a/searx/engines/duden.py b/searx/engines/duden.py index cf2f1a278..a711f422e 100644 --- a/searx/engines/duden.py +++ b/searx/engines/duden.py @@ -10,9 +10,9 @@ from lxml import html, etree import re +from urllib.parse import quote, urljoin from searx.engines.xpath import extract_text from searx.utils import eval_xpath -from searx.url_utils import quote, urljoin from searx import logger categories = ['general'] diff --git a/searx/engines/etools.py b/searx/engines/etools.py index a9eb0980d..efc102ef6 100644 --- a/searx/engines/etools.py +++ b/searx/engines/etools.py @@ -10,8 +10,8 @@ """ from lxml import html +from urllib.parse import quote from searx.engines.xpath import extract_text -from searx.url_utils import quote from searx.utils import eval_xpath categories = ['general'] diff --git a/searx/engines/fdroid.py b/searx/engines/fdroid.py index 4066dc716..a2a5114df 100644 --- a/searx/engines/fdroid.py +++ b/searx/engines/fdroid.py @@ -9,9 +9,9 @@ @parse url, title, content """ +from urllib.parse import urlencode from lxml import html from searx.engines.xpath import extract_text -from searx.url_utils import urlencode # engine dependent config categories = ['files'] diff --git a/searx/engines/filecrop.py b/searx/engines/filecrop.py index ed57a6bf3..eef5be6e8 100644 --- a/searx/engines/filecrop.py +++ b/searx/engines/filecrop.py @@ -1,9 +1,6 @@ -from searx.url_utils import urlencode +from html.parser import HTMLParser +from urllib.parse import urlencode -try: - from HTMLParser import HTMLParser -except: - from html.parser import HTMLParser url = 'http://www.filecrop.com/' search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1&pos={index}' # noqa diff --git a/searx/engines/flickr.py b/searx/engines/flickr.py index de1769370..b23c447b8 100644 --- a/searx/engines/flickr.py +++ b/searx/engines/flickr.py @@ -14,7 +14,7 @@ """ from json import loads -from searx.url_utils import urlencode +from urllib.parse import urlencode categories = ['images'] diff --git a/searx/engines/flickr_noapi.py b/searx/engines/flickr_noapi.py index 1cbb3e0a9..4bcf837cb 100644 --- a/searx/engines/flickr_noapi.py +++ b/searx/engines/flickr_noapi.py @@ -15,8 +15,8 @@ from json import loads from time import time import re +from urllib.parse import urlencode from searx.engines import logger -from searx.url_utils import urlencode from searx.utils import ecma_unescape, html_to_text logger = logger.getChild('flickr-noapi') @@ -117,10 +117,10 @@ def response(resp): 'img_format': img_format, 'template': 'images.html' } - result['author'] = author.encode('utf-8', 'ignore').decode('utf-8') - result['source'] = source.encode('utf-8', 'ignore').decode('utf-8') - result['title'] = title.encode('utf-8', 'ignore').decode('utf-8') - result['content'] = content.encode('utf-8', 'ignore').decode('utf-8') + result['author'] = author.encode(errors='ignore').decode() + result['source'] = source.encode(errors='ignore').decode() + result['title'] = title.encode(errors='ignore').decode() + result['content'] = content.encode(errors='ignore').decode() results.append(result) return results diff --git a/searx/engines/framalibre.py b/searx/engines/framalibre.py index f3441fa5f..14b659b5f 100644 --- a/searx/engines/framalibre.py +++ b/searx/engines/framalibre.py @@ -10,13 +10,10 @@ @parse url, title, content, thumbnail, img_src """ -try: - from cgi import escape -except: - from html import escape +from html import escape +from urllib.parse import urljoin, urlencode from lxml import html from searx.engines.xpath import extract_text -from searx.url_utils import urljoin, urlencode # engine dependent config categories = ['it'] diff --git a/searx/engines/frinkiac.py b/searx/engines/frinkiac.py index a67b42dbe..5b174a687 100644 --- a/searx/engines/frinkiac.py +++ b/searx/engines/frinkiac.py @@ -10,7 +10,7 @@ Frinkiac (Images) """ from json import loads -from searx.url_utils import urlencode +from urllib.parse import urlencode categories = ['images'] diff --git a/searx/engines/genius.py b/searx/engines/genius.py index aa5afad9b..feb7d79d1 100644 --- a/searx/engines/genius.py +++ b/searx/engines/genius.py @@ -11,7 +11,7 @@ Genius """ from json import loads -from searx.url_utils import urlencode +from urllib.parse import urlencode from datetime import datetime # engine dependent config diff --git a/searx/engines/gentoo.py b/searx/engines/gentoo.py index a7a966cc9..b6bc99fab 100644 --- a/searx/engines/gentoo.py +++ b/searx/engines/gentoo.py @@ -11,9 +11,9 @@ @parse url, title """ +from urllib.parse import urlencode, urljoin from lxml import html from searx.engines.xpath import extract_text -from searx.url_utils import urlencode, urljoin # engine dependent config categories = ['it'] @@ -90,7 +90,7 @@ def request(query, params): # if our language is hosted on the main site, we need to add its name # to the query in order to narrow the results to that language if language in main_langs: - query += b' (' + (main_langs[language]).encode('utf-8') + b')' + query += ' (' + main_langs[language] + ')' # prepare the request parameters query = urlencode({'search': query}) diff --git a/searx/engines/gigablast.py b/searx/engines/gigablast.py index b139c2a9f..1d71b18e9 100644 --- a/searx/engines/gigablast.py +++ b/searx/engines/gigablast.py @@ -14,8 +14,8 @@ import re from json import loads +from urllib.parse import urlencode # from searx import logger -from searx.url_utils import urlencode from searx.poolrequests import get # engine dependent config diff --git a/searx/engines/github.py b/searx/engines/github.py index eaa00da4f..80b50ceda 100644 --- a/searx/engines/github.py +++ b/searx/engines/github.py @@ -11,7 +11,7 @@ """ from json import loads -from searx.url_utils import urlencode +from urllib.parse import urlencode # engine dependent config categories = ['it'] diff --git a/searx/engines/google.py b/searx/engines/google.py index 093ad6bd7..dfc8a0ab8 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -18,11 +18,11 @@ Definitions`_. # pylint: disable=invalid-name, missing-function-docstring +from urllib.parse import urlencode, urlparse from lxml import html from flask_babel import gettext from searx.engines.xpath import extract_text from searx import logger -from searx.url_utils import urlencode, urlparse from searx.utils import match_language, eval_xpath logger = logger.getChild('google engine') diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py index f0e9e27e3..9dd5fad2c 100644 --- a/searx/engines/google_images.py +++ b/searx/engines/google_images.py @@ -24,11 +24,10 @@ Definitions`_. """ -import urllib +from urllib.parse import urlencode, urlparse, unquote from lxml import html from flask_babel import gettext from searx import logger -from searx.url_utils import urlencode, urlparse from searx.utils import eval_xpath from searx.engines.xpath import extract_text @@ -87,7 +86,7 @@ def scrap_img_by_id(script, data_id): if 'gstatic.com/images' in line and data_id in line: url_line = _script[i + 1] img_url = url_line.split('"')[1] - img_url = urllib.parse.unquote(img_url.replace(r'\u00', r'%')) + img_url = unquote(img_url.replace(r'\u00', r'%')) return img_url diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py index c9cc75435..08875328c 100644 --- a/searx/engines/google_news.py +++ b/searx/engines/google_news.py @@ -10,9 +10,9 @@ @parse url, title, content, publishedDate """ +from urllib.parse import urlencode from lxml import html from searx.engines.google import _fetch_supported_languages, supported_languages_url -from searx.url_utils import urlencode from searx.utils import match_language # search-url diff --git a/searx/engines/google_videos.py b/searx/engines/google_videos.py index fd6b2e3be..08af55902 100644 --- a/searx/engines/google_videos.py +++ b/searx/engines/google_videos.py @@ -12,9 +12,9 @@ from datetime import date, timedelta from json import loads +from urllib.parse import urlencode from lxml import html from searx.engines.xpath import extract_text -from searx.url_utils import urlencode import re # engine dependent config diff --git a/searx/engines/ina.py b/searx/engines/ina.py index ea509649f..cce580273 100644 --- a/searx/engines/ina.py +++ b/searx/engines/ina.py @@ -12,15 +12,12 @@ # @todo embedded (needs some md5 from video page) from json import loads +from urllib.parse import urlencode from lxml import html from dateutil import parser +from html.parser import HTMLParser from searx.engines.xpath import extract_text -from searx.url_utils import urlencode -try: - from HTMLParser import HTMLParser -except: - from html.parser import HTMLParser # engine dependent config categories = ['videos'] diff --git a/searx/engines/invidious.py b/searx/engines/invidious.py index cf76fd215..6ea942699 100644 --- a/searx/engines/invidious.py +++ b/searx/engines/invidious.py @@ -8,7 +8,7 @@ # @stable yes # @parse url, title, content, publishedDate, thumbnail, embedded, author, length -from searx.url_utils import quote_plus +from urllib.parse import quote_plus from dateutil import parser import time diff --git a/searx/engines/json_engine.py b/searx/engines/json_engine.py index 785b0c490..1e5c39ac4 100644 --- a/searx/engines/json_engine.py +++ b/searx/engines/json_engine.py @@ -1,11 +1,8 @@ from collections import Iterable from json import loads -from sys import version_info -from searx.url_utils import urlencode +from urllib.parse import urlencode from searx.utils import to_string -if version_info[0] == 3: - unicode = str search_url = None url_query = None @@ -37,8 +34,6 @@ def iterate(iterable): def is_iterable(obj): if type(obj) == str: return False - if type(obj) == unicode: - return False return isinstance(obj, Iterable) diff --git a/searx/engines/kickass.py b/searx/engines/kickass.py index 5e897c96f..af48d990b 100644 --- a/searx/engines/kickass.py +++ b/searx/engines/kickass.py @@ -12,9 +12,9 @@ from lxml import html from operator import itemgetter +from urllib.parse import quote, urljoin from searx.engines.xpath import extract_text from searx.utils import get_torrent_size, convert_str_to_int -from searx.url_utils import quote, urljoin # engine dependent config categories = ['videos', 'music', 'files'] diff --git a/searx/engines/mediawiki.py b/searx/engines/mediawiki.py index 0607ac93b..50ba74efc 100644 --- a/searx/engines/mediawiki.py +++ b/searx/engines/mediawiki.py @@ -14,7 +14,7 @@ from json import loads from string import Formatter -from searx.url_utils import urlencode, quote +from urllib.parse import urlencode, quote # engine dependent config categories = ['general'] @@ -79,7 +79,7 @@ def response(resp): if result.get('snippet', '').startswith('#REDIRECT'): continue url = base_url.format(language=resp.search_params['language']) +\ - 'wiki/' + quote(result['title'].replace(' ', '_').encode('utf-8')) + 'wiki/' + quote(result['title'].replace(' ', '_').encode()) # append result results.append({'url': url, diff --git a/searx/engines/microsoft_academic.py b/searx/engines/microsoft_academic.py index 9bac0069c..7426eef7e 100644 --- a/searx/engines/microsoft_academic.py +++ b/searx/engines/microsoft_academic.py @@ -12,8 +12,7 @@ Microsoft Academic (Science) from datetime import datetime from json import loads from uuid import uuid4 - -from searx.url_utils import urlencode +from urllib.parse import urlencode from searx.utils import html_to_text categories = ['images'] diff --git a/searx/engines/mixcloud.py b/searx/engines/mixcloud.py index 470c007ea..0606350a9 100644 --- a/searx/engines/mixcloud.py +++ b/searx/engines/mixcloud.py @@ -12,7 +12,7 @@ from json import loads from dateutil import parser -from searx.url_utils import urlencode +from urllib.parse import urlencode # engine dependent config categories = ['music'] diff --git a/searx/engines/nyaa.py b/searx/engines/nyaa.py index c57979a5f..ed8897ddc 100644 --- a/searx/engines/nyaa.py +++ b/searx/engines/nyaa.py @@ -10,8 +10,8 @@ """ from lxml import html +from urllib.parse import urlencode from searx.engines.xpath import extract_text -from searx.url_utils import urlencode from searx.utils import get_torrent_size, int_or_zero # engine dependent config diff --git a/searx/engines/openstreetmap.py b/searx/engines/openstreetmap.py index 257b1a1b3..5475c7a6d 100644 --- a/searx/engines/openstreetmap.py +++ b/searx/engines/openstreetmap.py @@ -30,8 +30,8 @@ route_re = re.compile('(?:from )?(.+) to (.+)') # do search-request def request(query, params): - params['url'] = base_url + search_string.format(query=query.decode('utf-8')) - params['route'] = route_re.match(query.decode('utf-8')) + params['url'] = base_url + search_string.format(query=query) + params['route'] = route_re.match(query) return params @@ -52,7 +52,7 @@ def response(resp): if 'display_name' not in r: continue - title = r['display_name'] or u'' + title = r['display_name'] or '' osm_type = r.get('osm_type', r.get('type')) url = result_base_url.format(osm_type=osm_type, osm_id=r['osm_id']) @@ -64,7 +64,7 @@ def response(resp): # if no geojson is found and osm_type is a node, add geojson Point if not geojson and osm_type == 'node': - geojson = {u'type': u'Point', u'coordinates': [r['lon'], r['lat']]} + geojson = {'type': 'Point', 'coordinates': [r['lon'], r['lat']]} address_raw = r.get('address') address = {} diff --git a/searx/engines/peertube.py b/searx/engines/peertube.py index b3795bf83..58ff38c02 100644 --- a/searx/engines/peertube.py +++ b/searx/engines/peertube.py @@ -14,7 +14,7 @@ from json import loads from datetime import datetime -from searx.url_utils import urlencode +from urllib.parse import urlencode from searx.utils import html_to_text # engine dependent config diff --git a/searx/engines/photon.py b/searx/engines/photon.py index 15236f680..9201fc168 100644 --- a/searx/engines/photon.py +++ b/searx/engines/photon.py @@ -11,8 +11,8 @@ """ from json import loads +from urllib.parse import urlencode from searx.utils import searx_useragent -from searx.url_utils import urlencode # engine dependent config categories = ['map'] diff --git a/searx/engines/piratebay.py b/searx/engines/piratebay.py index 0122d6daa..42866d058 100644 --- a/searx/engines/piratebay.py +++ b/searx/engines/piratebay.py @@ -11,7 +11,9 @@ from json import loads from datetime import datetime from operator import itemgetter -from searx.url_utils import quote + +from urllib.parse import quote, urljoin +from searx.engines.xpath import extract_text from searx.utils import get_torrent_size # engine dependent config @@ -62,8 +64,8 @@ def response(resp): # parse results for result in search_res: link = url + "description.php?id=" + result["id"] - magnetlink = "magnet:?xt=urn:btih:" + result["info_hash"] + \ - "&dn=" + result["name"] + "&tr=" + "&tr=".join(trackers) + magnetlink = "magnet:?xt=urn:btih:" + result["info_hash"] + "&dn=" + result["name"]\ + + "&tr=" + "&tr=".join(trackers) params = { "url": link, diff --git a/searx/engines/pubmed.py b/searx/engines/pubmed.py index 055f09226..7eb2e92f9 100644 --- a/searx/engines/pubmed.py +++ b/searx/engines/pubmed.py @@ -14,7 +14,7 @@ from flask_babel import gettext from lxml import etree from datetime import datetime -from searx.url_utils import urlencode +from urllib.parse import urlencode from searx.poolrequests import get diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py index 54e9dafad..ac918b905 100644 --- a/searx/engines/qwant.py +++ b/searx/engines/qwant.py @@ -12,9 +12,9 @@ from datetime import datetime from json import loads -from searx.utils import html_to_text -from searx.url_utils import urlencode -from searx.utils import match_language +from urllib.parse import urlencode +from searx.utils import html_to_text, match_language + # engine dependent config categories = None diff --git a/searx/engines/reddit.py b/searx/engines/reddit.py index d19724906..e732875cb 100644 --- a/searx/engines/reddit.py +++ b/searx/engines/reddit.py @@ -12,7 +12,7 @@ import json from datetime import datetime -from searx.url_utils import urlencode, urljoin, urlparse +from urllib.parse import urlencode, urljoin, urlparse # engine dependent config categories = ['general', 'images', 'news', 'social media'] diff --git a/searx/engines/scanr_structures.py b/searx/engines/scanr_structures.py index 7208dcb70..6dbbf4fd9 100644 --- a/searx/engines/scanr_structures.py +++ b/searx/engines/scanr_structures.py @@ -11,7 +11,7 @@ """ from json import loads, dumps -from searx.utils import html_to_text +from urllib.parse import html_to_text # engine dependent config categories = ['science'] @@ -29,7 +29,7 @@ def request(query, params): params['url'] = search_url params['method'] = 'POST' params['headers']['Content-type'] = "application/json" - params['data'] = dumps({"query": query.decode('utf-8'), + params['data'] = dumps({"query": query, "searchField": "ALL", "sortDirection": "ASC", "sortOrder": "RELEVANCY", diff --git a/searx/engines/searchcode_code.py b/searx/engines/searchcode_code.py index 789e8e7a9..706285814 100644 --- a/searx/engines/searchcode_code.py +++ b/searx/engines/searchcode_code.py @@ -11,7 +11,7 @@ """ from json import loads -from searx.url_utils import urlencode +from urllib.parse import urlencode # engine dependent config diff --git a/searx/engines/searchcode_doc.py b/searx/engines/searchcode_doc.py index 4b8e9a84a..878d2e792 100644 --- a/searx/engines/searchcode_doc.py +++ b/searx/engines/searchcode_doc.py @@ -11,7 +11,7 @@ """ from json import loads -from searx.url_utils import urlencode +from urllib.parse import urlencode # engine dependent config categories = ['it'] diff --git a/searx/engines/seedpeer.py b/searx/engines/seedpeer.py index f9b1f99c8..3778abe7b 100644 --- a/searx/engines/seedpeer.py +++ b/searx/engines/seedpeer.py @@ -11,7 +11,7 @@ from lxml import html from json import loads from operator import itemgetter -from searx.url_utils import quote, urljoin +from urllib.parse import quote, urljoin from searx.engines.xpath import extract_text diff --git a/searx/engines/soundcloud.py b/searx/engines/soundcloud.py index 284689bf6..5165ea3ea 100644 --- a/searx/engines/soundcloud.py +++ b/searx/engines/soundcloud.py @@ -14,14 +14,11 @@ import re from json import loads from lxml import html from dateutil import parser +from io import StringIO +from urllib.parse import quote_plus, urlencode from searx import logger from searx.poolrequests import get as http_get -from searx.url_utils import quote_plus, urlencode -try: - from cStringIO import StringIO -except: - from io import StringIO # engine dependent config categories = ['music'] @@ -61,7 +58,7 @@ def get_client_id(): # gets app_js and searches for the clientid response = http_get(app_js_url) if response.ok: - cids = cid_re.search(response.content.decode("utf-8")) + cids = cid_re.search(response.content.decode()) if cids is not None and len(cids.groups()): return cids.groups()[0] logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!") diff --git a/searx/engines/spotify.py b/searx/engines/spotify.py index 00c395706..74942326e 100644 --- a/searx/engines/spotify.py +++ b/searx/engines/spotify.py @@ -11,7 +11,7 @@ """ from json import loads -from searx.url_utils import urlencode +from urllib.parse import urlencode import requests import base64 @@ -39,8 +39,8 @@ def request(query, params): 'https://accounts.spotify.com/api/token', data={'grant_type': 'client_credentials'}, headers={'Authorization': 'Basic ' + base64.b64encode( - "{}:{}".format(api_client_id, api_client_secret).encode('utf-8') - ).decode('utf-8')} + "{}:{}".format(api_client_id, api_client_secret).encode() + ).decode()} ) j = loads(r.text) params['headers'] = {'Authorization': 'Bearer {}'.format(j.get('access_token'))} @@ -59,7 +59,7 @@ def response(resp): if result['type'] == 'track': title = result['name'] url = result['external_urls']['spotify'] - content = u'{} - {} - {}'.format( + content = '{} - {} - {}'.format( result['artists'][0]['name'], result['album']['name'], result['name']) diff --git a/searx/engines/stackoverflow.py b/searx/engines/stackoverflow.py index 25875aa15..90e4543d7 100644 --- a/searx/engines/stackoverflow.py +++ b/searx/engines/stackoverflow.py @@ -10,9 +10,9 @@ @parse url, title, content """ +from urllib.parse import urlencode, urljoin from lxml import html from searx.engines.xpath import extract_text -from searx.url_utils import urlencode, urljoin # engine dependent config categories = ['it'] diff --git a/searx/engines/tokyotoshokan.py b/searx/engines/tokyotoshokan.py index 773212043..9c8774d7c 100644 --- a/searx/engines/tokyotoshokan.py +++ b/searx/engines/tokyotoshokan.py @@ -11,10 +11,10 @@ """ import re +from urllib.parse import urlencode from lxml import html from searx.engines.xpath import extract_text from datetime import datetime -from searx.url_utils import urlencode from searx.utils import get_torrent_size, int_or_zero # engine dependent config diff --git a/searx/engines/torrentz.py b/searx/engines/torrentz.py index c5e515acf..fcc8c042c 100644 --- a/searx/engines/torrentz.py +++ b/searx/engines/torrentz.py @@ -12,10 +12,10 @@ """ import re +from urllib.parse import urlencode from lxml import html from datetime import datetime from searx.engines.xpath import extract_text -from searx.url_utils import urlencode from searx.utils import get_torrent_size # engine dependent config diff --git a/searx/engines/translated.py b/searx/engines/translated.py index 6cb18ff39..a50e7c830 100644 --- a/searx/engines/translated.py +++ b/searx/engines/translated.py @@ -12,11 +12,11 @@ import re from searx.utils import is_valid_lang categories = ['general'] -url = u'https://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}' -web_url = u'https://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}' +url = 'https://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}' +web_url = 'https://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}' weight = 100 -parser_re = re.compile(b'.*?([a-z]+)-([a-z]+) (.{2,})$', re.I) +parser_re = re.compile('.*?([a-z]+)-([a-z]+) (.{2,})$', re.I) api_key = '' @@ -39,9 +39,9 @@ def request(query, params): key_form = '' params['url'] = url.format(from_lang=from_lang[1], to_lang=to_lang[1], - query=query.decode('utf-8'), + query=query, key=key_form) - params['query'] = query.decode('utf-8') + params['query'] = query params['from_lang'] = from_lang params['to_lang'] = to_lang diff --git a/searx/engines/twitter.py b/searx/engines/twitter.py index d2a8d2088..549b14e96 100644 --- a/searx/engines/twitter.py +++ b/searx/engines/twitter.py @@ -12,10 +12,10 @@ @todo publishedDate """ +from urllib.parse import urlencode, urljoin from lxml import html from datetime import datetime from searx.engines.xpath import extract_text -from searx.url_utils import urlencode, urljoin # engine dependent config categories = ['social media'] diff --git a/searx/engines/unsplash.py b/searx/engines/unsplash.py index 2e8d6fdfc..45c6b30da 100644 --- a/searx/engines/unsplash.py +++ b/searx/engines/unsplash.py @@ -10,7 +10,7 @@ @parse url, title, img_src, thumbnail_src """ -from searx.url_utils import urlencode, urlparse, urlunparse, parse_qsl +from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl from json import loads url = 'https://unsplash.com/' diff --git a/searx/engines/vimeo.py b/searx/engines/vimeo.py index a92271019..fd3abc858 100644 --- a/searx/engines/vimeo.py +++ b/searx/engines/vimeo.py @@ -12,9 +12,9 @@ # @todo rewrite to api # @todo set content-parameter with correct data +from urllib.parse import urlencode from json import loads from dateutil import parser -from searx.url_utils import urlencode # engine dependent config categories = ['videos'] diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py index eb7e1dc71..ffa3724fd 100644 --- a/searx/engines/wikidata.py +++ b/searx/engines/wikidata.py @@ -15,9 +15,9 @@ from searx import logger from searx.poolrequests import get from searx.engines.xpath import extract_text from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url -from searx.url_utils import urlencode from searx.utils import match_language, eval_xpath +from urllib.parse import urlencode from json import loads from lxml.html import fromstring from lxml import etree @@ -76,7 +76,7 @@ def request(query, params): def response(resp): results = [] htmlparser = etree.HTMLParser() - html = fromstring(resp.content.decode("utf-8"), parser=htmlparser) + html = fromstring(resp.content.decode(), parser=htmlparser) search_results = eval_xpath(html, wikidata_ids_xpath) if resp.search_params['language'].split('-')[0] == 'all': @@ -89,7 +89,7 @@ def response(resp): wikidata_id = search_result.split('/')[-1] url = url_detail.format(query=urlencode({'page': wikidata_id, 'uselang': language})) htmlresponse = get(url) - jsonresponse = loads(htmlresponse.content.decode("utf-8")) + jsonresponse = loads(htmlresponse.content.decode()) results += getDetail(jsonresponse, wikidata_id, language, resp.search_params['language'], htmlparser) return results @@ -453,16 +453,16 @@ def get_geolink(result): latitude, longitude = coordinates.split(',') # convert to decimal - lat = int(latitude[:latitude.find(u'°')]) + lat = int(latitude[:latitude.find('°')]) if latitude.find('\'') >= 0: - lat += int(latitude[latitude.find(u'°') + 1:latitude.find('\'')] or 0) / 60.0 + lat += int(latitude[latitude.find('°') + 1:latitude.find('\'')] or 0) / 60.0 if latitude.find('"') >= 0: lat += float(latitude[latitude.find('\'') + 1:latitude.find('"')] or 0) / 3600.0 if latitude.find('S') >= 0: lat *= -1 - lon = int(longitude[:longitude.find(u'°')]) + lon = int(longitude[:longitude.find('°')]) if longitude.find('\'') >= 0: - lon += int(longitude[longitude.find(u'°') + 1:longitude.find('\'')] or 0) / 60.0 + lon += int(longitude[longitude.find('°') + 1:longitude.find('\'')] or 0) / 60.0 if longitude.find('"') >= 0: lon += float(longitude[longitude.find('\'') + 1:longitude.find('"')] or 0) / 3600.0 if longitude.find('W') >= 0: diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py index bff24d16b..620ec3c14 100644 --- a/searx/engines/wikipedia.py +++ b/searx/engines/wikipedia.py @@ -10,13 +10,13 @@ @parse url, infobox """ +from urllib.parse import quote from json import loads from lxml.html import fromstring -from searx.url_utils import quote from searx.utils import match_language, searx_useragent # search-url -search_url = u'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}' +search_url = 'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}' supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias' diff --git a/searx/engines/wolframalpha_api.py b/searx/engines/wolframalpha_api.py index 1c58c4a9b..520eaa209 100644 --- a/searx/engines/wolframalpha_api.py +++ b/searx/engines/wolframalpha_api.py @@ -9,7 +9,7 @@ # @parse url, infobox from lxml import etree -from searx.url_utils import urlencode +from urllib.parse import urlencode # search-url search_url = 'https://api.wolframalpha.com/v2/query?appid={api_key}&{query}' @@ -45,15 +45,15 @@ def request(query, params): # replace private user area characters to make text legible def replace_pua_chars(text): - pua_chars = {u'\uf522': u'\u2192', # rigth arrow - u'\uf7b1': u'\u2115', # set of natural numbers - u'\uf7b4': u'\u211a', # set of rational numbers - u'\uf7b5': u'\u211d', # set of real numbers - u'\uf7bd': u'\u2124', # set of integer numbers - u'\uf74c': 'd', # differential - u'\uf74d': u'\u212f', # euler's number - u'\uf74e': 'i', # imaginary number - u'\uf7d9': '='} # equals sign + pua_chars = {'\uf522': '\u2192', # rigth arrow + '\uf7b1': '\u2115', # set of natural numbers + '\uf7b4': '\u211a', # set of rational numbers + '\uf7b5': '\u211d', # set of real numbers + '\uf7bd': '\u2124', # set of integer numbers + '\uf74c': 'd', # differential + '\uf74d': '\u212f', # euler's number + '\uf74e': 'i', # imaginary number + '\uf7d9': '='} # equals sign for k, v in pua_chars.items(): text = text.replace(k, v) diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py index 387c9fa17..943d4f3fb 100644 --- a/searx/engines/wolframalpha_noapi.py +++ b/searx/engines/wolframalpha_noapi.py @@ -10,9 +10,9 @@ from json import loads from time import time +from urllib.parse import urlencode from searx.poolrequests import get as http_get -from searx.url_utils import urlencode # search-url url = 'https://www.wolframalpha.com/' diff --git a/searx/engines/www1x.py b/searx/engines/www1x.py index f1154b16d..1cb74dbad 100644 --- a/searx/engines/www1x.py +++ b/searx/engines/www1x.py @@ -11,7 +11,7 @@ """ from lxml import html -from searx.url_utils import urlencode, urljoin +from urllib.parse import urlencode, urljoin from searx.engines.xpath import extract_text # engine dependent config diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py index a9f3e4bdd..bd97a93a5 100644 --- a/searx/engines/xpath.py +++ b/searx/engines/xpath.py @@ -1,7 +1,7 @@ +from urllib.parse import unquote, urlencode, urljoin, urlparse from lxml import html from lxml.etree import _ElementStringResult, _ElementUnicodeResult from searx.utils import html_to_text, eval_xpath -from searx.url_utils import unquote, urlencode, urljoin, urlparse search_url = None url_xpath = None @@ -56,7 +56,7 @@ def extract_url(xpath_results, search_url): if url.startswith('//'): # add http or https to this kind of url //example.com/ parsed_search_url = urlparse(search_url) - url = u'{0}:{1}'.format(parsed_search_url.scheme or 'http', url) + url = '{0}:{1}'.format(parsed_search_url.scheme or 'http', url) elif url.startswith('/'): # fix relative url to the search engine url = urljoin(search_url, url) @@ -86,7 +86,7 @@ def normalize_url(url): p = parsed_url.path mark = p.find('/**') if mark != -1: - return unquote(p[mark + 3:]).decode('utf-8') + return unquote(p[mark + 3:]).decode() return url diff --git a/searx/engines/yacy.py b/searx/engines/yacy.py index f1d4c6abe..daa151082 100644 --- a/searx/engines/yacy.py +++ b/searx/engines/yacy.py @@ -14,7 +14,7 @@ from json import loads from dateutil import parser -from searx.url_utils import urlencode +from urllib.parse import urlencode from searx.utils import html_to_text diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py index a6b4aeb9f..0133b57b5 100644 --- a/searx/engines/yahoo.py +++ b/searx/engines/yahoo.py @@ -11,9 +11,9 @@ @parse url, title, content, suggestion """ +from urllib.parse import unquote, urlencode from lxml import html from searx.engines.xpath import extract_text, extract_url -from searx.url_utils import unquote, urlencode from searx.utils import match_language, eval_xpath # engine dependent config diff --git a/searx/engines/yahoo_news.py b/searx/engines/yahoo_news.py index 9f6a4159b..345e4d91f 100644 --- a/searx/engines/yahoo_news.py +++ b/searx/engines/yahoo_news.py @@ -11,13 +11,13 @@ import re from datetime import datetime, timedelta +from urllib.parse import urlencode from lxml import html from searx.engines.xpath import extract_text, extract_url from searx.engines.yahoo import ( parse_url, _fetch_supported_languages, supported_languages_url, language_aliases ) from dateutil import parser -from searx.url_utils import urlencode from searx.utils import match_language # engine dependent config @@ -58,7 +58,7 @@ def request(query, params): def sanitize_url(url): if ".yahoo.com/" in url: - return re.sub(u"\\;\\_ylt\\=.+$", "", url) + return re.sub("\\;\\_ylt\\=.+$", "", url) else: return url diff --git a/searx/engines/yandex.py b/searx/engines/yandex.py index 1c789f6cb..ff1ef5a26 100644 --- a/searx/engines/yandex.py +++ b/searx/engines/yandex.py @@ -9,9 +9,9 @@ @parse url, title, content """ +from urllib.parse import urlencode from lxml import html from searx import logger -from searx.url_utils import urlencode logger = logger.getChild('yandex engine') diff --git a/searx/engines/yggtorrent.py b/searx/engines/yggtorrent.py index 739574e8d..37bf3b1d9 100644 --- a/searx/engines/yggtorrent.py +++ b/searx/engines/yggtorrent.py @@ -11,8 +11,8 @@ from lxml import html from operator import itemgetter from datetime import datetime +from urllib.parse import quote from searx.engines.xpath import extract_text -from searx.url_utils import quote from searx.utils import get_torrent_size from searx.poolrequests import get as http_get diff --git a/searx/engines/youtube_api.py b/searx/engines/youtube_api.py index bc4c0d58e..2542169a6 100644 --- a/searx/engines/youtube_api.py +++ b/searx/engines/youtube_api.py @@ -10,7 +10,7 @@ from json import loads from dateutil import parser -from searx.url_utils import urlencode +from urllib.parse import urlencode # engine dependent config categories = ['videos', 'music'] diff --git a/searx/engines/youtube_noapi.py b/searx/engines/youtube_noapi.py index 68a3739a2..fef501458 100644 --- a/searx/engines/youtube_noapi.py +++ b/searx/engines/youtube_noapi.py @@ -10,9 +10,9 @@ from functools import reduce from json import loads +from urllib.parse import quote_plus from searx.engines.xpath import extract_text from searx.utils import list_get -from searx.url_utils import quote_plus # engine dependent config categories = ['videos', 'music'] diff --git a/searx/exceptions.py b/searx/exceptions.py index 0175acfa3..4af816272 100644 --- a/searx/exceptions.py +++ b/searx/exceptions.py @@ -27,7 +27,7 @@ class SearxParameterException(SearxException): message = 'Empty ' + name + ' parameter' else: message = 'Invalid value "' + value + '" for parameter ' + name - super(SearxParameterException, self).__init__(message) + super().__init__(message) self.message = message self.parameter_name = name self.parameter_value = value diff --git a/searx/external_bang.py b/searx/external_bang.py index 0b4c4ae16..92b6e6a09 100644 --- a/searx/external_bang.py +++ b/searx/external_bang.py @@ -23,7 +23,7 @@ def get_bang_url(search_query): """ if search_query.external_bang: - query = search_query.query.decode('utf-8', 'ignore') + query = search_query.query bang = _get_bang(search_query.external_bang) if bang and query: diff --git a/searx/languages.py b/searx/languages.py index 72e1a735e..7fd96ab1e 100644 --- a/searx/languages.py +++ b/searx/languages.py @@ -3,73 +3,73 @@ # this file is generated automatically by utils/update_search_languages.py language_codes = ( - (u"af-NA", u"Afrikaans", u"", u"Afrikaans"), - (u"ar-SA", u"العربية", u"", u"Arabic"), - (u"be-BY", u"Беларуская", u"", u"Belarusian"), - (u"bg-BG", u"Български", u"", u"Bulgarian"), - (u"ca-AD", u"Català", u"", u"Catalan"), - (u"cs-CZ", u"Čeština", u"", u"Czech"), - (u"da-DK", u"Dansk", u"", u"Danish"), - (u"de", u"Deutsch", u"", u"German"), - (u"de-AT", u"Deutsch", u"Österreich", u"German"), - (u"de-CH", u"Deutsch", u"Schweiz", u"German"), - (u"de-DE", u"Deutsch", u"Deutschland", u"German"), - (u"el-GR", u"Ελληνικά", u"", u"Greek"), - (u"en", u"English", u"", u"English"), - (u"en-AU", u"English", u"Australia", u"English"), - (u"en-CA", u"English", u"Canada", u"English"), - (u"en-GB", u"English", u"United Kingdom", u"English"), - (u"en-IE", u"English", u"Ireland", u"English"), - (u"en-IN", u"English", u"India", u"English"), - (u"en-NZ", u"English", u"New Zealand", u"English"), - (u"en-PH", u"English", u"Philippines", u"English"), - (u"en-SG", u"English", u"Singapore", u"English"), - (u"en-US", u"English", u"United States", u"English"), - (u"es", u"Español", u"", u"Spanish"), - (u"es-AR", u"Español", u"Argentina", u"Spanish"), - (u"es-CL", u"Español", u"Chile", u"Spanish"), - (u"es-ES", u"Español", u"España", u"Spanish"), - (u"es-MX", u"Español", u"México", u"Spanish"), - (u"et-EE", u"Eesti", u"", u"Estonian"), - (u"fa-IR", u"فارسی", u"", u"Persian"), - (u"fi-FI", u"Suomi", u"", u"Finnish"), - (u"fr", u"Français", u"", u"French"), - (u"fr-BE", u"Français", u"Belgique", u"French"), - (u"fr-CA", u"Français", u"Canada", u"French"), - (u"fr-CH", u"Français", u"Suisse", u"French"), - (u"fr-FR", u"Français", u"France", u"French"), - (u"he-IL", u"עברית", u"", u"Hebrew"), - (u"hr-HR", u"Hrvatski", u"", u"Croatian"), - (u"hu-HU", u"Magyar", u"", u"Hungarian"), - (u"hy-AM", u"Հայերեն", u"", u"Armenian"), - (u"id-ID", u"Indonesia", u"", u"Indonesian"), - (u"is-IS", u"Íslenska", u"", u"Icelandic"), - (u"it-IT", u"Italiano", u"", u"Italian"), - (u"ja-JP", u"日本語", u"", u"Japanese"), - (u"ko-KR", u"한국어", u"", u"Korean"), - (u"lt-LT", u"Lietuvių", u"", u"Lithuanian"), - (u"lv-LV", u"Latviešu", u"", u"Latvian"), - (u"ms-MY", u"Melayu", u"", u"Malay"), - (u"nb-NO", u"Norsk Bokmål", u"", u"Norwegian Bokmål"), - (u"nl", u"Nederlands", u"", u"Dutch"), - (u"nl-BE", u"Nederlands", u"België", u"Dutch"), - (u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"), - (u"pl-PL", u"Polski", u"", u"Polish"), - (u"pt", u"Português", u"", u"Portuguese"), - (u"pt-BR", u"Português", u"Brasil", u"Portuguese"), - (u"pt-PT", u"Português", u"Portugal", u"Portuguese"), - (u"ro-RO", u"Română", u"", u"Romanian"), - (u"ru-RU", u"Русский", u"", u"Russian"), - (u"sk-SK", u"Slovenčina", u"", u"Slovak"), - (u"sl-SI", u"Slovenščina", u"", u"Slovenian"), - (u"sr-RS", u"Srpski", u"", u"Serbian"), - (u"sv-SE", u"Svenska", u"", u"Swedish"), - (u"sw-KE", u"Kiswahili", u"", u"Swahili"), - (u"th-TH", u"ไทย", u"", u"Thai"), - (u"tr-TR", u"Türkçe", u"", u"Turkish"), - (u"uk-UA", u"Українська", u"", u"Ukrainian"), - (u"vi-VN", u"Tiếng Việt", u"", u"Vietnamese"), - (u"zh", u"中文", u"", u"Chinese"), - (u"zh-CN", u"中文", u"中国", u"Chinese"), - (u"zh-TW", u"中文", u"台灣", u"Chinese") + ("af-NA", "Afrikaans", "", "Afrikaans"), + ("ar-SA", "العربية", "", "Arabic"), + ("be-BY", "Беларуская", "", "Belarusian"), + ("bg-BG", "Български", "", "Bulgarian"), + ("ca-AD", "Català", "", "Catalan"), + ("cs-CZ", "Čeština", "", "Czech"), + ("da-DK", "Dansk", "", "Danish"), + ("de", "Deutsch", "", "German"), + ("de-AT", "Deutsch", "Österreich", "German"), + ("de-CH", "Deutsch", "Schweiz", "German"), + ("de-DE", "Deutsch", "Deutschland", "German"), + ("el-GR", "Ελληνικά", "", "Greek"), + ("en", "English", "", "English"), + ("en-AU", "English", "Australia", "English"), + ("en-CA", "English", "Canada", "English"), + ("en-GB", "English", "United Kingdom", "English"), + ("en-IE", "English", "Ireland", "English"), + ("en-IN", "English", "India", "English"), + ("en-NZ", "English", "New Zealand", "English"), + ("en-PH", "English", "Philippines", "English"), + ("en-SG", "English", "Singapore", "English"), + ("en-US", "English", "United States", "English"), + ("es", "Español", "", "Spanish"), + ("es-AR", "Español", "Argentina", "Spanish"), + ("es-CL", "Español", "Chile", "Spanish"), + ("es-ES", "Español", "España", "Spanish"), + ("es-MX", "Español", "México", "Spanish"), + ("et-EE", "Eesti", "", "Estonian"), + ("fa-IR", "فارسی", "", "Persian"), + ("fi-FI", "Suomi", "", "Finnish"), + ("fr", "Français", "", "French"), + ("fr-BE", "Français", "Belgique", "French"), + ("fr-CA", "Français", "Canada", "French"), + ("fr-CH", "Français", "Suisse", "French"), + ("fr-FR", "Français", "France", "French"), + ("he-IL", "עברית", "", "Hebrew"), + ("hr-HR", "Hrvatski", "", "Croatian"), + ("hu-HU", "Magyar", "", "Hungarian"), + ("hy-AM", "Հայերեն", "", "Armenian"), + ("id-ID", "Indonesia", "", "Indonesian"), + ("is-IS", "Íslenska", "", "Icelandic"), + ("it-IT", "Italiano", "", "Italian"), + ("ja-JP", "日本語", "", "Japanese"), + ("ko-KR", "한국어", "", "Korean"), + ("lt-LT", "Lietuvių", "", "Lithuanian"), + ("lv-LV", "Latviešu", "", "Latvian"), + ("ms-MY", "Melayu", "", "Malay"), + ("nb-NO", "Norsk Bokmål", "", "Norwegian Bokmål"), + ("nl", "Nederlands", "", "Dutch"), + ("nl-BE", "Nederlands", "België", "Dutch"), + ("nl-NL", "Nederlands", "Nederland", "Dutch"), + ("pl-PL", "Polski", "", "Polish"), + ("pt", "Português", "", "Portuguese"), + ("pt-BR", "Português", "Brasil", "Portuguese"), + ("pt-PT", "Português", "Portugal", "Portuguese"), + ("ro-RO", "Română", "", "Romanian"), + ("ru-RU", "Русский", "", "Russian"), + ("sk-SK", "Slovenčina", "", "Slovak"), + ("sl-SI", "Slovenščina", "", "Slovenian"), + ("sr-RS", "Srpski", "", "Serbian"), + ("sv-SE", "Svenska", "", "Swedish"), + ("sw-KE", "Kiswahili", "", "Swahili"), + ("th-TH", "ไทย", "", "Thai"), + ("tr-TR", "Türkçe", "", "Turkish"), + ("uk-UA", "Українська", "", "Ukrainian"), + ("vi-VN", "Tiếng Việt", "", "Vietnamese"), + ("zh", "中文", "", "Chinese"), + ("zh-CN", "中文", "中国", "Chinese"), + ("zh-TW", "中文", "台灣", "Chinese") ) diff --git a/searx/plugins/__init__.py b/searx/plugins/__init__.py index 791c40c21..51f6981a2 100644 --- a/searx/plugins/__init__.py +++ b/searx/plugins/__init__.py @@ -20,13 +20,10 @@ from importlib import import_module from os import listdir, makedirs, remove, stat, utime from os.path import abspath, basename, dirname, exists, join from shutil import copyfile -from sys import version_info from traceback import print_exc from searx import logger, settings, static_path -if version_info[0] == 3: - unicode = str logger = logger.getChild('plugins') @@ -38,8 +35,8 @@ from searx.plugins import (oa_doi_rewrite, tracker_url_remover, vim_hotkeys) -required_attrs = (('name', (str, unicode)), - ('description', (str, unicode)), +required_attrs = (('name', str), + ('description', str), ('default_on', bool)) optional_attrs = (('js_dependencies', tuple), diff --git a/searx/plugins/https_rewrite.py b/searx/plugins/https_rewrite.py index 82556017e..aeb42495e 100644 --- a/searx/plugins/https_rewrite.py +++ b/searx/plugins/https_rewrite.py @@ -16,17 +16,14 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. ''' import re -import sys +from urllib.parse import urlparse from lxml import etree from os import listdir, environ from os.path import isfile, isdir, join from searx.plugins import logger from flask_babel import gettext from searx import searx_dir -from searx.url_utils import urlparse -if sys.version_info[0] == 3: - unicode = str name = "HTTPS rewrite" description = gettext('Rewrite HTTP links to HTTPS if possible') diff --git a/searx/plugins/oa_doi_rewrite.py b/searx/plugins/oa_doi_rewrite.py index be80beb26..eef29f103 100644 --- a/searx/plugins/oa_doi_rewrite.py +++ b/searx/plugins/oa_doi_rewrite.py @@ -1,6 +1,6 @@ +from urllib.parse import urlparse, parse_qsl from flask_babel import gettext import re -from searx.url_utils import urlparse, parse_qsl from searx import settings diff --git a/searx/plugins/self_info.py b/searx/plugins/self_info.py index cdd3e9a6e..4fdfb4288 100644 --- a/searx/plugins/self_info.py +++ b/searx/plugins/self_info.py @@ -22,7 +22,7 @@ default_on = True # Self User Agent regex -p = re.compile(b'.*user[ -]agent.*', re.IGNORECASE) +p = re.compile('.*user[ -]agent.*', re.IGNORECASE) # attach callback to the post search hook @@ -31,7 +31,7 @@ p = re.compile(b'.*user[ -]agent.*', re.IGNORECASE) def post_search(request, search): if search.search_query.pageno > 1: return True - if search.search_query.query == b'ip': + if search.search_query.query == 'ip': x_forwarded_for = request.headers.getlist("X-Forwarded-For") if x_forwarded_for: ip = x_forwarded_for[0] diff --git a/searx/plugins/tracker_url_remover.py b/searx/plugins/tracker_url_remover.py index 33dd621e1..742f39013 100644 --- a/searx/plugins/tracker_url_remover.py +++ b/searx/plugins/tracker_url_remover.py @@ -17,7 +17,7 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. from flask_babel import gettext import re -from searx.url_utils import urlunparse, parse_qsl, urlencode +from urllib.parse import urlunparse, parse_qsl, urlencode regexes = {re.compile(r'utm_[^&]+'), re.compile(r'(wkey|wemail)[^&]*'), diff --git a/searx/poolrequests.py b/searx/poolrequests.py index 9f0ee8736..51b6219c3 100644 --- a/searx/poolrequests.py +++ b/searx/poolrequests.py @@ -20,7 +20,7 @@ class HTTPAdapterWithConnParams(requests.adapters.HTTPAdapter): self.config = {} self.proxy_manager = {} - super(requests.adapters.HTTPAdapter, self).__init__() + super().__init__() self._pool_connections = pool_connections self._pool_maxsize = pool_maxsize @@ -60,7 +60,7 @@ else: class SessionSinglePool(requests.Session): def __init__(self): - super(SessionSinglePool, self).__init__() + super().__init__() # reuse the same adapters with RLock(): @@ -71,7 +71,7 @@ class SessionSinglePool(requests.Session): def close(self): """Call super, but clear adapters since there are managed globaly""" self.adapters.clear() - super(SessionSinglePool, self).close() + super().close() def set_timeout_for_thread(timeout, start_time=None): diff --git a/searx/preferences.py b/searx/preferences.py index 82b8f5224..3042636a6 100644 --- a/searx/preferences.py +++ b/searx/preferences.py @@ -6,16 +6,11 @@ from base64 import urlsafe_b64encode, urlsafe_b64decode from zlib import compress, decompress -from sys import version +from urllib.parse import parse_qs, urlencode from searx import settings, autocomplete from searx.languages import language_codes as languages from searx.utils import match_language -from searx.url_utils import parse_qs, urlencode - -if version[0] == '3': - # pylint: disable=invalid-name - unicode = str COOKIE_MAX_AGE = 60 * 60 * 24 * 365 * 5 # 5 years @@ -37,7 +32,7 @@ class ValidationException(Exception): """ -class Setting(object): +class Setting: """Base class of user settings""" def __init__(self, default_value, **kwargs): @@ -315,7 +310,7 @@ class PluginsSetting(SwitchableSetting): return [item[len('plugin_'):] for item in items] -class Preferences(object): +class Preferences: """Validates and saves preferences to cookies""" def __init__(self, themes, categories, engines, plugins): @@ -402,14 +397,14 @@ class Preferences(object): settings_kv['tokens'] = ','.join(self.tokens.values) - return urlsafe_b64encode(compress(urlencode(settings_kv).encode('utf-8'))).decode('utf-8') + return urlsafe_b64encode(compress(urlencode(settings_kv).encode())).decode() def parse_encoded_data(self, input_data): """parse (base64) preferences from request (``flask.request.form['preferences']``)""" - decoded_data = decompress(urlsafe_b64decode(input_data.encode('utf-8'))) + decoded_data = decompress(urlsafe_b64decode(input_data.encode())) dict_data = {} for x, y in parse_qs(decoded_data).items(): - dict_data[x.decode('utf8')] = y[0].decode('utf8') + dict_data[x.decode()] = y[0].decode() self.parse_dict(dict_data) def parse_dict(self, input_data): diff --git a/searx/query.py b/searx/query.py index e8b57d4ca..ef323af7a 100644 --- a/searx/query.py +++ b/searx/query.py @@ -17,23 +17,22 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. (C) 2014 by Thomas Pointhuber, ''' +import re + from searx.languages import language_codes from searx.engines import ( categories, engines, engine_shortcuts ) -import re -import sys -if sys.version_info[0] == 3: - unicode = str VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$') -class RawTextQuery(object): +class RawTextQuery: """parse raw text query (the value from the html input)""" def __init__(self, query, disabled_engines): + assert isinstance(query, str) self.query = query self.disabled_engines = [] @@ -53,7 +52,7 @@ class RawTextQuery(object): self.query_parts = [] # split query, including whitespaces - raw_query_parts = re.split(r'(\s+)' if isinstance(self.query, str) else b'(\s+)', self.query) + raw_query_parts = re.split(r'(\s+)', self.query) parse_next = True @@ -93,7 +92,7 @@ class RawTextQuery(object): # check if any language-code is equal with # declared language-codes for lc in language_codes: - lang_id, lang_name, country, english_name = map(unicode.lower, lc) + lang_id, lang_name, country, english_name = map(str.lower, lc) # if correct language-code is found # set it as new search-language @@ -177,15 +176,15 @@ class RawTextQuery(object): def getFullQuery(self): # get full querry including whitespaces - return u''.join(self.query_parts) + return ''.join(self.query_parts) -class SearchQuery(object): +class SearchQuery: """container for all the search parameters (query, language, etc...)""" def __init__(self, query, engines, categories, lang, safesearch, pageno, time_range, timeout_limit=None, preferences=None, external_bang=None): - self.query = query.encode('utf-8') + self.query = query self.engines = engines self.categories = categories self.lang = lang @@ -197,4 +196,4 @@ class SearchQuery(object): self.external_bang = external_bang def __str__(self): - return str(self.query) + ";" + str(self.engines) + return self.query + ";" + str(self.engines) diff --git a/searx/results.py b/searx/results.py index df2e3e78d..e4cad2e24 100644 --- a/searx/results.py +++ b/searx/results.py @@ -1,14 +1,11 @@ import re -import sys from collections import defaultdict from operator import itemgetter from threading import RLock +from urllib.parse import urlparse, unquote from searx import logger from searx.engines import engines -from searx.url_utils import urlparse, unquote -if sys.version_info[0] == 3: - basestring = str CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U) WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U) @@ -16,7 +13,7 @@ WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U) # return the meaningful length of the content for a result def result_content_len(content): - if isinstance(content, basestring): + if isinstance(content, str): return len(CONTENT_LEN_IGNORED_CHARS_REGEX.sub('', content)) else: return 0 @@ -125,14 +122,14 @@ def result_score(result): return sum((occurences * weight) / position for position in result['positions']) -class ResultContainer(object): +class ResultContainer: """docstring for ResultContainer""" __slots__ = '_merged_results', 'infoboxes', 'suggestions', 'answers', 'corrections', '_number_of_results',\ '_ordered', 'paging', 'unresponsive_engines', 'timings', 'redirect_url' def __init__(self): - super(ResultContainer, self).__init__() + super().__init__() self._merged_results = [] self.infoboxes = [] self.suggestions = set() @@ -161,11 +158,11 @@ class ResultContainer(object): self._number_of_results.append(result['number_of_results']) else: # standard result (url, title, content) - if 'url' in result and not isinstance(result['url'], basestring): + if 'url' in result and not isinstance(result['url'], str): logger.debug('result: invalid URL: %s', str(result)) - elif 'title' in result and not isinstance(result['title'], basestring): + elif 'title' in result and not isinstance(result['title'], str): logger.debug('result: invalid title: %s', str(result)) - elif 'content' in result and not isinstance(result['content'], basestring): + elif 'content' in result and not isinstance(result['content'], str): logger.debug('result: invalid content: %s', str(result)) else: self._merge_result(result, standard_result_count + 1) diff --git a/searx/search.py b/searx/search.py index 79896e5e1..3695128ab 100644 --- a/searx/search.py +++ b/searx/search.py @@ -20,8 +20,8 @@ import sys import threading from time import time from uuid import uuid4 +from _thread import start_new_thread -import six from flask_babel import gettext import requests.exceptions import searx.poolrequests as requests_lib @@ -37,13 +37,6 @@ from searx import logger from searx.plugins import plugins from searx.exceptions import SearxParameterException -try: - from thread import start_new_thread -except: - from _thread import start_new_thread - -if sys.version_info[0] == 3: - unicode = str logger = logger.getChild('search') @@ -355,11 +348,11 @@ def get_search_query_from_webapp(preferences, form): load_default_categories = True for pd_name, pd in form.items(): if pd_name == 'categories': - query_categories.extend(categ for categ in map(unicode.strip, pd.split(',')) if categ in categories) + query_categories.extend(categ for categ in map(str.strip, pd.split(',')) if categ in categories) elif pd_name == 'engines': pd_engines = [{'category': engines[engine].categories[0], 'name': engine} - for engine in map(unicode.strip, pd.split(',')) if engine in engines] + for engine in map(str.strip, pd.split(',')) if engine in engines] if pd_engines: query_engines.extend(pd_engines) load_default_categories = False @@ -414,12 +407,12 @@ def get_search_query_from_webapp(preferences, form): raw_text_query) -class Search(object): +class Search: """Search information container""" def __init__(self, search_query): # init vars - super(Search, self).__init__() + super().__init__() self.search_query = search_query self.result_container = ResultContainer() self.actual_timeout = None @@ -434,7 +427,7 @@ class Search(object): # This means there was a valid bang and the # rest of the search does not need to be continued - if isinstance(self.result_container.redirect_url, six.string_types): + if isinstance(self.result_container.redirect_url, str): return self.result_container # start time start_time = time() @@ -541,13 +534,13 @@ class SearchWithPlugins(Search): """Similar to the Search class but call the plugins.""" def __init__(self, search_query, ordered_plugin_list, request): - super(SearchWithPlugins, self).__init__(search_query) + super().__init__(search_query) self.ordered_plugin_list = ordered_plugin_list self.request = request def search(self): if plugins.call(self.ordered_plugin_list, 'pre_search', self.request, self): - super(SearchWithPlugins, self).search() + super().search() plugins.call(self.ordered_plugin_list, 'post_search', self.request, self) diff --git a/searx/templates/courgette/404.html b/searx/templates/courgette/404.html index 9e3b8ac29..7a317f023 100644 --- a/searx/templates/courgette/404.html +++ b/searx/templates/courgette/404.html @@ -3,7 +3,7 @@

{{ _('Page not found') }}

{% autoescape false %} -

{{ _('Go to %(search_page)s.', search_page=unicode('{}').format(url_for('index'), _('search page'))) }}

+

{{ _('Go to %(search_page)s.', search_page='{}'.format(url_for('index'), _('search page'))) }}

{% endautoescape %}
{% endblock %} diff --git a/searx/templates/legacy/404.html b/searx/templates/legacy/404.html index 3e889dd21..c0fa62b00 100644 --- a/searx/templates/legacy/404.html +++ b/searx/templates/legacy/404.html @@ -3,7 +3,7 @@

{{ _('Page not found') }}

{% autoescape false %} -

{{ _('Go to %(search_page)s.', search_page=unicode('{}').format(url_for('index'), _('search page'))) }}

+

{{ _('Go to %(search_page)s.', search_page='{}'.format(url_for('index'), _('search page'))) }}

{% endautoescape %}
{% endblock %} diff --git a/searx/templates/oscar/404.html b/searx/templates/oscar/404.html index 5a50880a9..cdb31db73 100644 --- a/searx/templates/oscar/404.html +++ b/searx/templates/oscar/404.html @@ -3,7 +3,7 @@

{{ _('Page not found') }}

{% autoescape false %} -

{{ _('Go to %(search_page)s.', search_page=unicode('{}').format(url_for('index'), _('search page'))) }}

+

{{ _('Go to %(search_page)s.', search_page='{}'.format(url_for('index'), _('search page'))) }}

{% endautoescape %}
{% endblock %} diff --git a/searx/templates/simple/404.html b/searx/templates/simple/404.html index 11d604313..1a10514cc 100644 --- a/searx/templates/simple/404.html +++ b/searx/templates/simple/404.html @@ -3,7 +3,7 @@

{{ _('Page not found') }}

{% autoescape false %} -

{{ _('Go to %(search_page)s.', search_page=unicode('{}').format(url_for('index'), _('search page'))) }}

+

{{ _('Go to %(search_page)s.', search_page='{}'.format(url_for('index'), _('search page'))) }}

{% endautoescape %}
{% endblock %} diff --git a/searx/testing.py b/searx/testing.py index f0e303e13..c52974961 100644 --- a/searx/testing.py +++ b/searx/testing.py @@ -17,7 +17,7 @@ from unittest2 import TestCase class SearxTestLayer: """Base layer for non-robot tests.""" - __name__ = u'SearxTestLayer' + __name__ = 'SearxTestLayer' @classmethod def setUp(cls): @@ -66,7 +66,7 @@ class SearxRobotLayer(): stderr=subprocess.STDOUT ) if hasattr(self.server.stdout, 'read1'): - print(self.server.stdout.read1(1024).decode('utf-8')) + print(self.server.stdout.read1(1024).decode()) def tearDown(self): os.kill(self.server.pid, 9) diff --git a/searx/url_utils.py b/searx/url_utils.py deleted file mode 100644 index dcafc3ba8..000000000 --- a/searx/url_utils.py +++ /dev/null @@ -1,30 +0,0 @@ -from sys import version_info - -if version_info[0] == 2: - from urllib import quote, quote_plus, unquote, urlencode - from urlparse import parse_qs, parse_qsl, urljoin, urlparse, urlunparse, ParseResult -else: - from urllib.parse import ( - parse_qs, - parse_qsl, - quote, - quote_plus, - unquote, - urlencode, - urljoin, - urlparse, - urlunparse, - ParseResult - ) - - -__export__ = (parse_qs, - parse_qsl, - quote, - quote_plus, - unquote, - urlencode, - urljoin, - urlparse, - urlunparse, - ParseResult) diff --git a/searx/utils.py b/searx/utils.py index 5ea9dc89c..d8842c65f 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -1,21 +1,22 @@ # -*- coding: utf-8 -*- +import os +import sys import csv import hashlib import hmac -import os import re +import json -from babel.core import get_global -from babel.dates import format_date from codecs import getincrementalencoder from imp import load_source from numbers import Number from os.path import splitext, join -from io import open +from io import open, StringIO from random import choice +from html.parser import HTMLParser from lxml.etree import XPath -import sys -import json +from babel.core import get_global +from babel.dates import format_date from searx import settings from searx.version import VERSION_STRING @@ -23,23 +24,6 @@ from searx.languages import language_codes from searx import settings from searx import logger -try: - from cStringIO import StringIO -except: - from io import StringIO - -try: - from HTMLParser import HTMLParser -except: - from html.parser import HTMLParser - -if sys.version_info[0] == 3: - unichr = chr - unicode = str - IS_PY2 = False - basestring = str -else: - IS_PY2 = True logger = logger.getChild('utils') @@ -75,19 +59,18 @@ def highlight_content(content, query): if content.find('<') != -1: return content - query = query.decode('utf-8') if content.lower().find(query.lower()) > -1: - query_regex = u'({0})'.format(re.escape(query)) + query_regex = '({0})'.format(re.escape(query)) content = re.sub(query_regex, '\\1', content, flags=re.I | re.U) else: regex_parts = [] for chunk in query.split(): if len(chunk) == 1: - regex_parts.append(u'\\W+{0}\\W+'.format(re.escape(chunk))) + regex_parts.append('\\W+{0}\\W+'.format(re.escape(chunk))) else: - regex_parts.append(u'{0}'.format(re.escape(chunk))) - query_regex = u'({0})'.format('|'.join(regex_parts)) + regex_parts.append('{0}'.format(re.escape(chunk))) + query_regex = '({0})'.format('|'.join(regex_parts)) content = re.sub(query_regex, '\\1', content, flags=re.I | re.U) @@ -124,21 +107,21 @@ class HTMLTextExtractor(HTMLParser): def handle_charref(self, number): if not self.is_valid_tag(): return - if number[0] in (u'x', u'X'): + if number[0] in ('x', 'X'): codepoint = int(number[1:], 16) else: codepoint = int(number) - self.result.append(unichr(codepoint)) + self.result.append(chr(codepoint)) def handle_entityref(self, name): if not self.is_valid_tag(): return # codepoint = htmlentitydefs.name2codepoint[name] - # self.result.append(unichr(codepoint)) + # self.result.append(chr(codepoint)) self.result.append(name) def get_text(self): - return u''.join(self.result).strip() + return ''.join(self.result).strip() def html_to_text(html): @@ -163,22 +146,14 @@ class UnicodeWriter: self.encoder = getincrementalencoder(encoding)() def writerow(self, row): - if IS_PY2: - row = [s.encode("utf-8") if hasattr(s, 'encode') else s for s in row] self.writer.writerow(row) # Fetch UTF-8 output from the queue ... data = self.queue.getvalue() - if IS_PY2: - data = data.decode("utf-8") - else: - data = data.strip('\x00') + data = data.strip('\x00') # ... and reencode it into the target encoding data = self.encoder.encode(data) # write to the target stream - if IS_PY2: - self.stream.write(data) - else: - self.stream.write(data.decode("utf-8")) + self.stream.write(data.decode()) # empty queue self.queue.truncate(0) @@ -253,7 +228,7 @@ def dict_subset(d, properties): def prettify_url(url, max_length=74): if len(url) > max_length: chunk_len = int(max_length / 2 + 1) - return u'{0}[...]{1}'.format(url[:chunk_len], url[-chunk_len:]) + return '{0}[...]{1}'.format(url[:chunk_len], url[-chunk_len:]) else: return url @@ -309,8 +284,10 @@ def int_or_zero(num): def is_valid_lang(lang): + if isinstance(lang, bytes): + lang = lang.decode() is_abbr = (len(lang) == 2) - lang = lang.lower().decode('utf-8') + lang = lang.lower() if is_abbr: for l in language_codes: if l[0][:2] == lang: @@ -407,17 +384,14 @@ def new_hmac(secret_key, url): secret_key_bytes = secret_key else: raise err - if sys.version_info[0] == 2: - return hmac.new(bytes(secret_key), url, hashlib.sha256).hexdigest() - else: - return hmac.new(secret_key_bytes, url, hashlib.sha256).hexdigest() + return hmac.new(secret_key_bytes, url, hashlib.sha256).hexdigest() def to_string(obj): - if isinstance(obj, basestring): + if isinstance(obj, str): return obj if isinstance(obj, Number): - return unicode(obj) + return str(obj) if hasattr(obj, '__str__'): return obj.__str__() if hasattr(obj, '__repr__'): @@ -433,9 +407,9 @@ def ecma_unescape(s): """ # s = unicode(s) # "%u5409" becomes "吉" - s = ecma_unescape4_re.sub(lambda e: unichr(int(e.group(1), 16)), s) + s = ecma_unescape4_re.sub(lambda e: chr(int(e.group(1), 16)), s) # "%20" becomes " ", "%F3" becomes "ó" - s = ecma_unescape2_re.sub(lambda e: unichr(int(e.group(1), 16)), s) + s = ecma_unescape2_re.sub(lambda e: chr(int(e.group(1), 16)), s) return s diff --git a/searx/webapp.py b/searx/webapp.py index 4c0eceaaf..a1b0413aa 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -17,37 +17,35 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. (C) 2013- by Adam Tauber, ''' +import sys +if sys.version_info[0] < 3: + print('\033[1;31m Python2 is no longer supported\033[0m') + exit(1) + if __name__ == '__main__': - from sys import path from os.path import realpath, dirname - path.append(realpath(dirname(realpath(__file__)) + '/../')) + sys.path.append(realpath(dirname(realpath(__file__)) + '/../')) import hashlib import hmac import json import os -import sys import requests from searx import logger logger = logger.getChild('webapp') -try: - from pygments import highlight - from pygments.lexers import get_lexer_by_name - from pygments.formatters import HtmlFormatter -except: - logger.critical("cannot import dependency: pygments") - from sys import exit - exit(1) -try: - from cgi import escape -except: - from html import escape -from six import next from datetime import datetime, timedelta from time import time +from html import escape +from io import StringIO +from urllib.parse import urlencode, urlparse, urljoin + +from pygments import highlight +from pygments.lexers import get_lexer_by_name +from pygments.formatters import HtmlFormatter + from werkzeug.middleware.proxy_fix import ProxyFix from flask import ( Flask, request, render_template, url_for, Response, make_response, @@ -78,7 +76,6 @@ from searx.plugins import plugins from searx.plugins.oa_doi_rewrite import get_doi_resolver from searx.preferences import Preferences, ValidationException, LANGUAGE_CODES from searx.answerers import answerers -from searx.url_utils import urlencode, urlparse, urljoin from searx.utils import new_hmac # check if the pyopenssl package is installed. @@ -89,19 +86,6 @@ except ImportError: logger.critical("The pyopenssl package has to be installed.\n" "Some HTTPS connections will fail") -try: - from cStringIO import StringIO -except: - from io import StringIO - - -if sys.version_info[0] == 3: - unicode = str - PY3 = True -else: - logger.warning('\033[1;31m Python2 is no longer supported\033[0m') - exit(1) - # serve pages with HTTP/1.1 from werkzeug.serving import WSGIRequestHandler WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server'].get('http_protocol_version', '1.0')) @@ -315,11 +299,11 @@ def proxify(url): if not settings.get('result_proxy'): return url - url_params = dict(mortyurl=url.encode('utf-8')) + url_params = dict(mortyurl=url.encode()) if settings['result_proxy'].get('key'): url_params['mortyhash'] = hmac.new(settings['result_proxy']['key'], - url.encode('utf-8'), + url.encode(), hashlib.sha256).hexdigest() return '{0}?{1}'.format(settings['result_proxy']['url'], @@ -347,10 +331,10 @@ def image_proxify(url): if settings.get('result_proxy'): return proxify(url) - h = new_hmac(settings['server']['secret_key'], url.encode('utf-8')) + h = new_hmac(settings['server']['secret_key'], url.encode()) return '{0}?{1}'.format(url_for('image_proxy'), - urlencode(dict(url=url.encode('utf-8'), h=h))) + urlencode(dict(url=url.encode(), h=h))) def render(template_name, override_theme=None, **kwargs): @@ -424,8 +408,6 @@ def render(template_name, override_theme=None, **kwargs): kwargs['results_on_new_tab'] = request.preferences.get_value('results_on_new_tab') - kwargs['unicode'] = unicode - kwargs['preferences'] = request.preferences kwargs['brand'] = brand @@ -612,7 +594,7 @@ def index(): if 'content' in result and result['content']: result['content'] = highlight_content(escape(result['content'][:1024]), search_query.query) if 'title' in result and result['title']: - result['title'] = highlight_content(escape(result['title'] or u''), search_query.query) + result['title'] = highlight_content(escape(result['title'] or ''), search_query.query) else: if result.get('content'): result['content'] = html_to_text(result['content']).strip() @@ -634,14 +616,14 @@ def index(): minutes = int((timedifference.seconds / 60) % 60) hours = int(timedifference.seconds / 60 / 60) if hours == 0: - result['publishedDate'] = gettext(u'{minutes} minute(s) ago').format(minutes=minutes) + result['publishedDate'] = gettext('{minutes} minute(s) ago').format(minutes=minutes) else: - result['publishedDate'] = gettext(u'{hours} hour(s), {minutes} minute(s) ago').format(hours=hours, minutes=minutes) # noqa + result['publishedDate'] = gettext('{hours} hour(s), {minutes} minute(s) ago').format(hours=hours, minutes=minutes) # noqa else: result['publishedDate'] = format_date(result['publishedDate']) if output_format == 'json': - return Response(json.dumps({'query': search_query.query.decode('utf-8'), + return Response(json.dumps({'query': search_query.query, 'number_of_results': number_of_results, 'results': results, 'answers': list(result_container.answers), @@ -670,7 +652,7 @@ def index(): csv.writerow([row.get(key, '') for key in keys]) csv.stream.seek(0) response = Response(csv.stream.read(), mimetype='application/csv') - cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query.decode('utf-8')) + cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query) response.headers.add('Content-Disposition', cont_disp) return response @@ -754,10 +736,7 @@ def autocompleter(): disabled_engines = request.preferences.engines.get_disabled() # parse query - if PY3: - raw_text_query = RawTextQuery(request.form.get('q', b''), disabled_engines) - else: - raw_text_query = RawTextQuery(request.form.get('q', u'').encode('utf-8'), disabled_engines) + raw_text_query = RawTextQuery(str(request.form.get('q', b'')), disabled_engines) raw_text_query.parse_query() # check if search query is set @@ -879,7 +858,7 @@ def _is_selected_language_supported(engine, preferences): @app.route('/image_proxy', methods=['GET']) def image_proxy(): - url = request.args.get('url').encode('utf-8') + url = request.args.get('url').encode() if not url: return '', 400 @@ -1061,7 +1040,7 @@ def run(): ) -class ReverseProxyPathFix(object): +class ReverseProxyPathFix: '''Wrap the application in this middleware and configure the front-end server to add these headers, to let you quietly bind this to a URL other than / and to an HTTP scheme that is diff --git a/tests/unit/test_answerers.py b/tests/unit/test_answerers.py index bd8789a7e..73d8d26f2 100644 --- a/tests/unit/test_answerers.py +++ b/tests/unit/test_answerers.py @@ -10,7 +10,7 @@ class AnswererTest(SearxTestCase): def test_unicode_input(self): query = Mock() - unicode_payload = u'árvíztűrő tükörfúrógép' + unicode_payload = 'árvíztűrő tükörfúrógép' for answerer in answerers: - query.query = u'{} {}'.format(answerer.keywords[0], unicode_payload) + query.query = '{} {}'.format(answerer.keywords[0], unicode_payload) self.assertTrue(isinstance(answerer.answer(query), list)) diff --git a/tests/unit/test_plugins.py b/tests/unit/test_plugins.py index 10de8475a..838c1d574 100644 --- a/tests/unit/test_plugins.py +++ b/tests/unit/test_plugins.py @@ -48,11 +48,11 @@ class SelfIPTest(SearxTestCase): # IP test request = Mock(remote_addr='127.0.0.1') request.headers.getlist.return_value = [] - search = get_search_mock(query=b'ip', pageno=1) + search = get_search_mock(query='ip', pageno=1) store.call(store.plugins, 'post_search', request, search) self.assertTrue('127.0.0.1' in search.result_container.answers["ip"]["answer"]) - search = get_search_mock(query=b'ip', pageno=2) + search = get_search_mock(query='ip', pageno=2) store.call(store.plugins, 'post_search', request, search) self.assertFalse('ip' in search.result_container.answers) @@ -60,26 +60,26 @@ class SelfIPTest(SearxTestCase): request = Mock(user_agent='Mock') request.headers.getlist.return_value = [] - search = get_search_mock(query=b'user-agent', pageno=1) + search = get_search_mock(query='user-agent', pageno=1) store.call(store.plugins, 'post_search', request, search) self.assertTrue('Mock' in search.result_container.answers["user-agent"]["answer"]) - search = get_search_mock(query=b'user-agent', pageno=2) + search = get_search_mock(query='user-agent', pageno=2) store.call(store.plugins, 'post_search', request, search) self.assertFalse('user-agent' in search.result_container.answers) - search = get_search_mock(query=b'user-agent', pageno=1) + search = get_search_mock(query='user-agent', pageno=1) store.call(store.plugins, 'post_search', request, search) self.assertTrue('Mock' in search.result_container.answers["user-agent"]["answer"]) - search = get_search_mock(query=b'user-agent', pageno=2) + search = get_search_mock(query='user-agent', pageno=2) store.call(store.plugins, 'post_search', request, search) self.assertFalse('user-agent' in search.result_container.answers) - search = get_search_mock(query=b'What is my User-Agent?', pageno=1) + search = get_search_mock(query='What is my User-Agent?', pageno=1) store.call(store.plugins, 'post_search', request, search) self.assertTrue('Mock' in search.result_container.answers["user-agent"]["answer"]) - search = get_search_mock(query=b'What is my User-Agent?', pageno=2) + search = get_search_mock(query='What is my User-Agent?', pageno=2) store.call(store.plugins, 'post_search', request, search) self.assertFalse('user-agent' in search.result_container.answers) diff --git a/tests/unit/test_preferences.py b/tests/unit/test_preferences.py index 61ac0e8e4..32f50c60b 100644 --- a/tests/unit/test_preferences.py +++ b/tests/unit/test_preferences.py @@ -3,7 +3,7 @@ from searx.preferences import (EnumStringSetting, MapSetting, MissingArgumentExc from searx.testing import SearxTestCase -class PluginStub(object): +class PluginStub: def __init__(self, id, default_on): self.id = id @@ -28,13 +28,13 @@ class TestSettings(SearxTestCase): def test_map_setting_valid_default(self): setting = MapSetting(3, map={'dog': 1, 'bat': 2, 'cat': 3}) - self.assertEquals(setting.get_value(), 3) + self.assertEqual(setting.get_value(), 3) def test_map_setting_valid_choice(self): setting = MapSetting(3, map={'dog': 1, 'bat': 2, 'cat': 3}) - self.assertEquals(setting.get_value(), 3) + self.assertEqual(setting.get_value(), 3) setting.parse('bat') - self.assertEquals(setting.get_value(), 2) + self.assertEqual(setting.get_value(), 2) def test_enum_setting_invalid_initialization(self): with self.assertRaises(MissingArgumentException): @@ -56,13 +56,13 @@ class TestSettings(SearxTestCase): def test_enum_setting_valid_default(self): setting = EnumStringSetting(3, choices=[1, 2, 3]) - self.assertEquals(setting.get_value(), 3) + self.assertEqual(setting.get_value(), 3) def test_enum_setting_valid_choice(self): setting = EnumStringSetting(3, choices=[1, 2, 3]) - self.assertEquals(setting.get_value(), 3) + self.assertEqual(setting.get_value(), 3) setting.parse(2) - self.assertEquals(setting.get_value(), 2) + self.assertEqual(setting.get_value(), 2) # multiple choice settings def test_multiple_setting_invalid_initialization(self): @@ -80,48 +80,48 @@ class TestSettings(SearxTestCase): def test_multiple_setting_valid_default(self): setting = MultipleChoiceSetting(['3'], choices=['1', '2', '3']) - self.assertEquals(setting.get_value(), ['3']) + self.assertEqual(setting.get_value(), ['3']) def test_multiple_setting_valid_choice(self): setting = MultipleChoiceSetting(['3'], choices=['1', '2', '3']) - self.assertEquals(setting.get_value(), ['3']) + self.assertEqual(setting.get_value(), ['3']) setting.parse('2') - self.assertEquals(setting.get_value(), ['2']) + self.assertEqual(setting.get_value(), ['2']) # search language settings def test_lang_setting_valid_choice(self): setting = SearchLanguageSetting('all', choices=['all', 'de', 'en']) setting.parse('de') - self.assertEquals(setting.get_value(), 'de') + self.assertEqual(setting.get_value(), 'de') def test_lang_setting_invalid_choice(self): setting = SearchLanguageSetting('all', choices=['all', 'de', 'en']) setting.parse('xx') - self.assertEquals(setting.get_value(), 'all') + self.assertEqual(setting.get_value(), 'all') def test_lang_setting_old_cookie_choice(self): setting = SearchLanguageSetting('all', choices=['all', 'es', 'es-ES']) setting.parse('es_XA') - self.assertEquals(setting.get_value(), 'es') + self.assertEqual(setting.get_value(), 'es') def test_lang_setting_old_cookie_format(self): setting = SearchLanguageSetting('all', choices=['all', 'es', 'es-ES']) setting.parse('es_ES') - self.assertEquals(setting.get_value(), 'es-ES') + self.assertEqual(setting.get_value(), 'es-ES') # plugins settings def test_plugins_setting_all_default_enabled(self): plugin1 = PluginStub('plugin1', True) plugin2 = PluginStub('plugin2', True) setting = PluginsSetting(['3'], choices=[plugin1, plugin2]) - self.assertEquals(setting.get_enabled(), set(['plugin1', 'plugin2'])) + self.assertEqual(setting.get_enabled(), set(['plugin1', 'plugin2'])) def test_plugins_setting_few_default_enabled(self): plugin1 = PluginStub('plugin1', True) plugin2 = PluginStub('plugin2', False) plugin3 = PluginStub('plugin3', True) setting = PluginsSetting('name', choices=[plugin1, plugin2, plugin3]) - self.assertEquals(setting.get_enabled(), set(['plugin1', 'plugin3'])) + self.assertEqual(setting.get_enabled(), set(['plugin1', 'plugin3'])) class TestPreferences(SearxTestCase): diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index e4c0bdeed..86bf445e3 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -9,9 +9,9 @@ class TestQuery(SearxTestCase): query = RawTextQuery(query_text, []) query.parse_query() - self.assertEquals(query.getFullQuery(), query_text) - self.assertEquals(len(query.query_parts), 1) - self.assertEquals(len(query.languages), 0) + self.assertEqual(query.getFullQuery(), query_text) + self.assertEqual(len(query.query_parts), 1) + self.assertEqual(len(query.languages), 0) self.assertFalse(query.specific) def test_language_code(self): @@ -21,9 +21,9 @@ class TestQuery(SearxTestCase): query = RawTextQuery(full_query, []) query.parse_query() - self.assertEquals(query.getFullQuery(), full_query) - self.assertEquals(len(query.query_parts), 3) - self.assertEquals(len(query.languages), 1) + self.assertEqual(query.getFullQuery(), full_query) + self.assertEqual(len(query.query_parts), 3) + self.assertEqual(len(query.languages), 1) self.assertIn(language, query.languages) self.assertFalse(query.specific) @@ -34,8 +34,8 @@ class TestQuery(SearxTestCase): query = RawTextQuery(full_query, []) query.parse_query() - self.assertEquals(query.getFullQuery(), full_query) - self.assertEquals(len(query.query_parts), 3) + self.assertEqual(query.getFullQuery(), full_query) + self.assertEqual(len(query.query_parts), 3) self.assertIn('en', query.languages) self.assertFalse(query.specific) @@ -46,8 +46,8 @@ class TestQuery(SearxTestCase): query = RawTextQuery(full_query, []) query.parse_query() - self.assertEquals(query.getFullQuery(), full_query) - self.assertEquals(len(query.query_parts), 3) + self.assertEqual(query.getFullQuery(), full_query) + self.assertEqual(len(query.query_parts), 3) self.assertIn('all', query.languages) self.assertFalse(query.specific) @@ -58,9 +58,9 @@ class TestQuery(SearxTestCase): query = RawTextQuery(full_query, []) query.parse_query() - self.assertEquals(query.getFullQuery(), full_query) - self.assertEquals(len(query.query_parts), 1) - self.assertEquals(len(query.languages), 0) + self.assertEqual(query.getFullQuery(), full_query) + self.assertEqual(len(query.query_parts), 1) + self.assertEqual(len(query.languages), 0) self.assertFalse(query.specific) def test_timeout_below100(self): @@ -68,9 +68,9 @@ class TestQuery(SearxTestCase): query = RawTextQuery(query_text, []) query.parse_query() - self.assertEquals(query.getFullQuery(), query_text) - self.assertEquals(len(query.query_parts), 3) - self.assertEquals(query.timeout_limit, 3) + self.assertEqual(query.getFullQuery(), query_text) + self.assertEqual(len(query.query_parts), 3) + self.assertEqual(query.timeout_limit, 3) self.assertFalse(query.specific) def test_timeout_above100(self): @@ -78,9 +78,9 @@ class TestQuery(SearxTestCase): query = RawTextQuery(query_text, []) query.parse_query() - self.assertEquals(query.getFullQuery(), query_text) - self.assertEquals(len(query.query_parts), 3) - self.assertEquals(query.timeout_limit, 0.35) + self.assertEqual(query.getFullQuery(), query_text) + self.assertEqual(len(query.query_parts), 3) + self.assertEqual(query.timeout_limit, 0.35) self.assertFalse(query.specific) def test_timeout_above1000(self): @@ -88,9 +88,9 @@ class TestQuery(SearxTestCase): query = RawTextQuery(query_text, []) query.parse_query() - self.assertEquals(query.getFullQuery(), query_text) - self.assertEquals(len(query.query_parts), 3) - self.assertEquals(query.timeout_limit, 3.5) + self.assertEqual(query.getFullQuery(), query_text) + self.assertEqual(len(query.query_parts), 3) + self.assertEqual(query.timeout_limit, 3.5) self.assertFalse(query.specific) def test_timeout_invalid(self): @@ -99,8 +99,8 @@ class TestQuery(SearxTestCase): query = RawTextQuery(query_text, []) query.parse_query() - self.assertEquals(query.getFullQuery(), query_text) - self.assertEquals(len(query.query_parts), 1) - self.assertEquals(query.query_parts[0], query_text) - self.assertEquals(query.timeout_limit, None) + self.assertEqual(query.getFullQuery(), query_text) + self.assertEqual(len(query.query_parts), 1) + self.assertEqual(query.query_parts[0], query_text) + self.assertEqual(query.timeout_limit, None) self.assertFalse(query.specific) diff --git a/tests/unit/test_search.py b/tests/unit/test_search.py index ca9fe862a..a15d2c899 100644 --- a/tests/unit/test_search.py +++ b/tests/unit/test_search.py @@ -45,7 +45,7 @@ class SearchTestCase(SearxTestCase): preferences=Preferences(['oscar'], ['general'], engines, [])) search = searx.search.Search(search_query) search.search() - self.assertEquals(search.actual_timeout, 3.0) + self.assertEqual(search.actual_timeout, 3.0) def test_timeout_query_above_default_nomax(self): searx.search.max_request_timeout = None @@ -54,7 +54,7 @@ class SearchTestCase(SearxTestCase): preferences=Preferences(['oscar'], ['general'], engines, [])) search = searx.search.Search(search_query) search.search() - self.assertEquals(search.actual_timeout, 3.0) + self.assertEqual(search.actual_timeout, 3.0) def test_timeout_query_below_default_nomax(self): searx.search.max_request_timeout = None @@ -63,7 +63,7 @@ class SearchTestCase(SearxTestCase): preferences=Preferences(['oscar'], ['general'], engines, [])) search = searx.search.Search(search_query) search.search() - self.assertEquals(search.actual_timeout, 1.0) + self.assertEqual(search.actual_timeout, 1.0) def test_timeout_query_below_max(self): searx.search.max_request_timeout = 10.0 @@ -72,7 +72,7 @@ class SearchTestCase(SearxTestCase): preferences=Preferences(['oscar'], ['general'], engines, [])) search = searx.search.Search(search_query) search.search() - self.assertEquals(search.actual_timeout, 5.0) + self.assertEqual(search.actual_timeout, 5.0) def test_timeout_query_above_max(self): searx.search.max_request_timeout = 10.0 @@ -81,7 +81,7 @@ class SearchTestCase(SearxTestCase): preferences=Preferences(['oscar'], ['general'], engines, [])) search = searx.search.Search(search_query) search.search() - self.assertEquals(search.actual_timeout, 10.0) + self.assertEqual(search.actual_timeout, 10.0) def test_query_private_engine_without_token(self): search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': PRIVATE_ENGINE_NAME}], @@ -89,7 +89,7 @@ class SearchTestCase(SearxTestCase): preferences=Preferences(['oscar'], ['general'], engines, [])) search = searx.search.Search(search_query) results = search.search() - self.assertEquals(results.results_length(), 0) + self.assertEqual(results.results_length(), 0) def test_query_private_engine_with_incorrect_token(self): preferences_with_tokens = Preferences(['oscar'], ['general'], engines, []) @@ -99,7 +99,7 @@ class SearchTestCase(SearxTestCase): preferences=preferences_with_tokens) search = searx.search.Search(search_query) results = search.search() - self.assertEquals(results.results_length(), 0) + self.assertEqual(results.results_length(), 0) def test_query_private_engine_with_correct_token(self): preferences_with_tokens = Preferences(['oscar'], ['general'], engines, []) @@ -109,7 +109,7 @@ class SearchTestCase(SearxTestCase): preferences=preferences_with_tokens) search = searx.search.Search(search_query) results = search.search() - self.assertEquals(results.results_length(), 1) + self.assertEqual(results.results_length(), 1) def test_external_bang(self): search_query = searx.query.SearchQuery('yes yes', diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index b09b9d414..5f98511c3 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -1,12 +1,8 @@ # -*- coding: utf-8 -*- import mock -import sys from searx.testing import SearxTestCase from searx import utils -if sys.version_info[0] == 3: - unicode = str - class TestUtils(SearxTestCase): @@ -34,9 +30,9 @@ class TestUtils(SearxTestCase): self.assertEqual(utils.highlight_content(content, None), content) content = 'a' - query = b'test' + query = 'test' self.assertEqual(utils.highlight_content(content, query), content) - query = b'a test' + query = 'a test' self.assertEqual(utils.highlight_content(content, query), content) def test_html_to_text(self): @@ -52,15 +48,15 @@ class TestUtils(SearxTestCase): """ - self.assertIsInstance(utils.html_to_text(html), unicode) + self.assertIsInstance(utils.html_to_text(html), str) self.assertIsNotNone(utils.html_to_text(html)) self.assertEqual(utils.html_to_text(html), "Test text") def test_prettify_url(self): data = (('https://searx.me/', 'https://searx.me/'), - (u'https://searx.me/ű', u'https://searx.me/ű'), + ('https://searx.me/ű', 'https://searx.me/ű'), ('https://searx.me/' + (100 * 'a'), 'https://searx.me/[...]aaaaaaaaaaaaaaaaa'), - (u'https://searx.me/' + (100 * u'ű'), u'https://searx.me/[...]űűűűűűűűűűűűűűűűű')) + ('https://searx.me/' + (100 * 'ű'), 'https://searx.me/[...]űűűűűűűűűűűűűűűűű')) for test_url, expected in data: self.assertEqual(utils.prettify_url(test_url, max_length=32), expected) @@ -108,12 +104,12 @@ class TestHTMLTextExtractor(SearxTestCase): def test_handle_charref(self): self.html_text_extractor.handle_charref('xF') - self.assertIn(u'\x0f', self.html_text_extractor.result) + self.assertIn('\x0f', self.html_text_extractor.result) self.html_text_extractor.handle_charref('XF') - self.assertIn(u'\x0f', self.html_text_extractor.result) + self.assertIn('\x0f', self.html_text_extractor.result) self.html_text_extractor.handle_charref('97') - self.assertIn(u'a', self.html_text_extractor.result) + self.assertIn('a', self.html_text_extractor.result) def test_handle_entityref(self): entity = 'test' diff --git a/tests/unit/test_webapp.py b/tests/unit/test_webapp.py index 8eed607e3..7dd465898 100644 --- a/tests/unit/test_webapp.py +++ b/tests/unit/test_webapp.py @@ -1,11 +1,11 @@ # -*- coding: utf-8 -*- import json +from urllib.parse import ParseResult from mock import Mock from searx import webapp from searx.testing import SearxTestCase from searx.search import Search -from searx.url_utils import ParseResult class ViewsTestCase(SearxTestCase): @@ -89,7 +89,7 @@ class ViewsTestCase(SearxTestCase): def test_index_json(self): result = self.app.post('/', data={'q': 'test', 'format': 'json'}) - result_dict = json.loads(result.data.decode('utf-8')) + result_dict = json.loads(result.data.decode()) self.assertEqual('test', result_dict['query']) self.assertEqual(len(result_dict['results']), 2) diff --git a/utils/fabfile.py b/utils/fabfile.py index 559e2ab6c..93f7fc536 100644 --- a/utils/fabfile.py +++ b/utils/fabfile.py @@ -1,5 +1,5 @@ from fabric.api import cd, run, sudo, put -from cStringIO import StringIO +from io import StringIO base_dir = '/usr/local' diff --git a/utils/fetch_currencies.py b/utils/fetch_currencies.py index 5605fb387..437c375db 100644 --- a/utils/fetch_currencies.py +++ b/utils/fetch_currencies.py @@ -1,11 +1,11 @@ # -*- coding: utf-8 -*- -from __future__ import print_function + import json import re import unicodedata import string -from urllib import urlencode +from urllib.parse import urlencode from requests import get languages = {'de', 'en', 'es', 'fr', 'hu', 'it', 'nl', 'jp'} @@ -39,7 +39,7 @@ def add_currency_name(name, iso4217): db_names = db['names'] - if not isinstance(iso4217, basestring): + if not isinstance(iso4217, str): print("problem", name, iso4217) return @@ -126,7 +126,7 @@ def wdq_query(query): url = url_wmflabs_template + query htmlresponse = get(url) jsonresponse = json.loads(htmlresponse.content) - qlist = map(add_q, jsonresponse.get('items', {})) + qlist = list(map(add_q, jsonresponse.get('items', {}))) error = jsonresponse.get('status', {}).get('error', None) if error is not None and error != 'OK': print("error for query '" + query + "' :" + error) @@ -150,12 +150,12 @@ for q in wmflabs_queries: wdq_query(q) # static -add_currency_name(u"euro", 'EUR') -add_currency_name(u"euros", 'EUR') -add_currency_name(u"dollar", 'USD') -add_currency_name(u"dollars", 'USD') -add_currency_name(u"peso", 'MXN') -add_currency_name(u"pesos", 'MXN') +add_currency_name("euro", 'EUR') +add_currency_name("euros", 'EUR') +add_currency_name("dollar", 'USD') +add_currency_name("dollars", 'USD') +add_currency_name("peso", 'MXN') +add_currency_name("pesos", 'MXN') # write f = open("currencies.json", "wb") diff --git a/utils/fetch_firefox_version.py b/utils/fetch_firefox_version.py index 722c48229..997a752b3 100755 --- a/utils/fetch_firefox_version.py +++ b/utils/fetch_firefox_version.py @@ -9,9 +9,9 @@ path.append(realpath(dirname(realpath(__file__)) + '/../')) import json import requests import re +from urllib.parse import urlparse, urljoin from distutils.version import LooseVersion, StrictVersion from lxml import html -from searx.url_utils import urlparse, urljoin from searx import searx_dir URL = 'https://ftp.mozilla.org/pub/firefox/releases/' diff --git a/utils/fetch_languages.py b/utils/fetch_languages.py index 77ec0bf20..0fb9d9c18 100644 --- a/utils/fetch_languages.py +++ b/utils/fetch_languages.py @@ -174,14 +174,17 @@ def write_languages_file(languages): + '# this file is generated automatically by utils/update_search_languages.py\n'\ + '\nlanguage_codes = (' for code in sorted(languages): - file_content += '\n (u"' + code + '"'\ - + ', u"' + languages[code]['name'].split(' (')[0] + '"'\ - + ', u"' + languages[code].get('country', '') + '"'\ - + ', u"' + languages[code].get('english_name', '').split(' (')[0] + '"),' + if 'name' in languages[code]: + file_content += '\n ("' + code + '"'\ + + ', "' + languages[code]['name'].split(' (')[0] + '"'\ + + ', "' + languages[code].get('country', '') + '"'\ + + ', "' + languages[code].get('english_name', '').split(' (')[0] + '"),' + else: + print('ignore ',languages[code]) # remove last comma file_content = file_content[:-1] file_content += '\n)\n' - new_file.write(file_content.encode('utf8')) + new_file.write(file_content.encode()) new_file.close() diff --git a/utils/makefile.python b/utils/makefile.python index df16acbbf..6c6696964 100644 --- a/utils/makefile.python +++ b/utils/makefile.python @@ -69,11 +69,11 @@ python-help:: @echo ' py[un]install - [un]install python objects in editable mode' @echo ' upload-pypi - upload $(PYDIST)/* files to PyPi' @echo 'options:' - @echo ' make PY=2 [targets] => to eval targets with python 2 ($(PY))' - @echo ' make PIP_INST= => to set/unset pip install options ($(PIP_INST))' - @echo ' make TEST=. => choose test from $(TEST_FOLDER) (default "." runs all)' - @echo ' make DEBUG= => target "debug": do not invoke PDB on errors' - @echo ' make PY_SETUP_EXTRAS => also install extras_require from setup.py \[develop,test\]' + @echo ' make PY=3.7 [targets] => to eval targets with python 3.7 ($(PY))' + @echo ' make PIP_INST= => to set/unset pip install options ($(PIP_INST))' + @echo ' make TEST=. => choose test from $(TEST_FOLDER) (default "." runs all)' + @echo ' make DEBUG= => target "debug": do not invoke PDB on errors' + @echo ' make PY_SETUP_EXTRAS => also install extras_require from setup.py \[develop,test\]' @echo ' when using target "pydebug", set breakpoints within py-source by adding::' @echo ' DEBUG()' diff --git a/utils/standalone_searx.py b/utils/standalone_searx.py index 7bc1d32ed..d43b474d7 100755 --- a/utils/standalone_searx.py +++ b/utils/standalone_searx.py @@ -56,7 +56,7 @@ args = parser.parse_args() # search results for the query form = { "q":args.query, - "categories":args.category.decode('utf-8'), + "categories":args.category.decode(), "pageno":str(args.pageno), "language":args.lang, "time_range":args.timerange @@ -101,4 +101,3 @@ result_container_json = { } sys.stdout = codecs.getwriter("UTF-8")(sys.stdout) sys.stdout.write(dumps(result_container_json, sort_keys=True, indent=4, ensure_ascii=False, encoding="utf-8", default=json_serial)) -