mirror of https://github.com/searxng/searxng.git
Drop Python 2 (1/n): remove unicode string and url_utils
This commit is contained in:
parent
272158944b
commit
1022228d95
5
Makefile
5
Makefile
|
@ -213,10 +213,6 @@ gecko.driver:
|
||||||
PHONY += test test.sh test.pylint test.pep8 test.unit test.coverage test.robot
|
PHONY += test test.sh test.pylint test.pep8 test.unit test.coverage test.robot
|
||||||
test: buildenv test.pylint test.pep8 test.unit gecko.driver test.robot
|
test: buildenv test.pylint test.pep8 test.unit gecko.driver test.robot
|
||||||
|
|
||||||
ifeq ($(PY),2)
|
|
||||||
test.pylint:
|
|
||||||
@echo "LINT skip liniting py2"
|
|
||||||
else
|
|
||||||
# TODO: balance linting with pylint
|
# TODO: balance linting with pylint
|
||||||
|
|
||||||
test.pylint: pyenvinstall
|
test.pylint: pyenvinstall
|
||||||
|
@ -225,7 +221,6 @@ test.pylint: pyenvinstall
|
||||||
searx/testing.py \
|
searx/testing.py \
|
||||||
searx/engines/gigablast.py \
|
searx/engines/gigablast.py \
|
||||||
)
|
)
|
||||||
endif
|
|
||||||
|
|
||||||
# ignored rules:
|
# ignored rules:
|
||||||
# E402 module level import not at top of file
|
# E402 module level import not at top of file
|
||||||
|
|
|
@ -39,7 +39,7 @@ install_geckodriver() {
|
||||||
return
|
return
|
||||||
fi
|
fi
|
||||||
GECKODRIVER_VERSION="v0.24.0"
|
GECKODRIVER_VERSION="v0.24.0"
|
||||||
PLATFORM="`python -c "import six; import platform; six.print_(platform.system().lower(), platform.architecture()[0])"`"
|
PLATFORM="`python3 -c "import platform; print(platform.system().lower(), platform.architecture()[0])"`"
|
||||||
case "$PLATFORM" in
|
case "$PLATFORM" in
|
||||||
"linux 32bit" | "linux2 32bit") ARCH="linux32";;
|
"linux 32bit" | "linux2 32bit") ARCH="linux32";;
|
||||||
"linux 64bit" | "linux2 64bit") ARCH="linux64";;
|
"linux 64bit" | "linux2 64bit") ARCH="linux64";;
|
||||||
|
@ -136,7 +136,7 @@ docker_build() {
|
||||||
# Check consistency between the git tag and the searx/version.py file
|
# Check consistency between the git tag and the searx/version.py file
|
||||||
# /!\ HACK : parse Python file with bash /!\
|
# /!\ HACK : parse Python file with bash /!\
|
||||||
# otherwise it is not possible build the docker image without all Python dependencies ( version.py loads __init__.py )
|
# otherwise it is not possible build the docker image without all Python dependencies ( version.py loads __init__.py )
|
||||||
# SEARX_PYTHON_VERSION=$(python -c "import six; import searx.version; six.print_(searx.version.VERSION_STRING)")
|
# SEARX_PYTHON_VERSION=$(python3 -c "import six; import searx.version; six.print_(searx.version.VERSION_STRING)")
|
||||||
SEARX_PYTHON_VERSION=$(cat searx/version.py | grep "\(VERSION_MAJOR\|VERSION_MINOR\|VERSION_BUILD\) =" | cut -d\= -f2 | sed -e 's/^[[:space:]]*//' | paste -sd "." -)
|
SEARX_PYTHON_VERSION=$(cat searx/version.py | grep "\(VERSION_MAJOR\|VERSION_MINOR\|VERSION_BUILD\) =" | cut -d\= -f2 | sed -e 's/^[[:space:]]*//' | paste -sd "." -)
|
||||||
if [ $(echo "$SEARX_GIT_VERSION" | cut -d- -f1) != "$SEARX_PYTHON_VERSION" ]; then
|
if [ $(echo "$SEARX_GIT_VERSION" | cut -d- -f1) != "$SEARX_PYTHON_VERSION" ]; then
|
||||||
echo "Inconsistency between the last git tag and the searx/version.py file"
|
echo "Inconsistency between the last git tag and the searx/version.py file"
|
||||||
|
|
|
@ -21,12 +21,8 @@ from os import environ
|
||||||
from os.path import realpath, dirname, join, abspath, isfile
|
from os.path import realpath, dirname, join, abspath, isfile
|
||||||
from io import open
|
from io import open
|
||||||
from ssl import OPENSSL_VERSION_INFO, OPENSSL_VERSION
|
from ssl import OPENSSL_VERSION_INFO, OPENSSL_VERSION
|
||||||
try:
|
from yaml import safe_load
|
||||||
from yaml import safe_load
|
|
||||||
except:
|
|
||||||
from sys import exit, stderr
|
|
||||||
stderr.write('[E] install pyyaml\n')
|
|
||||||
exit(2)
|
|
||||||
|
|
||||||
searx_dir = abspath(dirname(__file__))
|
searx_dir = abspath(dirname(__file__))
|
||||||
engine_dir = dirname(realpath(__file__))
|
engine_dir = dirname(realpath(__file__))
|
||||||
|
|
|
@ -1,12 +1,8 @@
|
||||||
from os import listdir
|
from os import listdir
|
||||||
from os.path import realpath, dirname, join, isdir
|
from os.path import realpath, dirname, join, isdir
|
||||||
from sys import version_info
|
|
||||||
from searx.utils import load_module
|
from searx.utils import load_module
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
||||||
if version_info[0] == 3:
|
|
||||||
unicode = str
|
|
||||||
|
|
||||||
|
|
||||||
answerers_dir = dirname(realpath(__file__))
|
answerers_dir = dirname(realpath(__file__))
|
||||||
|
|
||||||
|
@ -36,10 +32,10 @@ def ask(query):
|
||||||
results = []
|
results = []
|
||||||
query_parts = list(filter(None, query.query.split()))
|
query_parts = list(filter(None, query.query.split()))
|
||||||
|
|
||||||
if query_parts[0].decode('utf-8') not in answerers_by_keywords:
|
if query_parts[0].decode() not in answerers_by_keywords:
|
||||||
return results
|
return results
|
||||||
|
|
||||||
for answerer in answerers_by_keywords[query_parts[0].decode('utf-8')]:
|
for answerer in answerers_by_keywords[query_parts[0].decode()]:
|
||||||
result = answerer(query)
|
result = answerer(query)
|
||||||
if result:
|
if result:
|
||||||
results.append(result)
|
results.append(result)
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
import hashlib
|
import hashlib
|
||||||
import random
|
import random
|
||||||
import string
|
import string
|
||||||
import sys
|
|
||||||
import uuid
|
import uuid
|
||||||
from flask_babel import gettext
|
from flask_babel import gettext
|
||||||
|
|
||||||
|
@ -10,12 +9,7 @@ from flask_babel import gettext
|
||||||
keywords = ('random',)
|
keywords = ('random',)
|
||||||
|
|
||||||
random_int_max = 2**31
|
random_int_max = 2**31
|
||||||
|
random_string_letters = string.ascii_lowercase + string.digits + string.ascii_uppercase
|
||||||
if sys.version_info[0] == 2:
|
|
||||||
random_string_letters = string.lowercase + string.digits + string.uppercase
|
|
||||||
else:
|
|
||||||
unicode = str
|
|
||||||
random_string_letters = string.ascii_lowercase + string.digits + string.ascii_uppercase
|
|
||||||
|
|
||||||
|
|
||||||
def random_characters():
|
def random_characters():
|
||||||
|
@ -24,25 +18,25 @@ def random_characters():
|
||||||
|
|
||||||
|
|
||||||
def random_string():
|
def random_string():
|
||||||
return u''.join(random_characters())
|
return ''.join(random_characters())
|
||||||
|
|
||||||
|
|
||||||
def random_float():
|
def random_float():
|
||||||
return unicode(random.random())
|
return str(random.random())
|
||||||
|
|
||||||
|
|
||||||
def random_int():
|
def random_int():
|
||||||
return unicode(random.randint(-random_int_max, random_int_max))
|
return str(random.randint(-random_int_max, random_int_max))
|
||||||
|
|
||||||
|
|
||||||
def random_sha256():
|
def random_sha256():
|
||||||
m = hashlib.sha256()
|
m = hashlib.sha256()
|
||||||
m.update(''.join(random_characters()).encode())
|
m.update(''.join(random_characters()).encode())
|
||||||
return unicode(m.hexdigest())
|
return str(m.hexdigest())
|
||||||
|
|
||||||
|
|
||||||
def random_uuid():
|
def random_uuid():
|
||||||
return unicode(uuid.uuid4())
|
return str(uuid.uuid4())
|
||||||
|
|
||||||
|
|
||||||
random_types = {b'string': random_string,
|
random_types = {b'string': random_string,
|
||||||
|
@ -70,4 +64,4 @@ def answer(query):
|
||||||
def self_info():
|
def self_info():
|
||||||
return {'name': gettext('Random value generator'),
|
return {'name': gettext('Random value generator'),
|
||||||
'description': gettext('Generate different random values'),
|
'description': gettext('Generate different random values'),
|
||||||
'examples': [u'random {}'.format(x.decode('utf-8')) for x in random_types]}
|
'examples': ['random {}'.format(x.decode()) for x in random_types]}
|
||||||
|
|
|
@ -1,11 +1,8 @@
|
||||||
from sys import version_info
|
|
||||||
from functools import reduce
|
from functools import reduce
|
||||||
from operator import mul
|
from operator import mul
|
||||||
|
|
||||||
from flask_babel import gettext
|
from flask_babel import gettext
|
||||||
|
|
||||||
if version_info[0] == 3:
|
|
||||||
unicode = str
|
|
||||||
|
|
||||||
keywords = ('min',
|
keywords = ('min',
|
||||||
'max',
|
'max',
|
||||||
|
@ -44,7 +41,7 @@ def answer(query):
|
||||||
if answer is None:
|
if answer is None:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
return [{'answer': unicode(answer)}]
|
return [{'answer': str(answer)}]
|
||||||
|
|
||||||
|
|
||||||
# required answerer function
|
# required answerer function
|
||||||
|
|
|
@ -16,19 +16,16 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
|
||||||
import sys
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
from json import loads
|
from json import loads
|
||||||
|
from urllib.parse import urlencode
|
||||||
|
|
||||||
from searx import settings
|
from searx import settings
|
||||||
from searx.languages import language_codes
|
from searx.languages import language_codes
|
||||||
from searx.engines import (
|
from searx.engines import (
|
||||||
categories, engines, engine_shortcuts
|
categories, engines, engine_shortcuts
|
||||||
)
|
)
|
||||||
from searx.poolrequests import get as http_get
|
from searx.poolrequests import get as http_get
|
||||||
from searx.url_utils import urlencode
|
|
||||||
|
|
||||||
if sys.version_info[0] == 3:
|
|
||||||
unicode = str
|
|
||||||
|
|
||||||
|
|
||||||
def get(*args, **kwargs):
|
def get(*args, **kwargs):
|
||||||
|
@ -85,22 +82,22 @@ def searx_bang(full_query):
|
||||||
engine_query = full_query.getSearchQuery()[1:]
|
engine_query = full_query.getSearchQuery()[1:]
|
||||||
|
|
||||||
for lc in language_codes:
|
for lc in language_codes:
|
||||||
lang_id, lang_name, country, english_name = map(unicode.lower, lc)
|
lang_id, lang_name, country, english_name = map(str.lower, lc)
|
||||||
|
|
||||||
# check if query starts with language-id
|
# check if query starts with language-id
|
||||||
if lang_id.startswith(engine_query):
|
if lang_id.startswith(engine_query):
|
||||||
if len(engine_query) <= 2:
|
if len(engine_query) <= 2:
|
||||||
results.append(u':{lang_id}'.format(lang_id=lang_id.split('-')[0]))
|
results.append(':{lang_id}'.format(lang_id=lang_id.split('-')[0]))
|
||||||
else:
|
else:
|
||||||
results.append(u':{lang_id}'.format(lang_id=lang_id))
|
results.append(':{lang_id}'.format(lang_id=lang_id))
|
||||||
|
|
||||||
# check if query starts with language name
|
# check if query starts with language name
|
||||||
if lang_name.startswith(engine_query) or english_name.startswith(engine_query):
|
if lang_name.startswith(engine_query) or english_name.startswith(engine_query):
|
||||||
results.append(u':{lang_name}'.format(lang_name=lang_name))
|
results.append(':{lang_name}'.format(lang_name=lang_name))
|
||||||
|
|
||||||
# check if query starts with country
|
# check if query starts with country
|
||||||
if country.startswith(engine_query.replace('_', ' ')):
|
if country.startswith(engine_query.replace('_', ' ')):
|
||||||
results.append(u':{country}'.format(country=country.replace(' ', '_')))
|
results.append(':{country}'.format(country=country.replace(' ', '_')))
|
||||||
|
|
||||||
# remove duplicates
|
# remove duplicates
|
||||||
result_set = set(results)
|
result_set = set(results)
|
||||||
|
|
|
@ -1,7 +1,8 @@
|
||||||
|
from urllib.parse import quote, urljoin
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
from searx.utils import get_torrent_size
|
from searx.utils import get_torrent_size
|
||||||
from searx.url_utils import quote, urljoin
|
|
||||||
|
|
||||||
url = 'https://1337x.to/'
|
url = 'https://1337x.to/'
|
||||||
search_url = url + 'search/{search_term}/{pageno}/'
|
search_url = url + 'search/{search_term}/{pageno}/'
|
||||||
|
|
|
@ -9,9 +9,9 @@
|
||||||
@parse url, title, content, seed, leech, torrentfile
|
@parse url, title, content, seed, leech, torrentfile
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from urllib.parse import urlencode
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
from searx.url_utils import urlencode
|
|
||||||
from searx.utils import get_torrent_size, int_or_zero
|
from searx.utils import get_torrent_size, int_or_zero
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
|
@ -63,7 +63,7 @@ def response(resp):
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
# I didn't add download/seed/leech count since as I figured out they are generated randomly everytime
|
# I didn't add download/seed/leech count since as I figured out they are generated randomly everytime
|
||||||
content = u'Category: "{category}".'
|
content = 'Category: "{category}".'
|
||||||
content = content.format(category=category)
|
content = content.format(category=category)
|
||||||
|
|
||||||
results.append({'url': href,
|
results.append({'url': href,
|
||||||
|
|
|
@ -9,9 +9,10 @@
|
||||||
@parse url, title, thumbnail_src
|
@parse url, title, thumbnail_src
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from urllib.parse import urlencode
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
from searx.url_utils import urlencode
|
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['it']
|
categories = ['it']
|
||||||
|
|
|
@ -11,9 +11,9 @@
|
||||||
@parse url, title
|
@parse url, title
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from urllib.parse import urlencode, urljoin
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
from searx.url_utils import urlencode, urljoin
|
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['it']
|
categories = ['it']
|
||||||
|
|
|
@ -11,9 +11,9 @@
|
||||||
More info on api: https://arxiv.org/help/api/user-manual
|
More info on api: https://arxiv.org/help/api/user-manual
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from urllib.parse import urlencode
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from searx.url_utils import urlencode
|
|
||||||
|
|
||||||
|
|
||||||
categories = ['science']
|
categories = ['science']
|
||||||
|
@ -30,7 +30,7 @@ def request(query, params):
|
||||||
# basic search
|
# basic search
|
||||||
offset = (params['pageno'] - 1) * number_of_results
|
offset = (params['pageno'] - 1) * number_of_results
|
||||||
|
|
||||||
string_args = dict(query=query.decode('utf-8'),
|
string_args = dict(query=query.decode(),
|
||||||
offset=offset,
|
offset=offset,
|
||||||
number_of_results=number_of_results)
|
number_of_results=number_of_results)
|
||||||
|
|
||||||
|
|
|
@ -13,10 +13,10 @@
|
||||||
More info on api: http://base-search.net/about/download/base_interface.pdf
|
More info on api: http://base-search.net/about/download/base_interface.pdf
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from urllib.parse import urlencode
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import re
|
import re
|
||||||
from searx.url_utils import urlencode
|
|
||||||
from searx.utils import searx_useragent
|
from searx.utils import searx_useragent
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -14,10 +14,10 @@
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
from urllib.parse import urlencode
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx import logger, utils
|
from searx import logger, utils
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
from searx.url_utils import urlencode
|
|
||||||
from searx.utils import match_language, gen_useragent, eval_xpath
|
from searx.utils import match_language, gen_useragent, eval_xpath
|
||||||
|
|
||||||
logger = logger.getChild('bing engine')
|
logger = logger.getChild('bing engine')
|
||||||
|
@ -47,7 +47,7 @@ def request(query, params):
|
||||||
else:
|
else:
|
||||||
lang = match_language(params['language'], supported_languages, language_aliases)
|
lang = match_language(params['language'], supported_languages, language_aliases)
|
||||||
|
|
||||||
query = u'language:{} {}'.format(lang.split('-')[0].upper(), query.decode('utf-8')).encode('utf-8')
|
query = 'language:{} {}'.format(lang.split('-')[0].upper(), query.decode()).encode()
|
||||||
|
|
||||||
search_path = search_string.format(
|
search_path = search_string.format(
|
||||||
query=urlencode({'q': query}),
|
query=urlencode({'q': query}),
|
||||||
|
|
|
@ -12,10 +12,10 @@
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from urllib.parse import urlencode
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from json import loads
|
from json import loads
|
||||||
import re
|
import re
|
||||||
from searx.url_utils import urlencode
|
|
||||||
from searx.utils import match_language
|
from searx.utils import match_language
|
||||||
|
|
||||||
from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases
|
from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases
|
||||||
|
@ -91,7 +91,7 @@ def response(resp):
|
||||||
|
|
||||||
# strip 'Unicode private use area' highlighting, they render to Tux
|
# strip 'Unicode private use area' highlighting, they render to Tux
|
||||||
# the Linux penguin and a standing diamond on my machine...
|
# the Linux penguin and a standing diamond on my machine...
|
||||||
title = m.get('t', '').replace(u'\ue000', '').replace(u'\ue001', '')
|
title = m.get('t', '').replace('\ue000', '').replace('\ue001', '')
|
||||||
results.append({'template': 'images.html',
|
results.append({'template': 'images.html',
|
||||||
'url': m['purl'],
|
'url': m['purl'],
|
||||||
'thumbnail_src': m['turl'],
|
'thumbnail_src': m['turl'],
|
||||||
|
|
|
@ -13,10 +13,9 @@
|
||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from dateutil import parser
|
from dateutil import parser
|
||||||
|
from urllib.parse import urlencode, urlparse, parse_qsl
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
from searx.utils import list_get, match_language
|
from searx.utils import list_get, match_language
|
||||||
from searx.url_utils import urlencode, urlparse, parse_qsl
|
|
||||||
|
|
||||||
from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases
|
from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
|
|
|
@ -12,7 +12,7 @@
|
||||||
|
|
||||||
from json import loads
|
from json import loads
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.url_utils import urlencode
|
from urllib.parse import urlencode
|
||||||
from searx.utils import match_language
|
from searx.utils import match_language
|
||||||
|
|
||||||
from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases
|
from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases
|
||||||
|
|
|
@ -12,8 +12,8 @@
|
||||||
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
|
from urllib.parse import quote, urljoin
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
from searx.url_utils import quote, urljoin
|
|
||||||
from searx.utils import get_torrent_size
|
from searx.utils import get_torrent_size
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
|
|
|
@ -1,14 +1,11 @@
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
import os
|
import os
|
||||||
import sys
|
|
||||||
import unicodedata
|
import unicodedata
|
||||||
|
|
||||||
from io import open
|
from io import open
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
if sys.version_info[0] == 3:
|
|
||||||
unicode = str
|
|
||||||
|
|
||||||
categories = []
|
categories = []
|
||||||
url = 'https://duckduckgo.com/js/spice/currency/1/{0}/{1}'
|
url = 'https://duckduckgo.com/js/spice/currency/1/{0}/{1}'
|
||||||
|
@ -20,7 +17,7 @@ db = 1
|
||||||
|
|
||||||
|
|
||||||
def normalize_name(name):
|
def normalize_name(name):
|
||||||
name = name.decode('utf-8').lower().replace('-', ' ').rstrip('s')
|
name = name.decode().lower().replace('-', ' ').rstrip('s')
|
||||||
name = re.sub(' +', ' ', name)
|
name = re.sub(' +', ' ', name)
|
||||||
return unicodedata.normalize('NFKD', name).lower()
|
return unicodedata.normalize('NFKD', name).lower()
|
||||||
|
|
||||||
|
|
|
@ -14,7 +14,7 @@
|
||||||
|
|
||||||
from json import loads
|
from json import loads
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from searx.url_utils import urlencode
|
from urllib.parse import urlencode
|
||||||
from searx.utils import match_language, html_to_text
|
from searx.utils import match_language, html_to_text
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
|
|
|
@ -11,7 +11,7 @@
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from json import loads
|
from json import loads
|
||||||
from searx.url_utils import urlencode
|
from urllib.parse import urlencode
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['music']
|
categories = ['music']
|
||||||
|
@ -50,7 +50,7 @@ def response(resp):
|
||||||
if url.startswith('http://'):
|
if url.startswith('http://'):
|
||||||
url = 'https' + url[4:]
|
url = 'https' + url[4:]
|
||||||
|
|
||||||
content = u'{} - {} - {}'.format(
|
content = '{} - {} - {}'.format(
|
||||||
result['artist']['name'],
|
result['artist']['name'],
|
||||||
result['album']['title'],
|
result['album']['title'],
|
||||||
result['title'])
|
result['title'])
|
||||||
|
|
|
@ -14,8 +14,9 @@
|
||||||
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
import re
|
import re
|
||||||
|
from urllib.parse import urlencode
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
from searx.url_utils import urlencode
|
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['images']
|
categories = ['images']
|
||||||
|
|
|
@ -10,12 +10,12 @@
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
from urllib.parse import urljoin
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.utils import is_valid_lang, eval_xpath
|
from searx.utils import is_valid_lang, eval_xpath
|
||||||
from searx.url_utils import urljoin
|
|
||||||
|
|
||||||
categories = ['general']
|
categories = ['general']
|
||||||
url = u'https://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
|
url = 'https://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
|
||||||
weight = 100
|
weight = 100
|
||||||
|
|
||||||
parser_re = re.compile(b'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
|
parser_re = re.compile(b'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
|
||||||
|
@ -37,7 +37,7 @@ def request(query, params):
|
||||||
|
|
||||||
params['url'] = url.format(from_lang=from_lang[2],
|
params['url'] = url.format(from_lang=from_lang[2],
|
||||||
to_lang=to_lang[2],
|
to_lang=to_lang[2],
|
||||||
query=query.decode('utf-8'))
|
query=query.decode())
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
|
@ -10,14 +10,11 @@
|
||||||
@parse url, title, content, magnetlink
|
@parse url, title, content, magnetlink
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from sys import version_info
|
from urllib.parse import urljoin
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
from searx.utils import get_torrent_size
|
from searx.utils import get_torrent_size
|
||||||
from searx.url_utils import urljoin
|
|
||||||
|
|
||||||
if version_info[0] == 3:
|
|
||||||
unicode = str
|
|
||||||
|
|
||||||
categories = ['videos', 'music', 'files']
|
categories = ['videos', 'music', 'files']
|
||||||
paging = True
|
paging = True
|
||||||
|
|
|
@ -14,8 +14,8 @@ import random
|
||||||
import string
|
import string
|
||||||
from dateutil import parser
|
from dateutil import parser
|
||||||
from json import loads
|
from json import loads
|
||||||
|
from urllib.parse import urlencode
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.url_utils import urlencode
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
|
|
|
@ -9,10 +9,10 @@
|
||||||
# @stable yes
|
# @stable yes
|
||||||
# @parse (general) url, title, content
|
# @parse (general) url, title, content
|
||||||
|
|
||||||
|
from urllib.parse import urlencode
|
||||||
from lxml.html import fromstring
|
from lxml.html import fromstring
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
from searx.utils import eval_xpath
|
from searx.utils import eval_xpath
|
||||||
from searx.url_utils import urlencode
|
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['general'] # TODO , 'images', 'music', 'videos', 'files'
|
categories = ['general'] # TODO , 'images', 'music', 'videos', 'files'
|
||||||
|
|
|
@ -15,9 +15,9 @@
|
||||||
|
|
||||||
from lxml.html import fromstring
|
from lxml.html import fromstring
|
||||||
from json import loads
|
from json import loads
|
||||||
|
from urllib.parse import urlencode
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
from searx.poolrequests import get
|
from searx.poolrequests import get
|
||||||
from searx.url_utils import urlencode
|
|
||||||
from searx.utils import match_language, eval_xpath
|
from searx.utils import match_language, eval_xpath
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
|
|
|
@ -10,11 +10,11 @@ DuckDuckGo (definitions)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
from urllib.parse import urlencode
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from re import compile
|
from re import compile
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url, language_aliases
|
from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url, language_aliases
|
||||||
from searx.url_utils import urlencode
|
|
||||||
from searx.utils import html_to_text, match_language
|
from searx.utils import html_to_text, match_language
|
||||||
|
|
||||||
url = 'https://api.duckduckgo.com/'\
|
url = 'https://api.duckduckgo.com/'\
|
||||||
|
|
|
@ -14,13 +14,13 @@
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from json import loads
|
from json import loads
|
||||||
|
from urllib.parse import urlencode
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
from searx.engines.duckduckgo import (
|
from searx.engines.duckduckgo import (
|
||||||
_fetch_supported_languages, supported_languages_url,
|
_fetch_supported_languages, supported_languages_url,
|
||||||
get_region_code, language_aliases
|
get_region_code, language_aliases
|
||||||
)
|
)
|
||||||
from searx.poolrequests import get
|
from searx.poolrequests import get
|
||||||
from searx.url_utils import urlencode
|
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['images']
|
categories = ['images']
|
||||||
|
|
|
@ -10,9 +10,9 @@
|
||||||
|
|
||||||
from lxml import html, etree
|
from lxml import html, etree
|
||||||
import re
|
import re
|
||||||
|
from urllib.parse import quote, urljoin
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
from searx.utils import eval_xpath
|
from searx.utils import eval_xpath
|
||||||
from searx.url_utils import quote, urljoin
|
|
||||||
from searx import logger
|
from searx import logger
|
||||||
|
|
||||||
categories = ['general']
|
categories = ['general']
|
||||||
|
|
|
@ -10,8 +10,8 @@
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
|
from urllib.parse import quote
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
from searx.url_utils import quote
|
|
||||||
from searx.utils import eval_xpath
|
from searx.utils import eval_xpath
|
||||||
|
|
||||||
categories = ['general']
|
categories = ['general']
|
||||||
|
|
|
@ -9,9 +9,9 @@
|
||||||
@parse url, title, content
|
@parse url, title, content
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from urllib.parse import urlencode
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
from searx.url_utils import urlencode
|
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['files']
|
categories = ['files']
|
||||||
|
|
|
@ -1,9 +1,6 @@
|
||||||
from searx.url_utils import urlencode
|
from html.parser import HTMLParser
|
||||||
|
from urllib.parse import urlencode
|
||||||
|
|
||||||
try:
|
|
||||||
from HTMLParser import HTMLParser
|
|
||||||
except:
|
|
||||||
from html.parser import HTMLParser
|
|
||||||
|
|
||||||
url = 'http://www.filecrop.com/'
|
url = 'http://www.filecrop.com/'
|
||||||
search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1&pos={index}' # noqa
|
search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1&pos={index}' # noqa
|
||||||
|
|
|
@ -14,7 +14,7 @@
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from json import loads
|
from json import loads
|
||||||
from searx.url_utils import urlencode
|
from urllib.parse import urlencode
|
||||||
|
|
||||||
categories = ['images']
|
categories = ['images']
|
||||||
|
|
||||||
|
|
|
@ -15,8 +15,8 @@
|
||||||
from json import loads
|
from json import loads
|
||||||
from time import time
|
from time import time
|
||||||
import re
|
import re
|
||||||
|
from urllib.parse import urlencode
|
||||||
from searx.engines import logger
|
from searx.engines import logger
|
||||||
from searx.url_utils import urlencode
|
|
||||||
from searx.utils import ecma_unescape, html_to_text
|
from searx.utils import ecma_unescape, html_to_text
|
||||||
|
|
||||||
logger = logger.getChild('flickr-noapi')
|
logger = logger.getChild('flickr-noapi')
|
||||||
|
@ -117,10 +117,10 @@ def response(resp):
|
||||||
'img_format': img_format,
|
'img_format': img_format,
|
||||||
'template': 'images.html'
|
'template': 'images.html'
|
||||||
}
|
}
|
||||||
result['author'] = author.encode('utf-8', 'ignore').decode('utf-8')
|
result['author'] = author.encode(errors='ignore').decode()
|
||||||
result['source'] = source.encode('utf-8', 'ignore').decode('utf-8')
|
result['source'] = source.encode(errors='ignore').decode()
|
||||||
result['title'] = title.encode('utf-8', 'ignore').decode('utf-8')
|
result['title'] = title.encode(errors='ignore').decode()
|
||||||
result['content'] = content.encode('utf-8', 'ignore').decode('utf-8')
|
result['content'] = content.encode(errors='ignore').decode()
|
||||||
results.append(result)
|
results.append(result)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -10,13 +10,10 @@
|
||||||
@parse url, title, content, thumbnail, img_src
|
@parse url, title, content, thumbnail, img_src
|
||||||
"""
|
"""
|
||||||
|
|
||||||
try:
|
from html import escape
|
||||||
from cgi import escape
|
from urllib.parse import urljoin, urlencode
|
||||||
except:
|
|
||||||
from html import escape
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
from searx.url_utils import urljoin, urlencode
|
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['it']
|
categories = ['it']
|
||||||
|
|
|
@ -10,7 +10,7 @@ Frinkiac (Images)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from json import loads
|
from json import loads
|
||||||
from searx.url_utils import urlencode
|
from urllib.parse import urlencode
|
||||||
|
|
||||||
categories = ['images']
|
categories = ['images']
|
||||||
|
|
||||||
|
|
|
@ -11,7 +11,7 @@ Genius
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from json import loads
|
from json import loads
|
||||||
from searx.url_utils import urlencode
|
from urllib.parse import urlencode
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
|
|
|
@ -11,9 +11,9 @@
|
||||||
@parse url, title
|
@parse url, title
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from urllib.parse import urlencode, urljoin
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
from searx.url_utils import urlencode, urljoin
|
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['it']
|
categories = ['it']
|
||||||
|
@ -90,7 +90,7 @@ def request(query, params):
|
||||||
# if our language is hosted on the main site, we need to add its name
|
# if our language is hosted on the main site, we need to add its name
|
||||||
# to the query in order to narrow the results to that language
|
# to the query in order to narrow the results to that language
|
||||||
if language in main_langs:
|
if language in main_langs:
|
||||||
query += b' (' + (main_langs[language]).encode('utf-8') + b')'
|
query += b' (' + (main_langs[language]).encode() + b')'
|
||||||
|
|
||||||
# prepare the request parameters
|
# prepare the request parameters
|
||||||
query = urlencode({'search': query})
|
query = urlencode({'search': query})
|
||||||
|
|
|
@ -14,8 +14,8 @@
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from json import loads
|
from json import loads
|
||||||
|
from urllib.parse import urlencode
|
||||||
# from searx import logger
|
# from searx import logger
|
||||||
from searx.url_utils import urlencode
|
|
||||||
from searx.poolrequests import get
|
from searx.poolrequests import get
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
|
|
|
@ -11,7 +11,7 @@
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from json import loads
|
from json import loads
|
||||||
from searx.url_utils import urlencode
|
from urllib.parse import urlencode
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['it']
|
categories = ['it']
|
||||||
|
|
|
@ -18,11 +18,11 @@ Definitions`_.
|
||||||
|
|
||||||
# pylint: disable=invalid-name, missing-function-docstring
|
# pylint: disable=invalid-name, missing-function-docstring
|
||||||
|
|
||||||
|
from urllib.parse import urlencode, urlparse
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from flask_babel import gettext
|
from flask_babel import gettext
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
from searx import logger
|
from searx import logger
|
||||||
from searx.url_utils import urlencode, urlparse
|
|
||||||
from searx.utils import match_language, eval_xpath
|
from searx.utils import match_language, eval_xpath
|
||||||
|
|
||||||
logger = logger.getChild('google engine')
|
logger = logger.getChild('google engine')
|
||||||
|
|
|
@ -24,11 +24,10 @@ Definitions`_.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import urllib
|
from urllib.parse import urlencode, urlparse, unquote
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from flask_babel import gettext
|
from flask_babel import gettext
|
||||||
from searx import logger
|
from searx import logger
|
||||||
from searx.url_utils import urlencode, urlparse
|
|
||||||
from searx.utils import eval_xpath
|
from searx.utils import eval_xpath
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
|
|
||||||
|
@ -87,7 +86,7 @@ def scrap_img_by_id(script, data_id):
|
||||||
if 'gstatic.com/images' in line and data_id in line:
|
if 'gstatic.com/images' in line and data_id in line:
|
||||||
url_line = _script[i + 1]
|
url_line = _script[i + 1]
|
||||||
img_url = url_line.split('"')[1]
|
img_url = url_line.split('"')[1]
|
||||||
img_url = urllib.parse.unquote(img_url.replace(r'\u00', r'%'))
|
img_url = unquote(img_url.replace(r'\u00', r'%'))
|
||||||
return img_url
|
return img_url
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -10,9 +10,9 @@
|
||||||
@parse url, title, content, publishedDate
|
@parse url, title, content, publishedDate
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from urllib.parse import urlencode
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.engines.google import _fetch_supported_languages, supported_languages_url
|
from searx.engines.google import _fetch_supported_languages, supported_languages_url
|
||||||
from searx.url_utils import urlencode
|
|
||||||
from searx.utils import match_language
|
from searx.utils import match_language
|
||||||
|
|
||||||
# search-url
|
# search-url
|
||||||
|
|
|
@ -12,9 +12,9 @@
|
||||||
|
|
||||||
from datetime import date, timedelta
|
from datetime import date, timedelta
|
||||||
from json import loads
|
from json import loads
|
||||||
|
from urllib.parse import urlencode
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
from searx.url_utils import urlencode
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
|
|
|
@ -12,15 +12,12 @@
|
||||||
# @todo embedded (needs some md5 from video page)
|
# @todo embedded (needs some md5 from video page)
|
||||||
|
|
||||||
from json import loads
|
from json import loads
|
||||||
|
from urllib.parse import urlencode
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from dateutil import parser
|
from dateutil import parser
|
||||||
|
from html.parser import HTMLParser
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
from searx.url_utils import urlencode
|
|
||||||
|
|
||||||
try:
|
|
||||||
from HTMLParser import HTMLParser
|
|
||||||
except:
|
|
||||||
from html.parser import HTMLParser
|
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['videos']
|
categories = ['videos']
|
||||||
|
|
|
@ -8,7 +8,7 @@
|
||||||
# @stable yes
|
# @stable yes
|
||||||
# @parse url, title, content, publishedDate, thumbnail, embedded, author, length
|
# @parse url, title, content, publishedDate, thumbnail, embedded, author, length
|
||||||
|
|
||||||
from searx.url_utils import quote_plus
|
from urllib.parse import quote_plus
|
||||||
from dateutil import parser
|
from dateutil import parser
|
||||||
import time
|
import time
|
||||||
|
|
||||||
|
|
|
@ -1,11 +1,8 @@
|
||||||
from collections import Iterable
|
from collections import Iterable
|
||||||
from json import loads
|
from json import loads
|
||||||
from sys import version_info
|
from urllib.parse import urlencode
|
||||||
from searx.url_utils import urlencode
|
|
||||||
from searx.utils import to_string
|
from searx.utils import to_string
|
||||||
|
|
||||||
if version_info[0] == 3:
|
|
||||||
unicode = str
|
|
||||||
|
|
||||||
search_url = None
|
search_url = None
|
||||||
url_query = None
|
url_query = None
|
||||||
|
@ -37,8 +34,6 @@ def iterate(iterable):
|
||||||
def is_iterable(obj):
|
def is_iterable(obj):
|
||||||
if type(obj) == str:
|
if type(obj) == str:
|
||||||
return False
|
return False
|
||||||
if type(obj) == unicode:
|
|
||||||
return False
|
|
||||||
return isinstance(obj, Iterable)
|
return isinstance(obj, Iterable)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -12,9 +12,9 @@
|
||||||
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
|
from urllib.parse import quote, urljoin
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
from searx.utils import get_torrent_size, convert_str_to_int
|
from searx.utils import get_torrent_size, convert_str_to_int
|
||||||
from searx.url_utils import quote, urljoin
|
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['videos', 'music', 'files']
|
categories = ['videos', 'music', 'files']
|
||||||
|
|
|
@ -14,7 +14,7 @@
|
||||||
|
|
||||||
from json import loads
|
from json import loads
|
||||||
from string import Formatter
|
from string import Formatter
|
||||||
from searx.url_utils import urlencode, quote
|
from urllib.parse import urlencode, quote
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['general']
|
categories = ['general']
|
||||||
|
@ -79,7 +79,7 @@ def response(resp):
|
||||||
if result.get('snippet', '').startswith('#REDIRECT'):
|
if result.get('snippet', '').startswith('#REDIRECT'):
|
||||||
continue
|
continue
|
||||||
url = base_url.format(language=resp.search_params['language']) +\
|
url = base_url.format(language=resp.search_params['language']) +\
|
||||||
'wiki/' + quote(result['title'].replace(' ', '_').encode('utf-8'))
|
'wiki/' + quote(result['title'].replace(' ', '_').encode())
|
||||||
|
|
||||||
# append result
|
# append result
|
||||||
results.append({'url': url,
|
results.append({'url': url,
|
||||||
|
|
|
@ -12,8 +12,7 @@ Microsoft Academic (Science)
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from json import loads
|
from json import loads
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
|
from urllib.parse import urlencode
|
||||||
from searx.url_utils import urlencode
|
|
||||||
from searx.utils import html_to_text
|
from searx.utils import html_to_text
|
||||||
|
|
||||||
categories = ['images']
|
categories = ['images']
|
||||||
|
|
|
@ -12,7 +12,7 @@
|
||||||
|
|
||||||
from json import loads
|
from json import loads
|
||||||
from dateutil import parser
|
from dateutil import parser
|
||||||
from searx.url_utils import urlencode
|
from urllib.parse import urlencode
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['music']
|
categories = ['music']
|
||||||
|
|
|
@ -10,8 +10,8 @@
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
|
from urllib.parse import urlencode
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
from searx.url_utils import urlencode
|
|
||||||
from searx.utils import get_torrent_size, int_or_zero
|
from searx.utils import get_torrent_size, int_or_zero
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
|
|
|
@ -30,8 +30,8 @@ route_re = re.compile('(?:from )?(.+) to (.+)')
|
||||||
# do search-request
|
# do search-request
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
|
|
||||||
params['url'] = base_url + search_string.format(query=query.decode('utf-8'))
|
params['url'] = base_url + search_string.format(query=query.decode())
|
||||||
params['route'] = route_re.match(query.decode('utf-8'))
|
params['route'] = route_re.match(query.decode())
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
@ -52,7 +52,7 @@ def response(resp):
|
||||||
if 'display_name' not in r:
|
if 'display_name' not in r:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
title = r['display_name'] or u''
|
title = r['display_name'] or ''
|
||||||
osm_type = r.get('osm_type', r.get('type'))
|
osm_type = r.get('osm_type', r.get('type'))
|
||||||
url = result_base_url.format(osm_type=osm_type,
|
url = result_base_url.format(osm_type=osm_type,
|
||||||
osm_id=r['osm_id'])
|
osm_id=r['osm_id'])
|
||||||
|
@ -64,7 +64,7 @@ def response(resp):
|
||||||
|
|
||||||
# if no geojson is found and osm_type is a node, add geojson Point
|
# if no geojson is found and osm_type is a node, add geojson Point
|
||||||
if not geojson and osm_type == 'node':
|
if not geojson and osm_type == 'node':
|
||||||
geojson = {u'type': u'Point', u'coordinates': [r['lon'], r['lat']]}
|
geojson = {'type': 'Point', 'coordinates': [r['lon'], r['lat']]}
|
||||||
|
|
||||||
address_raw = r.get('address')
|
address_raw = r.get('address')
|
||||||
address = {}
|
address = {}
|
||||||
|
|
|
@ -14,7 +14,7 @@
|
||||||
|
|
||||||
from json import loads
|
from json import loads
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from searx.url_utils import urlencode
|
from urllib.parse import urlencode
|
||||||
from searx.utils import html_to_text
|
from searx.utils import html_to_text
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
|
|
|
@ -11,8 +11,8 @@
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from json import loads
|
from json import loads
|
||||||
|
from urllib.parse import urlencode
|
||||||
from searx.utils import searx_useragent
|
from searx.utils import searx_useragent
|
||||||
from searx.url_utils import urlencode
|
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['map']
|
categories = ['map']
|
||||||
|
|
|
@ -11,7 +11,9 @@
|
||||||
from json import loads
|
from json import loads
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
from searx.url_utils import quote
|
|
||||||
|
from urllib.parse import quote, urljoin
|
||||||
|
from searx.engines.xpath import extract_text
|
||||||
from searx.utils import get_torrent_size
|
from searx.utils import get_torrent_size
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
|
@ -62,8 +64,8 @@ def response(resp):
|
||||||
# parse results
|
# parse results
|
||||||
for result in search_res:
|
for result in search_res:
|
||||||
link = url + "description.php?id=" + result["id"]
|
link = url + "description.php?id=" + result["id"]
|
||||||
magnetlink = "magnet:?xt=urn:btih:" + result["info_hash"] + \
|
magnetlink = "magnet:?xt=urn:btih:" + result["info_hash"] + "&dn=" + result["name"]\
|
||||||
"&dn=" + result["name"] + "&tr=" + "&tr=".join(trackers)
|
+ "&tr=" + "&tr=".join(trackers)
|
||||||
|
|
||||||
params = {
|
params = {
|
||||||
"url": link,
|
"url": link,
|
||||||
|
|
|
@ -14,7 +14,7 @@
|
||||||
from flask_babel import gettext
|
from flask_babel import gettext
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from searx.url_utils import urlencode
|
from urllib.parse import urlencode
|
||||||
from searx.poolrequests import get
|
from searx.poolrequests import get
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -12,9 +12,9 @@
|
||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from json import loads
|
from json import loads
|
||||||
from searx.utils import html_to_text
|
from urllib.parse import urlencode
|
||||||
from searx.url_utils import urlencode
|
from searx.utils import html_to_text, match_language
|
||||||
from searx.utils import match_language
|
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = None
|
categories = None
|
||||||
|
|
|
@ -12,7 +12,7 @@
|
||||||
|
|
||||||
import json
|
import json
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from searx.url_utils import urlencode, urljoin, urlparse
|
from urllib.parse import urlencode, urljoin, urlparse
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['general', 'images', 'news', 'social media']
|
categories = ['general', 'images', 'news', 'social media']
|
||||||
|
|
|
@ -11,7 +11,7 @@
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from json import loads, dumps
|
from json import loads, dumps
|
||||||
from searx.utils import html_to_text
|
from urllib.parse import html_to_text
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['science']
|
categories = ['science']
|
||||||
|
@ -29,7 +29,7 @@ def request(query, params):
|
||||||
params['url'] = search_url
|
params['url'] = search_url
|
||||||
params['method'] = 'POST'
|
params['method'] = 'POST'
|
||||||
params['headers']['Content-type'] = "application/json"
|
params['headers']['Content-type'] = "application/json"
|
||||||
params['data'] = dumps({"query": query.decode('utf-8'),
|
params['data'] = dumps({"query": query.decode(),
|
||||||
"searchField": "ALL",
|
"searchField": "ALL",
|
||||||
"sortDirection": "ASC",
|
"sortDirection": "ASC",
|
||||||
"sortOrder": "RELEVANCY",
|
"sortOrder": "RELEVANCY",
|
||||||
|
|
|
@ -11,7 +11,7 @@
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from json import loads
|
from json import loads
|
||||||
from searx.url_utils import urlencode
|
from urllib.parse import urlencode
|
||||||
|
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
|
|
|
@ -11,7 +11,7 @@
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from json import loads
|
from json import loads
|
||||||
from searx.url_utils import urlencode
|
from urllib.parse import urlencode
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['it']
|
categories = ['it']
|
||||||
|
|
|
@ -11,7 +11,7 @@
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from json import loads
|
from json import loads
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
from searx.url_utils import quote, urljoin
|
from urllib.parse import quote, urljoin
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -14,14 +14,11 @@ import re
|
||||||
from json import loads
|
from json import loads
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from dateutil import parser
|
from dateutil import parser
|
||||||
|
from io import StringIO
|
||||||
|
from urllib.parse import quote_plus, urlencode
|
||||||
from searx import logger
|
from searx import logger
|
||||||
from searx.poolrequests import get as http_get
|
from searx.poolrequests import get as http_get
|
||||||
from searx.url_utils import quote_plus, urlencode
|
|
||||||
|
|
||||||
try:
|
|
||||||
from cStringIO import StringIO
|
|
||||||
except:
|
|
||||||
from io import StringIO
|
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['music']
|
categories = ['music']
|
||||||
|
@ -61,7 +58,7 @@ def get_client_id():
|
||||||
# gets app_js and searches for the clientid
|
# gets app_js and searches for the clientid
|
||||||
response = http_get(app_js_url)
|
response = http_get(app_js_url)
|
||||||
if response.ok:
|
if response.ok:
|
||||||
cids = cid_re.search(response.content.decode("utf-8"))
|
cids = cid_re.search(response.content.decode())
|
||||||
if cids is not None and len(cids.groups()):
|
if cids is not None and len(cids.groups()):
|
||||||
return cids.groups()[0]
|
return cids.groups()[0]
|
||||||
logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!")
|
logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!")
|
||||||
|
|
|
@ -11,7 +11,7 @@
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from json import loads
|
from json import loads
|
||||||
from searx.url_utils import urlencode
|
from urllib.parse import urlencode
|
||||||
import requests
|
import requests
|
||||||
import base64
|
import base64
|
||||||
|
|
||||||
|
@ -39,8 +39,8 @@ def request(query, params):
|
||||||
'https://accounts.spotify.com/api/token',
|
'https://accounts.spotify.com/api/token',
|
||||||
data={'grant_type': 'client_credentials'},
|
data={'grant_type': 'client_credentials'},
|
||||||
headers={'Authorization': 'Basic ' + base64.b64encode(
|
headers={'Authorization': 'Basic ' + base64.b64encode(
|
||||||
"{}:{}".format(api_client_id, api_client_secret).encode('utf-8')
|
"{}:{}".format(api_client_id, api_client_secret).encode()
|
||||||
).decode('utf-8')}
|
).decode()}
|
||||||
)
|
)
|
||||||
j = loads(r.text)
|
j = loads(r.text)
|
||||||
params['headers'] = {'Authorization': 'Bearer {}'.format(j.get('access_token'))}
|
params['headers'] = {'Authorization': 'Bearer {}'.format(j.get('access_token'))}
|
||||||
|
@ -59,7 +59,7 @@ def response(resp):
|
||||||
if result['type'] == 'track':
|
if result['type'] == 'track':
|
||||||
title = result['name']
|
title = result['name']
|
||||||
url = result['external_urls']['spotify']
|
url = result['external_urls']['spotify']
|
||||||
content = u'{} - {} - {}'.format(
|
content = '{} - {} - {}'.format(
|
||||||
result['artists'][0]['name'],
|
result['artists'][0]['name'],
|
||||||
result['album']['name'],
|
result['album']['name'],
|
||||||
result['name'])
|
result['name'])
|
||||||
|
|
|
@ -10,9 +10,9 @@
|
||||||
@parse url, title, content
|
@parse url, title, content
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from urllib.parse import urlencode, urljoin
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
from searx.url_utils import urlencode, urljoin
|
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['it']
|
categories = ['it']
|
||||||
|
|
|
@ -11,10 +11,10 @@
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
from urllib.parse import urlencode
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from searx.url_utils import urlencode
|
|
||||||
from searx.utils import get_torrent_size, int_or_zero
|
from searx.utils import get_torrent_size, int_or_zero
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
|
|
|
@ -12,10 +12,10 @@
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
from urllib.parse import urlencode
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
from searx.url_utils import urlencode
|
|
||||||
from searx.utils import get_torrent_size
|
from searx.utils import get_torrent_size
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
|
|
|
@ -12,8 +12,8 @@ import re
|
||||||
from searx.utils import is_valid_lang
|
from searx.utils import is_valid_lang
|
||||||
|
|
||||||
categories = ['general']
|
categories = ['general']
|
||||||
url = u'https://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}'
|
url = 'https://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}'
|
||||||
web_url = u'https://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}'
|
web_url = 'https://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}'
|
||||||
weight = 100
|
weight = 100
|
||||||
|
|
||||||
parser_re = re.compile(b'.*?([a-z]+)-([a-z]+) (.{2,})$', re.I)
|
parser_re = re.compile(b'.*?([a-z]+)-([a-z]+) (.{2,})$', re.I)
|
||||||
|
@ -39,9 +39,9 @@ def request(query, params):
|
||||||
key_form = ''
|
key_form = ''
|
||||||
params['url'] = url.format(from_lang=from_lang[1],
|
params['url'] = url.format(from_lang=from_lang[1],
|
||||||
to_lang=to_lang[1],
|
to_lang=to_lang[1],
|
||||||
query=query.decode('utf-8'),
|
query=query.decode(),
|
||||||
key=key_form)
|
key=key_form)
|
||||||
params['query'] = query.decode('utf-8')
|
params['query'] = query.decode()
|
||||||
params['from_lang'] = from_lang
|
params['from_lang'] = from_lang
|
||||||
params['to_lang'] = to_lang
|
params['to_lang'] = to_lang
|
||||||
|
|
||||||
|
|
|
@ -12,10 +12,10 @@
|
||||||
@todo publishedDate
|
@todo publishedDate
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from urllib.parse import urlencode, urljoin
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
from searx.url_utils import urlencode, urljoin
|
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['social media']
|
categories = ['social media']
|
||||||
|
|
|
@ -10,7 +10,7 @@
|
||||||
@parse url, title, img_src, thumbnail_src
|
@parse url, title, img_src, thumbnail_src
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from searx.url_utils import urlencode, urlparse, urlunparse, parse_qsl
|
from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl
|
||||||
from json import loads
|
from json import loads
|
||||||
|
|
||||||
url = 'https://unsplash.com/'
|
url = 'https://unsplash.com/'
|
||||||
|
|
|
@ -12,9 +12,9 @@
|
||||||
# @todo rewrite to api
|
# @todo rewrite to api
|
||||||
# @todo set content-parameter with correct data
|
# @todo set content-parameter with correct data
|
||||||
|
|
||||||
|
from urllib.parse import urlencode
|
||||||
from json import loads
|
from json import loads
|
||||||
from dateutil import parser
|
from dateutil import parser
|
||||||
from searx.url_utils import urlencode
|
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['videos']
|
categories = ['videos']
|
||||||
|
|
|
@ -15,9 +15,9 @@ from searx import logger
|
||||||
from searx.poolrequests import get
|
from searx.poolrequests import get
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url
|
from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url
|
||||||
from searx.url_utils import urlencode
|
|
||||||
from searx.utils import match_language, eval_xpath
|
from searx.utils import match_language, eval_xpath
|
||||||
|
|
||||||
|
from urllib.parse import urlencode
|
||||||
from json import loads
|
from json import loads
|
||||||
from lxml.html import fromstring
|
from lxml.html import fromstring
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
@ -76,7 +76,7 @@ def request(query, params):
|
||||||
def response(resp):
|
def response(resp):
|
||||||
results = []
|
results = []
|
||||||
htmlparser = etree.HTMLParser()
|
htmlparser = etree.HTMLParser()
|
||||||
html = fromstring(resp.content.decode("utf-8"), parser=htmlparser)
|
html = fromstring(resp.content.decode(), parser=htmlparser)
|
||||||
search_results = eval_xpath(html, wikidata_ids_xpath)
|
search_results = eval_xpath(html, wikidata_ids_xpath)
|
||||||
|
|
||||||
if resp.search_params['language'].split('-')[0] == 'all':
|
if resp.search_params['language'].split('-')[0] == 'all':
|
||||||
|
@ -89,7 +89,7 @@ def response(resp):
|
||||||
wikidata_id = search_result.split('/')[-1]
|
wikidata_id = search_result.split('/')[-1]
|
||||||
url = url_detail.format(query=urlencode({'page': wikidata_id, 'uselang': language}))
|
url = url_detail.format(query=urlencode({'page': wikidata_id, 'uselang': language}))
|
||||||
htmlresponse = get(url)
|
htmlresponse = get(url)
|
||||||
jsonresponse = loads(htmlresponse.content.decode("utf-8"))
|
jsonresponse = loads(htmlresponse.content.decode())
|
||||||
results += getDetail(jsonresponse, wikidata_id, language, resp.search_params['language'], htmlparser)
|
results += getDetail(jsonresponse, wikidata_id, language, resp.search_params['language'], htmlparser)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
@ -453,16 +453,16 @@ def get_geolink(result):
|
||||||
latitude, longitude = coordinates.split(',')
|
latitude, longitude = coordinates.split(',')
|
||||||
|
|
||||||
# convert to decimal
|
# convert to decimal
|
||||||
lat = int(latitude[:latitude.find(u'°')])
|
lat = int(latitude[:latitude.find('°')])
|
||||||
if latitude.find('\'') >= 0:
|
if latitude.find('\'') >= 0:
|
||||||
lat += int(latitude[latitude.find(u'°') + 1:latitude.find('\'')] or 0) / 60.0
|
lat += int(latitude[latitude.find('°') + 1:latitude.find('\'')] or 0) / 60.0
|
||||||
if latitude.find('"') >= 0:
|
if latitude.find('"') >= 0:
|
||||||
lat += float(latitude[latitude.find('\'') + 1:latitude.find('"')] or 0) / 3600.0
|
lat += float(latitude[latitude.find('\'') + 1:latitude.find('"')] or 0) / 3600.0
|
||||||
if latitude.find('S') >= 0:
|
if latitude.find('S') >= 0:
|
||||||
lat *= -1
|
lat *= -1
|
||||||
lon = int(longitude[:longitude.find(u'°')])
|
lon = int(longitude[:longitude.find('°')])
|
||||||
if longitude.find('\'') >= 0:
|
if longitude.find('\'') >= 0:
|
||||||
lon += int(longitude[longitude.find(u'°') + 1:longitude.find('\'')] or 0) / 60.0
|
lon += int(longitude[longitude.find('°') + 1:longitude.find('\'')] or 0) / 60.0
|
||||||
if longitude.find('"') >= 0:
|
if longitude.find('"') >= 0:
|
||||||
lon += float(longitude[longitude.find('\'') + 1:longitude.find('"')] or 0) / 3600.0
|
lon += float(longitude[longitude.find('\'') + 1:longitude.find('"')] or 0) / 3600.0
|
||||||
if longitude.find('W') >= 0:
|
if longitude.find('W') >= 0:
|
||||||
|
|
|
@ -10,13 +10,13 @@
|
||||||
@parse url, infobox
|
@parse url, infobox
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from urllib.parse import quote
|
||||||
from json import loads
|
from json import loads
|
||||||
from lxml.html import fromstring
|
from lxml.html import fromstring
|
||||||
from searx.url_utils import quote
|
|
||||||
from searx.utils import match_language, searx_useragent
|
from searx.utils import match_language, searx_useragent
|
||||||
|
|
||||||
# search-url
|
# search-url
|
||||||
search_url = u'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}'
|
search_url = 'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}'
|
||||||
supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'
|
supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -9,7 +9,7 @@
|
||||||
# @parse url, infobox
|
# @parse url, infobox
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
from searx.url_utils import urlencode
|
from urllib.parse import urlencode
|
||||||
|
|
||||||
# search-url
|
# search-url
|
||||||
search_url = 'https://api.wolframalpha.com/v2/query?appid={api_key}&{query}'
|
search_url = 'https://api.wolframalpha.com/v2/query?appid={api_key}&{query}'
|
||||||
|
@ -45,15 +45,15 @@ def request(query, params):
|
||||||
|
|
||||||
# replace private user area characters to make text legible
|
# replace private user area characters to make text legible
|
||||||
def replace_pua_chars(text):
|
def replace_pua_chars(text):
|
||||||
pua_chars = {u'\uf522': u'\u2192', # rigth arrow
|
pua_chars = {'\uf522': '\u2192', # rigth arrow
|
||||||
u'\uf7b1': u'\u2115', # set of natural numbers
|
'\uf7b1': '\u2115', # set of natural numbers
|
||||||
u'\uf7b4': u'\u211a', # set of rational numbers
|
'\uf7b4': '\u211a', # set of rational numbers
|
||||||
u'\uf7b5': u'\u211d', # set of real numbers
|
'\uf7b5': '\u211d', # set of real numbers
|
||||||
u'\uf7bd': u'\u2124', # set of integer numbers
|
'\uf7bd': '\u2124', # set of integer numbers
|
||||||
u'\uf74c': 'd', # differential
|
'\uf74c': 'd', # differential
|
||||||
u'\uf74d': u'\u212f', # euler's number
|
'\uf74d': '\u212f', # euler's number
|
||||||
u'\uf74e': 'i', # imaginary number
|
'\uf74e': 'i', # imaginary number
|
||||||
u'\uf7d9': '='} # equals sign
|
'\uf7d9': '='} # equals sign
|
||||||
|
|
||||||
for k, v in pua_chars.items():
|
for k, v in pua_chars.items():
|
||||||
text = text.replace(k, v)
|
text = text.replace(k, v)
|
||||||
|
|
|
@ -10,9 +10,9 @@
|
||||||
|
|
||||||
from json import loads
|
from json import loads
|
||||||
from time import time
|
from time import time
|
||||||
|
from urllib.parse import urlencode
|
||||||
|
|
||||||
from searx.poolrequests import get as http_get
|
from searx.poolrequests import get as http_get
|
||||||
from searx.url_utils import urlencode
|
|
||||||
|
|
||||||
# search-url
|
# search-url
|
||||||
url = 'https://www.wolframalpha.com/'
|
url = 'https://www.wolframalpha.com/'
|
||||||
|
|
|
@ -11,7 +11,7 @@
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.url_utils import urlencode, urljoin
|
from urllib.parse import urlencode, urljoin
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
|
from urllib.parse import unquote, urlencode, urljoin, urlparse
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from lxml.etree import _ElementStringResult, _ElementUnicodeResult
|
from lxml.etree import _ElementStringResult, _ElementUnicodeResult
|
||||||
from searx.utils import html_to_text, eval_xpath
|
from searx.utils import html_to_text, eval_xpath
|
||||||
from searx.url_utils import unquote, urlencode, urljoin, urlparse
|
|
||||||
|
|
||||||
search_url = None
|
search_url = None
|
||||||
url_xpath = None
|
url_xpath = None
|
||||||
|
@ -56,7 +56,7 @@ def extract_url(xpath_results, search_url):
|
||||||
if url.startswith('//'):
|
if url.startswith('//'):
|
||||||
# add http or https to this kind of url //example.com/
|
# add http or https to this kind of url //example.com/
|
||||||
parsed_search_url = urlparse(search_url)
|
parsed_search_url = urlparse(search_url)
|
||||||
url = u'{0}:{1}'.format(parsed_search_url.scheme or 'http', url)
|
url = '{0}:{1}'.format(parsed_search_url.scheme or 'http', url)
|
||||||
elif url.startswith('/'):
|
elif url.startswith('/'):
|
||||||
# fix relative url to the search engine
|
# fix relative url to the search engine
|
||||||
url = urljoin(search_url, url)
|
url = urljoin(search_url, url)
|
||||||
|
@ -86,7 +86,7 @@ def normalize_url(url):
|
||||||
p = parsed_url.path
|
p = parsed_url.path
|
||||||
mark = p.find('/**')
|
mark = p.find('/**')
|
||||||
if mark != -1:
|
if mark != -1:
|
||||||
return unquote(p[mark + 3:]).decode('utf-8')
|
return unquote(p[mark + 3:]).decode()
|
||||||
|
|
||||||
return url
|
return url
|
||||||
|
|
||||||
|
|
|
@ -14,7 +14,7 @@
|
||||||
|
|
||||||
from json import loads
|
from json import loads
|
||||||
from dateutil import parser
|
from dateutil import parser
|
||||||
from searx.url_utils import urlencode
|
from urllib.parse import urlencode
|
||||||
|
|
||||||
from searx.utils import html_to_text
|
from searx.utils import html_to_text
|
||||||
|
|
||||||
|
|
|
@ -11,9 +11,9 @@
|
||||||
@parse url, title, content, suggestion
|
@parse url, title, content, suggestion
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from urllib.parse import unquote, urlencode
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.engines.xpath import extract_text, extract_url
|
from searx.engines.xpath import extract_text, extract_url
|
||||||
from searx.url_utils import unquote, urlencode
|
|
||||||
from searx.utils import match_language, eval_xpath
|
from searx.utils import match_language, eval_xpath
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
|
|
|
@ -11,13 +11,13 @@
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
|
from urllib.parse import urlencode
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.engines.xpath import extract_text, extract_url
|
from searx.engines.xpath import extract_text, extract_url
|
||||||
from searx.engines.yahoo import (
|
from searx.engines.yahoo import (
|
||||||
parse_url, _fetch_supported_languages, supported_languages_url, language_aliases
|
parse_url, _fetch_supported_languages, supported_languages_url, language_aliases
|
||||||
)
|
)
|
||||||
from dateutil import parser
|
from dateutil import parser
|
||||||
from searx.url_utils import urlencode
|
|
||||||
from searx.utils import match_language
|
from searx.utils import match_language
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
|
@ -58,7 +58,7 @@ def request(query, params):
|
||||||
|
|
||||||
def sanitize_url(url):
|
def sanitize_url(url):
|
||||||
if ".yahoo.com/" in url:
|
if ".yahoo.com/" in url:
|
||||||
return re.sub(u"\\;\\_ylt\\=.+$", "", url)
|
return re.sub("\\;\\_ylt\\=.+$", "", url)
|
||||||
else:
|
else:
|
||||||
return url
|
return url
|
||||||
|
|
||||||
|
|
|
@ -9,9 +9,9 @@
|
||||||
@parse url, title, content
|
@parse url, title, content
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from urllib.parse import urlencode
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx import logger
|
from searx import logger
|
||||||
from searx.url_utils import urlencode
|
|
||||||
|
|
||||||
logger = logger.getChild('yandex engine')
|
logger = logger.getChild('yandex engine')
|
||||||
|
|
||||||
|
|
|
@ -11,8 +11,8 @@
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from urllib.parse import quote
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
from searx.url_utils import quote
|
|
||||||
from searx.utils import get_torrent_size
|
from searx.utils import get_torrent_size
|
||||||
from searx.poolrequests import get as http_get
|
from searx.poolrequests import get as http_get
|
||||||
|
|
||||||
|
|
|
@ -10,7 +10,7 @@
|
||||||
|
|
||||||
from json import loads
|
from json import loads
|
||||||
from dateutil import parser
|
from dateutil import parser
|
||||||
from searx.url_utils import urlencode
|
from urllib.parse import urlencode
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['videos', 'music']
|
categories = ['videos', 'music']
|
||||||
|
|
|
@ -10,9 +10,9 @@
|
||||||
|
|
||||||
from functools import reduce
|
from functools import reduce
|
||||||
from json import loads
|
from json import loads
|
||||||
|
from urllib.parse import quote_plus
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
from searx.utils import list_get
|
from searx.utils import list_get
|
||||||
from searx.url_utils import quote_plus
|
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['videos', 'music']
|
categories = ['videos', 'music']
|
||||||
|
|
|
@ -23,7 +23,7 @@ def get_bang_url(search_query):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if search_query.external_bang:
|
if search_query.external_bang:
|
||||||
query = search_query.query.decode('utf-8', 'ignore')
|
query = search_query.query.decode(errors='ignore')
|
||||||
bang = _get_bang(search_query.external_bang)
|
bang = _get_bang(search_query.external_bang)
|
||||||
|
|
||||||
if bang and query:
|
if bang and query:
|
||||||
|
|
|
@ -3,73 +3,73 @@
|
||||||
# this file is generated automatically by utils/update_search_languages.py
|
# this file is generated automatically by utils/update_search_languages.py
|
||||||
|
|
||||||
language_codes = (
|
language_codes = (
|
||||||
(u"af-NA", u"Afrikaans", u"", u"Afrikaans"),
|
("af-NA", "Afrikaans", "", "Afrikaans"),
|
||||||
(u"ar-SA", u"العربية", u"", u"Arabic"),
|
("ar-SA", "العربية", "", "Arabic"),
|
||||||
(u"be-BY", u"Беларуская", u"", u"Belarusian"),
|
("be-BY", "Беларуская", "", "Belarusian"),
|
||||||
(u"bg-BG", u"Български", u"", u"Bulgarian"),
|
("bg-BG", "Български", "", "Bulgarian"),
|
||||||
(u"ca-AD", u"Català", u"", u"Catalan"),
|
("ca-AD", "Català", "", "Catalan"),
|
||||||
(u"cs-CZ", u"Čeština", u"", u"Czech"),
|
("cs-CZ", "Čeština", "", "Czech"),
|
||||||
(u"da-DK", u"Dansk", u"", u"Danish"),
|
("da-DK", "Dansk", "", "Danish"),
|
||||||
(u"de", u"Deutsch", u"", u"German"),
|
("de", "Deutsch", "", "German"),
|
||||||
(u"de-AT", u"Deutsch", u"Österreich", u"German"),
|
("de-AT", "Deutsch", "Österreich", "German"),
|
||||||
(u"de-CH", u"Deutsch", u"Schweiz", u"German"),
|
("de-CH", "Deutsch", "Schweiz", "German"),
|
||||||
(u"de-DE", u"Deutsch", u"Deutschland", u"German"),
|
("de-DE", "Deutsch", "Deutschland", "German"),
|
||||||
(u"el-GR", u"Ελληνικά", u"", u"Greek"),
|
("el-GR", "Ελληνικά", "", "Greek"),
|
||||||
(u"en", u"English", u"", u"English"),
|
("en", "English", "", "English"),
|
||||||
(u"en-AU", u"English", u"Australia", u"English"),
|
("en-AU", "English", "Australia", "English"),
|
||||||
(u"en-CA", u"English", u"Canada", u"English"),
|
("en-CA", "English", "Canada", "English"),
|
||||||
(u"en-GB", u"English", u"United Kingdom", u"English"),
|
("en-GB", "English", "United Kingdom", "English"),
|
||||||
(u"en-IE", u"English", u"Ireland", u"English"),
|
("en-IE", "English", "Ireland", "English"),
|
||||||
(u"en-IN", u"English", u"India", u"English"),
|
("en-IN", "English", "India", "English"),
|
||||||
(u"en-NZ", u"English", u"New Zealand", u"English"),
|
("en-NZ", "English", "New Zealand", "English"),
|
||||||
(u"en-PH", u"English", u"Philippines", u"English"),
|
("en-PH", "English", "Philippines", "English"),
|
||||||
(u"en-SG", u"English", u"Singapore", u"English"),
|
("en-SG", "English", "Singapore", "English"),
|
||||||
(u"en-US", u"English", u"United States", u"English"),
|
("en-US", "English", "United States", "English"),
|
||||||
(u"es", u"Español", u"", u"Spanish"),
|
("es", "Español", "", "Spanish"),
|
||||||
(u"es-AR", u"Español", u"Argentina", u"Spanish"),
|
("es-AR", "Español", "Argentina", "Spanish"),
|
||||||
(u"es-CL", u"Español", u"Chile", u"Spanish"),
|
("es-CL", "Español", "Chile", "Spanish"),
|
||||||
(u"es-ES", u"Español", u"España", u"Spanish"),
|
("es-ES", "Español", "España", "Spanish"),
|
||||||
(u"es-MX", u"Español", u"México", u"Spanish"),
|
("es-MX", "Español", "México", "Spanish"),
|
||||||
(u"et-EE", u"Eesti", u"", u"Estonian"),
|
("et-EE", "Eesti", "", "Estonian"),
|
||||||
(u"fa-IR", u"فارسی", u"", u"Persian"),
|
("fa-IR", "فارسی", "", "Persian"),
|
||||||
(u"fi-FI", u"Suomi", u"", u"Finnish"),
|
("fi-FI", "Suomi", "", "Finnish"),
|
||||||
(u"fr", u"Français", u"", u"French"),
|
("fr", "Français", "", "French"),
|
||||||
(u"fr-BE", u"Français", u"Belgique", u"French"),
|
("fr-BE", "Français", "Belgique", "French"),
|
||||||
(u"fr-CA", u"Français", u"Canada", u"French"),
|
("fr-CA", "Français", "Canada", "French"),
|
||||||
(u"fr-CH", u"Français", u"Suisse", u"French"),
|
("fr-CH", "Français", "Suisse", "French"),
|
||||||
(u"fr-FR", u"Français", u"France", u"French"),
|
("fr-FR", "Français", "France", "French"),
|
||||||
(u"he-IL", u"עברית", u"", u"Hebrew"),
|
("he-IL", "עברית", "", "Hebrew"),
|
||||||
(u"hr-HR", u"Hrvatski", u"", u"Croatian"),
|
("hr-HR", "Hrvatski", "", "Croatian"),
|
||||||
(u"hu-HU", u"Magyar", u"", u"Hungarian"),
|
("hu-HU", "Magyar", "", "Hungarian"),
|
||||||
(u"hy-AM", u"Հայերեն", u"", u"Armenian"),
|
("hy-AM", "Հայերեն", "", "Armenian"),
|
||||||
(u"id-ID", u"Indonesia", u"", u"Indonesian"),
|
("id-ID", "Indonesia", "", "Indonesian"),
|
||||||
(u"is-IS", u"Íslenska", u"", u"Icelandic"),
|
("is-IS", "Íslenska", "", "Icelandic"),
|
||||||
(u"it-IT", u"Italiano", u"", u"Italian"),
|
("it-IT", "Italiano", "", "Italian"),
|
||||||
(u"ja-JP", u"日本語", u"", u"Japanese"),
|
("ja-JP", "日本語", "", "Japanese"),
|
||||||
(u"ko-KR", u"한국어", u"", u"Korean"),
|
("ko-KR", "한국어", "", "Korean"),
|
||||||
(u"lt-LT", u"Lietuvių", u"", u"Lithuanian"),
|
("lt-LT", "Lietuvių", "", "Lithuanian"),
|
||||||
(u"lv-LV", u"Latviešu", u"", u"Latvian"),
|
("lv-LV", "Latviešu", "", "Latvian"),
|
||||||
(u"ms-MY", u"Melayu", u"", u"Malay"),
|
("ms-MY", "Melayu", "", "Malay"),
|
||||||
(u"nb-NO", u"Norsk Bokmål", u"", u"Norwegian Bokmål"),
|
("nb-NO", "Norsk Bokmål", "", "Norwegian Bokmål"),
|
||||||
(u"nl", u"Nederlands", u"", u"Dutch"),
|
("nl", "Nederlands", "", "Dutch"),
|
||||||
(u"nl-BE", u"Nederlands", u"België", u"Dutch"),
|
("nl-BE", "Nederlands", "België", "Dutch"),
|
||||||
(u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"),
|
("nl-NL", "Nederlands", "Nederland", "Dutch"),
|
||||||
(u"pl-PL", u"Polski", u"", u"Polish"),
|
("pl-PL", "Polski", "", "Polish"),
|
||||||
(u"pt", u"Português", u"", u"Portuguese"),
|
("pt", "Português", "", "Portuguese"),
|
||||||
(u"pt-BR", u"Português", u"Brasil", u"Portuguese"),
|
("pt-BR", "Português", "Brasil", "Portuguese"),
|
||||||
(u"pt-PT", u"Português", u"Portugal", u"Portuguese"),
|
("pt-PT", "Português", "Portugal", "Portuguese"),
|
||||||
(u"ro-RO", u"Română", u"", u"Romanian"),
|
("ro-RO", "Română", "", "Romanian"),
|
||||||
(u"ru-RU", u"Русский", u"", u"Russian"),
|
("ru-RU", "Русский", "", "Russian"),
|
||||||
(u"sk-SK", u"Slovenčina", u"", u"Slovak"),
|
("sk-SK", "Slovenčina", "", "Slovak"),
|
||||||
(u"sl-SI", u"Slovenščina", u"", u"Slovenian"),
|
("sl-SI", "Slovenščina", "", "Slovenian"),
|
||||||
(u"sr-RS", u"Srpski", u"", u"Serbian"),
|
("sr-RS", "Srpski", "", "Serbian"),
|
||||||
(u"sv-SE", u"Svenska", u"", u"Swedish"),
|
("sv-SE", "Svenska", "", "Swedish"),
|
||||||
(u"sw-KE", u"Kiswahili", u"", u"Swahili"),
|
("sw-KE", "Kiswahili", "", "Swahili"),
|
||||||
(u"th-TH", u"ไทย", u"", u"Thai"),
|
("th-TH", "ไทย", "", "Thai"),
|
||||||
(u"tr-TR", u"Türkçe", u"", u"Turkish"),
|
("tr-TR", "Türkçe", "", "Turkish"),
|
||||||
(u"uk-UA", u"Українська", u"", u"Ukrainian"),
|
("uk-UA", "Українська", "", "Ukrainian"),
|
||||||
(u"vi-VN", u"Tiếng Việt", u"", u"Vietnamese"),
|
("vi-VN", "Tiếng Việt", "", "Vietnamese"),
|
||||||
(u"zh", u"中文", u"", u"Chinese"),
|
("zh", "中文", "", "Chinese"),
|
||||||
(u"zh-CN", u"中文", u"中国", u"Chinese"),
|
("zh-CN", "中文", "中国", "Chinese"),
|
||||||
(u"zh-TW", u"中文", u"台灣", u"Chinese")
|
("zh-TW", "中文", "台灣", "Chinese")
|
||||||
)
|
)
|
||||||
|
|
|
@ -20,13 +20,10 @@ from importlib import import_module
|
||||||
from os import listdir, makedirs, remove, stat, utime
|
from os import listdir, makedirs, remove, stat, utime
|
||||||
from os.path import abspath, basename, dirname, exists, join
|
from os.path import abspath, basename, dirname, exists, join
|
||||||
from shutil import copyfile
|
from shutil import copyfile
|
||||||
from sys import version_info
|
|
||||||
from traceback import print_exc
|
from traceback import print_exc
|
||||||
|
|
||||||
from searx import logger, settings, static_path
|
from searx import logger, settings, static_path
|
||||||
|
|
||||||
if version_info[0] == 3:
|
|
||||||
unicode = str
|
|
||||||
|
|
||||||
logger = logger.getChild('plugins')
|
logger = logger.getChild('plugins')
|
||||||
|
|
||||||
|
@ -38,8 +35,8 @@ from searx.plugins import (oa_doi_rewrite,
|
||||||
tracker_url_remover,
|
tracker_url_remover,
|
||||||
vim_hotkeys)
|
vim_hotkeys)
|
||||||
|
|
||||||
required_attrs = (('name', (str, unicode)),
|
required_attrs = (('name', str),
|
||||||
('description', (str, unicode)),
|
('description', str),
|
||||||
('default_on', bool))
|
('default_on', bool))
|
||||||
|
|
||||||
optional_attrs = (('js_dependencies', tuple),
|
optional_attrs = (('js_dependencies', tuple),
|
||||||
|
|
|
@ -16,17 +16,14 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import sys
|
from urllib.parse import urlparse
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
from os import listdir, environ
|
from os import listdir, environ
|
||||||
from os.path import isfile, isdir, join
|
from os.path import isfile, isdir, join
|
||||||
from searx.plugins import logger
|
from searx.plugins import logger
|
||||||
from flask_babel import gettext
|
from flask_babel import gettext
|
||||||
from searx import searx_dir
|
from searx import searx_dir
|
||||||
from searx.url_utils import urlparse
|
|
||||||
|
|
||||||
if sys.version_info[0] == 3:
|
|
||||||
unicode = str
|
|
||||||
|
|
||||||
name = "HTTPS rewrite"
|
name = "HTTPS rewrite"
|
||||||
description = gettext('Rewrite HTTP links to HTTPS if possible')
|
description = gettext('Rewrite HTTP links to HTTPS if possible')
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
|
from urllib.parse import urlparse, parse_qsl
|
||||||
from flask_babel import gettext
|
from flask_babel import gettext
|
||||||
import re
|
import re
|
||||||
from searx.url_utils import urlparse, parse_qsl
|
|
||||||
from searx import settings
|
from searx import settings
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -17,7 +17,7 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
|
||||||
|
|
||||||
from flask_babel import gettext
|
from flask_babel import gettext
|
||||||
import re
|
import re
|
||||||
from searx.url_utils import urlunparse, parse_qsl, urlencode
|
from urllib.parse import urlunparse, parse_qsl, urlencode
|
||||||
|
|
||||||
regexes = {re.compile(r'utm_[^&]+'),
|
regexes = {re.compile(r'utm_[^&]+'),
|
||||||
re.compile(r'(wkey|wemail)[^&]*'),
|
re.compile(r'(wkey|wemail)[^&]*'),
|
||||||
|
|
|
@ -6,16 +6,11 @@
|
||||||
|
|
||||||
from base64 import urlsafe_b64encode, urlsafe_b64decode
|
from base64 import urlsafe_b64encode, urlsafe_b64decode
|
||||||
from zlib import compress, decompress
|
from zlib import compress, decompress
|
||||||
from sys import version
|
from urllib.parse import parse_qs, urlencode
|
||||||
|
|
||||||
from searx import settings, autocomplete
|
from searx import settings, autocomplete
|
||||||
from searx.languages import language_codes as languages
|
from searx.languages import language_codes as languages
|
||||||
from searx.utils import match_language
|
from searx.utils import match_language
|
||||||
from searx.url_utils import parse_qs, urlencode
|
|
||||||
|
|
||||||
if version[0] == '3':
|
|
||||||
# pylint: disable=invalid-name
|
|
||||||
unicode = str
|
|
||||||
|
|
||||||
|
|
||||||
COOKIE_MAX_AGE = 60 * 60 * 24 * 365 * 5 # 5 years
|
COOKIE_MAX_AGE = 60 * 60 * 24 * 365 * 5 # 5 years
|
||||||
|
@ -402,14 +397,14 @@ class Preferences(object):
|
||||||
|
|
||||||
settings_kv['tokens'] = ','.join(self.tokens.values)
|
settings_kv['tokens'] = ','.join(self.tokens.values)
|
||||||
|
|
||||||
return urlsafe_b64encode(compress(urlencode(settings_kv).encode('utf-8'))).decode('utf-8')
|
return urlsafe_b64encode(compress(urlencode(settings_kv).encode())).decode()
|
||||||
|
|
||||||
def parse_encoded_data(self, input_data):
|
def parse_encoded_data(self, input_data):
|
||||||
"""parse (base64) preferences from request (``flask.request.form['preferences']``)"""
|
"""parse (base64) preferences from request (``flask.request.form['preferences']``)"""
|
||||||
decoded_data = decompress(urlsafe_b64decode(input_data.encode('utf-8')))
|
decoded_data = decompress(urlsafe_b64decode(input_data.encode()))
|
||||||
dict_data = {}
|
dict_data = {}
|
||||||
for x, y in parse_qs(decoded_data).items():
|
for x, y in parse_qs(decoded_data).items():
|
||||||
dict_data[x.decode('utf8')] = y[0].decode('utf8')
|
dict_data[x.decode()] = y[0].decode()
|
||||||
self.parse_dict(dict_data)
|
self.parse_dict(dict_data)
|
||||||
|
|
||||||
def parse_dict(self, input_data):
|
def parse_dict(self, input_data):
|
||||||
|
|
|
@ -17,15 +17,13 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
|
||||||
(C) 2014 by Thomas Pointhuber, <thomas.pointhuber@gmx.at>
|
(C) 2014 by Thomas Pointhuber, <thomas.pointhuber@gmx.at>
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from searx.languages import language_codes
|
from searx.languages import language_codes
|
||||||
from searx.engines import (
|
from searx.engines import (
|
||||||
categories, engines, engine_shortcuts
|
categories, engines, engine_shortcuts
|
||||||
)
|
)
|
||||||
import re
|
|
||||||
import sys
|
|
||||||
|
|
||||||
if sys.version_info[0] == 3:
|
|
||||||
unicode = str
|
|
||||||
|
|
||||||
VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$')
|
VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$')
|
||||||
|
|
||||||
|
@ -93,7 +91,7 @@ class RawTextQuery(object):
|
||||||
# check if any language-code is equal with
|
# check if any language-code is equal with
|
||||||
# declared language-codes
|
# declared language-codes
|
||||||
for lc in language_codes:
|
for lc in language_codes:
|
||||||
lang_id, lang_name, country, english_name = map(unicode.lower, lc)
|
lang_id, lang_name, country, english_name = map(str.lower, lc)
|
||||||
|
|
||||||
# if correct language-code is found
|
# if correct language-code is found
|
||||||
# set it as new search-language
|
# set it as new search-language
|
||||||
|
@ -177,7 +175,7 @@ class RawTextQuery(object):
|
||||||
|
|
||||||
def getFullQuery(self):
|
def getFullQuery(self):
|
||||||
# get full querry including whitespaces
|
# get full querry including whitespaces
|
||||||
return u''.join(self.query_parts)
|
return ''.join(self.query_parts)
|
||||||
|
|
||||||
|
|
||||||
class SearchQuery(object):
|
class SearchQuery(object):
|
||||||
|
@ -185,7 +183,7 @@ class SearchQuery(object):
|
||||||
|
|
||||||
def __init__(self, query, engines, categories, lang, safesearch, pageno, time_range,
|
def __init__(self, query, engines, categories, lang, safesearch, pageno, time_range,
|
||||||
timeout_limit=None, preferences=None, external_bang=None):
|
timeout_limit=None, preferences=None, external_bang=None):
|
||||||
self.query = query.encode('utf-8')
|
self.query = query.encode()
|
||||||
self.engines = engines
|
self.engines = engines
|
||||||
self.categories = categories
|
self.categories = categories
|
||||||
self.lang = lang
|
self.lang = lang
|
||||||
|
|
|
@ -1,14 +1,11 @@
|
||||||
import re
|
import re
|
||||||
import sys
|
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
from threading import RLock
|
from threading import RLock
|
||||||
|
from urllib.parse import urlparse, unquote
|
||||||
from searx import logger
|
from searx import logger
|
||||||
from searx.engines import engines
|
from searx.engines import engines
|
||||||
from searx.url_utils import urlparse, unquote
|
|
||||||
|
|
||||||
if sys.version_info[0] == 3:
|
|
||||||
basestring = str
|
|
||||||
|
|
||||||
CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)
|
CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)
|
||||||
WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
|
WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
|
||||||
|
@ -16,7 +13,7 @@ WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
|
||||||
|
|
||||||
# return the meaningful length of the content for a result
|
# return the meaningful length of the content for a result
|
||||||
def result_content_len(content):
|
def result_content_len(content):
|
||||||
if isinstance(content, basestring):
|
if isinstance(content, str):
|
||||||
return len(CONTENT_LEN_IGNORED_CHARS_REGEX.sub('', content))
|
return len(CONTENT_LEN_IGNORED_CHARS_REGEX.sub('', content))
|
||||||
else:
|
else:
|
||||||
return 0
|
return 0
|
||||||
|
@ -161,11 +158,11 @@ class ResultContainer(object):
|
||||||
self._number_of_results.append(result['number_of_results'])
|
self._number_of_results.append(result['number_of_results'])
|
||||||
else:
|
else:
|
||||||
# standard result (url, title, content)
|
# standard result (url, title, content)
|
||||||
if 'url' in result and not isinstance(result['url'], basestring):
|
if 'url' in result and not isinstance(result['url'], str):
|
||||||
logger.debug('result: invalid URL: %s', str(result))
|
logger.debug('result: invalid URL: %s', str(result))
|
||||||
elif 'title' in result and not isinstance(result['title'], basestring):
|
elif 'title' in result and not isinstance(result['title'], str):
|
||||||
logger.debug('result: invalid title: %s', str(result))
|
logger.debug('result: invalid title: %s', str(result))
|
||||||
elif 'content' in result and not isinstance(result['content'], basestring):
|
elif 'content' in result and not isinstance(result['content'], str):
|
||||||
logger.debug('result: invalid content: %s', str(result))
|
logger.debug('result: invalid content: %s', str(result))
|
||||||
else:
|
else:
|
||||||
self._merge_result(result, standard_result_count + 1)
|
self._merge_result(result, standard_result_count + 1)
|
||||||
|
|
|
@ -20,8 +20,8 @@ import sys
|
||||||
import threading
|
import threading
|
||||||
from time import time
|
from time import time
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
|
from _thread import start_new_thread
|
||||||
|
|
||||||
import six
|
|
||||||
from flask_babel import gettext
|
from flask_babel import gettext
|
||||||
import requests.exceptions
|
import requests.exceptions
|
||||||
import searx.poolrequests as requests_lib
|
import searx.poolrequests as requests_lib
|
||||||
|
@ -37,13 +37,6 @@ from searx import logger
|
||||||
from searx.plugins import plugins
|
from searx.plugins import plugins
|
||||||
from searx.exceptions import SearxParameterException
|
from searx.exceptions import SearxParameterException
|
||||||
|
|
||||||
try:
|
|
||||||
from thread import start_new_thread
|
|
||||||
except:
|
|
||||||
from _thread import start_new_thread
|
|
||||||
|
|
||||||
if sys.version_info[0] == 3:
|
|
||||||
unicode = str
|
|
||||||
|
|
||||||
logger = logger.getChild('search')
|
logger = logger.getChild('search')
|
||||||
|
|
||||||
|
@ -355,11 +348,11 @@ def get_search_query_from_webapp(preferences, form):
|
||||||
load_default_categories = True
|
load_default_categories = True
|
||||||
for pd_name, pd in form.items():
|
for pd_name, pd in form.items():
|
||||||
if pd_name == 'categories':
|
if pd_name == 'categories':
|
||||||
query_categories.extend(categ for categ in map(unicode.strip, pd.split(',')) if categ in categories)
|
query_categories.extend(categ for categ in map(str.strip, pd.split(',')) if categ in categories)
|
||||||
elif pd_name == 'engines':
|
elif pd_name == 'engines':
|
||||||
pd_engines = [{'category': engines[engine].categories[0],
|
pd_engines = [{'category': engines[engine].categories[0],
|
||||||
'name': engine}
|
'name': engine}
|
||||||
for engine in map(unicode.strip, pd.split(',')) if engine in engines]
|
for engine in map(str.strip, pd.split(',')) if engine in engines]
|
||||||
if pd_engines:
|
if pd_engines:
|
||||||
query_engines.extend(pd_engines)
|
query_engines.extend(pd_engines)
|
||||||
load_default_categories = False
|
load_default_categories = False
|
||||||
|
@ -434,7 +427,7 @@ class Search(object):
|
||||||
|
|
||||||
# This means there was a valid bang and the
|
# This means there was a valid bang and the
|
||||||
# rest of the search does not need to be continued
|
# rest of the search does not need to be continued
|
||||||
if isinstance(self.result_container.redirect_url, six.string_types):
|
if isinstance(self.result_container.redirect_url, str):
|
||||||
return self.result_container
|
return self.result_container
|
||||||
# start time
|
# start time
|
||||||
start_time = time()
|
start_time = time()
|
||||||
|
|
|
@ -17,7 +17,7 @@ from unittest2 import TestCase
|
||||||
class SearxTestLayer:
|
class SearxTestLayer:
|
||||||
"""Base layer for non-robot tests."""
|
"""Base layer for non-robot tests."""
|
||||||
|
|
||||||
__name__ = u'SearxTestLayer'
|
__name__ = 'SearxTestLayer'
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def setUp(cls):
|
def setUp(cls):
|
||||||
|
@ -66,7 +66,7 @@ class SearxRobotLayer():
|
||||||
stderr=subprocess.STDOUT
|
stderr=subprocess.STDOUT
|
||||||
)
|
)
|
||||||
if hasattr(self.server.stdout, 'read1'):
|
if hasattr(self.server.stdout, 'read1'):
|
||||||
print(self.server.stdout.read1(1024).decode('utf-8'))
|
print(self.server.stdout.read1(1024).decode())
|
||||||
|
|
||||||
def tearDown(self):
|
def tearDown(self):
|
||||||
os.kill(self.server.pid, 9)
|
os.kill(self.server.pid, 9)
|
||||||
|
|
|
@ -1,30 +0,0 @@
|
||||||
from sys import version_info
|
|
||||||
|
|
||||||
if version_info[0] == 2:
|
|
||||||
from urllib import quote, quote_plus, unquote, urlencode
|
|
||||||
from urlparse import parse_qs, parse_qsl, urljoin, urlparse, urlunparse, ParseResult
|
|
||||||
else:
|
|
||||||
from urllib.parse import (
|
|
||||||
parse_qs,
|
|
||||||
parse_qsl,
|
|
||||||
quote,
|
|
||||||
quote_plus,
|
|
||||||
unquote,
|
|
||||||
urlencode,
|
|
||||||
urljoin,
|
|
||||||
urlparse,
|
|
||||||
urlunparse,
|
|
||||||
ParseResult
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
__export__ = (parse_qs,
|
|
||||||
parse_qsl,
|
|
||||||
quote,
|
|
||||||
quote_plus,
|
|
||||||
unquote,
|
|
||||||
urlencode,
|
|
||||||
urljoin,
|
|
||||||
urlparse,
|
|
||||||
urlunparse,
|
|
||||||
ParseResult)
|
|
|
@ -1,21 +1,22 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
import csv
|
import csv
|
||||||
import hashlib
|
import hashlib
|
||||||
import hmac
|
import hmac
|
||||||
import os
|
|
||||||
import re
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
from babel.core import get_global
|
|
||||||
from babel.dates import format_date
|
|
||||||
from codecs import getincrementalencoder
|
from codecs import getincrementalencoder
|
||||||
from imp import load_source
|
from imp import load_source
|
||||||
from numbers import Number
|
from numbers import Number
|
||||||
from os.path import splitext, join
|
from os.path import splitext, join
|
||||||
from io import open
|
from io import open, StringIO
|
||||||
from random import choice
|
from random import choice
|
||||||
|
from html.parser import HTMLParser
|
||||||
from lxml.etree import XPath
|
from lxml.etree import XPath
|
||||||
import sys
|
from babel.core import get_global
|
||||||
import json
|
from babel.dates import format_date
|
||||||
|
|
||||||
from searx import settings
|
from searx import settings
|
||||||
from searx.version import VERSION_STRING
|
from searx.version import VERSION_STRING
|
||||||
|
@ -23,23 +24,6 @@ from searx.languages import language_codes
|
||||||
from searx import settings
|
from searx import settings
|
||||||
from searx import logger
|
from searx import logger
|
||||||
|
|
||||||
try:
|
|
||||||
from cStringIO import StringIO
|
|
||||||
except:
|
|
||||||
from io import StringIO
|
|
||||||
|
|
||||||
try:
|
|
||||||
from HTMLParser import HTMLParser
|
|
||||||
except:
|
|
||||||
from html.parser import HTMLParser
|
|
||||||
|
|
||||||
if sys.version_info[0] == 3:
|
|
||||||
unichr = chr
|
|
||||||
unicode = str
|
|
||||||
IS_PY2 = False
|
|
||||||
basestring = str
|
|
||||||
else:
|
|
||||||
IS_PY2 = True
|
|
||||||
|
|
||||||
logger = logger.getChild('utils')
|
logger = logger.getChild('utils')
|
||||||
|
|
||||||
|
@ -75,19 +59,19 @@ def highlight_content(content, query):
|
||||||
if content.find('<') != -1:
|
if content.find('<') != -1:
|
||||||
return content
|
return content
|
||||||
|
|
||||||
query = query.decode('utf-8')
|
query = query.decode()
|
||||||
if content.lower().find(query.lower()) > -1:
|
if content.lower().find(query.lower()) > -1:
|
||||||
query_regex = u'({0})'.format(re.escape(query))
|
query_regex = '({0})'.format(re.escape(query))
|
||||||
content = re.sub(query_regex, '<span class="highlight">\\1</span>',
|
content = re.sub(query_regex, '<span class="highlight">\\1</span>',
|
||||||
content, flags=re.I | re.U)
|
content, flags=re.I | re.U)
|
||||||
else:
|
else:
|
||||||
regex_parts = []
|
regex_parts = []
|
||||||
for chunk in query.split():
|
for chunk in query.split():
|
||||||
if len(chunk) == 1:
|
if len(chunk) == 1:
|
||||||
regex_parts.append(u'\\W+{0}\\W+'.format(re.escape(chunk)))
|
regex_parts.append('\\W+{0}\\W+'.format(re.escape(chunk)))
|
||||||
else:
|
else:
|
||||||
regex_parts.append(u'{0}'.format(re.escape(chunk)))
|
regex_parts.append('{0}'.format(re.escape(chunk)))
|
||||||
query_regex = u'({0})'.format('|'.join(regex_parts))
|
query_regex = '({0})'.format('|'.join(regex_parts))
|
||||||
content = re.sub(query_regex, '<span class="highlight">\\1</span>',
|
content = re.sub(query_regex, '<span class="highlight">\\1</span>',
|
||||||
content, flags=re.I | re.U)
|
content, flags=re.I | re.U)
|
||||||
|
|
||||||
|
@ -124,21 +108,21 @@ class HTMLTextExtractor(HTMLParser):
|
||||||
def handle_charref(self, number):
|
def handle_charref(self, number):
|
||||||
if not self.is_valid_tag():
|
if not self.is_valid_tag():
|
||||||
return
|
return
|
||||||
if number[0] in (u'x', u'X'):
|
if number[0] in ('x', 'X'):
|
||||||
codepoint = int(number[1:], 16)
|
codepoint = int(number[1:], 16)
|
||||||
else:
|
else:
|
||||||
codepoint = int(number)
|
codepoint = int(number)
|
||||||
self.result.append(unichr(codepoint))
|
self.result.append(chr(codepoint))
|
||||||
|
|
||||||
def handle_entityref(self, name):
|
def handle_entityref(self, name):
|
||||||
if not self.is_valid_tag():
|
if not self.is_valid_tag():
|
||||||
return
|
return
|
||||||
# codepoint = htmlentitydefs.name2codepoint[name]
|
# codepoint = htmlentitydefs.name2codepoint[name]
|
||||||
# self.result.append(unichr(codepoint))
|
# self.result.append(chr(codepoint))
|
||||||
self.result.append(name)
|
self.result.append(name)
|
||||||
|
|
||||||
def get_text(self):
|
def get_text(self):
|
||||||
return u''.join(self.result).strip()
|
return ''.join(self.result).strip()
|
||||||
|
|
||||||
|
|
||||||
def html_to_text(html):
|
def html_to_text(html):
|
||||||
|
@ -163,22 +147,14 @@ class UnicodeWriter:
|
||||||
self.encoder = getincrementalencoder(encoding)()
|
self.encoder = getincrementalencoder(encoding)()
|
||||||
|
|
||||||
def writerow(self, row):
|
def writerow(self, row):
|
||||||
if IS_PY2:
|
|
||||||
row = [s.encode("utf-8") if hasattr(s, 'encode') else s for s in row]
|
|
||||||
self.writer.writerow(row)
|
self.writer.writerow(row)
|
||||||
# Fetch UTF-8 output from the queue ...
|
# Fetch UTF-8 output from the queue ...
|
||||||
data = self.queue.getvalue()
|
data = self.queue.getvalue()
|
||||||
if IS_PY2:
|
|
||||||
data = data.decode("utf-8")
|
|
||||||
else:
|
|
||||||
data = data.strip('\x00')
|
data = data.strip('\x00')
|
||||||
# ... and reencode it into the target encoding
|
# ... and reencode it into the target encoding
|
||||||
data = self.encoder.encode(data)
|
data = self.encoder.encode(data)
|
||||||
# write to the target stream
|
# write to the target stream
|
||||||
if IS_PY2:
|
self.stream.write(data.decode())
|
||||||
self.stream.write(data)
|
|
||||||
else:
|
|
||||||
self.stream.write(data.decode("utf-8"))
|
|
||||||
# empty queue
|
# empty queue
|
||||||
self.queue.truncate(0)
|
self.queue.truncate(0)
|
||||||
|
|
||||||
|
@ -253,7 +229,7 @@ def dict_subset(d, properties):
|
||||||
def prettify_url(url, max_length=74):
|
def prettify_url(url, max_length=74):
|
||||||
if len(url) > max_length:
|
if len(url) > max_length:
|
||||||
chunk_len = int(max_length / 2 + 1)
|
chunk_len = int(max_length / 2 + 1)
|
||||||
return u'{0}[...]{1}'.format(url[:chunk_len], url[-chunk_len:])
|
return '{0}[...]{1}'.format(url[:chunk_len], url[-chunk_len:])
|
||||||
else:
|
else:
|
||||||
return url
|
return url
|
||||||
|
|
||||||
|
@ -310,7 +286,7 @@ def int_or_zero(num):
|
||||||
|
|
||||||
def is_valid_lang(lang):
|
def is_valid_lang(lang):
|
||||||
is_abbr = (len(lang) == 2)
|
is_abbr = (len(lang) == 2)
|
||||||
lang = lang.lower().decode('utf-8')
|
lang = lang.lower().decode()
|
||||||
if is_abbr:
|
if is_abbr:
|
||||||
for l in language_codes:
|
for l in language_codes:
|
||||||
if l[0][:2] == lang:
|
if l[0][:2] == lang:
|
||||||
|
@ -407,17 +383,14 @@ def new_hmac(secret_key, url):
|
||||||
secret_key_bytes = secret_key
|
secret_key_bytes = secret_key
|
||||||
else:
|
else:
|
||||||
raise err
|
raise err
|
||||||
if sys.version_info[0] == 2:
|
|
||||||
return hmac.new(bytes(secret_key), url, hashlib.sha256).hexdigest()
|
|
||||||
else:
|
|
||||||
return hmac.new(secret_key_bytes, url, hashlib.sha256).hexdigest()
|
return hmac.new(secret_key_bytes, url, hashlib.sha256).hexdigest()
|
||||||
|
|
||||||
|
|
||||||
def to_string(obj):
|
def to_string(obj):
|
||||||
if isinstance(obj, basestring):
|
if isinstance(obj, str):
|
||||||
return obj
|
return obj
|
||||||
if isinstance(obj, Number):
|
if isinstance(obj, Number):
|
||||||
return unicode(obj)
|
return str(obj)
|
||||||
if hasattr(obj, '__str__'):
|
if hasattr(obj, '__str__'):
|
||||||
return obj.__str__()
|
return obj.__str__()
|
||||||
if hasattr(obj, '__repr__'):
|
if hasattr(obj, '__repr__'):
|
||||||
|
@ -433,9 +406,9 @@ def ecma_unescape(s):
|
||||||
"""
|
"""
|
||||||
# s = unicode(s)
|
# s = unicode(s)
|
||||||
# "%u5409" becomes "吉"
|
# "%u5409" becomes "吉"
|
||||||
s = ecma_unescape4_re.sub(lambda e: unichr(int(e.group(1), 16)), s)
|
s = ecma_unescape4_re.sub(lambda e: chr(int(e.group(1), 16)), s)
|
||||||
# "%20" becomes " ", "%F3" becomes "ó"
|
# "%20" becomes " ", "%F3" becomes "ó"
|
||||||
s = ecma_unescape2_re.sub(lambda e: unichr(int(e.group(1), 16)), s)
|
s = ecma_unescape2_re.sub(lambda e: chr(int(e.group(1), 16)), s)
|
||||||
return s
|
return s
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -17,37 +17,35 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
|
||||||
(C) 2013- by Adam Tauber, <asciimoo@gmail.com>
|
(C) 2013- by Adam Tauber, <asciimoo@gmail.com>
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
import sys
|
||||||
|
if sys.version_info[0] < 3:
|
||||||
|
print('\033[1;31m Python2 is no longer supported\033[0m')
|
||||||
|
exit(1)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
from sys import path
|
|
||||||
from os.path import realpath, dirname
|
from os.path import realpath, dirname
|
||||||
path.append(realpath(dirname(realpath(__file__)) + '/../'))
|
sys.path.append(realpath(dirname(realpath(__file__)) + '/../'))
|
||||||
|
|
||||||
import hashlib
|
import hashlib
|
||||||
import hmac
|
import hmac
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import sys
|
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from searx import logger
|
from searx import logger
|
||||||
logger = logger.getChild('webapp')
|
logger = logger.getChild('webapp')
|
||||||
|
|
||||||
try:
|
|
||||||
from pygments import highlight
|
|
||||||
from pygments.lexers import get_lexer_by_name
|
|
||||||
from pygments.formatters import HtmlFormatter
|
|
||||||
except:
|
|
||||||
logger.critical("cannot import dependency: pygments")
|
|
||||||
from sys import exit
|
|
||||||
exit(1)
|
|
||||||
try:
|
|
||||||
from cgi import escape
|
|
||||||
except:
|
|
||||||
from html import escape
|
|
||||||
from six import next
|
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from time import time
|
from time import time
|
||||||
|
from html import escape
|
||||||
|
from io import StringIO
|
||||||
|
from urllib.parse import urlencode, urlparse, urljoin
|
||||||
|
|
||||||
|
from pygments import highlight
|
||||||
|
from pygments.lexers import get_lexer_by_name
|
||||||
|
from pygments.formatters import HtmlFormatter
|
||||||
|
|
||||||
from werkzeug.middleware.proxy_fix import ProxyFix
|
from werkzeug.middleware.proxy_fix import ProxyFix
|
||||||
from flask import (
|
from flask import (
|
||||||
Flask, request, render_template, url_for, Response, make_response,
|
Flask, request, render_template, url_for, Response, make_response,
|
||||||
|
@ -78,7 +76,6 @@ from searx.plugins import plugins
|
||||||
from searx.plugins.oa_doi_rewrite import get_doi_resolver
|
from searx.plugins.oa_doi_rewrite import get_doi_resolver
|
||||||
from searx.preferences import Preferences, ValidationException, LANGUAGE_CODES
|
from searx.preferences import Preferences, ValidationException, LANGUAGE_CODES
|
||||||
from searx.answerers import answerers
|
from searx.answerers import answerers
|
||||||
from searx.url_utils import urlencode, urlparse, urljoin
|
|
||||||
from searx.utils import new_hmac
|
from searx.utils import new_hmac
|
||||||
|
|
||||||
# check if the pyopenssl package is installed.
|
# check if the pyopenssl package is installed.
|
||||||
|
@ -89,19 +86,6 @@ except ImportError:
|
||||||
logger.critical("The pyopenssl package has to be installed.\n"
|
logger.critical("The pyopenssl package has to be installed.\n"
|
||||||
"Some HTTPS connections will fail")
|
"Some HTTPS connections will fail")
|
||||||
|
|
||||||
try:
|
|
||||||
from cStringIO import StringIO
|
|
||||||
except:
|
|
||||||
from io import StringIO
|
|
||||||
|
|
||||||
|
|
||||||
if sys.version_info[0] == 3:
|
|
||||||
unicode = str
|
|
||||||
PY3 = True
|
|
||||||
else:
|
|
||||||
logger.warning('\033[1;31m Python2 is no longer supported\033[0m')
|
|
||||||
exit(1)
|
|
||||||
|
|
||||||
# serve pages with HTTP/1.1
|
# serve pages with HTTP/1.1
|
||||||
from werkzeug.serving import WSGIRequestHandler
|
from werkzeug.serving import WSGIRequestHandler
|
||||||
WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server'].get('http_protocol_version', '1.0'))
|
WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server'].get('http_protocol_version', '1.0'))
|
||||||
|
@ -315,11 +299,11 @@ def proxify(url):
|
||||||
if not settings.get('result_proxy'):
|
if not settings.get('result_proxy'):
|
||||||
return url
|
return url
|
||||||
|
|
||||||
url_params = dict(mortyurl=url.encode('utf-8'))
|
url_params = dict(mortyurl=url.encode())
|
||||||
|
|
||||||
if settings['result_proxy'].get('key'):
|
if settings['result_proxy'].get('key'):
|
||||||
url_params['mortyhash'] = hmac.new(settings['result_proxy']['key'],
|
url_params['mortyhash'] = hmac.new(settings['result_proxy']['key'],
|
||||||
url.encode('utf-8'),
|
url.encode(),
|
||||||
hashlib.sha256).hexdigest()
|
hashlib.sha256).hexdigest()
|
||||||
|
|
||||||
return '{0}?{1}'.format(settings['result_proxy']['url'],
|
return '{0}?{1}'.format(settings['result_proxy']['url'],
|
||||||
|
@ -347,10 +331,10 @@ def image_proxify(url):
|
||||||
if settings.get('result_proxy'):
|
if settings.get('result_proxy'):
|
||||||
return proxify(url)
|
return proxify(url)
|
||||||
|
|
||||||
h = new_hmac(settings['server']['secret_key'], url.encode('utf-8'))
|
h = new_hmac(settings['server']['secret_key'], url.encode())
|
||||||
|
|
||||||
return '{0}?{1}'.format(url_for('image_proxy'),
|
return '{0}?{1}'.format(url_for('image_proxy'),
|
||||||
urlencode(dict(url=url.encode('utf-8'), h=h)))
|
urlencode(dict(url=url.encode(), h=h)))
|
||||||
|
|
||||||
|
|
||||||
def render(template_name, override_theme=None, **kwargs):
|
def render(template_name, override_theme=None, **kwargs):
|
||||||
|
@ -424,7 +408,7 @@ def render(template_name, override_theme=None, **kwargs):
|
||||||
|
|
||||||
kwargs['results_on_new_tab'] = request.preferences.get_value('results_on_new_tab')
|
kwargs['results_on_new_tab'] = request.preferences.get_value('results_on_new_tab')
|
||||||
|
|
||||||
kwargs['unicode'] = unicode
|
kwargs['unicode'] = str
|
||||||
|
|
||||||
kwargs['preferences'] = request.preferences
|
kwargs['preferences'] = request.preferences
|
||||||
|
|
||||||
|
@ -612,7 +596,7 @@ def index():
|
||||||
if 'content' in result and result['content']:
|
if 'content' in result and result['content']:
|
||||||
result['content'] = highlight_content(escape(result['content'][:1024]), search_query.query)
|
result['content'] = highlight_content(escape(result['content'][:1024]), search_query.query)
|
||||||
if 'title' in result and result['title']:
|
if 'title' in result and result['title']:
|
||||||
result['title'] = highlight_content(escape(result['title'] or u''), search_query.query)
|
result['title'] = highlight_content(escape(result['title'] or ''), search_query.query)
|
||||||
else:
|
else:
|
||||||
if result.get('content'):
|
if result.get('content'):
|
||||||
result['content'] = html_to_text(result['content']).strip()
|
result['content'] = html_to_text(result['content']).strip()
|
||||||
|
@ -634,14 +618,14 @@ def index():
|
||||||
minutes = int((timedifference.seconds / 60) % 60)
|
minutes = int((timedifference.seconds / 60) % 60)
|
||||||
hours = int(timedifference.seconds / 60 / 60)
|
hours = int(timedifference.seconds / 60 / 60)
|
||||||
if hours == 0:
|
if hours == 0:
|
||||||
result['publishedDate'] = gettext(u'{minutes} minute(s) ago').format(minutes=minutes)
|
result['publishedDate'] = gettext('{minutes} minute(s) ago').format(minutes=minutes)
|
||||||
else:
|
else:
|
||||||
result['publishedDate'] = gettext(u'{hours} hour(s), {minutes} minute(s) ago').format(hours=hours, minutes=minutes) # noqa
|
result['publishedDate'] = gettext('{hours} hour(s), {minutes} minute(s) ago').format(hours=hours, minutes=minutes) # noqa
|
||||||
else:
|
else:
|
||||||
result['publishedDate'] = format_date(result['publishedDate'])
|
result['publishedDate'] = format_date(result['publishedDate'])
|
||||||
|
|
||||||
if output_format == 'json':
|
if output_format == 'json':
|
||||||
return Response(json.dumps({'query': search_query.query.decode('utf-8'),
|
return Response(json.dumps({'query': search_query.query.decode(),
|
||||||
'number_of_results': number_of_results,
|
'number_of_results': number_of_results,
|
||||||
'results': results,
|
'results': results,
|
||||||
'answers': list(result_container.answers),
|
'answers': list(result_container.answers),
|
||||||
|
@ -670,7 +654,7 @@ def index():
|
||||||
csv.writerow([row.get(key, '') for key in keys])
|
csv.writerow([row.get(key, '') for key in keys])
|
||||||
csv.stream.seek(0)
|
csv.stream.seek(0)
|
||||||
response = Response(csv.stream.read(), mimetype='application/csv')
|
response = Response(csv.stream.read(), mimetype='application/csv')
|
||||||
cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query.decode('utf-8'))
|
cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query.decode())
|
||||||
response.headers.add('Content-Disposition', cont_disp)
|
response.headers.add('Content-Disposition', cont_disp)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
@ -754,10 +738,7 @@ def autocompleter():
|
||||||
disabled_engines = request.preferences.engines.get_disabled()
|
disabled_engines = request.preferences.engines.get_disabled()
|
||||||
|
|
||||||
# parse query
|
# parse query
|
||||||
if PY3:
|
|
||||||
raw_text_query = RawTextQuery(request.form.get('q', b''), disabled_engines)
|
raw_text_query = RawTextQuery(request.form.get('q', b''), disabled_engines)
|
||||||
else:
|
|
||||||
raw_text_query = RawTextQuery(request.form.get('q', u'').encode('utf-8'), disabled_engines)
|
|
||||||
raw_text_query.parse_query()
|
raw_text_query.parse_query()
|
||||||
|
|
||||||
# check if search query is set
|
# check if search query is set
|
||||||
|
@ -879,7 +860,7 @@ def _is_selected_language_supported(engine, preferences):
|
||||||
|
|
||||||
@app.route('/image_proxy', methods=['GET'])
|
@app.route('/image_proxy', methods=['GET'])
|
||||||
def image_proxy():
|
def image_proxy():
|
||||||
url = request.args.get('url').encode('utf-8')
|
url = request.args.get('url').encode()
|
||||||
|
|
||||||
if not url:
|
if not url:
|
||||||
return '', 400
|
return '', 400
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue