forked from zaclys/searxng
[fix] pep8 part II.
This commit is contained in:
parent
b0fd71b7b3
commit
5740cfbf1c
|
@ -28,7 +28,8 @@ except:
|
||||||
searx_dir = abspath(dirname(__file__))
|
searx_dir = abspath(dirname(__file__))
|
||||||
engine_dir = dirname(realpath(__file__))
|
engine_dir = dirname(realpath(__file__))
|
||||||
|
|
||||||
# if possible set path to settings using the enviroment variable SEARX_SETTINGS_PATH
|
# if possible set path to settings using the
|
||||||
|
# enviroment variable SEARX_SETTINGS_PATH
|
||||||
if 'SEARX_SETTINGS_PATH' in environ:
|
if 'SEARX_SETTINGS_PATH' in environ:
|
||||||
settings_path = environ['SEARX_SETTINGS_PATH']
|
settings_path = environ['SEARX_SETTINGS_PATH']
|
||||||
# otherwise using default path
|
# otherwise using default path
|
||||||
|
|
|
@ -41,7 +41,7 @@ def load_module(filename):
|
||||||
module.name = modname
|
module.name = modname
|
||||||
return module
|
return module
|
||||||
|
|
||||||
if not 'engines' in settings or not settings['engines']:
|
if 'engines' not in settings or not settings['engines']:
|
||||||
print '[E] Error no engines found. Edit your settings.yml'
|
print '[E] Error no engines found. Edit your settings.yml'
|
||||||
exit(2)
|
exit(2)
|
||||||
|
|
||||||
|
@ -161,7 +161,8 @@ def get_engines_stats():
|
||||||
|
|
||||||
for engine in scores_per_result:
|
for engine in scores_per_result:
|
||||||
if max_score_per_result:
|
if max_score_per_result:
|
||||||
engine['percentage'] = int(engine['avg'] / max_score_per_result * 100)
|
engine['percentage'] = int(engine['avg']
|
||||||
|
/ max_score_per_result * 100)
|
||||||
else:
|
else:
|
||||||
engine['percentage'] = 0
|
engine['percentage'] = 0
|
||||||
|
|
||||||
|
|
|
@ -39,7 +39,8 @@ class Query(object):
|
||||||
self.engines = []
|
self.engines = []
|
||||||
self.languages = []
|
self.languages = []
|
||||||
|
|
||||||
# parse query, if tags are set, which change the serch engine or search-language
|
# parse query, if tags are set, which
|
||||||
|
# change the serch engine or search-language
|
||||||
def parse_query(self):
|
def parse_query(self):
|
||||||
self.query_parts = []
|
self.query_parts = []
|
||||||
|
|
||||||
|
@ -66,11 +67,13 @@ class Query(object):
|
||||||
if query_part[0] == ':':
|
if query_part[0] == ':':
|
||||||
lang = query_part[1:].lower()
|
lang = query_part[1:].lower()
|
||||||
|
|
||||||
# check if any language-code is equal with declared language-codes
|
# check if any language-code is equal with
|
||||||
|
# declared language-codes
|
||||||
for lc in language_codes:
|
for lc in language_codes:
|
||||||
lang_id, lang_name, country = map(str.lower, lc)
|
lang_id, lang_name, country = map(str.lower, lc)
|
||||||
|
|
||||||
# if correct language-code is found, set it as new search-language
|
# if correct language-code is found
|
||||||
|
# set it as new search-language
|
||||||
if lang == lang_id\
|
if lang == lang_id\
|
||||||
or lang_id.startswith(lang)\
|
or lang_id.startswith(lang)\
|
||||||
or lang == lang_name\
|
or lang == lang_name\
|
||||||
|
@ -92,19 +95,20 @@ class Query(object):
|
||||||
|
|
||||||
# check if prefix is equal with engine name
|
# check if prefix is equal with engine name
|
||||||
elif prefix in engines\
|
elif prefix in engines\
|
||||||
and not prefix in self.blocked_engines:
|
and prefix not in self.blocked_engines:
|
||||||
parse_next = True
|
parse_next = True
|
||||||
self.engines.append({'category': 'none',
|
self.engines.append({'category': 'none',
|
||||||
'name': prefix})
|
'name': prefix})
|
||||||
|
|
||||||
# check if prefix is equal with categorie name
|
# check if prefix is equal with categorie name
|
||||||
elif prefix in categories:
|
elif prefix in categories:
|
||||||
# using all engines for that search, which are declared under that categorie name
|
# using all engines for that search, which
|
||||||
|
# are declared under that categorie name
|
||||||
parse_next = True
|
parse_next = True
|
||||||
self.engines.extend({'category': prefix,
|
self.engines.extend({'category': prefix,
|
||||||
'name': engine.name}
|
'name': engine.name}
|
||||||
for engine in categories[prefix]
|
for engine in categories[prefix]
|
||||||
if not engine in self.blocked_engines)
|
if engine not in self.blocked_engines)
|
||||||
|
|
||||||
# append query part to query_part list
|
# append query part to query_part list
|
||||||
self.query_parts.append(query_part)
|
self.query_parts.append(query_part)
|
||||||
|
@ -124,4 +128,3 @@ class Query(object):
|
||||||
def getFullQuery(self):
|
def getFullQuery(self):
|
||||||
# get full querry including whitespaces
|
# get full querry including whitespaces
|
||||||
return string.join(self.query_parts, '')
|
return string.join(self.query_parts, '')
|
||||||
|
|
||||||
|
|
|
@ -22,7 +22,7 @@ from datetime import datetime
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
from urlparse import urlparse, unquote
|
from urlparse import urlparse, unquote
|
||||||
from searx.engines import (
|
from searx.engines import (
|
||||||
categories, engines, engine_shortcuts
|
categories, engines
|
||||||
)
|
)
|
||||||
from searx.languages import language_codes
|
from searx.languages import language_codes
|
||||||
from searx.utils import gen_useragent
|
from searx.utils import gen_useragent
|
||||||
|
@ -39,7 +39,13 @@ def default_request_params():
|
||||||
|
|
||||||
|
|
||||||
# create a callback wrapper for the search engine results
|
# create a callback wrapper for the search engine results
|
||||||
def make_callback(engine_name, results, suggestions, answers, infoboxes, callback, params):
|
def make_callback(engine_name,
|
||||||
|
results,
|
||||||
|
suggestions,
|
||||||
|
answers,
|
||||||
|
infoboxes,
|
||||||
|
callback,
|
||||||
|
params):
|
||||||
|
|
||||||
# creating a callback wrapper for the search engine results
|
# creating a callback wrapper for the search engine results
|
||||||
def process_callback(response, **kwargs):
|
def process_callback(response, **kwargs):
|
||||||
|
@ -126,7 +132,8 @@ def score_results(results):
|
||||||
|
|
||||||
# strip multiple spaces and cariage returns from content
|
# strip multiple spaces and cariage returns from content
|
||||||
if 'content' in res:
|
if 'content' in res:
|
||||||
res['content'] = re.sub(' +', ' ', res['content'].strip().replace('\n', ''))
|
res['content'] = re.sub(' +', ' ',
|
||||||
|
res['content'].strip().replace('\n', ''))
|
||||||
|
|
||||||
# get weight of this engine if possible
|
# get weight of this engine if possible
|
||||||
if hasattr(engines[res['engine']], 'weight'):
|
if hasattr(engines[res['engine']], 'weight'):
|
||||||
|
@ -139,8 +146,12 @@ def score_results(results):
|
||||||
duplicated = False
|
duplicated = False
|
||||||
for new_res in results:
|
for new_res in results:
|
||||||
# remove / from the end of the url if required
|
# remove / from the end of the url if required
|
||||||
p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path # noqa
|
p1 = res['parsed_url'].path[:-1]\
|
||||||
p2 = new_res['parsed_url'].path[:-1] if new_res['parsed_url'].path.endswith('/') else new_res['parsed_url'].path # noqa
|
if res['parsed_url'].path.endswith('/')\
|
||||||
|
else res['parsed_url'].path
|
||||||
|
p2 = new_res['parsed_url'].path[:-1]\
|
||||||
|
if new_res['parsed_url'].path.endswith('/')\
|
||||||
|
else new_res['parsed_url'].path
|
||||||
|
|
||||||
# check if that result is a duplicate
|
# check if that result is a duplicate
|
||||||
if res['host'] == new_res['host'] and\
|
if res['host'] == new_res['host'] and\
|
||||||
|
@ -153,7 +164,8 @@ def score_results(results):
|
||||||
# merge duplicates together
|
# merge duplicates together
|
||||||
if duplicated:
|
if duplicated:
|
||||||
# using content with more text
|
# using content with more text
|
||||||
if content_result_len(res.get('content', '')) > content_result_len(duplicated.get('content', '')):
|
if content_result_len(res.get('content', '')) >\
|
||||||
|
content_result_len(duplicated.get('content', '')):
|
||||||
duplicated['content'] = res['content']
|
duplicated['content'] = res['content']
|
||||||
|
|
||||||
# increase result-score
|
# increase result-score
|
||||||
|
@ -182,17 +194,25 @@ def score_results(results):
|
||||||
|
|
||||||
for i, res in enumerate(results):
|
for i, res in enumerate(results):
|
||||||
# FIXME : handle more than one category per engine
|
# FIXME : handle more than one category per engine
|
||||||
category = engines[res['engine']].categories[0] + ':' + '' if 'template' not in res else res['template']
|
category = engines[res['engine']].categories[0] + ':' + ''\
|
||||||
|
if 'template' not in res\
|
||||||
|
else res['template']
|
||||||
|
|
||||||
current = None if category not in categoryPositions else categoryPositions[category]
|
current = None if category not in categoryPositions\
|
||||||
|
else categoryPositions[category]
|
||||||
|
|
||||||
# group with previous results using the same category if the group can accept more result and is not too far from the current position
|
# group with previous results using the same category
|
||||||
if current != None and (current['count'] > 0) and (len(gresults) - current['index'] < 20):
|
# if the group can accept more result and is not too far
|
||||||
# group with the previous results using the same category with this one
|
# from the current position
|
||||||
|
if current is not None and (current['count'] > 0)\
|
||||||
|
and (len(gresults) - current['index'] < 20):
|
||||||
|
# group with the previous results using
|
||||||
|
# the same category with this one
|
||||||
index = current['index']
|
index = current['index']
|
||||||
gresults.insert(index, res)
|
gresults.insert(index, res)
|
||||||
|
|
||||||
# update every index after the current one (including the current one)
|
# update every index after the current one
|
||||||
|
# (including the current one)
|
||||||
for k in categoryPositions:
|
for k in categoryPositions:
|
||||||
v = categoryPositions[k]['index']
|
v = categoryPositions[k]['index']
|
||||||
if v >= index:
|
if v >= index:
|
||||||
|
@ -215,7 +235,7 @@ def score_results(results):
|
||||||
def merge_two_infoboxes(infobox1, infobox2):
|
def merge_two_infoboxes(infobox1, infobox2):
|
||||||
if 'urls' in infobox2:
|
if 'urls' in infobox2:
|
||||||
urls1 = infobox1.get('urls', None)
|
urls1 = infobox1.get('urls', None)
|
||||||
if urls1 == None:
|
if urls1 is None:
|
||||||
urls1 = []
|
urls1 = []
|
||||||
infobox1.set('urls', urls1)
|
infobox1.set('urls', urls1)
|
||||||
|
|
||||||
|
@ -229,7 +249,7 @@ def merge_two_infoboxes(infobox1, infobox2):
|
||||||
|
|
||||||
if 'attributes' in infobox2:
|
if 'attributes' in infobox2:
|
||||||
attributes1 = infobox1.get('attributes', None)
|
attributes1 = infobox1.get('attributes', None)
|
||||||
if attributes1 == None:
|
if attributes1 is None:
|
||||||
attributes1 = []
|
attributes1 = []
|
||||||
infobox1.set('attributes', attributes1)
|
infobox1.set('attributes', attributes1)
|
||||||
|
|
||||||
|
@ -244,7 +264,7 @@ def merge_two_infoboxes(infobox1, infobox2):
|
||||||
if 'content' in infobox2:
|
if 'content' in infobox2:
|
||||||
content1 = infobox1.get('content', None)
|
content1 = infobox1.get('content', None)
|
||||||
content2 = infobox2.get('content', '')
|
content2 = infobox2.get('content', '')
|
||||||
if content1 != None:
|
if content1 is not None:
|
||||||
if content_result_len(content2) > content_result_len(content1):
|
if content_result_len(content2) > content_result_len(content1):
|
||||||
infobox1['content'] = content2
|
infobox1['content'] = content2
|
||||||
else:
|
else:
|
||||||
|
@ -257,9 +277,9 @@ def merge_infoboxes(infoboxes):
|
||||||
for infobox in infoboxes:
|
for infobox in infoboxes:
|
||||||
add_infobox = True
|
add_infobox = True
|
||||||
infobox_id = infobox.get('id', None)
|
infobox_id = infobox.get('id', None)
|
||||||
if infobox_id != None:
|
if infobox_id is not None:
|
||||||
existingIndex = infoboxes_id.get(infobox_id, None)
|
existingIndex = infoboxes_id.get(infobox_id, None)
|
||||||
if existingIndex != None:
|
if existingIndex is not None:
|
||||||
merge_two_infoboxes(results[existingIndex], infobox)
|
merge_two_infoboxes(results[existingIndex], infobox)
|
||||||
add_infobox = False
|
add_infobox = False
|
||||||
|
|
||||||
|
@ -318,7 +338,8 @@ class Search(object):
|
||||||
|
|
||||||
self.pageno = int(pageno_param)
|
self.pageno = int(pageno_param)
|
||||||
|
|
||||||
# parse query, if tags are set, which change the serch engine or search-language
|
# parse query, if tags are set, which change
|
||||||
|
# the serch engine or search-language
|
||||||
query_obj = Query(self.request_data['q'], self.blocked_engines)
|
query_obj = Query(self.request_data['q'], self.blocked_engines)
|
||||||
query_obj.parse_query()
|
query_obj.parse_query()
|
||||||
|
|
||||||
|
@ -334,25 +355,29 @@ class Search(object):
|
||||||
|
|
||||||
self.categories = []
|
self.categories = []
|
||||||
|
|
||||||
# if engines are calculated from query, set categories by using that informations
|
# if engines are calculated from query,
|
||||||
|
# set categories by using that informations
|
||||||
if self.engines:
|
if self.engines:
|
||||||
self.categories = list(set(engine['category']
|
self.categories = list(set(engine['category']
|
||||||
for engine in self.engines))
|
for engine in self.engines))
|
||||||
|
|
||||||
# otherwise, using defined categories to calculate which engines should be used
|
# otherwise, using defined categories to
|
||||||
|
# calculate which engines should be used
|
||||||
else:
|
else:
|
||||||
# set used categories
|
# set used categories
|
||||||
for pd_name, pd in self.request_data.items():
|
for pd_name, pd in self.request_data.items():
|
||||||
if pd_name.startswith('category_'):
|
if pd_name.startswith('category_'):
|
||||||
category = pd_name[9:]
|
category = pd_name[9:]
|
||||||
# if category is not found in list, skip
|
# if category is not found in list, skip
|
||||||
if not category in categories:
|
if category not in categories:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# add category to list
|
# add category to list
|
||||||
self.categories.append(category)
|
self.categories.append(category)
|
||||||
|
|
||||||
# if no category is specified for this search, using user-defined default-configuration which (is stored in cookie)
|
# if no category is specified for this search,
|
||||||
|
# using user-defined default-configuration which
|
||||||
|
# (is stored in cookie)
|
||||||
if not self.categories:
|
if not self.categories:
|
||||||
cookie_categories = request.cookies.get('categories', '')
|
cookie_categories = request.cookies.get('categories', '')
|
||||||
cookie_categories = cookie_categories.split(',')
|
cookie_categories = cookie_categories.split(',')
|
||||||
|
@ -360,16 +385,18 @@ class Search(object):
|
||||||
if ccateg in categories:
|
if ccateg in categories:
|
||||||
self.categories.append(ccateg)
|
self.categories.append(ccateg)
|
||||||
|
|
||||||
# if still no category is specified, using general as default-category
|
# if still no category is specified, using general
|
||||||
|
# as default-category
|
||||||
if not self.categories:
|
if not self.categories:
|
||||||
self.categories = ['general']
|
self.categories = ['general']
|
||||||
|
|
||||||
# using all engines for that search, which are declared under the specific categories
|
# using all engines for that search, which are
|
||||||
|
# declared under the specific categories
|
||||||
for categ in self.categories:
|
for categ in self.categories:
|
||||||
self.engines.extend({'category': categ,
|
self.engines.extend({'category': categ,
|
||||||
'name': x.name}
|
'name': x.name}
|
||||||
for x in categories[categ]
|
for x in categories[categ]
|
||||||
if not x.name in self.blocked_engines)
|
if x.name not in self.blocked_engines)
|
||||||
|
|
||||||
# do search-request
|
# do search-request
|
||||||
def search(self, request):
|
def search(self, request):
|
||||||
|
@ -400,7 +427,8 @@ class Search(object):
|
||||||
if self.pageno > 1 and not engine.paging:
|
if self.pageno > 1 and not engine.paging:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# if search-language is set and engine does not provide language-support, skip
|
# if search-language is set and engine does not
|
||||||
|
# provide language-support, skip
|
||||||
if self.lang != 'all' and not engine.language_support:
|
if self.lang != 'all' and not engine.language_support:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
@ -412,7 +440,8 @@ class Search(object):
|
||||||
request_params['pageno'] = self.pageno
|
request_params['pageno'] = self.pageno
|
||||||
request_params['language'] = self.lang
|
request_params['language'] = self.lang
|
||||||
|
|
||||||
# update request parameters dependent on search-engine (contained in engines folder)
|
# update request parameters dependent on
|
||||||
|
# search-engine (contained in engines folder)
|
||||||
request_params = engine.request(self.query.encode('utf-8'),
|
request_params = engine.request(self.query.encode('utf-8'),
|
||||||
request_params)
|
request_params)
|
||||||
|
|
||||||
|
@ -431,7 +460,8 @@ class Search(object):
|
||||||
request_params
|
request_params
|
||||||
)
|
)
|
||||||
|
|
||||||
# create dictionary which contain all informations about the request
|
# create dictionary which contain all
|
||||||
|
# informations about the request
|
||||||
request_args = dict(
|
request_args = dict(
|
||||||
headers=request_params['headers'],
|
headers=request_params['headers'],
|
||||||
hooks=dict(response=callback),
|
hooks=dict(response=callback),
|
||||||
|
|
|
@ -23,6 +23,7 @@ def gen_useragent():
|
||||||
def searx_useragent():
|
def searx_useragent():
|
||||||
return 'searx'
|
return 'searx'
|
||||||
|
|
||||||
|
|
||||||
def highlight_content(content, query):
|
def highlight_content(content, query):
|
||||||
|
|
||||||
if not content:
|
if not content:
|
||||||
|
|
|
@ -146,14 +146,14 @@ def render(template_name, override_theme=None, **kwargs):
|
||||||
|
|
||||||
nonblocked_categories = set(chain.from_iterable(nonblocked_categories))
|
nonblocked_categories = set(chain.from_iterable(nonblocked_categories))
|
||||||
|
|
||||||
if not 'categories' in kwargs:
|
if 'categories' not in kwargs:
|
||||||
kwargs['categories'] = ['general']
|
kwargs['categories'] = ['general']
|
||||||
kwargs['categories'].extend(x for x in
|
kwargs['categories'].extend(x for x in
|
||||||
sorted(categories.keys())
|
sorted(categories.keys())
|
||||||
if x != 'general'
|
if x != 'general'
|
||||||
and x in nonblocked_categories)
|
and x in nonblocked_categories)
|
||||||
|
|
||||||
if not 'selected_categories' in kwargs:
|
if 'selected_categories' not in kwargs:
|
||||||
kwargs['selected_categories'] = []
|
kwargs['selected_categories'] = []
|
||||||
for arg in request.args:
|
for arg in request.args:
|
||||||
if arg.startswith('category_'):
|
if arg.startswith('category_'):
|
||||||
|
@ -168,7 +168,7 @@ def render(template_name, override_theme=None, **kwargs):
|
||||||
if not kwargs['selected_categories']:
|
if not kwargs['selected_categories']:
|
||||||
kwargs['selected_categories'] = ['general']
|
kwargs['selected_categories'] = ['general']
|
||||||
|
|
||||||
if not 'autocomplete' in kwargs:
|
if 'autocomplete' not in kwargs:
|
||||||
kwargs['autocomplete'] = autocomplete
|
kwargs['autocomplete'] = autocomplete
|
||||||
|
|
||||||
kwargs['method'] = request.cookies.get('method', 'POST')
|
kwargs['method'] = request.cookies.get('method', 'POST')
|
||||||
|
@ -202,7 +202,8 @@ def index():
|
||||||
'index.html',
|
'index.html',
|
||||||
)
|
)
|
||||||
|
|
||||||
search.results, search.suggestions, search.answers, search.infoboxes = search.search(request)
|
search.results, search.suggestions,\
|
||||||
|
search.answers, search.infoboxes = search.search(request)
|
||||||
|
|
||||||
for result in search.results:
|
for result in search.results:
|
||||||
|
|
||||||
|
@ -250,12 +251,16 @@ def index():
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# get domainname from result
|
# get domainname from result
|
||||||
# TODO, does only work correct with TLD's like asdf.com, not for asdf.com.de
|
# TODO, does only work correct with TLD's like
|
||||||
|
# asdf.com, not for asdf.com.de
|
||||||
# TODO, using publicsuffix instead of this rewrite rule
|
# TODO, using publicsuffix instead of this rewrite rule
|
||||||
old_result_domainname = '.'.join(result['parsed_url'].hostname.split('.')[-2:])
|
old_result_domainname = '.'.join(
|
||||||
new_result_domainname = '.'.join(new_parsed_url.hostname.split('.')[-2:])
|
result['parsed_url'].hostname.split('.')[-2:])
|
||||||
|
new_result_domainname = '.'.join(
|
||||||
|
new_parsed_url.hostname.split('.')[-2:])
|
||||||
|
|
||||||
# check if rewritten hostname is the same, to protect against wrong or malicious rewrite rules
|
# check if rewritten hostname is the same,
|
||||||
|
# to protect against wrong or malicious rewrite rules
|
||||||
if old_result_domainname == new_result_domainname:
|
if old_result_domainname == new_result_domainname:
|
||||||
# set new url
|
# set new url
|
||||||
result['url'] = new_result_url
|
result['url'] = new_result_url
|
||||||
|
@ -429,7 +434,7 @@ def preferences():
|
||||||
for pd_name, pd in request.form.items():
|
for pd_name, pd in request.form.items():
|
||||||
if pd_name.startswith('category_'):
|
if pd_name.startswith('category_'):
|
||||||
category = pd_name[9:]
|
category = pd_name[9:]
|
||||||
if not category in categories:
|
if category not in categories:
|
||||||
continue
|
continue
|
||||||
selected_categories.append(category)
|
selected_categories.append(category)
|
||||||
elif pd_name == 'locale' and pd in settings['locales']:
|
elif pd_name == 'locale' and pd in settings['locales']:
|
||||||
|
|
Loading…
Reference in New Issue