[fix] pep8 part II.

This commit is contained in:
Adam Tauber 2014-10-19 12:41:04 +02:00
parent b0fd71b7b3
commit 5740cfbf1c
6 changed files with 119 additions and 78 deletions

View File

@ -28,7 +28,8 @@ except:
searx_dir = abspath(dirname(__file__)) searx_dir = abspath(dirname(__file__))
engine_dir = dirname(realpath(__file__)) engine_dir = dirname(realpath(__file__))
# if possible set path to settings using the enviroment variable SEARX_SETTINGS_PATH # if possible set path to settings using the
# enviroment variable SEARX_SETTINGS_PATH
if 'SEARX_SETTINGS_PATH' in environ: if 'SEARX_SETTINGS_PATH' in environ:
settings_path = environ['SEARX_SETTINGS_PATH'] settings_path = environ['SEARX_SETTINGS_PATH']
# otherwise using default path # otherwise using default path

View File

@ -41,7 +41,7 @@ def load_module(filename):
module.name = modname module.name = modname
return module return module
if not 'engines' in settings or not settings['engines']: if 'engines' not in settings or not settings['engines']:
print '[E] Error no engines found. Edit your settings.yml' print '[E] Error no engines found. Edit your settings.yml'
exit(2) exit(2)
@ -68,15 +68,15 @@ for engine_data in settings['engines']:
engine.categories = ['general'] engine.categories = ['general']
if not hasattr(engine, 'language_support'): if not hasattr(engine, 'language_support'):
#engine.language_support = False # engine.language_support = False
engine.language_support = True engine.language_support = True
if not hasattr(engine, 'timeout'): if not hasattr(engine, 'timeout'):
#engine.language_support = False # engine.language_support = False
engine.timeout = settings['server']['request_timeout'] engine.timeout = settings['server']['request_timeout']
if not hasattr(engine, 'shortcut'): if not hasattr(engine, 'shortcut'):
#engine.shortcut = ''' # engine.shortcut = '''
engine.shortcut = '' engine.shortcut = ''
# checking required variables # checking required variables
@ -161,7 +161,8 @@ def get_engines_stats():
for engine in scores_per_result: for engine in scores_per_result:
if max_score_per_result: if max_score_per_result:
engine['percentage'] = int(engine['avg'] / max_score_per_result * 100) engine['percentage'] = int(engine['avg']
/ max_score_per_result * 100)
else: else:
engine['percentage'] = 0 engine['percentage'] = 0

View File

@ -31,30 +31,31 @@ class Query(object):
def __init__(self, query, blocked_engines): def __init__(self, query, blocked_engines):
self.query = query self.query = query
self.blocked_engines = [] self.blocked_engines = []
if blocked_engines: if blocked_engines:
self.blocked_engines = blocked_engines self.blocked_engines = blocked_engines
self.query_parts = [] self.query_parts = []
self.engines = [] self.engines = []
self.languages = [] self.languages = []
# parse query, if tags are set, which change the serch engine or search-language # parse query, if tags are set, which
# change the serch engine or search-language
def parse_query(self): def parse_query(self):
self.query_parts = [] self.query_parts = []
# split query, including whitespaces # split query, including whitespaces
raw_query_parts = re.split(r'(\s+)', self.query) raw_query_parts = re.split(r'(\s+)', self.query)
parse_next = True parse_next = True
for query_part in raw_query_parts: for query_part in raw_query_parts:
if not parse_next: if not parse_next:
self.query_parts[-1] += query_part self.query_parts[-1] += query_part
continue continue
parse_next = False parse_next = False
# part does only contain spaces, skip # part does only contain spaces, skip
if query_part.isspace()\ if query_part.isspace()\
or query_part == '': or query_part == '':
@ -62,15 +63,17 @@ class Query(object):
self.query_parts.append(query_part) self.query_parts.append(query_part)
continue continue
# this force a language # this force a language
if query_part[0] == ':': if query_part[0] == ':':
lang = query_part[1:].lower() lang = query_part[1:].lower()
# check if any language-code is equal with declared language-codes # check if any language-code is equal with
# declared language-codes
for lc in language_codes: for lc in language_codes:
lang_id, lang_name, country = map(str.lower, lc) lang_id, lang_name, country = map(str.lower, lc)
# if correct language-code is found, set it as new search-language # if correct language-code is found
# set it as new search-language
if lang == lang_id\ if lang == lang_id\
or lang_id.startswith(lang)\ or lang_id.startswith(lang)\
or lang == lang_name\ or lang == lang_name\
@ -89,23 +92,24 @@ class Query(object):
parse_next = True parse_next = True
self.engines.append({'category': 'none', self.engines.append({'category': 'none',
'name': engine_shortcuts[prefix]}) 'name': engine_shortcuts[prefix]})
# check if prefix is equal with engine name # check if prefix is equal with engine name
elif prefix in engines\ elif prefix in engines\
and not prefix in self.blocked_engines: and prefix not in self.blocked_engines:
parse_next = True parse_next = True
self.engines.append({'category': 'none', self.engines.append({'category': 'none',
'name': prefix}) 'name': prefix})
# check if prefix is equal with categorie name # check if prefix is equal with categorie name
elif prefix in categories: elif prefix in categories:
# using all engines for that search, which are declared under that categorie name # using all engines for that search, which
# are declared under that categorie name
parse_next = True parse_next = True
self.engines.extend({'category': prefix, self.engines.extend({'category': prefix,
'name': engine.name} 'name': engine.name}
for engine in categories[prefix] for engine in categories[prefix]
if not engine in self.blocked_engines) if engine not in self.blocked_engines)
# append query part to query_part list # append query part to query_part list
self.query_parts.append(query_part) self.query_parts.append(query_part)
@ -114,14 +118,13 @@ class Query(object):
self.query_parts[-1] = search_query self.query_parts[-1] = search_query
else: else:
self.query_parts.append(search_query) self.query_parts.append(search_query)
def getSearchQuery(self): def getSearchQuery(self):
if len(self.query_parts): if len(self.query_parts):
return self.query_parts[-1] return self.query_parts[-1]
else: else:
return '' return ''
def getFullQuery(self): def getFullQuery(self):
# get full querry including whitespaces # get full querry including whitespaces
return string.join(self.query_parts, '') return string.join(self.query_parts, '')

View File

@ -22,7 +22,7 @@ from datetime import datetime
from operator import itemgetter from operator import itemgetter
from urlparse import urlparse, unquote from urlparse import urlparse, unquote
from searx.engines import ( from searx.engines import (
categories, engines, engine_shortcuts categories, engines
) )
from searx.languages import language_codes from searx.languages import language_codes
from searx.utils import gen_useragent from searx.utils import gen_useragent
@ -39,7 +39,13 @@ def default_request_params():
# create a callback wrapper for the search engine results # create a callback wrapper for the search engine results
def make_callback(engine_name, results, suggestions, answers, infoboxes, callback, params): def make_callback(engine_name,
results,
suggestions,
answers,
infoboxes,
callback,
params):
# creating a callback wrapper for the search engine results # creating a callback wrapper for the search engine results
def process_callback(response, **kwargs): def process_callback(response, **kwargs):
@ -95,7 +101,7 @@ def make_callback(engine_name, results, suggestions, answers, infoboxes, callbac
def content_result_len(content): def content_result_len(content):
if isinstance(content, basestring): if isinstance(content, basestring):
content = re.sub('[,;:!?\./\\\\ ()-_]', '', content) content = re.sub('[,;:!?\./\\\\ ()-_]', '', content)
return len(content) return len(content)
else: else:
return 0 return 0
@ -126,7 +132,8 @@ def score_results(results):
# strip multiple spaces and cariage returns from content # strip multiple spaces and cariage returns from content
if 'content' in res: if 'content' in res:
res['content'] = re.sub(' +', ' ', res['content'].strip().replace('\n', '')) res['content'] = re.sub(' +', ' ',
res['content'].strip().replace('\n', ''))
# get weight of this engine if possible # get weight of this engine if possible
if hasattr(engines[res['engine']], 'weight'): if hasattr(engines[res['engine']], 'weight'):
@ -139,8 +146,12 @@ def score_results(results):
duplicated = False duplicated = False
for new_res in results: for new_res in results:
# remove / from the end of the url if required # remove / from the end of the url if required
p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path # noqa p1 = res['parsed_url'].path[:-1]\
p2 = new_res['parsed_url'].path[:-1] if new_res['parsed_url'].path.endswith('/') else new_res['parsed_url'].path # noqa if res['parsed_url'].path.endswith('/')\
else res['parsed_url'].path
p2 = new_res['parsed_url'].path[:-1]\
if new_res['parsed_url'].path.endswith('/')\
else new_res['parsed_url'].path
# check if that result is a duplicate # check if that result is a duplicate
if res['host'] == new_res['host'] and\ if res['host'] == new_res['host'] and\
@ -153,7 +164,8 @@ def score_results(results):
# merge duplicates together # merge duplicates together
if duplicated: if duplicated:
# using content with more text # using content with more text
if content_result_len(res.get('content', '')) > content_result_len(duplicated.get('content', '')): if content_result_len(res.get('content', '')) >\
content_result_len(duplicated.get('content', '')):
duplicated['content'] = res['content'] duplicated['content'] = res['content']
# increase result-score # increase result-score
@ -182,17 +194,25 @@ def score_results(results):
for i, res in enumerate(results): for i, res in enumerate(results):
# FIXME : handle more than one category per engine # FIXME : handle more than one category per engine
category = engines[res['engine']].categories[0] + ':' + '' if 'template' not in res else res['template'] category = engines[res['engine']].categories[0] + ':' + ''\
if 'template' not in res\
else res['template']
current = None if category not in categoryPositions else categoryPositions[category] current = None if category not in categoryPositions\
else categoryPositions[category]
# group with previous results using the same category if the group can accept more result and is not too far from the current position # group with previous results using the same category
if current != None and (current['count'] > 0) and (len(gresults) - current['index'] < 20): # if the group can accept more result and is not too far
# group with the previous results using the same category with this one # from the current position
if current is not None and (current['count'] > 0)\
and (len(gresults) - current['index'] < 20):
# group with the previous results using
# the same category with this one
index = current['index'] index = current['index']
gresults.insert(index, res) gresults.insert(index, res)
# update every index after the current one (including the current one) # update every index after the current one
# (including the current one)
for k in categoryPositions: for k in categoryPositions:
v = categoryPositions[k]['index'] v = categoryPositions[k]['index']
if v >= index: if v >= index:
@ -206,7 +226,7 @@ def score_results(results):
gresults.append(res) gresults.append(res)
# update categoryIndex # update categoryIndex
categoryPositions[category] = { 'index' : len(gresults), 'count' : 8 } categoryPositions[category] = {'index': len(gresults), 'count': 8}
# return gresults # return gresults
return gresults return gresults
@ -215,21 +235,21 @@ def score_results(results):
def merge_two_infoboxes(infobox1, infobox2): def merge_two_infoboxes(infobox1, infobox2):
if 'urls' in infobox2: if 'urls' in infobox2:
urls1 = infobox1.get('urls', None) urls1 = infobox1.get('urls', None)
if urls1 == None: if urls1 is None:
urls1 = [] urls1 = []
infobox1.set('urls', urls1) infobox1.set('urls', urls1)
urlSet = set() urlSet = set()
for url in infobox1.get('urls', []): for url in infobox1.get('urls', []):
urlSet.add(url.get('url', None)) urlSet.add(url.get('url', None))
for url in infobox2.get('urls', []): for url in infobox2.get('urls', []):
if url.get('url', None) not in urlSet: if url.get('url', None) not in urlSet:
urls1.append(url) urls1.append(url)
if 'attributes' in infobox2: if 'attributes' in infobox2:
attributes1 = infobox1.get('attributes', None) attributes1 = infobox1.get('attributes', None)
if attributes1 == None: if attributes1 is None:
attributes1 = [] attributes1 = []
infobox1.set('attributes', attributes1) infobox1.set('attributes', attributes1)
@ -237,14 +257,14 @@ def merge_two_infoboxes(infobox1, infobox2):
for attribute in infobox1.get('attributes', []): for attribute in infobox1.get('attributes', []):
if attribute.get('label', None) not in attributeSet: if attribute.get('label', None) not in attributeSet:
attributeSet.add(attribute.get('label', None)) attributeSet.add(attribute.get('label', None))
for attribute in infobox2.get('attributes', []): for attribute in infobox2.get('attributes', []):
attributes1.append(attribute) attributes1.append(attribute)
if 'content' in infobox2: if 'content' in infobox2:
content1 = infobox1.get('content', None) content1 = infobox1.get('content', None)
content2 = infobox2.get('content', '') content2 = infobox2.get('content', '')
if content1 != None: if content1 is not None:
if content_result_len(content2) > content_result_len(content1): if content_result_len(content2) > content_result_len(content1):
infobox1['content'] = content2 infobox1['content'] = content2
else: else:
@ -257,12 +277,12 @@ def merge_infoboxes(infoboxes):
for infobox in infoboxes: for infobox in infoboxes:
add_infobox = True add_infobox = True
infobox_id = infobox.get('id', None) infobox_id = infobox.get('id', None)
if infobox_id != None: if infobox_id is not None:
existingIndex = infoboxes_id.get(infobox_id, None) existingIndex = infoboxes_id.get(infobox_id, None)
if existingIndex != None: if existingIndex is not None:
merge_two_infoboxes(results[existingIndex], infobox) merge_two_infoboxes(results[existingIndex], infobox)
add_infobox=False add_infobox = False
if add_infobox: if add_infobox:
results.append(infobox) results.append(infobox)
infoboxes_id[infobox_id] = len(results)-1 infoboxes_id[infobox_id] = len(results)-1
@ -318,7 +338,8 @@ class Search(object):
self.pageno = int(pageno_param) self.pageno = int(pageno_param)
# parse query, if tags are set, which change the serch engine or search-language # parse query, if tags are set, which change
# the serch engine or search-language
query_obj = Query(self.request_data['q'], self.blocked_engines) query_obj = Query(self.request_data['q'], self.blocked_engines)
query_obj.parse_query() query_obj.parse_query()
@ -334,25 +355,29 @@ class Search(object):
self.categories = [] self.categories = []
# if engines are calculated from query, set categories by using that informations # if engines are calculated from query,
# set categories by using that informations
if self.engines: if self.engines:
self.categories = list(set(engine['category'] self.categories = list(set(engine['category']
for engine in self.engines)) for engine in self.engines))
# otherwise, using defined categories to calculate which engines should be used # otherwise, using defined categories to
# calculate which engines should be used
else: else:
# set used categories # set used categories
for pd_name, pd in self.request_data.items(): for pd_name, pd in self.request_data.items():
if pd_name.startswith('category_'): if pd_name.startswith('category_'):
category = pd_name[9:] category = pd_name[9:]
# if category is not found in list, skip # if category is not found in list, skip
if not category in categories: if category not in categories:
continue continue
# add category to list # add category to list
self.categories.append(category) self.categories.append(category)
# if no category is specified for this search, using user-defined default-configuration which (is stored in cookie) # if no category is specified for this search,
# using user-defined default-configuration which
# (is stored in cookie)
if not self.categories: if not self.categories:
cookie_categories = request.cookies.get('categories', '') cookie_categories = request.cookies.get('categories', '')
cookie_categories = cookie_categories.split(',') cookie_categories = cookie_categories.split(',')
@ -360,16 +385,18 @@ class Search(object):
if ccateg in categories: if ccateg in categories:
self.categories.append(ccateg) self.categories.append(ccateg)
# if still no category is specified, using general as default-category # if still no category is specified, using general
# as default-category
if not self.categories: if not self.categories:
self.categories = ['general'] self.categories = ['general']
# using all engines for that search, which are declared under the specific categories # using all engines for that search, which are
# declared under the specific categories
for categ in self.categories: for categ in self.categories:
self.engines.extend({'category': categ, self.engines.extend({'category': categ,
'name': x.name} 'name': x.name}
for x in categories[categ] for x in categories[categ]
if not x.name in self.blocked_engines) if x.name not in self.blocked_engines)
# do search-request # do search-request
def search(self, request): def search(self, request):
@ -386,7 +413,7 @@ class Search(object):
number_of_searches += 1 number_of_searches += 1
# set default useragent # set default useragent
#user_agent = request.headers.get('User-Agent', '') # user_agent = request.headers.get('User-Agent', '')
user_agent = gen_useragent() user_agent = gen_useragent()
# start search-reqest for all selected engines # start search-reqest for all selected engines
@ -400,7 +427,8 @@ class Search(object):
if self.pageno > 1 and not engine.paging: if self.pageno > 1 and not engine.paging:
continue continue
# if search-language is set and engine does not provide language-support, skip # if search-language is set and engine does not
# provide language-support, skip
if self.lang != 'all' and not engine.language_support: if self.lang != 'all' and not engine.language_support:
continue continue
@ -412,7 +440,8 @@ class Search(object):
request_params['pageno'] = self.pageno request_params['pageno'] = self.pageno
request_params['language'] = self.lang request_params['language'] = self.lang
# update request parameters dependent on search-engine (contained in engines folder) # update request parameters dependent on
# search-engine (contained in engines folder)
request_params = engine.request(self.query.encode('utf-8'), request_params = engine.request(self.query.encode('utf-8'),
request_params) request_params)
@ -431,7 +460,8 @@ class Search(object):
request_params request_params
) )
# create dictionary which contain all informations about the request # create dictionary which contain all
# informations about the request
request_args = dict( request_args = dict(
headers=request_params['headers'], headers=request_params['headers'],
hooks=dict(response=callback), hooks=dict(response=callback),

View File

@ -1,4 +1,4 @@
#import htmlentitydefs # import htmlentitydefs
from codecs import getincrementalencoder from codecs import getincrementalencoder
from HTMLParser import HTMLParser from HTMLParser import HTMLParser
from random import choice from random import choice
@ -22,7 +22,8 @@ def gen_useragent():
def searx_useragent(): def searx_useragent():
return 'searx' return 'searx'
def highlight_content(content, query): def highlight_content(content, query):
if not content: if not content:
@ -67,8 +68,8 @@ class HTMLTextExtractor(HTMLParser):
self.result.append(unichr(codepoint)) self.result.append(unichr(codepoint))
def handle_entityref(self, name): def handle_entityref(self, name):
#codepoint = htmlentitydefs.name2codepoint[name] # codepoint = htmlentitydefs.name2codepoint[name]
#self.result.append(unichr(codepoint)) # self.result.append(unichr(codepoint))
self.result.append(name) self.result.append(name)
def get_text(self): def get_text(self):

View File

@ -71,7 +71,7 @@ app.secret_key = settings['server']['secret_key']
babel = Babel(app) babel = Babel(app)
#TODO configurable via settings.yml # TODO configurable via settings.yml
favicons = ['wikipedia', 'youtube', 'vimeo', 'soundcloud', favicons = ['wikipedia', 'youtube', 'vimeo', 'soundcloud',
'twitter', 'stackoverflow', 'github'] 'twitter', 'stackoverflow', 'github']
@ -146,14 +146,14 @@ def render(template_name, override_theme=None, **kwargs):
nonblocked_categories = set(chain.from_iterable(nonblocked_categories)) nonblocked_categories = set(chain.from_iterable(nonblocked_categories))
if not 'categories' in kwargs: if 'categories' not in kwargs:
kwargs['categories'] = ['general'] kwargs['categories'] = ['general']
kwargs['categories'].extend(x for x in kwargs['categories'].extend(x for x in
sorted(categories.keys()) sorted(categories.keys())
if x != 'general' if x != 'general'
and x in nonblocked_categories) and x in nonblocked_categories)
if not 'selected_categories' in kwargs: if 'selected_categories' not in kwargs:
kwargs['selected_categories'] = [] kwargs['selected_categories'] = []
for arg in request.args: for arg in request.args:
if arg.startswith('category_'): if arg.startswith('category_'):
@ -168,7 +168,7 @@ def render(template_name, override_theme=None, **kwargs):
if not kwargs['selected_categories']: if not kwargs['selected_categories']:
kwargs['selected_categories'] = ['general'] kwargs['selected_categories'] = ['general']
if not 'autocomplete' in kwargs: if 'autocomplete' not in kwargs:
kwargs['autocomplete'] = autocomplete kwargs['autocomplete'] = autocomplete
kwargs['method'] = request.cookies.get('method', 'POST') kwargs['method'] = request.cookies.get('method', 'POST')
@ -202,14 +202,15 @@ def index():
'index.html', 'index.html',
) )
search.results, search.suggestions, search.answers, search.infoboxes = search.search(request) search.results, search.suggestions,\
search.answers, search.infoboxes = search.search(request)
for result in search.results: for result in search.results:
if not search.paging and engines[result['engine']].paging: if not search.paging and engines[result['engine']].paging:
search.paging = True search.paging = True
# check if HTTPS rewrite is required # check if HTTPS rewrite is required
if settings['server']['https_rewrite']\ if settings['server']['https_rewrite']\
and result['parsed_url'].scheme == 'http': and result['parsed_url'].scheme == 'http':
@ -236,7 +237,7 @@ def index():
try: try:
# TODO, precompile rule # TODO, precompile rule
p = re.compile(rule[0]) p = re.compile(rule[0])
# rewrite url if possible # rewrite url if possible
new_result_url = p.sub(rule[1], result['url']) new_result_url = p.sub(rule[1], result['url'])
except: except:
@ -250,17 +251,21 @@ def index():
continue continue
# get domainname from result # get domainname from result
# TODO, does only work correct with TLD's like asdf.com, not for asdf.com.de # TODO, does only work correct with TLD's like
# asdf.com, not for asdf.com.de
# TODO, using publicsuffix instead of this rewrite rule # TODO, using publicsuffix instead of this rewrite rule
old_result_domainname = '.'.join(result['parsed_url'].hostname.split('.')[-2:]) old_result_domainname = '.'.join(
new_result_domainname = '.'.join(new_parsed_url.hostname.split('.')[-2:]) result['parsed_url'].hostname.split('.')[-2:])
new_result_domainname = '.'.join(
new_parsed_url.hostname.split('.')[-2:])
# check if rewritten hostname is the same, to protect against wrong or malicious rewrite rules # check if rewritten hostname is the same,
# to protect against wrong or malicious rewrite rules
if old_result_domainname == new_result_domainname: if old_result_domainname == new_result_domainname:
# set new url # set new url
result['url'] = new_result_url result['url'] = new_result_url
# target has matched, do not search over the other rules # target has matched, do not search over the other rules
break break
if search.request_data.get('format', 'html') == 'html': if search.request_data.get('format', 'html') == 'html':
@ -429,7 +434,7 @@ def preferences():
for pd_name, pd in request.form.items(): for pd_name, pd in request.form.items():
if pd_name.startswith('category_'): if pd_name.startswith('category_'):
category = pd_name[9:] category = pd_name[9:]
if not category in categories: if category not in categories:
continue continue
selected_categories.append(category) selected_categories.append(category)
elif pd_name == 'locale' and pd in settings['locales']: elif pd_name == 'locale' and pd in settings['locales']: