From e39d9fe5423a0fceed1d15dc63c1f8aa30d72e44 Mon Sep 17 00:00:00 2001 From: Dalf Date: Mon, 22 Sep 2014 23:39:21 +0200 Subject: [PATCH] update comment --- searx/search.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/searx/search.py b/searx/search.py index 10916cc50..48f8012f1 100644 --- a/searx/search.py +++ b/searx/search.py @@ -106,8 +106,13 @@ def score_results(results): res['host'] = res['host'].replace('www.', '', 1) res['engines'] = [res['engine']] + weight = 1.0 + # strip multiple spaces and cariage returns from content + if 'content' in res: + res['content'] = re.sub(' +', ' ', res['content'].strip().replace('\n', '')) + # get weight of this engine if possible if hasattr(engines[res['engine']], 'weight'): weight = float(engines[res['engine']].weight) @@ -115,12 +120,8 @@ def score_results(results): # calculate score for that engine score = int((flat_len - i) / engines_len) * weight + 1 - duplicated = False - # check for duplicates - if 'content' in res: - res['content'] = re.sub(' +', ' ', res['content'].strip().replace('\n', '')) - + duplicated = False for new_res in results: # remove / from the end of the url if required p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path # noqa