forked from zaclys/searxng
[fix] www. domain duplications
This commit is contained in:
parent
78d42f094c
commit
b226e6462b
|
@ -154,16 +154,24 @@ def score_results(results):
|
||||||
# deduplication + scoring
|
# deduplication + scoring
|
||||||
for i, res in enumerate(flat_res):
|
for i, res in enumerate(flat_res):
|
||||||
res['parsed_url'] = urlparse(res['url'])
|
res['parsed_url'] = urlparse(res['url'])
|
||||||
|
res['host'] = res['parsed_url'].netloc
|
||||||
|
|
||||||
|
if res['host'].startswith('www.'):
|
||||||
|
res['host'] = res['host'].replace('www.', '', 1)
|
||||||
|
|
||||||
res['engines'] = [res['engine']]
|
res['engines'] = [res['engine']]
|
||||||
weight = 1.0
|
weight = 1.0
|
||||||
|
|
||||||
if hasattr(engines[res['engine']], 'weight'):
|
if hasattr(engines[res['engine']], 'weight'):
|
||||||
weight = float(engines[res['engine']].weight)
|
weight = float(engines[res['engine']].weight)
|
||||||
|
|
||||||
score = int((flat_len - i) / engines_len) * weight + 1
|
score = int((flat_len - i) / engines_len) * weight + 1
|
||||||
duplicated = False
|
duplicated = False
|
||||||
|
|
||||||
for new_res in results:
|
for new_res in results:
|
||||||
p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path # noqa
|
p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path # noqa
|
||||||
p2 = new_res['parsed_url'].path[:-1] if new_res['parsed_url'].path.endswith('/') else new_res['parsed_url'].path # noqa
|
p2 = new_res['parsed_url'].path[:-1] if new_res['parsed_url'].path.endswith('/') else new_res['parsed_url'].path # noqa
|
||||||
if res['parsed_url'].netloc == new_res['parsed_url'].netloc and\
|
if res['host'] == new_res['host'] and\
|
||||||
p1 == p2 and\
|
p1 == p2 and\
|
||||||
res['parsed_url'].query == new_res['parsed_url'].query and\
|
res['parsed_url'].query == new_res['parsed_url'].query and\
|
||||||
res.get('template') == new_res.get('template'):
|
res.get('template') == new_res.get('template'):
|
||||||
|
|
Loading…
Reference in New Issue