From 5e7060053cc382723af5daa1b4af42fe228b5292 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Mon, 26 Oct 2020 20:40:24 +0100 Subject: [PATCH] [mod] ahmia_filter.py: minor changes - use result['parsed_url'] - load ahmia_blacklist.txt in searx.datae --- searx/data/__init__.py | 7 ++++++- searx/plugins/ahmia_filter.py | 11 ++++------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/searx/data/__init__.py b/searx/data/__init__.py index 391947bff..1116e5d47 100644 --- a/searx/data/__init__.py +++ b/searx/data/__init__.py @@ -2,7 +2,7 @@ import json from pathlib import Path -__init__ = ['ENGINES_LANGUGAGES', 'CURRENCIES', 'USER_AGENTS', 'bangs_loader'] +__init__ = ['ENGINES_LANGUGAGES', 'CURRENCIES', 'USER_AGENTS', 'bangs_loader', 'ahmia_blacklist_loader'] data_dir = Path(__file__).parent @@ -16,6 +16,11 @@ def bangs_loader(): return load('bangs.json') +def ahmia_blacklist_loader(): + with open(str(data_dir / 'ahmia_blacklist.txt'), encoding='utf-8') as fd: + return fd.read().split() + + ENGINES_LANGUAGES = load('engines_languages.json') CURRENCIES = load('currencies.json') USER_AGENTS = load('useragents.json') diff --git a/searx/plugins/ahmia_filter.py b/searx/plugins/ahmia_filter.py index 8eb7f9413..83b05e4d2 100644 --- a/searx/plugins/ahmia_filter.py +++ b/searx/plugins/ahmia_filter.py @@ -3,9 +3,7 @@ ''' from hashlib import md5 -from os.path import join -from urllib.parse import urlparse -from searx import searx_dir +from searx.data import ahmia_blacklist_loader name = "Ahmia blacklist" description = "Filter out onion results that appear in Ahmia's blacklist. (See https://ahmia.fi/blacklist)" @@ -18,15 +16,14 @@ ahmia_blacklist = None def get_ahmia_blacklist(): global ahmia_blacklist if not ahmia_blacklist: - with open(join(join(searx_dir, "data"), "ahmia_blacklist.txt"), 'r') as f: - ahmia_blacklist = f.read().split() + ahmia_blacklist = ahmia_blacklist_loader() return ahmia_blacklist def not_blacklisted(result): - if not result.get('is_onion'): + if not result.get('is_onion') or not result.get('parsed_url'): return True - result_hash = md5(urlparse(result.get('url')).hostname.encode()).hexdigest() + result_hash = md5(result['parsed_url'].hostname.encode()).hexdigest() return result_hash not in get_ahmia_blacklist()