From a9dc54bebc943000252975ef25ddcb51681fc284 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Mon, 5 Oct 2020 13:50:33 +0200 Subject: [PATCH] [mod] Add searx.data module Instead of loading the data/*.json in different location, load these files in the new searx.data module. --- searx/data/__init__.py | 21 +++++++++++++++++++++ searx/engines/__init__.py | 8 +++----- searx/engines/currency_convert.py | 28 +++++++--------------------- searx/external_bang.py | 12 ++++-------- searx/utils.py | 9 ++------- 5 files changed, 37 insertions(+), 41 deletions(-) create mode 100644 searx/data/__init__.py diff --git a/searx/data/__init__.py b/searx/data/__init__.py new file mode 100644 index 000000000..391947bff --- /dev/null +++ b/searx/data/__init__.py @@ -0,0 +1,21 @@ +import json +from pathlib import Path + + +__init__ = ['ENGINES_LANGUGAGES', 'CURRENCIES', 'USER_AGENTS', 'bangs_loader'] +data_dir = Path(__file__).parent + + +def load(filename): + # add str(...) for Python 3.5 + with open(str(data_dir / filename), encoding='utf-8') as fd: + return json.load(fd) + + +def bangs_loader(): + return load('bangs.json') + + +ENGINES_LANGUAGES = load('engines_languages.json') +CURRENCIES = load('currencies.json') +USER_AGENTS = load('useragents.json') diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index 5b91c08ce..9cdca47b7 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -19,13 +19,12 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. import sys import threading from os.path import realpath, dirname -from io import open from babel.localedata import locale_identifiers from flask_babel import gettext from operator import itemgetter -from json import loads from searx import settings from searx import logger +from searx.data import ENGINES_LANGUAGES from searx.poolrequests import get from searx.utils import load_module, match_language, get_engine_from_settings @@ -38,7 +37,6 @@ engines = {} categories = {'general': []} -languages = loads(open(engine_dir + '/../data/engines_languages.json', 'r', encoding='utf-8').read()) babel_langs = [lang_parts[0] + '-' + lang_parts[-1] if len(lang_parts) > 1 else lang_parts[0] for lang_parts in (lang_code.split('_') for lang_code in locale_identifiers())] @@ -108,8 +106,8 @@ def load_engine(engine_data): sys.exit(1) # assign supported languages from json file - if engine_data['name'] in languages: - setattr(engine, 'supported_languages', languages[engine_data['name']]) + if engine_data['name'] in ENGINES_LANGUAGES: + setattr(engine, 'supported_languages', ENGINES_LANGUAGES[engine_data['name']]) # find custom aliases for non standard language codes if hasattr(engine, 'supported_languages'): diff --git a/searx/engines/currency_convert.py b/searx/engines/currency_convert.py index c6067c4a8..4a82cfdca 100644 --- a/searx/engines/currency_convert.py +++ b/searx/engines/currency_convert.py @@ -1,11 +1,11 @@ import json import re -import os import unicodedata -from io import open from datetime import datetime +from searx.data import CURRENCIES + categories = [] url = 'https://duckduckgo.com/js/spice/currency/1/{0}/{1}' @@ -13,8 +13,6 @@ weight = 100 parser_re = re.compile('.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I) -db = 1 - def normalize_name(name): name = name.lower().replace('-', ' ').rstrip('s') @@ -23,17 +21,17 @@ def normalize_name(name): def name_to_iso4217(name): - global db + global CURRENCIES name = normalize_name(name) - currencies = db['names'].get(name, [name]) - return currencies[0] + currency = CURRENCIES['names'].get(name, [name]) + return currency[0] def iso4217_to_name(iso4217, language): - global db + global CURRENCIES - return db['iso4217'].get(iso4217, {}).get(language, iso4217) + return CURRENCIES['iso4217'].get(iso4217, {}).get(language, iso4217) def request(query, params): @@ -82,15 +80,3 @@ def response(resp): results.append({'answer': answer, 'url': url}) return results - - -def load(): - global db - - current_dir = os.path.dirname(os.path.realpath(__file__)) - json_data = open(current_dir + "/../data/currencies.json", 'r', encoding='utf-8').read() - - db = json.loads(json_data) - - -load() diff --git a/searx/external_bang.py b/searx/external_bang.py index 92b6e6a09..104f85958 100644 --- a/searx/external_bang.py +++ b/searx/external_bang.py @@ -1,7 +1,4 @@ -import json -from os.path import join - -from searx import searx_dir +from searx.data import bangs_loader # bangs data coming from the following url convert to json with # https://raw.githubusercontent.com/jivesearch/jivesearch/master/bangs/bangs.toml @@ -9,10 +6,9 @@ from searx import searx_dir # NOTE only use the get_bang_url bangs_data = {} -with open(join(searx_dir, 'data/bangs.json'), encoding='utf-8') as json_file: - for bang in json.load(json_file)['bang']: - for trigger in bang["triggers"]: - bangs_data[trigger] = {x: y for x, y in bang.items() if x != "triggers"} +for bang in bangs_loader()['bang']: + for trigger in bang["triggers"]: + bangs_data[trigger] = {x: y for x, y in bang.items() if x != "triggers"} def get_bang_url(search_query): diff --git a/searx/utils.py b/searx/utils.py index db17feba9..1c10585cf 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -1,13 +1,10 @@ # -*- coding: utf-8 -*- -import os import sys import re -import json import importlib from numbers import Number from os.path import splitext, join -from io import open from random import choice from html.parser import HTMLParser from urllib.parse import urljoin, urlparse, unquote @@ -18,6 +15,7 @@ from babel.core import get_global from searx import settings +from searx.data import USER_AGENTS from searx.version import VERSION_STRING from searx.languages import language_codes from searx import logger @@ -31,9 +29,6 @@ blocked_tags = ('script', ecma_unescape4_re = re.compile(r'%u([0-9a-fA-F]{4})', re.UNICODE) ecma_unescape2_re = re.compile(r'%([0-9a-fA-F]{2})', re.UNICODE) -useragents = json.loads(open(os.path.dirname(os.path.realpath(__file__)) - + "/data/useragents.json", 'r', encoding='utf-8').read()) - xpath_cache = dict() lang_to_lc_cache = dict() @@ -50,7 +45,7 @@ def gen_useragent(os=None): See searx/data/useragents.json """ - return str(useragents['ua'].format(os=os or choice(useragents['os']), version=choice(useragents['versions']))) + return str(USER_AGENTS['ua'].format(os=os or choice(USER_AGENTS['os']), version=choice(USER_AGENTS['versions']))) class HTMLTextExtractorException(Exception):