Merge pull request #2246 from dalf/mod-searx-data

[mod] Add searx.data module
This commit is contained in:
Alexandre Flament 2020-10-07 10:38:13 +02:00 committed by GitHub
commit 8b278cbfad
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 37 additions and 41 deletions

21
searx/data/__init__.py Normal file
View File

@ -0,0 +1,21 @@
import json
from pathlib import Path
__init__ = ['ENGINES_LANGUGAGES', 'CURRENCIES', 'USER_AGENTS', 'bangs_loader']
data_dir = Path(__file__).parent
def load(filename):
# add str(...) for Python 3.5
with open(str(data_dir / filename), encoding='utf-8') as fd:
return json.load(fd)
def bangs_loader():
return load('bangs.json')
ENGINES_LANGUAGES = load('engines_languages.json')
CURRENCIES = load('currencies.json')
USER_AGENTS = load('useragents.json')

View File

@ -19,13 +19,12 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
import sys import sys
import threading import threading
from os.path import realpath, dirname from os.path import realpath, dirname
from io import open
from babel.localedata import locale_identifiers from babel.localedata import locale_identifiers
from flask_babel import gettext from flask_babel import gettext
from operator import itemgetter from operator import itemgetter
from json import loads
from searx import settings from searx import settings
from searx import logger from searx import logger
from searx.data import ENGINES_LANGUAGES
from searx.poolrequests import get from searx.poolrequests import get
from searx.utils import load_module, match_language, get_engine_from_settings from searx.utils import load_module, match_language, get_engine_from_settings
@ -38,7 +37,6 @@ engines = {}
categories = {'general': []} categories = {'general': []}
languages = loads(open(engine_dir + '/../data/engines_languages.json', 'r', encoding='utf-8').read())
babel_langs = [lang_parts[0] + '-' + lang_parts[-1] if len(lang_parts) > 1 else lang_parts[0] babel_langs = [lang_parts[0] + '-' + lang_parts[-1] if len(lang_parts) > 1 else lang_parts[0]
for lang_parts in (lang_code.split('_') for lang_code in locale_identifiers())] for lang_parts in (lang_code.split('_') for lang_code in locale_identifiers())]
@ -108,8 +106,8 @@ def load_engine(engine_data):
sys.exit(1) sys.exit(1)
# assign supported languages from json file # assign supported languages from json file
if engine_data['name'] in languages: if engine_data['name'] in ENGINES_LANGUAGES:
setattr(engine, 'supported_languages', languages[engine_data['name']]) setattr(engine, 'supported_languages', ENGINES_LANGUAGES[engine_data['name']])
# find custom aliases for non standard language codes # find custom aliases for non standard language codes
if hasattr(engine, 'supported_languages'): if hasattr(engine, 'supported_languages'):

View File

@ -1,11 +1,11 @@
import json import json
import re import re
import os
import unicodedata import unicodedata
from io import open
from datetime import datetime from datetime import datetime
from searx.data import CURRENCIES
categories = [] categories = []
url = 'https://duckduckgo.com/js/spice/currency/1/{0}/{1}' url = 'https://duckduckgo.com/js/spice/currency/1/{0}/{1}'
@ -13,8 +13,6 @@ weight = 100
parser_re = re.compile('.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I) parser_re = re.compile('.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)
db = 1
def normalize_name(name): def normalize_name(name):
name = name.lower().replace('-', ' ').rstrip('s') name = name.lower().replace('-', ' ').rstrip('s')
@ -23,17 +21,17 @@ def normalize_name(name):
def name_to_iso4217(name): def name_to_iso4217(name):
global db global CURRENCIES
name = normalize_name(name) name = normalize_name(name)
currencies = db['names'].get(name, [name]) currency = CURRENCIES['names'].get(name, [name])
return currencies[0] return currency[0]
def iso4217_to_name(iso4217, language): def iso4217_to_name(iso4217, language):
global db global CURRENCIES
return db['iso4217'].get(iso4217, {}).get(language, iso4217) return CURRENCIES['iso4217'].get(iso4217, {}).get(language, iso4217)
def request(query, params): def request(query, params):
@ -82,15 +80,3 @@ def response(resp):
results.append({'answer': answer, 'url': url}) results.append({'answer': answer, 'url': url})
return results return results
def load():
global db
current_dir = os.path.dirname(os.path.realpath(__file__))
json_data = open(current_dir + "/../data/currencies.json", 'r', encoding='utf-8').read()
db = json.loads(json_data)
load()

View File

@ -1,7 +1,4 @@
import json from searx.data import bangs_loader
from os.path import join
from searx import searx_dir
# bangs data coming from the following url convert to json with # bangs data coming from the following url convert to json with
# https://raw.githubusercontent.com/jivesearch/jivesearch/master/bangs/bangs.toml # https://raw.githubusercontent.com/jivesearch/jivesearch/master/bangs/bangs.toml
@ -9,10 +6,9 @@ from searx import searx_dir
# NOTE only use the get_bang_url # NOTE only use the get_bang_url
bangs_data = {} bangs_data = {}
with open(join(searx_dir, 'data/bangs.json'), encoding='utf-8') as json_file: for bang in bangs_loader()['bang']:
for bang in json.load(json_file)['bang']: for trigger in bang["triggers"]:
for trigger in bang["triggers"]: bangs_data[trigger] = {x: y for x, y in bang.items() if x != "triggers"}
bangs_data[trigger] = {x: y for x, y in bang.items() if x != "triggers"}
def get_bang_url(search_query): def get_bang_url(search_query):

View File

@ -1,13 +1,10 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import os
import sys import sys
import re import re
import json
import importlib import importlib
from numbers import Number from numbers import Number
from os.path import splitext, join from os.path import splitext, join
from io import open
from random import choice from random import choice
from html.parser import HTMLParser from html.parser import HTMLParser
from urllib.parse import urljoin, urlparse, unquote from urllib.parse import urljoin, urlparse, unquote
@ -18,6 +15,7 @@ from babel.core import get_global
from searx import settings from searx import settings
from searx.data import USER_AGENTS
from searx.version import VERSION_STRING from searx.version import VERSION_STRING
from searx.languages import language_codes from searx.languages import language_codes
from searx import logger from searx import logger
@ -31,9 +29,6 @@ blocked_tags = ('script',
ecma_unescape4_re = re.compile(r'%u([0-9a-fA-F]{4})', re.UNICODE) ecma_unescape4_re = re.compile(r'%u([0-9a-fA-F]{4})', re.UNICODE)
ecma_unescape2_re = re.compile(r'%([0-9a-fA-F]{2})', re.UNICODE) ecma_unescape2_re = re.compile(r'%([0-9a-fA-F]{2})', re.UNICODE)
useragents = json.loads(open(os.path.dirname(os.path.realpath(__file__))
+ "/data/useragents.json", 'r', encoding='utf-8').read())
xpath_cache = dict() xpath_cache = dict()
lang_to_lc_cache = dict() lang_to_lc_cache = dict()
@ -50,7 +45,7 @@ def gen_useragent(os=None):
See searx/data/useragents.json See searx/data/useragents.json
""" """
return str(useragents['ua'].format(os=os or choice(useragents['os']), version=choice(useragents['versions']))) return str(USER_AGENTS['ua'].format(os=os or choice(USER_AGENTS['os']), version=choice(USER_AGENTS['versions'])))
class HTMLTextExtractorException(Exception): class HTMLTextExtractorException(Exception):