forked from zaclys/searxng
Merge pull request #2246 from dalf/mod-searx-data
[mod] Add searx.data module
This commit is contained in:
commit
8b278cbfad
|
@ -0,0 +1,21 @@
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
__init__ = ['ENGINES_LANGUGAGES', 'CURRENCIES', 'USER_AGENTS', 'bangs_loader']
|
||||||
|
data_dir = Path(__file__).parent
|
||||||
|
|
||||||
|
|
||||||
|
def load(filename):
|
||||||
|
# add str(...) for Python 3.5
|
||||||
|
with open(str(data_dir / filename), encoding='utf-8') as fd:
|
||||||
|
return json.load(fd)
|
||||||
|
|
||||||
|
|
||||||
|
def bangs_loader():
|
||||||
|
return load('bangs.json')
|
||||||
|
|
||||||
|
|
||||||
|
ENGINES_LANGUAGES = load('engines_languages.json')
|
||||||
|
CURRENCIES = load('currencies.json')
|
||||||
|
USER_AGENTS = load('useragents.json')
|
|
@ -19,13 +19,12 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
|
||||||
import sys
|
import sys
|
||||||
import threading
|
import threading
|
||||||
from os.path import realpath, dirname
|
from os.path import realpath, dirname
|
||||||
from io import open
|
|
||||||
from babel.localedata import locale_identifiers
|
from babel.localedata import locale_identifiers
|
||||||
from flask_babel import gettext
|
from flask_babel import gettext
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
from json import loads
|
|
||||||
from searx import settings
|
from searx import settings
|
||||||
from searx import logger
|
from searx import logger
|
||||||
|
from searx.data import ENGINES_LANGUAGES
|
||||||
from searx.poolrequests import get
|
from searx.poolrequests import get
|
||||||
from searx.utils import load_module, match_language, get_engine_from_settings
|
from searx.utils import load_module, match_language, get_engine_from_settings
|
||||||
|
|
||||||
|
@ -38,7 +37,6 @@ engines = {}
|
||||||
|
|
||||||
categories = {'general': []}
|
categories = {'general': []}
|
||||||
|
|
||||||
languages = loads(open(engine_dir + '/../data/engines_languages.json', 'r', encoding='utf-8').read())
|
|
||||||
babel_langs = [lang_parts[0] + '-' + lang_parts[-1] if len(lang_parts) > 1 else lang_parts[0]
|
babel_langs = [lang_parts[0] + '-' + lang_parts[-1] if len(lang_parts) > 1 else lang_parts[0]
|
||||||
for lang_parts in (lang_code.split('_') for lang_code in locale_identifiers())]
|
for lang_parts in (lang_code.split('_') for lang_code in locale_identifiers())]
|
||||||
|
|
||||||
|
@ -108,8 +106,8 @@ def load_engine(engine_data):
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
# assign supported languages from json file
|
# assign supported languages from json file
|
||||||
if engine_data['name'] in languages:
|
if engine_data['name'] in ENGINES_LANGUAGES:
|
||||||
setattr(engine, 'supported_languages', languages[engine_data['name']])
|
setattr(engine, 'supported_languages', ENGINES_LANGUAGES[engine_data['name']])
|
||||||
|
|
||||||
# find custom aliases for non standard language codes
|
# find custom aliases for non standard language codes
|
||||||
if hasattr(engine, 'supported_languages'):
|
if hasattr(engine, 'supported_languages'):
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
import os
|
|
||||||
import unicodedata
|
import unicodedata
|
||||||
|
|
||||||
from io import open
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
|
from searx.data import CURRENCIES
|
||||||
|
|
||||||
|
|
||||||
categories = []
|
categories = []
|
||||||
url = 'https://duckduckgo.com/js/spice/currency/1/{0}/{1}'
|
url = 'https://duckduckgo.com/js/spice/currency/1/{0}/{1}'
|
||||||
|
@ -13,8 +13,6 @@ weight = 100
|
||||||
|
|
||||||
parser_re = re.compile('.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)
|
parser_re = re.compile('.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)
|
||||||
|
|
||||||
db = 1
|
|
||||||
|
|
||||||
|
|
||||||
def normalize_name(name):
|
def normalize_name(name):
|
||||||
name = name.lower().replace('-', ' ').rstrip('s')
|
name = name.lower().replace('-', ' ').rstrip('s')
|
||||||
|
@ -23,17 +21,17 @@ def normalize_name(name):
|
||||||
|
|
||||||
|
|
||||||
def name_to_iso4217(name):
|
def name_to_iso4217(name):
|
||||||
global db
|
global CURRENCIES
|
||||||
|
|
||||||
name = normalize_name(name)
|
name = normalize_name(name)
|
||||||
currencies = db['names'].get(name, [name])
|
currency = CURRENCIES['names'].get(name, [name])
|
||||||
return currencies[0]
|
return currency[0]
|
||||||
|
|
||||||
|
|
||||||
def iso4217_to_name(iso4217, language):
|
def iso4217_to_name(iso4217, language):
|
||||||
global db
|
global CURRENCIES
|
||||||
|
|
||||||
return db['iso4217'].get(iso4217, {}).get(language, iso4217)
|
return CURRENCIES['iso4217'].get(iso4217, {}).get(language, iso4217)
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
|
@ -82,15 +80,3 @@ def response(resp):
|
||||||
results.append({'answer': answer, 'url': url})
|
results.append({'answer': answer, 'url': url})
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
def load():
|
|
||||||
global db
|
|
||||||
|
|
||||||
current_dir = os.path.dirname(os.path.realpath(__file__))
|
|
||||||
json_data = open(current_dir + "/../data/currencies.json", 'r', encoding='utf-8').read()
|
|
||||||
|
|
||||||
db = json.loads(json_data)
|
|
||||||
|
|
||||||
|
|
||||||
load()
|
|
||||||
|
|
|
@ -1,7 +1,4 @@
|
||||||
import json
|
from searx.data import bangs_loader
|
||||||
from os.path import join
|
|
||||||
|
|
||||||
from searx import searx_dir
|
|
||||||
|
|
||||||
# bangs data coming from the following url convert to json with
|
# bangs data coming from the following url convert to json with
|
||||||
# https://raw.githubusercontent.com/jivesearch/jivesearch/master/bangs/bangs.toml
|
# https://raw.githubusercontent.com/jivesearch/jivesearch/master/bangs/bangs.toml
|
||||||
|
@ -9,8 +6,7 @@ from searx import searx_dir
|
||||||
# NOTE only use the get_bang_url
|
# NOTE only use the get_bang_url
|
||||||
|
|
||||||
bangs_data = {}
|
bangs_data = {}
|
||||||
with open(join(searx_dir, 'data/bangs.json'), encoding='utf-8') as json_file:
|
for bang in bangs_loader()['bang']:
|
||||||
for bang in json.load(json_file)['bang']:
|
|
||||||
for trigger in bang["triggers"]:
|
for trigger in bang["triggers"]:
|
||||||
bangs_data[trigger] = {x: y for x, y in bang.items() if x != "triggers"}
|
bangs_data[trigger] = {x: y for x, y in bang.items() if x != "triggers"}
|
||||||
|
|
||||||
|
|
|
@ -1,13 +1,10 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
import os
|
|
||||||
import sys
|
import sys
|
||||||
import re
|
import re
|
||||||
import json
|
|
||||||
import importlib
|
import importlib
|
||||||
|
|
||||||
from numbers import Number
|
from numbers import Number
|
||||||
from os.path import splitext, join
|
from os.path import splitext, join
|
||||||
from io import open
|
|
||||||
from random import choice
|
from random import choice
|
||||||
from html.parser import HTMLParser
|
from html.parser import HTMLParser
|
||||||
from urllib.parse import urljoin, urlparse, unquote
|
from urllib.parse import urljoin, urlparse, unquote
|
||||||
|
@ -18,6 +15,7 @@ from babel.core import get_global
|
||||||
|
|
||||||
|
|
||||||
from searx import settings
|
from searx import settings
|
||||||
|
from searx.data import USER_AGENTS
|
||||||
from searx.version import VERSION_STRING
|
from searx.version import VERSION_STRING
|
||||||
from searx.languages import language_codes
|
from searx.languages import language_codes
|
||||||
from searx import logger
|
from searx import logger
|
||||||
|
@ -31,9 +29,6 @@ blocked_tags = ('script',
|
||||||
ecma_unescape4_re = re.compile(r'%u([0-9a-fA-F]{4})', re.UNICODE)
|
ecma_unescape4_re = re.compile(r'%u([0-9a-fA-F]{4})', re.UNICODE)
|
||||||
ecma_unescape2_re = re.compile(r'%([0-9a-fA-F]{2})', re.UNICODE)
|
ecma_unescape2_re = re.compile(r'%([0-9a-fA-F]{2})', re.UNICODE)
|
||||||
|
|
||||||
useragents = json.loads(open(os.path.dirname(os.path.realpath(__file__))
|
|
||||||
+ "/data/useragents.json", 'r', encoding='utf-8').read())
|
|
||||||
|
|
||||||
xpath_cache = dict()
|
xpath_cache = dict()
|
||||||
lang_to_lc_cache = dict()
|
lang_to_lc_cache = dict()
|
||||||
|
|
||||||
|
@ -50,7 +45,7 @@ def gen_useragent(os=None):
|
||||||
|
|
||||||
See searx/data/useragents.json
|
See searx/data/useragents.json
|
||||||
"""
|
"""
|
||||||
return str(useragents['ua'].format(os=os or choice(useragents['os']), version=choice(useragents['versions'])))
|
return str(USER_AGENTS['ua'].format(os=os or choice(USER_AGENTS['os']), version=choice(USER_AGENTS['versions'])))
|
||||||
|
|
||||||
|
|
||||||
class HTMLTextExtractorException(Exception):
|
class HTMLTextExtractorException(Exception):
|
||||||
|
|
Loading…
Reference in New Issue