[mod] lower memory footprint by lazy loading JSON data

This patch implements lazy loading of the JSON data.

Motivation: in most requests not all JSON data is needed, but loaded.  By
example these four JSON files:

- currencies.json ~550KB
- engine_descriptions.json ~1,3MB
- external_bangs.json ~1,3MB
- osm_keys_tags.json ~ 2,2MB

most often not used and consume a lot of memory and BTW they also extend the
time required to instantiate a walker.

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2024-04-29 18:36:26 +02:00
parent e45a7cc063
commit 82fd0dac60
15 changed files with 73 additions and 48 deletions

View file

@ -20,13 +20,20 @@ __all__ = [
import json
from pathlib import Path
from searx import logger
data_dir = Path(__file__).parent
logger = logger.getChild('data')
def _load(filename):
with open(data_dir / filename, encoding='utf-8') as f:
return json.load(f)
CURRENCIES: dict
USER_AGENTS: dict
EXTERNAL_URLS: dict
WIKIDATA_UNITS: dict
EXTERNAL_BANGS: dict
OSM_KEYS_TAGS: dict
ENGINE_DESCRIPTIONS: dict
ENGINE_TRAITS: dict
LOCALES: dict
def ahmia_blacklist_loader():
@ -42,12 +49,27 @@ def ahmia_blacklist_loader():
return f.read().split()
CURRENCIES = _load('currencies.json')
USER_AGENTS = _load('useragents.json')
EXTERNAL_URLS = _load('external_urls.json')
WIKIDATA_UNITS = _load('wikidata_units.json')
EXTERNAL_BANGS = _load('external_bangs.json')
OSM_KEYS_TAGS = _load('osm_keys_tags.json')
ENGINE_DESCRIPTIONS = _load('engine_descriptions.json')
ENGINE_TRAITS = _load('engine_traits.json')
LOCALES = _load('locales.json')
NAME_TO_JSON_FILE = {
'CURRENCIES': 'currencies.json',
'USER_AGENTS': 'useragents.json',
'EXTERNAL_URLS': 'external_urls.json',
'WIKIDATA_UNITS': 'wikidata_units.json',
'EXTERNAL_BANGS': 'external_bangs.json',
'OSM_KEYS_TAGS': 'osm_keys_tags.json',
'ENGINE_DESCRIPTIONS': 'engine_descriptions.json',
'ENGINE_TRAITS': 'engine_traits.json',
'LOCALES': 'locales.json',
}
def __getattr__(name: str):
# lazy load of JSON files ..
filename = NAME_TO_JSON_FILE.get(name)
if filename:
filename = data_dir / filename
logger.debug("init global %s from JSON file %s", name, filename)
with open(filename, encoding='utf-8') as f:
globals()[name] = json.load(f)
return globals()[name]
else:
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")