utils/build_env.py run without external dependencies

this should allow to create utils/brand.env on the fly even without virtualenv. the ultimate goal of the commit is remove utils/brand.env from the git repository. the code includes a tiny yaml parser that **should** be good enough. the code read searx/settings.yml and searx/version.py directly (and ignore the environment variables)
2024-01-01 19:24:07 +01:00 · 2024-01-06 17:56:52 +00:00 · 2024-01-06 17:56:52 +00:00 · c0917e3725
commit c0917e3725
parent 2f7c800f6f
1 changed files with 126 additions and 38 deletions
--- a/utils/build_env.py
+++ b/utils/build_env.py
@ -4,58 +4,146 @@

 # set path
 import sys
-import os
-from os.path import realpath, dirname, join, sep, abspath
+import importlib.util
+import re

-repo_root = realpath(dirname(realpath(__file__)) + sep + '..')
-sys.path.insert(0, repo_root)
+from pathlib import Path

-# Assure that the settings file from repository's working tree is used to
-# generate the build_env, not from /etc/searxng/settings.yml.
-os.environ['SEARXNG_SETTINGS_PATH'] = join(repo_root, 'etc', 'settings.yml')
+repo_root = Path(__file__).resolve().parent.parent

-def _env(*arg, **kwargs):
-    val = get_setting(*arg, **kwargs)
-    if val is True:
-        val = '1'
-    elif val is False:
-        val = ''
-    return val

 # If you add or remove variables here, do not forget to update:
 # - ./docs/admin/engines/settings.rst
 # - ./docs/dev/makefile.rst (section make buildenv)

 name_val = [
-
-    ('SEARXNG_URL'              , 'server.base_url'),
-    ('SEARXNG_PORT'             , 'server.port'),
-    ('SEARXNG_BIND_ADDRESS'     , 'server.bind_address'),
-
+    ("SEARXNG_URL", "server.base_url"),
+    ("SEARXNG_PORT", "server.port"),
+    ("SEARXNG_BIND_ADDRESS", "server.bind_address"),
 ]

-brand_env = 'utils' + sep + 'brand.env'

-# Some defaults in the settings.yml are taken from the environment,
-# e.g. SEARXNG_BIND_ADDRESS (:py:obj:`searx.settings_defaults.SHEMA`).  When the
-# 'brand.env' file is created these environment variables should be unset first::
+def main():
+    # import searx/version.py dynamically, so the SearXNG dependencies are not required
+    # note: searx/version_frozen.py is ignored
+    searx_version = load_module_from_path(repo_root / "searx" / "version.py")

-_unset = object()
-for name, option in name_val:
-    if not os.environ.get(name, _unset) is _unset:
-        del os.environ[name]
+    settings_path = repo_root / "searx" / "settings.yml"
+    with open(settings_path) as f:
+        settings = parse_yaml(f.read())

-# After the variables are unset in the environ, we can import from the searx
-# package (what will read the values from the settings.yml).
+    brand_env = repo_root / "utils" / "brand.env"
+    print("build %s (settings from: %s)" % (brand_env, settings_path))
+    with open(brand_env, "w", encoding="utf-8") as f:
+        for name, setting_name in name_val:
+            print("export %s='%s'" % (name, get_setting_value(settings, setting_name)), file=f)
+        print(f"export GIT_URL='{searx_version.GIT_URL}'", file=f)
+        print(f"export GIT_BRANCH='{searx_version.GIT_BRANCH}'", file=f)

-from searx.version import GIT_URL, GIT_BRANCH
-from searx import get_setting

-print('build %s (settings from: %s)' % (brand_env, os.environ['SEARXNG_SETTINGS_PATH']))
-sys.path.insert(0, repo_root)
+def load_module_from_path(path):
+    spec = importlib.util.spec_from_file_location("module.name", path)
+    module = importlib.util.module_from_spec(spec)
+    sys.modules[spec.name] = module
+    spec.loader.exec_module(module)
+    return module

-with open(repo_root + sep + brand_env, 'w', encoding='utf-8') as f:
-    for name, option in name_val:
-        print("export %s='%s'" % (name, _env(option)), file=f)
-    print(f"export GIT_URL='{GIT_URL}'", file=f)
-    print(f"export GIT_BRANCH='{GIT_BRANCH}'", file=f)
+
+def get_setting_value(settings, name):
+    value = settings
+    for a in name.split("."):
+        value = value[a]
+    if value is True:
+        value = "1"
+    elif value is False:
+        value = ""
+    return value
+
+
+def parse_yaml(yaml_str):
+    """
+    A simple YAML parser that converts a YAML string to a Python dictionary.
+    This parser can handle nested dictionaries, but does not handle list or JSON like structures.
+
+    Good enough parser to get the values of server.base_url, server.port and server.bind_address
+    """
+
+    def get_type_and_value_without_comment(line):
+        """Extract value without comment and quote
+
+        Returns a tuple:
+        * str or None: str when the value is written inside quote, None otherwise
+        * the value without quote if any
+        """
+        match = re.search(r"\"(.*)\"(\s+#)?|\'(.*)\'(\s+#)?|([^#]*)(\s+#)?", line)
+        if match:
+            g = match.groups()
+            if g[0] is not None:
+                return str, g[0]
+            elif g[2] is not None:
+                return str, g[2]
+            elif g[4] is not None:
+                return None, g[4].strip()
+        return None, line.strip()
+
+    # fmt: off
+    true_values = ("y", "Y", "yes", "Yes", "YES", "true", "True", "TRUE", "on", "On", "ON",)
+    false_values = ("n", "N", "no", "No", "NO", "false", "False", "FALSE", "off", "Off", "OFF",)
+    # fmt: on
+
+    def process_line(line):
+        """Extract key and value from a line, considering its indentation."""
+        if ": " in line:
+            key, value = line.split(": ", 1)
+            key = key.strip()
+            value_type, value = get_type_and_value_without_comment(value)
+            if value in true_values and value_type is None:
+                value = True
+            elif value in false_values and value_type is None:
+                value = False
+            elif value.replace(".", "").isdigit() and value_type is None:
+                for t in (int, float):
+                    try:
+                        value = t(value)
+                        break
+                    except ValueError:
+                        continue
+            return key, value
+        return None, None
+
+    def get_indentation_level(line):
+        """Determine the indentation level of a line."""
+        return len(line) - len(line.lstrip())
+
+    yaml_dict = {}
+    lines = yaml_str.split("\n")
+    stack = [yaml_dict]
+
+    for line in lines:
+        if not line.strip():
+            continue  # Skip empty lines
+
+        indentation_level = get_indentation_level(line)
+        # Assuming 2 spaces per indentation level
+        # see .yamllint.yml
+        current_level = indentation_level // 2
+
+        # Adjust the stack based on the current indentation level
+        while len(stack) > current_level + 1:
+            stack.pop()
+
+        if line.endswith(":"):
+            key = line[0:-1].strip()
+            new_dict = {}
+            stack[-1][key] = new_dict
+            stack.append(new_dict)
+        else:
+            key, value = process_line(line)
+            if key is not None:
+                stack[-1][key] = value
+
+    return yaml_dict
+
+
+if __name__ == "__main__":
+    main()