blog/makesite.py
Yax 3b7484fbe6 Replace JSON config with .env files
- Add python-dotenv dependency for environment variable loading
- Replace params.json and params-local.json with .env files
- Add --local and --local-stacosys flags to makesite.py
- Update Makefile to auto-detect stacosys availability
- Document configuration and usage in README.md
2026-01-10 16:03:53 +01:00

782 lines
24 KiB
Python
Executable file

#!/usr/bin/env python3
"""Make static website/blog with Python."""
import datetime
import os
import re
import shutil
import sys
import unicodedata
from pathlib import Path
from dotenv import load_dotenv
import requests
import mistune
from pygments import highlight
from pygments.lexers import get_lexer_by_name
from pygments.formatters import html
FRENCH_WEEKDAYS = ['lun.', 'mar.', 'mer.', 'jeu.', 'ven.', 'sam.', 'dim.']
FRENCH_MONTHS = ['janv.', 'févr.', 'mars', 'avr.', 'mai', 'juin',
'juil.', 'août', 'sept.', 'oct.', 'nov.', 'déc.']
class HighlightRenderer(mistune.HTMLRenderer):
"""Custom Mistune renderer that adds syntax highlighting to code blocks using Pygments."""
def block_code(self, code, info=None):
"""Render code blocks with syntax highlighting.
Args:
code: The code content to render
info: Optional language identifier for syntax highlighting
Returns:
str: HTML with syntax-highlighted code or plain pre/code tags
"""
if info:
lexer = get_lexer_by_name(info, stripall=True)
formatter = html.HtmlFormatter()
return highlight(code, lexer, formatter)
return '<pre><code>' + mistune.escape(code) + '</code></pre>'
markdown = mistune.create_markdown(renderer=HighlightRenderer())
def fread(filename):
"""Read file and close the file."""
with open(filename, "r") as f:
return f.read()
def fwrite(filename, text):
"""Write content to file and close the file."""
basedir = os.path.dirname(filename)
if not os.path.isdir(basedir):
os.makedirs(basedir)
with open(filename, "w") as f:
f.write(text)
def log(msg, *log_args):
"""Log message with specified arguments."""
sys.stderr.write(msg.format(*log_args) + "\n")
def _strip_tags_and_truncate(text, words=25):
"""Remove HTML tags and truncate text to the specified number of words."""
return " ".join(re.sub(r"(?s)<.*?>", " ", text).split()[:words])
def _parse_headers(text):
"""Parse HTML comment headers and yield (key, value, end-index) tuples."""
for match in re.finditer(r"\s*<!--\s*(.+?)\s*:\s*(.+?)\s*-->\s*|.+", text):
if not match.group(1):
break
yield match.group(1), match.group(2), match.end()
def _rfc_2822_format(date_str):
"""Convert yyyy-mm-dd date string to RFC 2822 format date string."""
d = datetime.datetime.strptime(date_str, "%Y-%m-%d")
return d \
.replace(tzinfo=datetime.timezone.utc) \
.strftime('%a, %d %b %Y %H:%M:%S %z')
def slugify(value):
"""
Converts to lowercase, removes non-word characters (alphanumerics and
underscores) and converts spaces to hyphens. Also strips leading and
trailing whitespace.
"""
value = unicodedata.normalize("NFKD", value).encode("ascii", "ignore").decode("ascii")
value = re.sub(r"[^\w\s-]", "", value) # Remove non-word characters and spaces
value = re.sub(r"\s+", "-", value) # Replace multiple spaces with a single hyphen
return value.lower() # Convert to lowercase
def parse_post_file(filename, params):
"""Parse post file: read, extract metadata, convert markdown, and generate summary."""
# Read file content.
text = fread(filename)
# Read metadata and save it in a dictionary.
date_slug = os.path.basename(filename).split(".")[0]
match = re.search(r"^(?:(\d\d\d\d-\d\d-\d\d)-)?(.+)$", date_slug)
content = {"date": match.group(1) or "1970-01-01", "slug": match.group(2)}
# Read headers.
end = 0
for key, val, end in _parse_headers(text):
content[key] = val
# slugify post title
content["slug"] = slugify(content["title"])
# Separate content from headers.
text = text[end:]
# Convert Markdown content to HTML.
if filename.endswith((".md", ".mkd", ".mkdn", ".mdown", ".markdown")):
summary_index = text.find("<!-- more")
if summary_index > 0:
summary = markdown(_strip_html_tags(text[:summary_index]))
else:
summary = _strip_tags_and_truncate(markdown(_strip_html_tags(text)))
clean_text = text.replace("<!-- more -->", "")
text = markdown(clean_text)
else:
summary = _strip_tags_and_truncate(text)
# Update the dictionary with content and RFC 2822 date.
content.update(
{
"content": text,
"content_rss": _make_links_absolute(params["site_url"], text),
"rfc_2822_date": _rfc_2822_format(content["date"]),
"summary": summary,
}
)
return content
def _make_links_absolute(site_url, text):
"""Convert relative links to absolute URLs for RSS feed."""
# TODO externalize links replacement configuration
return text \
.replace("src=\"/images/20", "src=\"" + site_url + "/images/20") \
.replace("href=\"/20", "href=\"" + site_url + "/20")
def _strip_html_tags(text):
"""Remove HTML tags from text."""
while True:
original_text = text
text = re.sub(r"<\w+.*?>", "", text)
text = re.sub(r"<\/\w+>", "", text)
if original_text == text:
break
return text
def render(template, **params):
"""Replace placeholders in template with values from params."""
return re.sub(
r"{{\s*([^}\s]+)\s*}}",
lambda match: str(params.get(match.group(1), match.group(0))),
template,
)
def get_header_list_value(header_name, page_params):
"""Extract and parse a space-separated list from a header value.
Args:
header_name: Name of the header to extract (e.g., 'category', 'tag')
page_params: Dict containing page parameters
Returns:
list: List of stripped string values from the header
"""
header_list = []
if header_name in page_params:
for s in page_params[header_name].split(" "):
if s.strip():
header_list.append(s.strip())
return header_list
def _render_comment(comment, comment_detail_layout):
"""Render a single comment using the comment detail layout.
Args:
comment: Dict with keys: author, content, date, and optional: site, avatar
comment_detail_layout: Template string for rendering a comment
Returns:
str: Rendered HTML for the comment
"""
site = comment.get("site", "")
if site:
site_start = '<a href="' + site + '">'
site_end = '</a>'
else:
site_start = ''
site_end = ''
return render(
comment_detail_layout,
author=comment["author"],
avatar=comment.get("avatar", ""),
site_start=site_start,
site_end=site_end,
date=comment["date"],
content=markdown(comment["content"]),
)
def _fetch_comments(post_url, stacosys_url):
"""Fetch comments from Stacosys API for a given post URL.
Args:
post_url: Relative URL of the post (e.g., "2024/my-post/")
stacosys_url: Base URL of the Stacosys comment service
Returns:
list: List of comment dictionaries from API response
"""
req_url = stacosys_url + "/comments"
query_params = dict(url="/" + post_url)
resp = requests.get(url=req_url, params=query_params)
return resp.json()["data"]
def _process_comments(page_params, stacosys_url, comment_layout,
comment_detail_layout):
"""Process comments for a post: fetch, render, and return comment data.
Args:
page_params: Dict containing page parameters (must have 'comment' and 'post_url')
stacosys_url: Base URL of Stacosys service (empty string disables comments)
comment_layout: Template for the overall comment section
comment_detail_layout: Template for individual comments
Returns:
tuple: (comment_count, comments_html, comment_section_html)
- comment_count: Number of comments (int)
- comments_html: Rendered HTML of all comments (str)
- comment_section_html: Complete comment section with form (str)
"""
# Check if comments are enabled for this page
page_comment = page_params.get("comment", "yes")
is_page_comment_enabled = (page_comment != "no")
# Default values when comments are disabled
if not stacosys_url or not is_page_comment_enabled:
return 0, "", ""
# Fetch and render comments
comments = _fetch_comments(page_params["post_url"], stacosys_url)
out_comments = [
_render_comment(comment, comment_detail_layout)
for comment in comments
]
comments_html = "".join(out_comments)
# Render complete comment section
temp_params = dict(page_params)
temp_params["comments"] = comments_html
temp_params["comment_count"] = len(comments)
comment_section_html = render(comment_layout, **temp_params)
return len(comments), comments_html, comment_section_html
def _get_friendly_date(date_str):
"""Convert date string to French-formatted readable date.
Args:
date_str: Date string in YYYY-MM-DD format
Returns:
str: French-formatted date (e.g., "15 janv. 2024")
"""
dt = datetime.datetime.strptime(date_str, "%Y-%m-%d")
french_month = FRENCH_MONTHS[dt.month - 1]
return f"{dt.day:02d} {french_month} {dt.year}"
def _process_categories(page_params, category_layout):
"""Process categories from page params and return rendered category label.
Args:
page_params: Dict containing page parameters (must have 'category' key)
category_layout: Template string for rendering individual categories
Returns:
tuple: (list of category strings, rendered category label HTML)
"""
categories = get_header_list_value("category", page_params)
out_cats = []
for category in categories:
out_cat = render(category_layout,
category=category,
url=slugify(category))
out_cats.append(out_cat.strip())
category_label = "".join(out_cats)
return categories, category_label
def _setup_page_params(content, params):
"""Set up page parameters from content and global params.
Args:
content: Dict containing parsed content (must have 'date' key)
params: Global parameters dict
Returns:
dict: Page parameters with date_path, friendly_date, year, post_url, etc.
"""
page_params = dict(params, **content)
page_params["header"] = ""
page_params["footer"] = ""
page_params["date_path"] = page_params["date"].replace("-", "/")
page_params["friendly_date"] = _get_friendly_date(page_params["date"])
page_params["year"] = page_params["date"].split("-")[0]
page_params["post_url"] = (
page_params["year"] + "/" + page_params["slug"] + "/"
)
return page_params
def make_posts(
src, src_pattern, dst, layout, category_layout,
comment_layout, comment_detail_layout, **params
):
"""Generate posts from posts directory."""
items = []
for posix_path in Path(src).glob(src_pattern):
src_path = str(posix_path)
content = parse_post_file(src_path, params)
# render text / summary for basic fields
content["content"] = render(content["content"], **params)
content["summary"] = render(content["summary"], **params)
# setup page parameters
page_params = _setup_page_params(content, params)
# process categories
categories, category_label = _process_categories(page_params, category_layout)
page_params["categories"] = categories
page_params["category_label"] = category_label
# tags
tags = get_header_list_value("tag", page_params)
page_params["tags"] = tags
# comments
comment_count, comments_html, comment_section = _process_comments(
page_params, params.get("stacosys_url", ""),
comment_layout, comment_detail_layout
)
page_params["comment_count"] = comment_count
page_params["comments"] = comments_html
page_params["comment"] = comment_section
content["year"] = page_params["year"]
content["post_url"] = page_params["post_url"]
content["categories"] = page_params["categories"]
content["category_label"] = page_params["category_label"]
content["tags"] = page_params["tags"]
content["friendly_date"] = page_params["friendly_date"]
content["comment_count"] = page_params["comment_count"]
items.append(content)
dst_path = render(dst, **page_params)
output = render(layout, **page_params)
log("Rendering {} => {} ...", src_path, dst_path)
fwrite(dst_path, output)
return sorted(items, key=lambda x: x["date"], reverse=True)
def make_notes(
src, src_pattern, dst, layout, **params
):
"""Generate notes from notes directory."""
items = []
for posix_path in Path(src).glob(src_pattern):
src_path = str(posix_path)
content = parse_post_file(src_path, params)
# render text / summary for basic fields
content["content"] = render(content["content"], **params)
content["summary"] = render(content["summary"], **params)
page_params = dict(params, **content)
page_params["header"] = ""
page_params["footer"] = ""
page_params["friendly_date"] = ""
page_params["category_label"] = ""
page_params["post_url"] = "notes/" + page_params["slug"] + "/"
content["post_url"] = page_params["post_url"]
content["friendly_date"] = page_params["friendly_date"]
content["category_label"] = page_params["category_label"]
items.append(content)
dst_path = render(dst, **page_params)
output = render(layout, **page_params)
log("Rendering {} => {} ...", src_path, dst_path)
fwrite(dst_path, output)
return sorted(items, key=lambda x: x["date"], reverse=True)
def make_list(
posts, dst, list_layout, item_layout,
header_layout, footer_layout, **params
):
"""Generate list page for a blog.
Args:
posts: List of post dictionaries to include in the list
dst: Destination path for the generated HTML file
list_layout: Template for the overall list page
item_layout: Template for individual list items
header_layout: Template for page header (None to skip)
footer_layout: Template for page footer (None to skip)
**params: Additional parameters for template rendering
"""
# header
if header_layout is None:
params["header"] = ""
else:
header = render(header_layout, **params)
params["header"] = header
# footer
if footer_layout is None:
params["footer"] = ""
else:
footer = render(footer_layout, **params)
params["footer"] = footer
# content
items = []
for post in posts:
item_params = dict(params, **post)
if "comment_count" in item_params and item_params["comment_count"]:
if item_params["comment_count"] == 1:
item_params["comment_label"] = "1 commentaire"
else:
item_params["comment_label"] = (
str(item_params["comment_count"]) + " commentaires"
)
else:
item_params["comment_label"] = ""
item = render(item_layout, **item_params)
items.append(item)
params["content"] = "".join(items)
dst_path = render(dst, **params)
output = render(list_layout, **params)
log("Rendering list => {} ...", dst_path)
fwrite(dst_path, output)
def create_blog(page_layout, list_in_page_layout, params):
"""Create blog posts and paginated index pages.
Args:
page_layout: Template for individual pages
list_in_page_layout: Template for list pages wrapped in page layout
params: Global site parameters
Returns:
list: Sorted list of all post dictionaries (newest first)
"""
banner_layout = fread("layout/banner.html")
paging_layout = fread("layout/paging.html")
post_layout = fread("layout/post.html")
post_layout = render(page_layout, content=post_layout)
comment_layout = fread("layout/comment.html")
comment_detail_layout = fread("layout/comment-detail.html")
category_layout = fread("layout/category.html")
item_layout = fread("layout/item.html")
posts = make_posts(
"posts",
"**/*.md",
"_site/{{ post_url }}/index.html",
post_layout,
category_layout,
comment_layout,
comment_detail_layout,
**params
)
# Create blog list pages by 10.
page_size = 10
chunk_posts = [
posts[i: i + page_size]
for i in range(0, len(posts), page_size)
]
page = 1
last_page = len(chunk_posts)
for chunk in chunk_posts:
params["page"] = page
if page == last_page:
params["next_page"] = ""
else:
params["next_page"] = "/page" + str(page + 1) + "/"
if page == 1:
params["previous_page"] = ""
make_list(
chunk,
"_site/index.html",
list_in_page_layout,
item_layout,
banner_layout,
paging_layout,
**params
)
else:
params["previous_page"] = "/page" + str(page - 1) + "/"
make_list(
chunk,
"_site/page" + str(page) + "/index.html",
list_in_page_layout,
item_layout,
banner_layout,
paging_layout,
**params
)
page = page + 1
return posts
def generate_categories(list_in_page_layout, item_nosummary_layout,
posts, params):
"""Generate category pages grouping posts by category.
Args:
list_in_page_layout: Template for list pages
item_nosummary_layout: Template for list items without summaries
posts: List of all blog posts
params: Global site parameters
"""
category_title_layout = fread("layout/category_title.html")
cat_post = {}
for post in posts:
for cat in post["categories"]:
if cat in cat_post:
cat_post[cat].append(post)
else:
cat_post[cat] = [post]
for cat in cat_post.keys():
params["category"] = cat
make_list(
cat_post[cat],
"_site/" + slugify(cat) + "/index.html",
list_in_page_layout,
item_nosummary_layout,
category_title_layout,
None,
**params
)
def generate_archives(blog_posts, list_in_page_layout, item_nosummary_layout,
archive_title_layout, params):
"""Generate archives page with all blog posts.
Args:
blog_posts: List of all blog posts
list_in_page_layout: Template for list pages
item_nosummary_layout: Template for list items without summaries
archive_title_layout: Template for archive page header
params: Global site parameters
"""
make_list(
blog_posts,
"_site/archives/index.html",
list_in_page_layout,
item_nosummary_layout,
archive_title_layout,
None,
**params
)
def generate_notes(page_layout, archive_title_layout,
list_in_page_layout, params):
"""Generate notes pages and notes index.
Args:
page_layout: Template for individual pages
archive_title_layout: Template for notes index header
list_in_page_layout: Template for list pages
params: Global site parameters
"""
note_layout = fread("layout/note.html")
item_note_layout = fread("layout/item_note.html")
note_layout = render(page_layout, content=note_layout)
notes = make_notes(
"notes",
"**/*.md",
"_site/{{ post_url }}/index.html",
note_layout,
**params
)
make_list(
notes,
"_site/notes/index.html",
list_in_page_layout,
item_note_layout,
archive_title_layout,
None,
**params
)
def generate_rss_feeds(posts, params):
"""Generate RSS feeds: main feed and per-tag feeds.
Args:
posts: List of all blog posts
params: Global site parameters
"""
rss_xml = fread("layout/rss.xml")
rss_item_xml = fread("layout/rss_item.xml")
# Create main RSS feed for 10 last entries
for filename in ("_site/rss.xml", "_site/index.xml"):
make_list(
posts[:10],
filename,
rss_xml,
rss_item_xml,
None,
None,
**params
)
# Create RSS feed by tag
tag_post = {}
for post in posts:
for tag in post["tags"]:
if tag in tag_post:
tag_post[tag].append(post)
else:
tag_post[tag] = [post]
for tag in tag_post.keys():
params["tag"] = tag
make_list(
tag_post[tag],
"_site/rss." + slugify(tag) + ".xml",
rss_xml,
rss_item_xml,
None,
None,
**params
)
def generate_sitemap(posts, params):
"""Generate XML sitemap for all posts.
Args:
posts: List of all blog posts
params: Global site parameters
"""
sitemap_xml = fread("layout/sitemap.xml")
sitemap_item_xml = fread("layout/sitemap_item.xml")
make_list(
posts,
"_site/sitemap.xml",
sitemap_xml,
sitemap_item_xml,
None,
None,
**params
)
def get_params(env_file=None):
"""Load site parameters from .env files.
Args:
env_file: Optional .env file to load and override .env values
Returns:
dict: Site parameters with defaults and loaded values
"""
# Load .env file first
load_dotenv(".env")
# Load override file if specified
if env_file:
load_dotenv(env_file, override=True)
log("use params from " + env_file)
else:
log("use params from .env")
# Build params from environment variables
params = {
"title": os.getenv("TITLE", "Blog"),
"subtitle": os.getenv("SUBTITLE", "Lorem Ipsum"),
"author": os.getenv("AUTHOR", "Admin"),
"site_url": os.getenv("SITE_URL", "http://localhost:8000"),
"current_year": datetime.datetime.now().year,
"stacosys_url": os.getenv("STACOSYS_URL", ""),
"external_check": os.getenv("EXTERNAL_CHECK", ""),
}
return params
def rebuild_site_directory():
"""Remove existing _site directory and recreate from static files."""
if os.path.isdir("_site"):
shutil.rmtree("_site")
shutil.copytree("static", "_site")
def main(env_file=None):
"""Main entry point for static site generation.
Args:
env_file: Optional .env file to override .env values
"""
params = get_params(env_file)
# Create a new _site directory from scratch.
rebuild_site_directory()
# Load layouts.
page_layout = fread("layout/page.html")
list_layout = fread("layout/list.html")
list_in_page_layout = render(page_layout, content=list_layout)
archive_title_layout = fread("layout/archives.html")
item_nosummary_layout = fread("layout/item_nosummary.html")
blog_posts = create_blog(page_layout, list_in_page_layout, params)
generate_categories(list_in_page_layout, item_nosummary_layout,
blog_posts, params)
generate_archives(blog_posts, list_in_page_layout, item_nosummary_layout,
archive_title_layout, params)
generate_notes(page_layout, archive_title_layout,
list_in_page_layout, params)
generate_rss_feeds(blog_posts, params)
generate_sitemap(blog_posts, params)
if __name__ == "__main__":
# Determine which env file to use
env_file = None
if "--local-stacosys" in sys.argv:
env_file = ".env.local-stacosys"
elif "--local" in sys.argv:
env_file = ".env.local"
main(env_file)