#!/usr/bin/env python3 """Fetch the Reticulum manual at build time and stage it for bundling. The downloaded archive is extracted into ``meshchatx/public/reticulum-docs-bundled/current`` so that the application ships with an offline copy of the manual. At runtime the backend will serve those files for any ``/reticulum-docs/`` request that does not have a user-uploaded version overriding it. Usage:: python scripts/build/fetch_reticulum_manual.py [--source URL] [--dest DIR] [--force] [--include-pdf] By default the upstream PDF/EPUB copies of the manual are excluded from the bundle because the in-app viewer only renders the HTML version. Pass ``--include-pdf`` (or set ``MESHCHATX_DOCS_INCLUDE_PDF=1``) to keep them. Environment variables:: MESHCHATX_RETICULUM_DOCS_URL Override the default source URL (single value). MESHCHATX_RETICULUM_DOCS_DEST Override the destination directory. MESHCHATX_SKIP_DOCS_FETCH If set to ``1``/``true``, exit without fetching. MESHCHATX_DOCS_INCLUDE_PDF If set to ``1``/``true``, include PDF/EPUB. """ from __future__ import annotations import argparse import io import logging import os import shutil import sys import urllib.error import urllib.request import zipfile from pathlib import Path DEFAULT_SOURCES = ( "https://github.com/markqvist/reticulum_website/archive/refs/heads/main.zip", "https://git.quad4.io/Reticulum/reticulum_website/archive/main.zip", ) DEFAULT_DEST = ( Path(__file__).resolve().parent.parent.parent / "meshchatx" / "public" / "reticulum-docs-bundled" / "current" ) EXTRA_BINARY_SUFFIXES = (".pdf", ".epub") """File suffixes for large alternate-format manuals that are excluded from the bundled copy by default. The HTML viewer does not use them, so dropping these saves roughly 9 MB on disk per build artifact.""" def _is_truthy(value: str | None) -> bool: return value is not None and value.strip().lower() in {"1", "true", "yes", "on"} def _download(url: str, timeout: float) -> bytes: logging.info("Downloading Reticulum manual from %s", url) req = urllib.request.Request( url, headers={"User-Agent": "meshchatx-build-script"}, ) with urllib.request.urlopen(req, timeout=timeout) as response: return response.read() def _resolve_docs_root(zip_bytes: bytes) -> tuple[zipfile.ZipFile, str]: """Return the open zip and the path prefix that contains the docs/ tree.""" archive = zipfile.ZipFile(io.BytesIO(zip_bytes)) names = archive.namelist() if not names: archive.close() raise ValueError("downloaded archive is empty") root = names[0].split("/", 1)[0] docs_prefix = f"{root}/docs/" if not any(name.startswith(docs_prefix) for name in names): archive.close() raise ValueError( f"archive does not contain expected docs/ folder under {root}/", ) return archive, docs_prefix def _extract( archive: zipfile.ZipFile, docs_prefix: str, dest: Path, include_pdf: bool = False, ) -> tuple[int, int]: """Extract docs/ tree from ``archive`` into ``dest``. Returns ``(extracted_count, skipped_binary_count)``. When ``include_pdf`` is false, large alternate-format manuals listed in :data:`EXTRA_BINARY_SUFFIXES` are skipped to keep shipped artifacts small. """ extracted = 0 skipped_binary = 0 for member in archive.infolist(): name = member.filename if not name.startswith(docs_prefix): continue rel = name[len(docs_prefix) :] if not rel or rel.endswith("/"): continue if ".." in rel.split("/"): continue if not include_pdf and rel.lower().endswith(EXTRA_BINARY_SUFFIXES): skipped_binary += 1 continue target = dest / rel try: target.relative_to(dest) except ValueError: continue target.parent.mkdir(parents=True, exist_ok=True) with archive.open(member) as src, open(target, "wb") as fh: shutil.copyfileobj(src, fh) extracted += 1 return extracted, skipped_binary def fetch_manual( sources: list[str], dest: Path, timeout: float = 120.0, force: bool = False, include_pdf: bool = False, ) -> int: if dest.exists() and any(dest.iterdir()) and not force: logging.info( "Reticulum manual already present at %s (%d entries); skipping fetch.", dest, sum(1 for _ in dest.rglob("*")), ) return 0 last_error: Exception | None = None archive: zipfile.ZipFile | None = None docs_prefix: str | None = None for url in sources: try: data = _download(url, timeout) archive, docs_prefix = _resolve_docs_root(data) break except (urllib.error.URLError, OSError, ValueError, zipfile.BadZipFile) as exc: logging.warning("Failed to fetch %s: %s", url, exc) last_error = exc archive = None docs_prefix = None if archive is None or docs_prefix is None: raise SystemExit( f"Could not download Reticulum manual from any source: {last_error}", ) try: if dest.exists(): shutil.rmtree(dest) dest.mkdir(parents=True, exist_ok=True) extracted, skipped_binary = _extract( archive, docs_prefix, dest, include_pdf=include_pdf, ) finally: archive.close() if extracted == 0: raise SystemExit("Archive contained no docs/ files to extract") if skipped_binary: logging.info( "Skipped %d alternate-format manual file(s) (%s); pass --include-pdf to keep them.", skipped_binary, ", ".join(EXTRA_BINARY_SUFFIXES), ) logging.info("Extracted %d files to %s", extracted, dest) return extracted def _parse_args(argv: list[str]) -> argparse.Namespace: parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "--source", action="append", default=None, help=( "URL of a Reticulum website ZIP. May be passed multiple times to provide " "fallbacks. Defaults to the canonical upstream sources." ), ) parser.add_argument( "--dest", type=Path, default=None, help="Output directory for the extracted manual.", ) parser.add_argument( "--timeout", type=float, default=120.0, help="HTTP timeout in seconds.", ) parser.add_argument( "--force", action="store_true", help="Re-fetch even if the destination already exists.", ) parser.add_argument( "--include-pdf", action="store_true", default=_is_truthy(os.environ.get("MESHCHATX_DOCS_INCLUDE_PDF")), help=( "Include the upstream PDF/EPUB manuals in the bundle. They are " "skipped by default to keep build artifacts smaller because the " "in-app viewer only uses the HTML version." ), ) parser.add_argument( "--quiet", action="store_true", help="Reduce log output.", ) return parser.parse_args(argv) def main(argv: list[str] | None = None) -> int: args = _parse_args(argv if argv is not None else sys.argv[1:]) logging.basicConfig( level=logging.WARNING if args.quiet else logging.INFO, format="%(message)s", ) if _is_truthy(os.environ.get("MESHCHATX_SKIP_DOCS_FETCH")): logging.info( "MESHCHATX_SKIP_DOCS_FETCH is set; skipping Reticulum manual fetch." ) return 0 sources: list[str] = [] if args.source: sources.extend(args.source) env_url = os.environ.get("MESHCHATX_RETICULUM_DOCS_URL") if env_url: sources.append(env_url) if not sources: sources = list(DEFAULT_SOURCES) env_dest = os.environ.get("MESHCHATX_RETICULUM_DOCS_DEST") dest = args.dest or (Path(env_dest) if env_dest else DEFAULT_DEST) fetch_manual( sources=sources, dest=dest.resolve(), timeout=args.timeout, force=args.force, include_pdf=args.include_pdf, ) return 0 if __name__ == "__main__": raise SystemExit(main())