#!/usr/bin/env python3 # Calibre-Web Automated – fork of Calibre-Web # Copyright (C) 2018-2025 Calibre-Web contributors # Copyright (C) 2024-2025 Calibre-Web Automated contributors # SPDX-License-Identifier: GPL-3.0-or-later # See CONTRIBUTORS for full list of authors. """ update_spdx_headers.py Purpose: Normalize / insert concise GPL license + attribution headers with SPDX into source files. Designed for Calibre-Web Automated (fork of janeczku/calibre-web). Behavior: * Scans files (default: **/*.py) excluding common vendor / build / translation dirs. * Detects existing legacy Calibre-Web header blocks and replaces them with the new short form. * Inserts header if none present. * Respects existing shebang (kept as first line). * Avoids duplicating if header already normalized. * Updates year range dynamically (current year) and fork start year (2024). * DEFAULT ROOT: When --paths is omitted the repository root is inferred as two directories up from this script (scripts/../). Resulting header (example): # Calibre-Web Automated – fork of Calibre-Web # Copyright (C) 2018-2025 Calibre-Web contributors # Copyright (C) 2024-2025 Calibre-Web Automated contributors # SPDX-License-Identifier: GPL-3.0-or-later # See CONTRIBUTORS for full list of authors. CLI: --apply Write changes (default dry-run). --paths PATH [PATH] Limit to specific paths/files. --ext .py,.sh Comma separated list of extensions (default .py) --exclude PAT Extra glob-style exclude (can repeat) --print Print updated content to stdout (only valid when single file & --apply not set) --verbose Verbose logging --quiet Only errors --license-only Only add SPDX line if missing (preserve existing header text otherwise) --force-year-start YEAR Override upstream start year (default 2018) Exit codes: 0 success, 1 errors. NOTE: Commit the CONTRIBUTORS file separately; headers reference it. """ from __future__ import annotations import argparse import datetime import pathlib import re import sys from typing import Iterable, List UPSTREAM_START_DEFAULT = 2018 FORK_START = 2024 CURRENT_YEAR = datetime.date.today().year LEGACY_PATTERNS = [ re.compile(r"This file is part of the Calibre-Web", re.IGNORECASE), re.compile(r"GNU General Public License", re.IGNORECASE), ] SPDX_RE = re.compile(r"SPDX-License-Identifier:\s*GPL-3\.0-or-later") HEADER_ALREADY_RE = re.compile(r"Calibre-Web Automated .*?SPDX-License-Identifier: GPL-3\.0-or-later", re.DOTALL) SHEBANG_RE = re.compile(r"^#!.*\n") CODING_RE = re.compile(r"^#.*coding[:=].*\n", re.IGNORECASE) EXCLUDE_DIRS = { ".git", "__pycache__", "build", "dist", "venv", ".venv", "env", "node_modules", "translations", "locale", "docs/_build", "htmlcov" } DEFAULT_EXTS = [".py"] HEADER_TEMPLATE = ( "# Calibre-Web Automated – fork of Calibre-Web\n" "# Copyright (C) {upstream_start}-{year} Calibre-Web contributors\n" "# Copyright (C) {fork_start}-{year} Calibre-Web Automated contributors\n" "# SPDX-License-Identifier: GPL-3.0-or-later\n" "# See CONTRIBUTORS for full list of authors.\n" "\n" ) def parse_args(): p = argparse.ArgumentParser(description="Normalize SPDX headers") p.add_argument("--apply", action="store_true", help="Write changes (default dry-run)") p.add_argument("--paths", nargs="*", help="Optional list of files/dirs to process") p.add_argument("--ext", default=",".join(DEFAULT_EXTS), help="Comma separated extensions") p.add_argument("--exclude", action="append", default=[], help="Additional directory/file exclude (glob contains)") p.add_argument("--print", action="store_true", help="Print updated content (only single file dry-run)") p.add_argument("--verbose", action="store_true") p.add_argument("--quiet", action="store_true") p.add_argument("--license-only", action="store_true", help="Only inject SPDX if missing") p.add_argument("--force-year-start", type=int, default=UPSTREAM_START_DEFAULT) return p.parse_args() def log(msg: str, *, verbose=False, quiet=False): if quiet: return if verbose: print(msg) def collect_paths(paths: List[str] | None, exts: List[str], extra_excludes: List[str]) -> Iterable[pathlib.Path]: if not paths: roots = [pathlib.Path.cwd()] else: roots = [pathlib.Path(p).resolve() for p in paths] for root in roots: if root.is_file(): if root.suffix in exts: yield root continue for p in root.rglob("*"): if p.is_dir(): # skip excluded dirs if p.name in EXCLUDE_DIRS: continue if any(x for x in extra_excludes if x and x in str(p)): continue continue if p.suffix not in exts: continue if any(x for x in extra_excludes if x and x in str(p)): continue yield p def has_legacy(text: str) -> bool: return any(p.search(text[:1000]) for p in LEGACY_PATTERNS) def already_normalized(text: str) -> bool: return HEADER_ALREADY_RE.search(text[:400]) is not None def build_header(upstream_start: int) -> str: return HEADER_TEMPLATE.format(upstream_start=upstream_start, fork_start=FORK_START, year=CURRENT_YEAR) def extract_preamble(text: str): shebang = "" coding = "" rest = text m = SHEBANG_RE.match(rest) if m: shebang = m.group(0) rest = rest[len(shebang):] m2 = CODING_RE.match(rest) if m2: coding = m2.group(0) rest = rest[len(coding):] return shebang, coding, rest LEGACY_BLOCK_RE = re.compile( r"^(?:#.*Calibre-Web.*\n)(?:#.*\n){0,40}#.*http.*gnu.*licenses.*\n", re.IGNORECASE | re.MULTILINE ) def normalize(text: str, upstream_start: int, license_only: bool) -> str: if license_only: if SPDX_RE.search(text): return text # nothing # append SPDX at top after any shebang/coding line shebang, coding, rest = extract_preamble(text) header = f"{shebang}{coding}# SPDX-License-Identifier: GPL-3.0-or-later\n" return header + rest if already_normalized(text): # Maybe year change? Replace years if outdated def repl_years(match: re.Match): return build_header(upstream_start) # Replace only first occurrence of our template block start return re.sub(r"^# Calibre-Web Automated .*?\n\n", repl_years, text, count=1, flags=re.DOTALL) shebang, coding, rest = extract_preamble(text) # Remove legacy if present new_rest = LEGACY_BLOCK_RE.sub("", rest, count=1) if has_legacy(rest) else rest new_header = build_header(upstream_start) return f"{shebang}{coding}{new_header}{new_rest.lstrip()}" def process_file(path: pathlib.Path, upstream_start: int, license_only: bool, apply: bool, verbose: bool, quiet: bool) -> bool: try: original = path.read_text(encoding="utf-8") except Exception as e: if not quiet: print(f"ERROR: Cannot read {path}: {e}", file=sys.stderr) return False updated = normalize(original, upstream_start, license_only) changed = updated != original if changed and apply: try: path.write_text(updated, encoding="utf-8") except Exception as e: if not quiet: print(f"ERROR: Cannot write {path}: {e}", file=sys.stderr) return False if changed: log(f"Updated: {path}", verbose=verbose, quiet=quiet) return True def main() -> int: args = parse_args() exts = [e if e.startswith('.') else f'.{e}' for e in args.ext.split(',') if e.strip()] # Anchor default scan root to repository root (script directory's parent) if --paths omitted. if not args.paths: repo_root = pathlib.Path(__file__).resolve().parent.parent default_paths = [str(repo_root)] else: default_paths = args.paths targets = list(collect_paths(default_paths, exts, args.exclude)) if not targets: print("No matching files.") return 0 ok = True for p in targets: if not process_file(p, args.force_year_start, args.license_only, args.apply, args.verbose, args.quiet): ok = False # Optional print (single file, dry run) if args.print and not args.apply and len(targets) == 1: path = targets[0] text = path.read_text(encoding='utf-8') print(normalize(text, args.force_year_start, args.license_only)) if not ok: return 1 if not args.apply: print("(dry-run) Use --apply to write changes.") return 0 if __name__ == "__main__": # pragma: no cover raise SystemExit(main())