Files
docling/scripts/check_tach_module_coverage.py
geoHeil 5b1df788ef ci: tighten pre-commit guardrails (#3346)
* ci: tighten pre-commit guardrails

Signed-off-by: Georg Heiler <georg.kf.heiler@gmail.com>

* ci: validate pre-commit guardrail changes

Signed-off-by: Georg Heiler <georg.kf.heiler@gmail.com>

* ci: switch hook validation to prek

Signed-off-by: Georg Heiler <georg.kf.heiler@gmail.com>

* ci: exempt active slim plan from max-lines

Signed-off-by: Georg Heiler <georg.kf.heiler@gmail.com>

* ci: move max-lines config under github

Signed-off-by: Georg Heiler <georg.kf.heiler@gmail.com>

* ci: fail on uncovered tach modules

Signed-off-by: Georg Heiler <georg.kf.heiler@gmail.com>

* ci: ignore generated docs in max-lines check

Signed-off-by: Georg Heiler <georg.kf.heiler@gmail.com>

* ci: clarify local validation tasks

Signed-off-by: Georg Heiler <georg.kf.heiler@gmail.com>

* docs: refine agent instructions

Signed-off-by: Georg Heiler <georg.kf.heiler@gmail.com>

* ci: replace mypy with ty

(cherry picked from commit 382afbde8f00abfaeba95ea9c8e9cc603f27a2d9)
Signed-off-by: Georg Heiler <georg.kf.heiler@gmail.com>

* ci: replace justfile with makefile

Signed-off-by: Georg Heiler <georg.kf.heiler@gmail.com>

---------

Signed-off-by: Georg Heiler <georg.kf.heiler@gmail.com>
2026-05-08 15:07:11 +02:00

116 lines
3.5 KiB
Python

#!/usr/bin/env python3
"""Fail when Python modules are not covered by a Tach module."""
from __future__ import annotations
import sys
from pathlib import Path
from typing import Any
PACKAGE_ROOT = "docling"
TACH_CONFIG = Path("tach.toml")
SKIP_DIRS = {"__pycache__"}
def load_toml(path: Path) -> dict[str, Any]:
"""Load TOML with a small fallback for older local Python installs."""
try:
import tomllib
except ModuleNotFoundError:
try:
import tomli as tomllib
except ModuleNotFoundError:
return {"modules": [{"path": p} for p in parse_module_paths(path)]}
with path.open("rb") as stream:
return tomllib.load(stream)
def parse_module_paths(path: Path) -> list[str]:
"""Extract [[modules]].path values when no TOML parser is available."""
module_paths: list[str] = []
in_module = False
for raw_line in path.read_text(encoding="utf-8").splitlines():
line = raw_line.strip()
if line == "[[modules]]":
in_module = True
continue
if line.startswith("[["):
in_module = False
continue
if in_module and line.startswith("path"):
_, value = line.split("=", maxsplit=1)
module_paths.append(value.strip().strip('"'))
return module_paths
def module_name(path: Path) -> str:
parts = list(path.with_suffix("").parts)
if parts[-1] == "__init__":
parts = parts[:-1]
return ".".join(parts)
def iter_python_modules(package_root: Path) -> list[str]:
modules: set[str] = set()
for path in package_root.rglob("*.py"):
if any(part in SKIP_DIRS for part in path.parts):
continue
name = module_name(path)
if name != PACKAGE_ROOT:
modules.add(name)
return sorted(modules)
def is_covered(module: str, tach_modules: list[str]) -> bool:
return any(
module == tach_module or module.startswith(f"{tach_module}.")
for tach_module in tach_modules
)
def module_exists(module: str) -> bool:
path = Path(*module.split("."))
return path.with_suffix(".py").is_file() or (path / "__init__.py").is_file()
def main() -> int:
config = load_toml(TACH_CONFIG)
tach_modules = sorted(module["path"] for module in config.get("modules", []))
duplicate_modules = sorted(
module for module in set(tach_modules) if tach_modules.count(module) > 1
)
stale_modules = [module for module in tach_modules if not module_exists(module)]
uncovered_modules = [
module
for module in iter_python_modules(Path(PACKAGE_ROOT))
if not is_covered(module, tach_modules)
]
if not duplicate_modules and not stale_modules and not uncovered_modules:
return 0
print("Tach module coverage check failed.")
if duplicate_modules:
print("\nDuplicate [[modules]] entries:")
for module in duplicate_modules:
print(f" - {module}")
if stale_modules:
print("\nConfigured Tach modules without a matching Python module/package:")
for module in stale_modules:
print(f" - {module}")
if uncovered_modules:
print("\nPython modules not covered by any Tach [[modules]].path prefix:")
for module in uncovered_modules:
print(f" - {module}")
print(
"\nAdd a Tach module entry in tach.toml, or move the code under an "
"existing Tach module deliberately."
)
return 1
if __name__ == "__main__":
sys.exit(main())