mirror of
https://github.com/docling-project/docling.git
synced 2026-05-17 13:10:38 +00:00
5b1df788ef
* ci: tighten pre-commit guardrails Signed-off-by: Georg Heiler <georg.kf.heiler@gmail.com> * ci: validate pre-commit guardrail changes Signed-off-by: Georg Heiler <georg.kf.heiler@gmail.com> * ci: switch hook validation to prek Signed-off-by: Georg Heiler <georg.kf.heiler@gmail.com> * ci: exempt active slim plan from max-lines Signed-off-by: Georg Heiler <georg.kf.heiler@gmail.com> * ci: move max-lines config under github Signed-off-by: Georg Heiler <georg.kf.heiler@gmail.com> * ci: fail on uncovered tach modules Signed-off-by: Georg Heiler <georg.kf.heiler@gmail.com> * ci: ignore generated docs in max-lines check Signed-off-by: Georg Heiler <georg.kf.heiler@gmail.com> * ci: clarify local validation tasks Signed-off-by: Georg Heiler <georg.kf.heiler@gmail.com> * docs: refine agent instructions Signed-off-by: Georg Heiler <georg.kf.heiler@gmail.com> * ci: replace mypy with ty (cherry picked from commit 382afbde8f00abfaeba95ea9c8e9cc603f27a2d9) Signed-off-by: Georg Heiler <georg.kf.heiler@gmail.com> * ci: replace justfile with makefile Signed-off-by: Georg Heiler <georg.kf.heiler@gmail.com> --------- Signed-off-by: Georg Heiler <georg.kf.heiler@gmail.com>
116 lines
3.5 KiB
Python
116 lines
3.5 KiB
Python
#!/usr/bin/env python3
|
|
"""Fail when Python modules are not covered by a Tach module."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
PACKAGE_ROOT = "docling"
|
|
TACH_CONFIG = Path("tach.toml")
|
|
SKIP_DIRS = {"__pycache__"}
|
|
|
|
|
|
def load_toml(path: Path) -> dict[str, Any]:
|
|
"""Load TOML with a small fallback for older local Python installs."""
|
|
try:
|
|
import tomllib
|
|
except ModuleNotFoundError:
|
|
try:
|
|
import tomli as tomllib
|
|
except ModuleNotFoundError:
|
|
return {"modules": [{"path": p} for p in parse_module_paths(path)]}
|
|
|
|
with path.open("rb") as stream:
|
|
return tomllib.load(stream)
|
|
|
|
|
|
def parse_module_paths(path: Path) -> list[str]:
|
|
"""Extract [[modules]].path values when no TOML parser is available."""
|
|
module_paths: list[str] = []
|
|
in_module = False
|
|
for raw_line in path.read_text(encoding="utf-8").splitlines():
|
|
line = raw_line.strip()
|
|
if line == "[[modules]]":
|
|
in_module = True
|
|
continue
|
|
if line.startswith("[["):
|
|
in_module = False
|
|
continue
|
|
if in_module and line.startswith("path"):
|
|
_, value = line.split("=", maxsplit=1)
|
|
module_paths.append(value.strip().strip('"'))
|
|
return module_paths
|
|
|
|
|
|
def module_name(path: Path) -> str:
|
|
parts = list(path.with_suffix("").parts)
|
|
if parts[-1] == "__init__":
|
|
parts = parts[:-1]
|
|
return ".".join(parts)
|
|
|
|
|
|
def iter_python_modules(package_root: Path) -> list[str]:
|
|
modules: set[str] = set()
|
|
for path in package_root.rglob("*.py"):
|
|
if any(part in SKIP_DIRS for part in path.parts):
|
|
continue
|
|
name = module_name(path)
|
|
if name != PACKAGE_ROOT:
|
|
modules.add(name)
|
|
return sorted(modules)
|
|
|
|
|
|
def is_covered(module: str, tach_modules: list[str]) -> bool:
|
|
return any(
|
|
module == tach_module or module.startswith(f"{tach_module}.")
|
|
for tach_module in tach_modules
|
|
)
|
|
|
|
|
|
def module_exists(module: str) -> bool:
|
|
path = Path(*module.split("."))
|
|
return path.with_suffix(".py").is_file() or (path / "__init__.py").is_file()
|
|
|
|
|
|
def main() -> int:
|
|
config = load_toml(TACH_CONFIG)
|
|
tach_modules = sorted(module["path"] for module in config.get("modules", []))
|
|
|
|
duplicate_modules = sorted(
|
|
module for module in set(tach_modules) if tach_modules.count(module) > 1
|
|
)
|
|
stale_modules = [module for module in tach_modules if not module_exists(module)]
|
|
uncovered_modules = [
|
|
module
|
|
for module in iter_python_modules(Path(PACKAGE_ROOT))
|
|
if not is_covered(module, tach_modules)
|
|
]
|
|
|
|
if not duplicate_modules and not stale_modules and not uncovered_modules:
|
|
return 0
|
|
|
|
print("Tach module coverage check failed.")
|
|
if duplicate_modules:
|
|
print("\nDuplicate [[modules]] entries:")
|
|
for module in duplicate_modules:
|
|
print(f" - {module}")
|
|
if stale_modules:
|
|
print("\nConfigured Tach modules without a matching Python module/package:")
|
|
for module in stale_modules:
|
|
print(f" - {module}")
|
|
if uncovered_modules:
|
|
print("\nPython modules not covered by any Tach [[modules]].path prefix:")
|
|
for module in uncovered_modules:
|
|
print(f" - {module}")
|
|
print(
|
|
"\nAdd a Tach module entry in tach.toml, or move the code under an "
|
|
"existing Tach module deliberately."
|
|
)
|
|
return 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|