Files
docling/.github/workflows/checks.yml
geoHeil 5b1df788ef ci: tighten pre-commit guardrails (#3346)
* ci: tighten pre-commit guardrails

Signed-off-by: Georg Heiler <georg.kf.heiler@gmail.com>

* ci: validate pre-commit guardrail changes

Signed-off-by: Georg Heiler <georg.kf.heiler@gmail.com>

* ci: switch hook validation to prek

Signed-off-by: Georg Heiler <georg.kf.heiler@gmail.com>

* ci: exempt active slim plan from max-lines

Signed-off-by: Georg Heiler <georg.kf.heiler@gmail.com>

* ci: move max-lines config under github

Signed-off-by: Georg Heiler <georg.kf.heiler@gmail.com>

* ci: fail on uncovered tach modules

Signed-off-by: Georg Heiler <georg.kf.heiler@gmail.com>

* ci: ignore generated docs in max-lines check

Signed-off-by: Georg Heiler <georg.kf.heiler@gmail.com>

* ci: clarify local validation tasks

Signed-off-by: Georg Heiler <georg.kf.heiler@gmail.com>

* docs: refine agent instructions

Signed-off-by: Georg Heiler <georg.kf.heiler@gmail.com>

* ci: replace mypy with ty

(cherry picked from commit 382afbde8f00abfaeba95ea9c8e9cc603f27a2d9)
Signed-off-by: Georg Heiler <georg.kf.heiler@gmail.com>

* ci: replace justfile with makefile

Signed-off-by: Georg Heiler <georg.kf.heiler@gmail.com>

---------

Signed-off-by: Georg Heiler <georg.kf.heiler@gmail.com>
2026-05-08 15:07:11 +02:00

641 lines
24 KiB
YAML

on:
workflow_call:
inputs:
push_coverage:
type: boolean
description: "If true, the coverage results are pushed to codecov.io."
default: true
run_lint:
type: boolean
description: "If true, run the full lint job in this workflow."
default: true
force_all_checks:
type: boolean
description: "If true, run all test/example/package lanes regardless of changed-path filters."
default: false
run_package_compat:
type: boolean
description: "If true, run the cross-version package compatibility lanes."
default: false
run_windows:
type: boolean
description: "If true, run the Windows smoke test lane on GitHub-hosted runners."
default: false
run_macos:
type: boolean
description: "If true, run the macOS smoke test lane on GitHub-hosted runners."
default: false
use_tach:
type: boolean
description: "If true, enable Tach-aware test skipping in the cheap CI lanes."
default: false
tach_base_ref:
type: string
description: "Base git ref or commit used for Tach impact analysis."
default: ""
python_versions:
type: string
description: 'JSON array of Python versions to use for multi-version jobs, e.g. ["3.10", "3.12"].'
default: '["3.10", "3.11", "3.12", "3.13", "3.14"]'
secrets:
CODECOV_TOKEN:
required: false
env:
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true"
HF_HUB_DOWNLOAD_TIMEOUT: "90"
HF_HUB_ETAG_TIMEOUT: "90"
UV_FROZEN: "1"
EXAMPLES_HEAVY: '^(batch_convert|chart_extraction|granite_vision_table_structure|minimal_asr_pipeline|minimal_vlm_pipeline)\.py$'
EXAMPLES_UNSUPPORTED_IN_CI: '^(compare_vlm_models|custom_convert|demo_layout_vlm|develop_picture_enrichment|export_multimodal|gpu_standard_pipeline|gpu_vlm_pipeline|granitedocling_repetition_stopping|minimal|mlx_whisper_example|offline_convert|pictures_description|pictures_description_api|post_process_ocr_with_vlm|rapidocr_with_custom_models|run_with_formats_html_rendered|run_with_formats_html_rendered_mp|suryaocr_with_custom_models|vlm_pipeline_api_model)\.py$|xbrl_conversion\.ipynb$'
jobs:
changes:
runs-on: ubuntu-latest
timeout-minutes: 5
permissions:
contents: read
pull-requests: read
outputs:
run_tests: ${{ steps.force.outputs.run_tests || steps.filter.outputs.run_tests }}
ml_suites: ${{ steps.force.outputs.ml_suites || steps.ml-matrix.outputs.ml_suites || '[]' }}
run_examples_all: ${{ steps.force.outputs.run_examples_all || steps.filter.outputs.run_examples_all }}
changed_example_scripts: ${{ steps.force.outputs.changed_example_scripts || steps.filter.outputs.changed_example_scripts }}
changed_example_scripts_files: ${{ steps.force.outputs.changed_example_scripts_files || steps.filter.outputs.changed_example_scripts_files || '[]' }}
run_package: ${{ steps.force.outputs.run_package || steps.filter.outputs.run_package }}
run_tach: ${{ steps.force.outputs.run_tach || steps.filter.outputs.run_tach }}
steps:
- name: Force all lanes
if: ${{ inputs.force_all_checks }}
id: force
run: |
echo "run_tests=true" >> "$GITHUB_OUTPUT"
echo 'ml_suites=["ocr","pdf-model","vlm","asr"]' >> "$GITHUB_OUTPUT"
echo "run_examples_all=true" >> "$GITHUB_OUTPUT"
echo "changed_example_scripts=true" >> "$GITHUB_OUTPUT"
echo "changed_example_scripts_files=[]" >> "$GITHUB_OUTPUT"
echo "run_package=true" >> "$GITHUB_OUTPUT"
echo "run_tach=true" >> "$GITHUB_OUTPUT"
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
if: ${{ !inputs.force_all_checks }}
with:
fetch-depth: 0
- name: Detect changed paths
if: ${{ !inputs.force_all_checks }}
id: filter
uses: dorny/paths-filter@fbd0ab8f3e69293af611ebaee6363fc25e6d187d # v4.0.1
with:
list-files: json
filters: |
# Default catch-all for regular tests. The core pytest command
# ignores modules marked with ML pytest markers; newly added tests
# still run here unless intentionally marked for an ML lane.
run_tests:
- ".github/workflows/**"
- ".github/scripts/**"
- ".github/actions/**"
- "pyproject.toml"
- "uv.lock"
- "Dockerfile"
- "docling/**"
- "tests/**"
run_examples_all:
- ".github/workflows/**"
- ".github/scripts/**"
- ".github/actions/**"
- "pyproject.toml"
- "uv.lock"
- "Dockerfile"
- "docling/**"
changed_example_scripts:
- added|modified: "docs/examples/*.py"
run_package:
- ".github/workflows/**"
- ".github/scripts/**"
- ".github/actions/**"
- "pyproject.toml"
- "uv.lock"
- "README.md"
- "LICENSE"
- "docling/**"
run_tach:
- ".github/workflows/**"
- ".github/scripts/**"
- ".github/actions/**"
- ".pre-commit-config.yaml"
- "pyproject.toml"
- "scripts/check_tach_module_coverage.py"
- "tach.toml"
- "uv.lock"
- "docling/**"
- name: Build ML suite matrix
if: ${{ !inputs.force_all_checks }}
id: ml-matrix
env:
# ML lanes share the regular test trigger; run_tach adds Tach and
# pre-commit config changes that can affect test selection.
RUN_ML: ${{ steps.filter.outputs.run_tests == 'true' || steps.filter.outputs.run_tach == 'true' }}
run: |
python3 .github/scripts/pytest_marker_selection.py matrix \
--run-all-ml "$RUN_ML"
lint:
if: ${{ inputs.run_lint }}
runs-on: ubuntu-latest
timeout-minutes: 20
strategy:
fail-fast: false
matrix:
python-version: ["3.12"]
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- uses: ./.github/actions/setup-ubuntu-ci
with:
python_version: ${{ matrix.python-version }}
uv_sync_args: --frozen --group dev --all-extras --all-packages --no-group docs --no-group examples
- name: Set prek cache key
run: echo "PY=$(python -VV | sha256sum | cut -d' ' -f1)" >> "$GITHUB_ENV"
- name: Cache prek environments
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
with:
path: ~/.cache/prek
key: prek|${{ env.PY }}|${{ hashFiles('.pre-commit-config.yaml', '.github/dprint.json') }}
restore-keys: |
prek|${{ env.PY }}|
- name: Check style
run: |
echo "--- Running prek style checks ---"
uv run prek run --all-files
tach:
needs: changes
if: ${{ needs.changes.outputs.run_tach == 'true' }}
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- uses: ./.github/actions/setup-ubuntu-ci
with:
python_version: "3.12"
uv_sync_args: --frozen --only-group dev --no-install-project
- name: Check module coverage
run: python3 scripts/check_tach_module_coverage.py
- name: Check module boundaries
run: uv run --no-sync tach check
run-tests-core:
needs: changes
if: ${{ needs.changes.outputs.run_tests == 'true' }}
runs-on: ubuntu-latest
timeout-minutes: 45
strategy:
fail-fast: false
matrix:
python-version: ${{ fromJSON(inputs.python_versions) }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
fetch-depth: 0
- uses: ./.github/actions/setup-ubuntu-ci
with:
python_version: ${{ matrix.python-version }}
uv_sync_args: --frozen --group dev --all-extras --all-packages --no-group docs --no-group examples
install_system_deps: "true"
cache_models: "true"
- name: Run core test suite
env:
TACH_BASE_REF: ${{ inputs.tach_base_ref }}
run: |
echo "--- Running core tests ---"
mapfile -t ML_IGNORE_ARGS < <(uv run --no-sync python .github/scripts/pytest_marker_selection.py core-ignore-args)
TACH_ARGS=""
if [ "${{ inputs.use_tach }}" = "true" ] && [ -n "$TACH_BASE_REF" ] && [ "$TACH_BASE_REF" != "0000000000000000000000000000000000000000" ]; then
TACH_ARGS="--tach-base $TACH_BASE_REF"
fi
echo "Running core test suite"
uv run pytest -v --durations=0 --cov=docling --cov-report=xml --cov-context=test $TACH_ARGS "${ML_IGNORE_ARGS[@]}"
- name: Upload coverage to Codecov
if: inputs.push_coverage
uses: codecov/codecov-action@57e3a136b779b570ffcdbf80b3bdc90e7fab3de2 # v6.0.0
with:
token: ${{ secrets.CODECOV_TOKEN }}
files: ./coverage.xml
flags: run-tests-core
run-tests-ml:
needs: changes
if: ${{ needs.changes.outputs.ml_suites != '[]' }}
runs-on: ubuntu-latest
timeout-minutes: 60
strategy:
fail-fast: false
matrix:
python-version: ${{ fromJSON(inputs.python_versions) }}
suite: ${{ fromJSON(needs.changes.outputs.ml_suites) }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
fetch-depth: 0
- uses: ./.github/actions/setup-ubuntu-ci
with:
python_version: ${{ matrix.python-version }}
uv_sync_args: --frozen --group dev --all-extras --all-packages --no-group docs --no-group examples
install_system_deps: "true"
cache_models: "true"
- name: Pre-download OCR models
if: ${{ matrix.suite == 'ocr' }}
run: uv run python -c "import easyocr; reader = easyocr.Reader(['en', 'fr', 'de', 'es'])"
- name: Run ML test suite
env:
TACH_BASE_REF: ${{ inputs.tach_base_ref }}
run: |
echo "--- Running ML suite: ${{ matrix.suite }} ---"
SUITE_MARKER=$(uv run --no-sync python .github/scripts/pytest_marker_selection.py suite-marker "${{ matrix.suite }}")
mapfile -t SUITE_ARGS < <(uv run --no-sync python .github/scripts/pytest_marker_selection.py suite-args "${{ matrix.suite }}")
if [ ${#SUITE_ARGS[@]} -eq 0 ]; then
echo "No tests are marked for ${{ matrix.suite }}" >&2
exit 1
fi
TACH_ARGS=""
if [ "${{ inputs.use_tach }}" = "true" ] && [ -n "$TACH_BASE_REF" ] && [ "$TACH_BASE_REF" != "0000000000000000000000000000000000000000" ]; then
TACH_ARGS="--tach-base $TACH_BASE_REF"
fi
uv run pytest -v --durations=0 --cov=docling --cov-report=xml --cov-context=test $TACH_ARGS -m "$SUITE_MARKER" "${SUITE_ARGS[@]}"
- name: Upload coverage to Codecov
if: inputs.push_coverage
uses: codecov/codecov-action@57e3a136b779b570ffcdbf80b3bdc90e7fab3de2 # v6.0.0
with:
token: ${{ secrets.CODECOV_TOKEN }}
files: ./coverage.xml
flags: run-tests-ml-${{ matrix.suite }}
run-examples-light:
needs: changes
if: >-
${{
needs.changes.outputs.run_examples_all == 'true' ||
needs.changes.outputs.changed_example_scripts == 'true'
}}
runs-on: ubuntu-latest
timeout-minutes: 60
strategy:
fail-fast: false
matrix:
python-version: ["3.10"]
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- uses: ./.github/actions/setup-ubuntu-ci
with:
python_version: ${{ matrix.python-version }}
uv_sync_args: --frozen --group examples --all-extras --all-packages --no-group docs --no-group dev
install_system_deps: "true"
cache_models: "true"
- name: Free up disk space
run: |
df -h
sudo rm -rf /usr/share/dotnet
sudo rm -rf /usr/local/lib/android
sudo rm -rf /opt/ghc
sudo apt-get clean
df -h
- name: Run selected light examples
env:
RUN_ALL_EXAMPLES: ${{ needs.changes.outputs.run_examples_all }}
CHANGED_EXAMPLE_FILES: ${{ needs.changes.outputs.changed_example_scripts_files }}
run: |
args=(
--examples-dir docs/examples
--changed-files-json "$CHANGED_EXAMPLE_FILES"
--exclude-pattern "${EXAMPLES_HEAVY}|${EXAMPLES_UNSUPPORTED_IN_CI}"
)
if [ "$RUN_ALL_EXAMPLES" = "true" ]; then
args+=(--run-all)
fi
uv run --no-sync python .github/scripts/run_selected_examples.py "${args[@]}"
run-tests-windows:
needs: changes
if: ${{ inputs.run_windows }}
runs-on: windows-latest
timeout-minutes: 30
env:
PYTHONUTF8: "1"
UV_PYTHON_PREFERENCE: only-system
strategy:
fail-fast: false
matrix:
python-version: ["3.12"]
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: ${{ matrix.python-version }}
- name: Install uv
uses: astral-sh/setup-uv@v7
with:
enable-cache: true
- name: Install dependencies
shell: bash
run: |
uv python pin "${{ matrix.python-version }}"
uv sync --frozen --group dev --all-packages --no-group docs --no-group examples
- name: Check import and CLI wiring
shell: bash
run: |
uv run python -c "import docling; from docling.document_converter import DocumentConverter; DocumentConverter(); print('Docling import smoke passed')"
uv run docling --help
- name: Run cross-platform smoke tests
shell: bash
run: |
mapfile -t cross_platform_args < <(uv run --no-sync python .github/scripts/pytest_marker_selection.py marker-args cross_platform)
if [ "${#cross_platform_args[@]}" -eq 0 ]; then
echo "No tests are marked with pytest.mark.cross_platform" >&2
exit 1
fi
uv run pytest -p no:tach -m cross_platform "${cross_platform_args[@]}" --durations=10
run-tests-macos:
needs: changes
if: ${{ inputs.run_macos }}
runs-on: macos-latest
timeout-minutes: 30
env:
UV_PYTHON_PREFERENCE: only-system
strategy:
fail-fast: false
matrix:
python-version: ["3.12"]
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: ${{ matrix.python-version }}
- name: Install uv
uses: astral-sh/setup-uv@v7
with:
enable-cache: true
- name: Install dependencies
shell: bash
run: |
uv python pin "${{ matrix.python-version }}"
uv sync --frozen --group dev --all-packages --no-group docs --no-group examples
- name: Check import and CLI wiring
shell: bash
run: |
uv run python -c "import docling; from docling.document_converter import DocumentConverter; DocumentConverter(); print('Docling import smoke passed')"
uv run docling --help
- name: Run cross-platform smoke tests
shell: bash
run: |
mapfile -t cross_platform_args < <(uv run --no-sync python .github/scripts/pytest_marker_selection.py marker-args cross_platform)
if [ "${#cross_platform_args[@]}" -eq 0 ]; then
echo "No tests are marked with pytest.mark.cross_platform" >&2
exit 1
fi
uv run pytest -p no:tach -m cross_platform "${cross_platform_args[@]}" --durations=10
test-pip-install-no-lock:
needs: changes
if: ${{ inputs.run_package_compat && needs.changes.outputs.run_package == 'true' }}
runs-on: ubuntu-latest
timeout-minutes: 30
env:
SELECTED_PYTHON_VERSIONS: ${{ join(fromJSON(inputs.python_versions), ' ') }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Install uv
uses: astral-sh/setup-uv@v7
- name: Test pip install across selected Python versions
run: |
for py_version in $SELECTED_PYTHON_VERSIONS; do
echo "=========================================="
echo "Testing Python $py_version"
echo "=========================================="
# Create virtual environment with uv
uv venv /tmp/venv-${py_version} --python=${py_version}
source /tmp/venv-${py_version}/bin/activate
# Install docling-slim with pip (no lock file)
uv pip install --torch-backend=cpu -e .[all]
# Run basic import test
python -c "import docling; print('Import successful for Python ${py_version}')"
# Cleanup
deactivate
rm -rf /tmp/venv-${py_version}
echo "Python $py_version: PASSED"
echo ""
done
test-pip-install-no-dev-headers:
needs: changes
if: ${{ inputs.run_package_compat && needs.changes.outputs.run_package == 'true' }}
runs-on: ubuntu-latest
timeout-minutes: 30
env:
SELECTED_PYTHON_VERSIONS: ${{ join(fromJSON(inputs.python_versions), ' ') }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Install uv
uses: astral-sh/setup-uv@v7
- name: Test pip install without dev headers across selected Python versions
run: |
for py_version in $SELECTED_PYTHON_VERSIONS; do
echo "=========================================="
echo "Testing Python $py_version (no dev headers)"
echo "=========================================="
# Create virtual environment with uv
uv venv /tmp/venv-nodev-${py_version} --python=${py_version}
source /tmp/venv-nodev-${py_version}/bin/activate
# Find and remove Python.h from the Python installation
echo "Removing Python development headers from Python installation..."
python_include_dir=$(python -c "import sysconfig; print(sysconfig.get_path('include'))")
echo "Python include directory: $python_include_dir"
if [ -f "$python_include_dir/Python.h" ]; then
echo "Found Python.h, removing it and other headers..."
# Use sudo if the directory is system-owned
if [ -w "$python_include_dir" ]; then
rm -rf "$python_include_dir"/*
else
sudo rm -rf "$python_include_dir"/*
fi
echo "✓ Headers removed"
else
echo "Warning: Python.h not found at expected location"
fi
# Verify that compilation fails without dev headers
# Try to install numpy from source (sdist) - this should fail
echo "Verifying compilation fails without dev headers..."
set +e # Temporarily allow command to fail
uv pip install --no-binary=:all: numpy==1.26.4 > /tmp/numpy-install-${py_version}.log 2>&1
numpy_exit_code=$?
set -e # Re-enable exit on error
if [ $numpy_exit_code -eq 0 ]; then
echo "ERROR: numpy installation from source succeeded, but it should have failed without dev headers!"
cat /tmp/numpy-install-${py_version}.log
exit 1
else
echo "✓ Compilation correctly failed without dev headers (expected behavior)"
# Check that the error mentions missing Python.h or similar
if grep -qi "Python.h\|fatal error.*\.h" /tmp/numpy-install-${py_version}.log; then
echo "✓ Error message confirms missing development headers"
else
echo "Warning: Error message doesn't explicitly mention missing headers, but compilation failed as expected"
fi
fi
# Install docling-slim with pip (no lock file, no compilation)
echo "grpcio>=1.71.0" > override-grpcio.txt
uv pip install --torch-backend=cpu -e ".[all]" --overrides override-grpcio.txt
# Run basic import test
python -c "import docling; print('Import successful for Python ${py_version} without dev headers')"
# Cleanup
deactivate
rm -rf /tmp/venv-nodev-${py_version}
echo "Python $py_version (no dev headers): PASSED"
echo ""
done
build-package:
needs: changes
if: ${{ needs.changes.outputs.run_package == 'true' }}
runs-on: ubuntu-latest
timeout-minutes: 15
strategy:
matrix:
python-version: ["3.12"]
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Install uv and set the python version
uses: astral-sh/setup-uv@v7
with:
python-version: ${{ matrix.python-version }}
enable-cache: true
- name: Install dependencies
run: uv sync --all-extras
- name: Build packages
run: bash .github/scripts/build-packages.sh
- name: Check content of wheels
run: |
for whl in dist/*/*.whl; do
echo "=== $whl ==="
unzip -l "$whl"
done
- name: Store the distribution packages
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
name: python-package-distributions
path: dist/
test-package:
needs:
- changes
- build-package
if: ${{ needs.changes.outputs.run_package == 'true' }}
runs-on: ubuntu-latest
timeout-minutes: 15
strategy:
matrix:
python-version: ["3.12"]
steps:
- name: Download all the dists
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
with:
name: python-package-distributions
path: dist/
- name: Install uv and set the python version
uses: astral-sh/setup-uv@v7
with:
python-version: ${{ matrix.python-version }}
activate-environment: true
enable-cache: false
- name: Install package
run: |
uv pip install --find-links dist/docling-slim/ dist/docling/docling-*.whl
- name: Run docling
run: uv run docling --help
check:
if: ${{ always() }}
needs:
- changes
- lint
- tach
- run-tests-core
- run-tests-ml
- run-examples-light
- run-tests-windows
- run-tests-macos
- test-pip-install-no-lock
- test-pip-install-no-dev-headers
- build-package
- test-package
runs-on: ubuntu-latest
timeout-minutes: 5
permissions:
contents: read
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: All Green
env:
NEEDS_JSON: ${{ toJSON(needs) }}
ALLOWED_SKIPS: >-
lint tach run-tests-core run-tests-ml run-examples-light run-tests-windows
run-tests-macos test-pip-install-no-lock test-pip-install-no-dev-headers
build-package test-package
run: >-
python3 .github/scripts/check_needs_results.py
--needs-json "$NEEDS_JSON"
--allowed-skips "$ALLOWED_SKIPS"