mirror of
https://github.com/docling-project/docling.git
synced 2026-05-17 13:10:38 +00:00
feat: Introduce modular docling-slim package (#3285)
* plans folder structure Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * initial plan Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * updated plan Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * restructure repo for docling and docling-slim Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * transpose package structures Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * add all-packages Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * updated lock and deps Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * align deps Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * more lock like main Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * more locked pinning Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * rename extras Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * add simple README for docling-slim Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * fix scikit-image issue Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * add readme placeholder Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * add all extras in package test Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * cli in docling-slim Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * apply formatting Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * fix testing package Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * override grpcio in no-header test Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * update lock Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * update package description Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * updated extras Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * fix publish scripts Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * update package test Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> --------- Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
Executable
+27
@@ -0,0 +1,27 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e # trigger failure on error - do not remove!
|
||||
set -x # display command on output
|
||||
|
||||
# Build each package into its own dist subdirectory so the PyPI publish
|
||||
# action can upload them independently (otherwise a single `dist/` causes
|
||||
# the second publish step to re-upload files and fail on `skip-existing: false`).
|
||||
|
||||
# Build docling-slim package (from repo root — source co-located)
|
||||
echo "Building docling-slim package..."
|
||||
uv build --out-dir dist/docling-slim
|
||||
|
||||
# Build docling package (meta-package, dependency-only wheel)
|
||||
echo "Building docling package..."
|
||||
# Backup placeholder README and copy root README for build
|
||||
mv packages/docling/README.md packages/docling/README.md.placeholder
|
||||
cp README.md packages/docling/README.md
|
||||
(cd packages/docling && uv build --out-dir ../../dist/docling)
|
||||
# Restore placeholder README
|
||||
mv packages/docling/README.md.placeholder packages/docling/README.md
|
||||
|
||||
echo "Build complete."
|
||||
echo "docling-slim artifacts:"
|
||||
ls -lh dist/docling-slim/
|
||||
echo "docling artifacts:"
|
||||
ls -lh dist/docling/
|
||||
@@ -9,9 +9,21 @@ if [ -z "${TARGET_VERSION}" ]; then
|
||||
fi
|
||||
CHGLOG_FILE="${CHGLOG_FILE:-CHANGELOG.md}"
|
||||
|
||||
# update package version
|
||||
# update package versions:
|
||||
# - root pyproject.toml = docling-slim
|
||||
# - packages/docling/pyproject.toml = docling (meta-package)
|
||||
uvx --from=toml-cli toml set --toml-path=pyproject.toml project.version "${TARGET_VERSION}"
|
||||
UV_FROZEN=0 uv lock --upgrade-package docling
|
||||
uvx --from=toml-cli toml set --toml-path=packages/docling/pyproject.toml project.version "${TARGET_VERSION}"
|
||||
|
||||
# update docling-slim dependency version in docling package
|
||||
uvx --from=toml-cli toml set --toml-path=packages/docling/pyproject.toml "project.dependencies[0]" "docling-slim[standard]==${TARGET_VERSION}"
|
||||
|
||||
# update all re-exported extras in docling package
|
||||
for extra in easyocr tesserocr ocrmac vlm rapidocr asr htmlrender remote-serving onnxruntime xbrl; do
|
||||
uvx --from=toml-cli toml set --toml-path=packages/docling/pyproject.toml "project.optional-dependencies.${extra}[0]" "docling-slim[*]==${TARGET_VERSION}"
|
||||
done
|
||||
|
||||
UV_FROZEN=0 uv lock --upgrade-package docling --upgrade-package docling-slim
|
||||
|
||||
# collect release notes
|
||||
REL_NOTES=$(mktemp)
|
||||
@@ -31,7 +43,7 @@ mv "${TMP_CHGLOG}" "${CHGLOG_FILE}"
|
||||
# push changes
|
||||
git config --global user.name 'github-actions[bot]'
|
||||
git config --global user.email 'github-actions[bot]@users.noreply.github.com'
|
||||
git add pyproject.toml uv.lock "${CHGLOG_FILE}"
|
||||
git add pyproject.toml packages/docling/pyproject.toml uv.lock "${CHGLOG_FILE}"
|
||||
COMMIT_MSG="chore: bump version to ${TARGET_VERSION} [skip ci]"
|
||||
git commit -m "${COMMIT_MSG}"
|
||||
git push origin main
|
||||
|
||||
@@ -50,7 +50,7 @@ jobs:
|
||||
pre-commit|${{ env.PY }}|
|
||||
|
||||
- name: Install Python Dependencies
|
||||
run: uv sync --frozen --all-extras
|
||||
run: uv sync --frozen --all-extras --all-packages
|
||||
|
||||
- name: Check style
|
||||
run: |
|
||||
@@ -92,7 +92,7 @@ jobs:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Install Python Dependencies
|
||||
run: uv sync --frozen --all-extras
|
||||
run: uv sync --frozen --all-extras --all-packages
|
||||
|
||||
- name: Cache Models
|
||||
uses: actions/cache@v5
|
||||
@@ -159,7 +159,7 @@ jobs:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Install Python Dependencies
|
||||
run: uv sync --frozen --all-extras
|
||||
run: uv sync --frozen --all-extras --all-packages
|
||||
|
||||
- name: Cache Models
|
||||
uses: actions/cache@v5
|
||||
@@ -231,7 +231,7 @@ jobs:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Install Python Dependencies
|
||||
run: uv sync --frozen --all-extras
|
||||
run: uv sync --frozen --all-extras --all-packages
|
||||
|
||||
- name: Cache Models
|
||||
uses: actions/cache@v5
|
||||
@@ -301,21 +301,21 @@ jobs:
|
||||
echo "=========================================="
|
||||
echo "Testing Python $py_version"
|
||||
echo "=========================================="
|
||||
|
||||
|
||||
# Create virtual environment with uv
|
||||
uv venv /tmp/venv-${py_version} --python=${py_version}
|
||||
source /tmp/venv-${py_version}/bin/activate
|
||||
|
||||
# Install package with pip (no lock file)
|
||||
uv pip install --torch-backend=cpu -e .[easyocr,tesserocr,vlm,rapidocr,asr]
|
||||
|
||||
|
||||
# Install docling-slim with pip (no lock file)
|
||||
uv pip install --torch-backend=cpu -e .[all]
|
||||
|
||||
# Run basic import test
|
||||
python -c "import docling; from docling.document_converter import DocumentConverter; print('Import successful for Python ${py_version}')"
|
||||
|
||||
python -c "import docling; print('Import successful for Python ${py_version}')"
|
||||
|
||||
# Cleanup
|
||||
deactivate
|
||||
rm -rf /tmp/venv-${py_version}
|
||||
|
||||
|
||||
echo "Python $py_version: PASSED"
|
||||
echo ""
|
||||
done
|
||||
@@ -334,16 +334,16 @@ jobs:
|
||||
echo "=========================================="
|
||||
echo "Testing Python $py_version (no dev headers)"
|
||||
echo "=========================================="
|
||||
|
||||
|
||||
# Create virtual environment with uv
|
||||
uv venv /tmp/venv-nodev-${py_version} --python=${py_version}
|
||||
source /tmp/venv-nodev-${py_version}/bin/activate
|
||||
|
||||
|
||||
# Find and remove Python.h from the Python installation
|
||||
echo "Removing Python development headers from Python installation..."
|
||||
python_include_dir=$(python -c "import sysconfig; print(sysconfig.get_path('include'))")
|
||||
echo "Python include directory: $python_include_dir"
|
||||
|
||||
|
||||
if [ -f "$python_include_dir/Python.h" ]; then
|
||||
echo "Found Python.h, removing it and other headers..."
|
||||
# Use sudo if the directory is system-owned
|
||||
@@ -356,7 +356,7 @@ jobs:
|
||||
else
|
||||
echo "Warning: Python.h not found at expected location"
|
||||
fi
|
||||
|
||||
|
||||
# Verify that compilation fails without dev headers
|
||||
# Try to install numpy from source (sdist) - this should fail
|
||||
echo "Verifying compilation fails without dev headers..."
|
||||
@@ -364,7 +364,7 @@ jobs:
|
||||
uv pip install --no-binary=:all: numpy==1.26.4 > /tmp/numpy-install-${py_version}.log 2>&1
|
||||
numpy_exit_code=$?
|
||||
set -e # Re-enable exit on error
|
||||
|
||||
|
||||
if [ $numpy_exit_code -eq 0 ]; then
|
||||
echo "ERROR: numpy installation from source succeeded, but it should have failed without dev headers!"
|
||||
cat /tmp/numpy-install-${py_version}.log
|
||||
@@ -378,19 +378,18 @@ jobs:
|
||||
echo "Warning: Error message doesn't explicitly mention missing headers, but compilation failed as expected"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Install package with pip (no lock file, no compilation)
|
||||
# Install without extras that require compilation (tesserocr requires dev headers)
|
||||
# Note: Not using --only-binary since some packages are sdist-only but don't require compilation
|
||||
uv pip install --torch-backend=cpu -e .[easyocr,vlm,rapidocr,asr]
|
||||
|
||||
|
||||
# Install docling-slim with pip (no lock file, no compilation)
|
||||
echo "grpcio>=1.71.0" > override-grpcio.txt
|
||||
uv pip install --torch-backend=cpu -e ".[all]" --overrides override-grpcio.txt
|
||||
|
||||
# Run basic import test
|
||||
python -c "import docling; from docling.document_converter import DocumentConverter; print('Import successful for Python ${py_version} without dev headers')"
|
||||
|
||||
python -c "import docling; print('Import successful for Python ${py_version} without dev headers')"
|
||||
|
||||
# Cleanup
|
||||
deactivate
|
||||
rm -rf /tmp/venv-nodev-${py_version}
|
||||
|
||||
|
||||
echo "Python $py_version (no dev headers): PASSED"
|
||||
echo ""
|
||||
done
|
||||
@@ -412,11 +411,15 @@ jobs:
|
||||
- name: Install dependencies
|
||||
run: uv sync --all-extras
|
||||
|
||||
- name: Build package
|
||||
run: uv build
|
||||
- name: Build packages
|
||||
run: bash .github/scripts/build-packages.sh
|
||||
|
||||
- name: Check content of wheel
|
||||
run: unzip -l dist/*.whl
|
||||
- name: Check content of wheels
|
||||
run: |
|
||||
for whl in dist/*/*.whl; do
|
||||
echo "=== $whl ==="
|
||||
unzip -l "$whl"
|
||||
done
|
||||
|
||||
- name: Store the distribution packages
|
||||
uses: actions/upload-artifact@v6
|
||||
@@ -447,7 +450,7 @@ jobs:
|
||||
|
||||
- name: Install package
|
||||
run: |
|
||||
uv pip install dist/*.whl
|
||||
uv pip install --find-links dist/docling-slim/ dist/docling/docling-*.whl
|
||||
|
||||
- name: Run docling
|
||||
run: uv run docling --help
|
||||
|
||||
@@ -19,8 +19,10 @@ jobs:
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
enable-cache: true
|
||||
- name: Install Python Dependencies
|
||||
run: uv sync --frozen --all-extras --all-packages
|
||||
- name: Build docs
|
||||
run: uv run mkdocs build --verbose --clean
|
||||
run: uv run --no-sync mkdocs build --verbose --clean
|
||||
- name: Build and push docs
|
||||
if: inputs.deploy
|
||||
run: uv run --no-sync mkdocs gh-deploy --force
|
||||
|
||||
+54
-10
@@ -1,4 +1,4 @@
|
||||
name: "Build and publish package"
|
||||
name: "Build and publish packages"
|
||||
|
||||
on:
|
||||
release:
|
||||
@@ -11,16 +11,11 @@ permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
build-and-publish:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ['3.12']
|
||||
environment:
|
||||
name: pypi
|
||||
url: https://pypi.org/p/docling
|
||||
permissions:
|
||||
id-token: write # IMPORTANT: mandatory for trusted publishing
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
- name: Install uv and set the python version
|
||||
@@ -30,9 +25,58 @@ jobs:
|
||||
enable-cache: true
|
||||
- name: Install dependencies
|
||||
run: uv sync --all-extras
|
||||
- name: Build package
|
||||
run: uv build
|
||||
- name: Publish distribution 📦 to PyPI
|
||||
- name: Build packages
|
||||
run: bash .github/scripts/build-packages.sh
|
||||
- name: Upload build artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: dist
|
||||
path: dist/
|
||||
|
||||
publish-docling-slim:
|
||||
needs: build
|
||||
runs-on: ubuntu-latest
|
||||
environment:
|
||||
name: pypi
|
||||
url: https://pypi.org/p/docling-slim
|
||||
permissions:
|
||||
id-token: write # IMPORTANT: mandatory for trusted publishing
|
||||
steps:
|
||||
- name: Download build artifacts
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: dist
|
||||
path: dist/
|
||||
- name: Publish docling-slim to PyPI
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
attestations: true
|
||||
packages-dir: dist/docling-slim/
|
||||
skip-existing: false
|
||||
|
||||
publish-docling:
|
||||
# docling is a meta-package that depends on docling-slim, so publish it
|
||||
# after docling-slim is available on PyPI.
|
||||
needs: publish-docling-slim
|
||||
runs-on: ubuntu-latest
|
||||
environment:
|
||||
name: pypi
|
||||
url: https://pypi.org/p/docling
|
||||
permissions:
|
||||
id-token: write # IMPORTANT: mandatory for trusted publishing
|
||||
steps:
|
||||
- name: Download build artifacts
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: dist
|
||||
path: dist/
|
||||
- name: Wait for docling-slim to be available on PyPI
|
||||
run: |
|
||||
echo "Waiting 60 seconds for docling-slim to propagate on PyPI..."
|
||||
sleep 60
|
||||
- name: Publish docling to PyPI
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
attestations: true
|
||||
packages-dir: dist/docling/
|
||||
skip-existing: false
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
+20
-2
@@ -1,6 +1,7 @@
|
||||
import datetime
|
||||
import logging
|
||||
import re
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
import warnings
|
||||
@@ -8,8 +9,25 @@ from collections.abc import Iterable
|
||||
from pathlib import Path
|
||||
from typing import Annotated, Type
|
||||
|
||||
import rich.table
|
||||
import typer
|
||||
# Check for CLI dependencies
|
||||
try:
|
||||
import rich.table
|
||||
import typer
|
||||
except ImportError as e:
|
||||
missing_package = str(e).split("'")[1] if "'" in str(e) else "typer or rich"
|
||||
print(
|
||||
f"Error: Missing required CLI dependency '{missing_package}'", file=sys.stderr
|
||||
)
|
||||
print("\nThe docling CLI requires additional dependencies.", file=sys.stderr)
|
||||
print("Please install them using one of the following options:\n", file=sys.stderr)
|
||||
print(" 1. Install the full docling package (recommended):", file=sys.stderr)
|
||||
print(" pip install docling\n", file=sys.stderr)
|
||||
print(" 2. Install docling-slim with CLI support:", file=sys.stderr)
|
||||
print(" pip install docling-slim[cli]\n", file=sys.stderr)
|
||||
print(" 3. Install just the missing dependencies:", file=sys.stderr)
|
||||
print(" pip install typer rich\n", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
from docling_core.transforms.serializer.html import (
|
||||
HTMLDocSerializer,
|
||||
HTMLOutputStyle,
|
||||
|
||||
+20
-3
@@ -1,12 +1,29 @@
|
||||
import logging
|
||||
import sys
|
||||
import warnings
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import Annotated, Optional
|
||||
|
||||
import typer
|
||||
from rich.console import Console
|
||||
from rich.logging import RichHandler
|
||||
# Check for CLI dependencies
|
||||
try:
|
||||
import typer
|
||||
from rich.console import Console
|
||||
from rich.logging import RichHandler
|
||||
except ImportError as e:
|
||||
missing_package = str(e).split("'")[1] if "'" in str(e) else "typer or rich"
|
||||
print(
|
||||
f"Error: Missing required CLI dependency '{missing_package}'", file=sys.stderr
|
||||
)
|
||||
print("\nThe docling-tools CLI requires additional dependencies.", file=sys.stderr)
|
||||
print("Please install them using one of the following options:\n", file=sys.stderr)
|
||||
print(" 1. Install the full docling package (recommended):", file=sys.stderr)
|
||||
print(" pip install docling\n", file=sys.stderr)
|
||||
print(" 2. Install docling-slim with CLI support:", file=sys.stderr)
|
||||
print(" pip install docling-slim[cli]\n", file=sys.stderr)
|
||||
print(" 3. Install just the missing dependencies:", file=sys.stderr)
|
||||
print(" pip install typer rich\n", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
from docling.datamodel.settings import settings
|
||||
from docling.models.utils.hf_model_download import download_hf_model
|
||||
|
||||
+19
-1
@@ -1,4 +1,22 @@
|
||||
import typer
|
||||
import sys
|
||||
|
||||
# Check for CLI dependencies
|
||||
try:
|
||||
import typer
|
||||
except ImportError as e:
|
||||
missing_package = str(e).split("'")[1] if "'" in str(e) else "typer"
|
||||
print(
|
||||
f"Error: Missing required CLI dependency '{missing_package}'", file=sys.stderr
|
||||
)
|
||||
print("\nThe docling-tools CLI requires additional dependencies.", file=sys.stderr)
|
||||
print("Please install them using one of the following options:\n", file=sys.stderr)
|
||||
print(" 1. Install the full docling package (recommended):", file=sys.stderr)
|
||||
print(" pip install docling\n", file=sys.stderr)
|
||||
print(" 2. Install docling-slim with CLI support:", file=sys.stderr)
|
||||
print(" pip install docling-slim[cli]\n", file=sys.stderr)
|
||||
print(" 3. Install just the missing dependencies:", file=sys.stderr)
|
||||
print(" pip install typer rich\n", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
from docling.cli.models import app as models_app
|
||||
|
||||
|
||||
@@ -0,0 +1,126 @@
|
||||
# Docling Slim
|
||||
|
||||
**Lightweight SDK for parsing documents with minimal dependencies and opt-in extras**
|
||||
|
||||
Docling Slim is a minimal-dependency version of Docling that allows you to install only the components you need. It provides the core document processing functionality with ~50MB of base dependencies, and you can add specific features through optional extras.
|
||||
|
||||
## When to Use Docling Slim
|
||||
|
||||
- **Use `docling`** (recommended): If you want the full-featured experience with all standard capabilities
|
||||
- **Use `docling-slim`**: If you need fine-grained control over dependencies or want to minimize installation size
|
||||
|
||||
## For Most Users: Use the Main Docling Package
|
||||
|
||||
We recommend most users install the full-featured `docling` package instead:
|
||||
|
||||
```bash
|
||||
pip install docling
|
||||
```
|
||||
|
||||
The `docling` package includes all standard features, the CLI tools, and is the easiest way to get started. Visit the [main Docling documentation](https://docling-project.github.io/docling/) for complete guides and examples.
|
||||
|
||||
## Installation
|
||||
|
||||
### With Specific Features
|
||||
```bash
|
||||
# PDF support with local models
|
||||
pip install docling-slim[format-pdf,models-local]
|
||||
|
||||
# Office formats only
|
||||
pip install docling-slim[format-office]
|
||||
|
||||
# PDF + CLI
|
||||
pip install docling-slim[format-pdf,cli]
|
||||
|
||||
# Docling service client for using the Docling Serve API
|
||||
pip install docling-slim[service-client]
|
||||
```
|
||||
|
||||
## Available Extras
|
||||
|
||||
### Convenience Bundles
|
||||
|
||||
| Extra | Description | Use Case |
|
||||
|-------|-------------|----------|
|
||||
| `standard` | All standard features (same as `docling` package) | Full-featured usage |
|
||||
| `all` | All available extras | Complete installation |
|
||||
|
||||
### CLI
|
||||
|
||||
| Extra | Description | Use Case |
|
||||
|-------|-------------|----------|
|
||||
| `cli` | Command-line interface (typer, rich) | CLI tools (docling, docling-tools) |
|
||||
|
||||
### Core Components
|
||||
|
||||
| Extra | Description | Use Case |
|
||||
|-------|-------------|----------|
|
||||
| `convert-core` | Core conversion components (numpy, pillow, scipy) | Basic document conversion |
|
||||
| `extract-core` | Structured information extraction | Data extraction from documents |
|
||||
|
||||
### Format Support
|
||||
|
||||
#### PDF Formats
|
||||
|
||||
| Extra | Description | Use Case |
|
||||
|-------|-------------|----------|
|
||||
| `format-pdf` | PDF parsing (pypdfium2 + docling-parse) | PDF documents |
|
||||
| `format-pdf-pypdfium2` | PDF rendering only | Lightweight PDF support |
|
||||
| `format-pdf-docling` | Advanced PDF parsing | Complex PDF layouts |
|
||||
|
||||
#### Office Formats (office = docx + pptx + xlsx)
|
||||
|
||||
| Extra | Description | Use Case |
|
||||
|-------|-------------|----------|
|
||||
| `format-office` | All Office formats | Microsoft Office documents |
|
||||
| `format-docx` | Microsoft Word documents | .docx files |
|
||||
| `format-pptx` | Microsoft PowerPoint | .pptx files |
|
||||
| `format-xlsx` | Microsoft Excel | .xlsx files |
|
||||
|
||||
#### Web Formats (web = html + markdown)
|
||||
|
||||
| Extra | Description | Use Case |
|
||||
|-------|-------------|----------|
|
||||
| `format-web` | HTML and Markdown | Web content |
|
||||
| `format-html` | HTML parsing | Web pages and HTML files |
|
||||
| `format-markdown` | Markdown parsing | .md files |
|
||||
|
||||
#### Other Formats
|
||||
|
||||
| Extra | Description | Use Case |
|
||||
|-------|-------------|----------|
|
||||
| `format-latex` | LaTeX documents | .tex files |
|
||||
| `format-xml-xbrl` | XBRL financial reports | Financial documents |
|
||||
| `format-html-render` | HTML rendering with Playwright | Dynamic web content |
|
||||
| `format-audio` | Audio transcription (Whisper) | .wav, .mp3 files |
|
||||
|
||||
### OCR Engines
|
||||
|
||||
| Extra | Description | Use Case |
|
||||
|-------|-------------|----------|
|
||||
| `feat-ocr-rapidocr` | RapidOCR (lightweight) | Fast OCR |
|
||||
| `feat-ocr-rapidocr-onnx` | RapidOCR with ONNX runtime | Optimized OCR |
|
||||
| `feat-ocr-easyocr` | EasyOCR | Multi-language OCR |
|
||||
| `feat-ocr-tesserocr` | Tesseract OCR | High-accuracy OCR |
|
||||
| `feat-ocr-mac` | macOS native OCR | macOS only |
|
||||
|
||||
### Models
|
||||
|
||||
| Extra | Description | Use Case |
|
||||
|-------|-------------|----------|
|
||||
| `models-local` | Local PyTorch models | GPU/CPU inference |
|
||||
| `models-remote` | Remote model serving (Triton) | Production deployments |
|
||||
| `models-onnxruntime` | ONNX Runtime acceleration | Optimized inference |
|
||||
| `models-vlm-inline` | Vision Language Models | Image understanding, inline processing |
|
||||
|
||||
### Other features
|
||||
|
||||
| Extra | Description | Use Case |
|
||||
|-------|-------------|----------|
|
||||
| `feat-chunking` | Document chunking | RAG applications |
|
||||
| `service-client` | Docling service client | Remote processing |
|
||||
|
||||
|
||||
## License
|
||||
|
||||
MIT License - See [LICENSE](https://github.com/docling-project/docling/blob/main/LICENSE)
|
||||
@@ -0,0 +1,7 @@
|
||||
# Docling
|
||||
|
||||
This is a placeholder README for the `docling` meta-package.
|
||||
|
||||
For the full README, see the [root README.md](../../README.md) in the repository.
|
||||
|
||||
The actual README content is copied from the root during the build process.
|
||||
@@ -0,0 +1,92 @@
|
||||
[build-system]
|
||||
requires = ["hatchling"]
|
||||
build-backend = "hatchling.build"
|
||||
|
||||
[project]
|
||||
name = "docling"
|
||||
version = "2.91.0" # DO NOT EDIT, updated automatically
|
||||
description = "SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications."
|
||||
license = "MIT"
|
||||
keywords = [
|
||||
"docling",
|
||||
"convert",
|
||||
"document",
|
||||
"pdf",
|
||||
"docx",
|
||||
"html",
|
||||
"markdown",
|
||||
"layout model",
|
||||
"segmentation",
|
||||
"table structure",
|
||||
"table former",
|
||||
]
|
||||
classifiers = [
|
||||
"Operating System :: MacOS :: MacOS X",
|
||||
"Operating System :: POSIX :: Linux",
|
||||
"Operating System :: Microsoft :: Windows",
|
||||
"Development Status :: 5 - Production/Stable",
|
||||
"Intended Audience :: Developers",
|
||||
"Intended Audience :: Science/Research",
|
||||
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
"Programming Language :: Python :: 3.13",
|
||||
"Programming Language :: Python :: 3.14",
|
||||
]
|
||||
readme = "README.md"
|
||||
authors = [
|
||||
{ name = "Christoph Auer", email = "cau@zurich.ibm.com" },
|
||||
{ name = "Michele Dolfi", email = "dol@zurich.ibm.com" },
|
||||
{ name = "Maxim Lysak", email = "mly@zurich.ibm.com" },
|
||||
{ name = "Nikos Livathinos", email = "nli@zurich.ibm.com" },
|
||||
{ name = "Ahmed Nassar", email = "ahn@zurich.ibm.com" },
|
||||
{ name = "Panos Vagenas", email = "pva@zurich.ibm.com" },
|
||||
{ name = "Peter Staar", email = "taa@zurich.ibm.com" },
|
||||
]
|
||||
requires-python = '>=3.10,<4.0'
|
||||
|
||||
# Meta-package: pulls in docling-slim with standard extras (includes CLI).
|
||||
# The `docling` Python module itself is provided by docling-slim.
|
||||
# CLI entry points are now defined in docling-slim's pyproject.toml.
|
||||
dependencies = [
|
||||
'docling-slim[standard]==2.91.0',
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
homepage = "https://github.com/docling-project/docling"
|
||||
repository = "https://github.com/docling-project/docling"
|
||||
issues = "https://github.com/docling-project/docling/issues"
|
||||
changelog = "https://github.com/docling-project/docling/blob/main/CHANGELOG.md"
|
||||
|
||||
[tool.uv.sources]
|
||||
# For local development: use workspace member
|
||||
docling-slim = { workspace = true }
|
||||
|
||||
# Re-export slim extras for convenience
|
||||
[project.optional-dependencies]
|
||||
easyocr = ['docling-slim[feat-ocr-easyocr]==2.91.0']
|
||||
tesserocr = ['docling-slim[feat-ocr-tesserocr]==2.91.0']
|
||||
ocrmac = ['docling-slim[feat-ocr-mac]==2.91.0']
|
||||
vlm = ['docling-slim[models-vlm-inline]==2.91.0']
|
||||
rapidocr = ['docling-slim[feat-ocr-rapidocr-onnx]==2.91.0']
|
||||
chunking = ['docling-slim[feat-chunking]==2.91.0']
|
||||
format-audio = ['docling-slim[format-audio]==2.91.0']
|
||||
format-html-render = ['docling-slim[format-html-render]==2.91.0']
|
||||
models-remote = ['docling-slim[models-remote]==2.91.0']
|
||||
models-onnxruntime = ['docling-slim[models-onnxruntime]==2.91.0']
|
||||
format-xml-xbrl = ['docling-slim[format-xml-xbrl]==2.91.0']
|
||||
|
||||
# Dependency-only wheel: no Python modules shipped here. All source lives in
|
||||
# the docling-slim wheel (built from the repo root). This avoids the prior
|
||||
# bug where both wheels shipped the same `docling/` module and collided on
|
||||
# install.
|
||||
[tool.hatch.build.targets.wheel]
|
||||
bypass-selection = true
|
||||
|
||||
[tool.hatch.build.targets.sdist]
|
||||
only-include = ["pyproject.toml", "README.md"]
|
||||
|
||||
[tool.uv]
|
||||
package = true
|
||||
+223
-118
@@ -1,7 +1,11 @@
|
||||
[build-system]
|
||||
requires = ["hatchling"]
|
||||
build-backend = "hatchling.build"
|
||||
|
||||
[project]
|
||||
name = "docling"
|
||||
name = "docling-slim"
|
||||
version = "2.91.0" # DO NOT EDIT, updated automatically
|
||||
description = "SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications."
|
||||
description = "Modular version of the Docling package: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications."
|
||||
license = "MIT"
|
||||
keywords = [
|
||||
"docling",
|
||||
@@ -13,8 +17,6 @@ keywords = [
|
||||
"markdown",
|
||||
"layout model",
|
||||
"segmentation",
|
||||
"table structure",
|
||||
"table former",
|
||||
]
|
||||
classifiers = [
|
||||
"Operating System :: MacOS :: MacOS X",
|
||||
@@ -31,7 +33,7 @@ classifiers = [
|
||||
"Programming Language :: Python :: 3.13",
|
||||
"Programming Language :: Python :: 3.14",
|
||||
]
|
||||
readme = "README.md"
|
||||
readme = "packages/docling-slim/README.md"
|
||||
authors = [
|
||||
{ name = "Christoph Auer", email = "cau@zurich.ibm.com" },
|
||||
{ name = "Michele Dolfi", email = "dol@zurich.ibm.com" },
|
||||
@@ -42,40 +44,17 @@ authors = [
|
||||
{ name = "Peter Staar", email = "taa@zurich.ibm.com" },
|
||||
]
|
||||
requires-python = '>=3.10,<4.0'
|
||||
|
||||
# MINIMAL BASE (8 packages) - ~50MB
|
||||
dependencies = [
|
||||
'pydantic (>=2.0.0,<3.0.0)',
|
||||
'docling-core[chunking] (>=2.73.0,<3.0.0)',
|
||||
'docling-parse (>=5.3.2,<6.0.0)',
|
||||
'docling-ibm-models>=3.13.0,<4',
|
||||
'torch (>=2.2.2,<3.0.0)',
|
||||
'torchvision (>=0,<1)',
|
||||
'filetype (>=1.2.0,<2.0.0)',
|
||||
'pypdfium2 (>=4.30.0,!=4.30.1,<6.0.0)',
|
||||
'pydantic-settings (>=2.3.0,<3.0.0)',
|
||||
'huggingface_hub (>=0.23,<2)',
|
||||
'httpx (>=0.28,<1.0.0)',
|
||||
'requests (>=2.32.2,<3.0.0)',
|
||||
'ocrmac (>=1.0.0,<2.0.0) ; sys_platform == "darwin"',
|
||||
'rapidocr (>=3.8,<4.0.0)',
|
||||
'certifi (>=2024.7.4)',
|
||||
'rtree (>=1.3.0,<2.0.0)',
|
||||
'typer (>=0.12.5,<0.22.0)',
|
||||
'python-docx (>=1.1.2,<2.0.0)',
|
||||
'python-pptx (>=1.0.2,<2.0.0)',
|
||||
'beautifulsoup4 (>=4.12.3,<5.0.0)',
|
||||
'pandas (>=2.1.4,<4.0.0)',
|
||||
'marko (>=2.1.2,<3.0.0)',
|
||||
'openpyxl (>=3.1.5,<4.0.0)',
|
||||
'lxml (>=4.0.0,<7.0.0)',
|
||||
'pillow (>=10.0.0,<13.0.0)',
|
||||
'tqdm (>=4.65.0,<5.0.0)',
|
||||
'pluggy (>=1.0.0,<2.0.0)',
|
||||
'pylatexenc (>=2.10,<3.0)',
|
||||
'scipy (>=1.6.0,<2.0.0)',
|
||||
"accelerate>=1.0.0,<2",
|
||||
"polyfactory>=2.22.2",
|
||||
"defusedxml (>=0.7.1, <0.8.0)",
|
||||
"websockets (>=14.0,<17.0)",
|
||||
'pydantic>=2.0.0,<3.0.0',
|
||||
'docling-core>=2.73.0,<3.0.0',
|
||||
'pydantic-settings>=2.3.0,<3.0.0',
|
||||
'filetype>=1.2.0,<2.0.0',
|
||||
'requests>=2.32.2,<3.0.0',
|
||||
'certifi>=2024.7.4',
|
||||
'pluggy>=1.0.0,<2.0.0',
|
||||
'tqdm>=4.65.0,<5.0.0',
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
@@ -87,66 +66,207 @@ changelog = "https://github.com/docling-project/docling/blob/main/CHANGELOG.md"
|
||||
[project.entry-points.docling]
|
||||
"docling_defaults" = "docling.models.plugins.defaults"
|
||||
|
||||
# CLI scripts (require cli extra: pip install docling-slim[cli])
|
||||
[project.scripts]
|
||||
docling = "docling.cli.main:app"
|
||||
docling-tools = "docling.cli.tools:app"
|
||||
|
||||
[project.optional-dependencies]
|
||||
easyocr = ['easyocr (>=1.7,<2.0)']
|
||||
tesserocr = ['tesserocr (>=2.7.1,<3.0.0)']
|
||||
ocrmac = ['ocrmac (>=1.0.0,<2.0.0) ; sys_platform == "darwin"']
|
||||
htmlrender = ["playwright>=1.58.0"]
|
||||
vlm = [
|
||||
'transformers (>=4.42.0,<6.0.0,!=5.0.*,!=5.1.*,!=5.2.*,!=5.3.*)',
|
||||
'accelerate (>=1.2.1,<2.0.0)',
|
||||
'mlx-vlm (>=0.4.3,<1.0.0) ; python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64"',
|
||||
# 'vllm (>=0.10.0,<1.0.0) ; python_version >= "3.10" and python_version < "3.14" and sys_platform == "linux" and platform_machine == "x86_64"',
|
||||
"qwen-vl-utils>=0.0.11",
|
||||
"peft>=0.18.1",
|
||||
# ============================================================================
|
||||
# CORE COMPONENTS
|
||||
# ============================================================================
|
||||
convert-core = [
|
||||
'numpy>=1.24.0,<3.0.0',
|
||||
'pillow>=10.0.0,<13.0.0',
|
||||
'rtree>=1.3.0,<2.0.0',
|
||||
'scipy>=1.6.0,<2.0.0',
|
||||
]
|
||||
rapidocr = [
|
||||
'rapidocr (>=3.8,<4.0.0)',
|
||||
'onnxruntime (>=1.7.0,<2.0.0) ; python_version < "3.14"',
|
||||
|
||||
extract-core = [
|
||||
'docling-slim[convert-core]',
|
||||
'polyfactory>=2.22.2',
|
||||
]
|
||||
onnxruntime = [
|
||||
'onnxruntime (<1.24) ; python_version < "3.14" and sys_platform == "darwin"',
|
||||
'onnxruntime-gpu (<1.24) ; python_version < "3.14" and (sys_platform == "linux" or sys_platform == "win32")',
|
||||
|
||||
# ============================================================================
|
||||
# FORMAT SUPPORT
|
||||
# ============================================================================
|
||||
|
||||
# --- PDF Formats ---
|
||||
format-pdf-pypdfium2 = [
|
||||
'pypdfium2>=4.30.0,!=4.30.1,<6.0.0',
|
||||
]
|
||||
asr = [
|
||||
'mlx-whisper>=0.4.3 ; python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64"',
|
||||
'openai-whisper>=20250625',
|
||||
'numba>=0.63.0', # Ensure Python 3.11+ compatibility (llvmlite constraint)
|
||||
|
||||
format-pdf-docling = [
|
||||
'pypdfium2>=4.30.0,!=4.30.1,<6.0.0',
|
||||
'docling-parse>=5.3.2,<6.0.0',
|
||||
]
|
||||
xbrl = [
|
||||
"arelle-release (>=2.38.17,<3.0.0)",
|
||||
|
||||
format-pdf = [
|
||||
'docling-slim[format-pdf-pypdfium2,format-pdf-docling]',
|
||||
]
|
||||
remote-serving = [
|
||||
'tritonclient[grpc] (>=2.65.0,<3.0.0)',
|
||||
|
||||
# --- Office Formats (office = docx + pptx + xlsx) ---
|
||||
format-docx = [
|
||||
'python-docx>=1.1.2,<2.0.0',
|
||||
]
|
||||
|
||||
format-pptx = [
|
||||
'python-pptx>=1.0.2,<2.0.0',
|
||||
]
|
||||
|
||||
format-xlsx = [
|
||||
'openpyxl>=3.1.5,<4.0.0',
|
||||
]
|
||||
|
||||
format-office = [
|
||||
'docling-slim[format-docx,format-pptx,format-xlsx]',
|
||||
]
|
||||
|
||||
# --- Web Formats (web = html + markdown) ---
|
||||
format-html = [
|
||||
'beautifulsoup4>=4.12.3,<5.0.0',
|
||||
'lxml>=4.0.0,<7.0.0',
|
||||
]
|
||||
|
||||
format-markdown = [
|
||||
'marko>=2.1.2,<3.0.0',
|
||||
]
|
||||
|
||||
format-web = [
|
||||
'docling-slim[format-html,format-markdown]',
|
||||
]
|
||||
|
||||
# --- Other Formats ---
|
||||
format-latex = [
|
||||
'pylatexenc>=2.10,<3.0',
|
||||
]
|
||||
|
||||
format-xml-xbrl = [
|
||||
'arelle-release>=2.38.17,<3.0.0',
|
||||
]
|
||||
|
||||
format-html-render = [
|
||||
'playwright>=1.58.0',
|
||||
]
|
||||
|
||||
format-audio = [
|
||||
'mlx-whisper>=0.4.3 ; python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64"',
|
||||
'openai-whisper>=20250625',
|
||||
'numba>=0.63.0',
|
||||
]
|
||||
|
||||
# ============================================================================
|
||||
# OCR ENGINES (feat-ocr-*)
|
||||
# ============================================================================
|
||||
feat-ocr-rapidocr = [
|
||||
'rapidocr>=3.8,<4.0.0',
|
||||
]
|
||||
|
||||
feat-ocr-rapidocr-onnx = [
|
||||
'rapidocr>=3.8,<4.0.0',
|
||||
'onnxruntime>=1.7.0,<2.0.0 ; python_version < "3.14"',
|
||||
]
|
||||
|
||||
feat-ocr-easyocr = [
|
||||
'easyocr>=1.7,<2.0',
|
||||
# easyocr declares scikit-image with no lower bound; without this pin,
|
||||
# resolvers on Python 3.10 backtrack to 0.16.2 (2019), which has no
|
||||
# Py3.10 wheels and fails to build from source.
|
||||
'scikit-image>=0.19',
|
||||
]
|
||||
|
||||
feat-ocr-tesserocr = [
|
||||
'tesserocr>=2.7.1,<3.0.0',
|
||||
'pandas>=2.1.4,<4.0.0',
|
||||
]
|
||||
|
||||
feat-ocr-mac = [
|
||||
'ocrmac>=1.0.0,<2.0.0 ; sys_platform == "darwin"',
|
||||
]
|
||||
|
||||
# ============================================================================
|
||||
# MODELS
|
||||
# ============================================================================
|
||||
models-local = [
|
||||
'torch>=2.2.2,<3.0.0',
|
||||
'torchvision>=0,<1',
|
||||
'docling-ibm-models>=3.13.0,<4',
|
||||
'accelerate>=1.0.0,<2',
|
||||
'huggingface_hub>=0.23,<2',
|
||||
'defusedxml>=0.7.1,<0.8.0',
|
||||
]
|
||||
|
||||
models-remote = [
|
||||
'tritonclient[grpc]>=2.65.0,<3.0.0',
|
||||
]
|
||||
|
||||
models-onnxruntime = [
|
||||
'onnxruntime<1.24 ; python_version < "3.14" and sys_platform == "darwin"',
|
||||
'onnxruntime-gpu<1.24 ; python_version < "3.14" and (sys_platform == "linux" or sys_platform == "win32")',
|
||||
]
|
||||
|
||||
# Vision Language Models for inline processing
|
||||
models-vlm-inline = [
|
||||
'transformers>=4.42.0,<6.0.0,!=5.0.*,!=5.1.*,!=5.2.*,!=5.3.*',
|
||||
'accelerate>=1.2.1,<2.0.0',
|
||||
'mlx-vlm>=0.4.3,<1.0.0 ; python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64"',
|
||||
'qwen-vl-utils>=0.0.11',
|
||||
'peft>=0.18.1',
|
||||
]
|
||||
|
||||
# ============================================================================
|
||||
# FEATURES
|
||||
# ============================================================================
|
||||
# Document chunking for RAG applications
|
||||
feat-chunking = [
|
||||
'docling-core[chunking]>=2.73.0,<3.0.0',
|
||||
]
|
||||
|
||||
service-client = [
|
||||
'httpx>=0.28,<1.0.0',
|
||||
'websockets>=14.0,<17.0',
|
||||
]
|
||||
|
||||
cli = [
|
||||
'typer>=0.12.5,<0.22.0',
|
||||
'rich>=13.0.0',
|
||||
]
|
||||
|
||||
# ============================================================================
|
||||
# CONVENIENCE BUNDLES
|
||||
# ============================================================================
|
||||
standard = [
|
||||
'docling-slim[format-pdf,models-local,feat-ocr-rapidocr,format-office,format-web,format-latex,feat-chunking,extract-core,service-client,cli]',
|
||||
]
|
||||
|
||||
all = [
|
||||
'docling-slim[standard,models-vlm-inline,format-audio,format-html-render,format-xml-xbrl,models-remote,models-onnxruntime,feat-ocr-easyocr,feat-ocr-tesserocr,feat-ocr-mac]',
|
||||
]
|
||||
|
||||
[dependency-groups]
|
||||
dev = [
|
||||
"pre-commit~=3.7",
|
||||
"mypy~=1.10",
|
||||
"types-setuptools~=70.3",
|
||||
"pandas-stubs~=2.1",
|
||||
"types-openpyxl~=3.1",
|
||||
"types-requests~=2.31",
|
||||
"boto3-stubs~=1.37",
|
||||
"types-urllib3~=1.26",
|
||||
"types-tqdm~=4.67",
|
||||
"coverage~=7.6",
|
||||
"pytest~=8.3",
|
||||
"pytest-cov>=6.1.1",
|
||||
"pytest-dependency~=0.6",
|
||||
"pytest-durations~=1.6.1",
|
||||
"pytest-xdist~=3.3",
|
||||
"ipykernel~=6.29",
|
||||
"ipywidgets~=8.1",
|
||||
"nbqa~=1.9",
|
||||
"python-semantic-release~=7.32",
|
||||
"types-defusedxml (>=0.7.0.20250822, <0.8.0)",
|
||||
"pre-commit~=3.7",
|
||||
"mypy~=1.10",
|
||||
"types-setuptools~=70.3",
|
||||
"pandas-stubs~=2.1",
|
||||
"types-openpyxl~=3.1",
|
||||
"types-requests~=2.31",
|
||||
"boto3-stubs~=1.37",
|
||||
"types-urllib3~=1.26",
|
||||
"types-tqdm~=4.67",
|
||||
"coverage~=7.6",
|
||||
"pytest~=8.3",
|
||||
"pytest-cov>=6.1.1",
|
||||
"pytest-dependency~=0.6",
|
||||
"pytest-durations~=1.6.1",
|
||||
"pytest-xdist~=3.3",
|
||||
"ipykernel~=6.29",
|
||||
"ipywidgets~=8.1",
|
||||
"nbqa~=1.9",
|
||||
"python-semantic-release~=7.32",
|
||||
"types-defusedxml>=0.7.0.20250822,<0.8.0",
|
||||
]
|
||||
|
||||
docs = [
|
||||
"mkdocs-material~=9.5",
|
||||
"mkdocs-jupyter>=0.25,<0.26",
|
||||
@@ -155,6 +275,7 @@ docs = [
|
||||
"mkdocstrings[python]~=0.27",
|
||||
"griffe-pydantic~=1.1",
|
||||
]
|
||||
|
||||
examples = [
|
||||
"datasets~=2.21",
|
||||
"python-dotenv~=1.0",
|
||||
@@ -162,53 +283,52 @@ examples = [
|
||||
"langchain-milvus~=0.1",
|
||||
"langchain-text-splitters>=0.2",
|
||||
"modelscope>=1.29.0",
|
||||
'gliner>=0.2.21 ; python_version < "3.14"', # gliner depends on onnxruntime which is not available on py3.14
|
||||
]
|
||||
constraints = [
|
||||
'numba >=0.63.0',
|
||||
'langchain-core >=0.3.81',
|
||||
'pandas (>=2.1.4,<3.0.0); python_version < "3.11"',
|
||||
'pandas (>=2.1.4,<4.0.0); python_version >= "3.11"',
|
||||
'gliner>=0.2.21 ; python_version < "3.14"',
|
||||
]
|
||||
|
||||
constraints = [
|
||||
'numba>=0.63.0',
|
||||
'langchain-core>=0.3.81',
|
||||
'pandas>=2.1.4,<3.0.0 ; python_version < "3.11"',
|
||||
'pandas>=2.1.4,<4.0.0 ; python_version >= "3.11"',
|
||||
]
|
||||
|
||||
[tool.uv.workspace]
|
||||
members = ["packages/docling"]
|
||||
|
||||
[tool.uv.sources]
|
||||
docling = { workspace = true }
|
||||
|
||||
[tool.uv]
|
||||
package = true
|
||||
default-groups = "all"
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
include = ["docling*"]
|
||||
[tool.hatch.build.targets.wheel]
|
||||
packages = ["docling"]
|
||||
|
||||
[tool.hatch.build.targets.sdist]
|
||||
only-include = ["docling", "pyproject.toml", "README.md", "LICENSE"]
|
||||
|
||||
[tool.ruff]
|
||||
target-version = "py310"
|
||||
line-length = 88
|
||||
respect-gitignore = true
|
||||
|
||||
# extend-exclude = [
|
||||
# "tests",
|
||||
# ]
|
||||
|
||||
[tool.ruff.format]
|
||||
skip-magic-trailing-comma = false
|
||||
|
||||
[tool.ruff.lint]
|
||||
select = [
|
||||
# "B", # flake8-bugbear
|
||||
"C", # flake8-comprehensions
|
||||
"C9", # mccabe
|
||||
# "D", # flake8-docstrings
|
||||
"E", # pycodestyle errors (default)
|
||||
"F", # pyflakes (default)
|
||||
"I", # isort
|
||||
"PD", # pandas-vet
|
||||
"PIE", # pie
|
||||
# "PTH", # pathlib
|
||||
"Q", # flake8-quotes
|
||||
# "RET", # return
|
||||
"RUF", # Enable all ruff-specific checks
|
||||
# "SIM", # simplify
|
||||
"S307", # eval
|
||||
# "T20", # (disallow print statements) keep debugging statements out of the codebase
|
||||
"W", # pycodestyle warnings
|
||||
"ASYNC", # async
|
||||
"UP", # pyupgrade
|
||||
@@ -227,38 +347,23 @@ ignore = [
|
||||
"UP035", # `typing.Set` is deprecated, use `set` instead"
|
||||
]
|
||||
|
||||
#extend-select = []
|
||||
|
||||
[tool.ruff.lint.pep8-naming]
|
||||
classmethod-decorators = [
|
||||
# Allow Pydantic's `@validator` decorator to trigger class method treatment.
|
||||
"pydantic.validator",
|
||||
]
|
||||
|
||||
[tool.ruff.lint.per-file-ignores]
|
||||
"__init__.py" = ["E402", "F401"]
|
||||
"tests/*.py" = ["ASYNC"] # Disable ASYNC check for tests
|
||||
"tests/*.py" = ["ASYNC"]
|
||||
|
||||
[tool.ruff.lint.mccabe]
|
||||
max-complexity = 30
|
||||
|
||||
# [tool.ruff.lint.isort.sections]
|
||||
# "docling" = ["docling_core", "docling_ibm_models", "docling_parse"]
|
||||
|
||||
[tool.ruff.lint.isort]
|
||||
combine-as-imports = true
|
||||
# section-order = [
|
||||
# "future",
|
||||
# "standard-library",
|
||||
# "third-party",
|
||||
# "docling",
|
||||
# "first-party",
|
||||
# "local-folder",
|
||||
# ]
|
||||
|
||||
[tool.mypy]
|
||||
pretty = true
|
||||
# strict = true
|
||||
no_implicit_optional = true
|
||||
plugins = "pydantic.mypy"
|
||||
python_version = "3.10"
|
||||
|
||||
Reference in New Issue
Block a user