Files
2026-05-12 14:54:25 +00:00

249 lines
7.2 KiB
TOML

[project]
name = "docling-core"
version = "2.75.0" # DO NOT EDIT, updated automatically
description = "A python library to define and validate data types in Docling."
license = "MIT"
license-files = ["LICENSE"]
keywords = ["docling", "discovery", "etl", "information retrieval", "analytics", "database", "database schema", "schema", "JSON"]
readme = "README.md"
authors = [
{ name = "Cesar Berrospi Ramis", email = "ceb@zurich.ibm.com" },
{ name = "Panos Vagenas", email = "pva@zurich.ibm.com" },
{ name = "Michele Dolfi", email = "dol@zurich.ibm.com" },
{ name = "Christoph Auer", email = "cau@zurich.ibm.com" },
{ name = "Peter Staar", email = "taa@zurich.ibm.com" },
]
maintainers = [
{ name = "Panos Vagenas", email = "pva@zurich.ibm.com" },
{ name = "Michele Dolfi", email = "dol@zurich.ibm.com" },
{ name = "Christoph Auer", email = "cau@zurich.ibm.com" },
{ name = "Peter Staar", email = "taa@zurich.ibm.com" },
{ name = "Cesar Berrospi Ramis", email = "ceb@zurich.ibm.com" },
]
classifiers = [
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Developers",
"Intended Audience :: Science/Research",
"Natural Language :: English",
"Operating System :: OS Independent",
"Topic :: Database",
"Topic :: Scientific/Engineering :: Information Analysis",
"Topic :: Software Development :: Libraries :: Python Modules",
"Typing :: Typed",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Programming Language :: Python :: 3.14",
]
requires-python = '>=3.10,<4.0'
dependencies = [
'jsonschema (>=4.16.0,<5.0.0)',
'pydantic (>=2.6.0,<3.0.0,!=2.10.0,!=2.10.1,!=2.10.2)',
'jsonref (>=1.1.0,<2.0.0)',
'tabulate (>=0.9.0,<0.11.0)',
'pandas (>=2.1.4,<4.0.0)',
'pillow (>=10.0.0,<13.0.0)',
'pyyaml (>=5.1,<7.0.0)',
'typing-extensions (>=4.12.2,<5.0.0)',
'typer (>=0.12.5,<0.25.0)',
'latex2mathml (>=3.77.0,<4.0.0)',
"defusedxml (>=0.7.1, <0.8.0)",
"pydantic-settings>=2.14.0",
]
[project.urls]
homepage = "https://github.com/docling-project"
repository = "https://github.com/docling-project/docling-core"
issues = "https://github.com/docling-project/docling-core/issues"
changelog = "https://github.com/docling-project/docling-core/blob/main/CHANGELOG.md"
[project.scripts]
# validate = "docling_core.utils.validate:main" # use python -m docling_core.utils.validate
# generate_jsonschema = "docling_core.utils.generate_jsonschema:main" # use python -m docling_core.utils.generate_jsonschema
# generate_docs = "docling_core.utils.generate_docs:main" # use python -m docling_core.utils.generate_docs
docling-view = "docling_core.cli.view:app"
docling-serialize = "docling_core.cli.serialize:app"
[project.optional-dependencies]
chunking = [
# common:
'semchunk (>=2.2.0,<4.0.0)',
'tree-sitter (>=0.25.0,<0.27.0)',
'tree-sitter-python >=0.23.6',
'tree-sitter-c >=0.23.4',
'tree-sitter-javascript >=0.23.1',
'tree-sitter-typescript >=0.23.2',
# specific:
'transformers (>=4.34.0,<6.0.0)',
]
chunking-openai = [
# common:
'semchunk (>=2.2.0,<4.0.0)',
'tree-sitter (>=0.25.0,<0.27.0)',
'tree-sitter-python >=0.23.6',
'tree-sitter-c >=0.23.4',
'tree-sitter-javascript >=0.23.1',
'tree-sitter-typescript >=0.23.2',
# specific:
'tiktoken (>=0.9.0,<0.13.0)',
]
examples = [
"datasets>=4.0.0",
"matplotlib>=3.7.0",
"openpyxl>=3.1.5",
]
[dependency-groups]
dev = [
"pre-commit~=3.7",
"mypy~=1.6",
"black~=26.3.1",
"isort~=5.10",
"autoflake~=2.0",
"flake8~=7.1",
"pycodestyle~=2.10",
"flake8-docstrings~=1.6",
"pep8-naming~=0.13",
"jsondiff~=2.0",
"types-setuptools~=70.3",
"pandas-stubs~=2.1",
"ipykernel~=6.29",
"coverage~=7.6",
"pytest~=8.3",
"pytest-cov>=6.1.1",
"python-semantic-release~=7.32",
'tree-sitter-java-orchard (>=0.3.0,<1.0.0); python_version >= "3.10"',
"ruff>=0.14.11",
"types-defusedxml (>=0.7.0.20250822, <0.8.0)",
]
constraints = [
'pandas (>=2.1.4,<3.0.0); python_version < "3.11"',
'pandas (>=2.1.4,<4.0.0); python_version >= "3.11"',
]
[tool.uv]
package = true
[tool.setuptools.packages.find]
where = [".", "docling_core/resources/schemas"]
include = ["docling_core*"]
namespaces = true
[tool.setuptools.package-data]
"*" = ["*.json"]
[tool.ruff]
target-version = "py310"
line-length = 120
respect-gitignore = true
exclude = [
"test/data/**"
]
[tool.ruff.format]
skip-magic-trailing-comma = false
[tool.ruff.lint]
select = [
"C", # flake8-comprehensions
"C9", # mccabe
"E", # pycodestyle errors (default)
"F", # pyflakes (default)
"I", # isort
"PD", # pandas-vet
"PIE", # pie
"Q", # flake8-quotes
"RUF", # Enable all ruff-specific checks
"S307", # eval
"W", # pycodestyle warnings
"ASYNC", # async
"UP", # pyupgrade
]
ignore = [
"C408", # Unnecessary `dict()` call (rewrite as a literal)
"E501", # Line too long, handled by ruff formatter
"E741", # Ambiguous variable name: `l`
"D107", # "Missing docstring in __init__",
"F401", # imported but unused; consider using `importlib.util.find_spec` to test for "
"PL", # Pylint
"RUF005", # Consider ... instead of concatenation
"RUF012", # Mutable Class Attributes
"UP007", # Option and Union
"UP045", # Use `X | None` for type annotations
]
[tool.ruff.lint.pep8-naming]
classmethod-decorators = [
"classmethod",
"validator",
"pydantic.validator",
]
[tool.ruff.lint.pydocstyle]
convention = "google"
[tool.ruff.lint.per-file-ignores]
"__init__.py" = ["E402", "F401"]
"test/*.py" = ["ASYNC"] # Disable ASYNC check for tests
"*.ipynb" = ["I"] # Disable import sorting for notebooks
[tool.ruff.lint.mccabe]
max-complexity = 30
[tool.ruff.lint.isort]
combine-as-imports = false
# TODO: move to schema-compatible location
# [toolruff.lint.autoflake]
# in-place = true
# ignore-init-module-imports = true
# remove-all-unused-imports = true
# remove-unused-variables = true
# expand-star-imports = true
# recursive = true
[tool.mypy]
pretty = true
no_implicit_optional = true
namespace_packages = true
show_error_codes = true
python_version = "3.10"
plugins = ["pydantic.mypy"]
exclude = "(^|/)test/data/.*"
[[tool.mypy.overrides]]
module = [
"jsondiff.*",
"jsonref.*",
"jsonschema.*",
"requests.*",
"semchunk.*",
"tabulate.*",
"transformers.*",
"tree_sitter_java_orchard.*",
"yaml.*",
]
ignore_missing_imports = true
[[tool.mypy.overrides]]
module = ["test.*"]
disallow_untyped_defs = false
[tool.semantic_release]
# for default values check:
# https://github.com/python-semantic-release/python-semantic-release/blob/v7.32.2/semantic_release/defaults.cfg
version_source = "tag_only"
branch = "main"
# configure types which should trigger minor and patch version bumps respectively
# (note that they must be a subset of the configured allowed types):
parser_angular_allowed_types = "build,chore,ci,docs,feat,fix,perf,style,refactor,test"
parser_angular_minor_types = "feat"
parser_angular_patch_types = "fix,perf"