mirror of
https://github.com/docling-project/docling-core.git
synced 2026-05-17 13:10:44 +00:00
249 lines
7.2 KiB
TOML
249 lines
7.2 KiB
TOML
[project]
|
|
name = "docling-core"
|
|
version = "2.75.0" # DO NOT EDIT, updated automatically
|
|
description = "A python library to define and validate data types in Docling."
|
|
license = "MIT"
|
|
license-files = ["LICENSE"]
|
|
keywords = ["docling", "discovery", "etl", "information retrieval", "analytics", "database", "database schema", "schema", "JSON"]
|
|
readme = "README.md"
|
|
authors = [
|
|
{ name = "Cesar Berrospi Ramis", email = "ceb@zurich.ibm.com" },
|
|
{ name = "Panos Vagenas", email = "pva@zurich.ibm.com" },
|
|
{ name = "Michele Dolfi", email = "dol@zurich.ibm.com" },
|
|
{ name = "Christoph Auer", email = "cau@zurich.ibm.com" },
|
|
{ name = "Peter Staar", email = "taa@zurich.ibm.com" },
|
|
]
|
|
maintainers = [
|
|
{ name = "Panos Vagenas", email = "pva@zurich.ibm.com" },
|
|
{ name = "Michele Dolfi", email = "dol@zurich.ibm.com" },
|
|
{ name = "Christoph Auer", email = "cau@zurich.ibm.com" },
|
|
{ name = "Peter Staar", email = "taa@zurich.ibm.com" },
|
|
{ name = "Cesar Berrospi Ramis", email = "ceb@zurich.ibm.com" },
|
|
]
|
|
classifiers = [
|
|
"Development Status :: 5 - Production/Stable",
|
|
"Intended Audience :: Developers",
|
|
"Intended Audience :: Science/Research",
|
|
"Natural Language :: English",
|
|
"Operating System :: OS Independent",
|
|
"Topic :: Database",
|
|
"Topic :: Scientific/Engineering :: Information Analysis",
|
|
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
"Typing :: Typed",
|
|
"Programming Language :: Python :: 3",
|
|
"Programming Language :: Python :: 3.10",
|
|
"Programming Language :: Python :: 3.11",
|
|
"Programming Language :: Python :: 3.12",
|
|
"Programming Language :: Python :: 3.13",
|
|
"Programming Language :: Python :: 3.14",
|
|
]
|
|
requires-python = '>=3.10,<4.0'
|
|
dependencies = [
|
|
'jsonschema (>=4.16.0,<5.0.0)',
|
|
'pydantic (>=2.6.0,<3.0.0,!=2.10.0,!=2.10.1,!=2.10.2)',
|
|
'jsonref (>=1.1.0,<2.0.0)',
|
|
'tabulate (>=0.9.0,<0.11.0)',
|
|
'pandas (>=2.1.4,<4.0.0)',
|
|
'pillow (>=10.0.0,<13.0.0)',
|
|
'pyyaml (>=5.1,<7.0.0)',
|
|
'typing-extensions (>=4.12.2,<5.0.0)',
|
|
'typer (>=0.12.5,<0.25.0)',
|
|
'latex2mathml (>=3.77.0,<4.0.0)',
|
|
"defusedxml (>=0.7.1, <0.8.0)",
|
|
"pydantic-settings>=2.14.0",
|
|
]
|
|
|
|
[project.urls]
|
|
homepage = "https://github.com/docling-project"
|
|
repository = "https://github.com/docling-project/docling-core"
|
|
issues = "https://github.com/docling-project/docling-core/issues"
|
|
changelog = "https://github.com/docling-project/docling-core/blob/main/CHANGELOG.md"
|
|
|
|
[project.scripts]
|
|
# validate = "docling_core.utils.validate:main" # use python -m docling_core.utils.validate
|
|
# generate_jsonschema = "docling_core.utils.generate_jsonschema:main" # use python -m docling_core.utils.generate_jsonschema
|
|
# generate_docs = "docling_core.utils.generate_docs:main" # use python -m docling_core.utils.generate_docs
|
|
docling-view = "docling_core.cli.view:app"
|
|
docling-serialize = "docling_core.cli.serialize:app"
|
|
|
|
[project.optional-dependencies]
|
|
chunking = [
|
|
# common:
|
|
'semchunk (>=2.2.0,<4.0.0)',
|
|
'tree-sitter (>=0.25.0,<0.27.0)',
|
|
'tree-sitter-python >=0.23.6',
|
|
'tree-sitter-c >=0.23.4',
|
|
'tree-sitter-javascript >=0.23.1',
|
|
'tree-sitter-typescript >=0.23.2',
|
|
|
|
# specific:
|
|
'transformers (>=4.34.0,<6.0.0)',
|
|
]
|
|
chunking-openai = [
|
|
# common:
|
|
'semchunk (>=2.2.0,<4.0.0)',
|
|
'tree-sitter (>=0.25.0,<0.27.0)',
|
|
'tree-sitter-python >=0.23.6',
|
|
'tree-sitter-c >=0.23.4',
|
|
'tree-sitter-javascript >=0.23.1',
|
|
'tree-sitter-typescript >=0.23.2',
|
|
|
|
# specific:
|
|
'tiktoken (>=0.9.0,<0.13.0)',
|
|
]
|
|
examples = [
|
|
"datasets>=4.0.0",
|
|
"matplotlib>=3.7.0",
|
|
"openpyxl>=3.1.5",
|
|
]
|
|
|
|
[dependency-groups]
|
|
dev = [
|
|
"pre-commit~=3.7",
|
|
"mypy~=1.6",
|
|
"black~=26.3.1",
|
|
"isort~=5.10",
|
|
"autoflake~=2.0",
|
|
"flake8~=7.1",
|
|
"pycodestyle~=2.10",
|
|
"flake8-docstrings~=1.6",
|
|
"pep8-naming~=0.13",
|
|
"jsondiff~=2.0",
|
|
"types-setuptools~=70.3",
|
|
"pandas-stubs~=2.1",
|
|
"ipykernel~=6.29",
|
|
"coverage~=7.6",
|
|
"pytest~=8.3",
|
|
"pytest-cov>=6.1.1",
|
|
"python-semantic-release~=7.32",
|
|
'tree-sitter-java-orchard (>=0.3.0,<1.0.0); python_version >= "3.10"',
|
|
"ruff>=0.14.11",
|
|
"types-defusedxml (>=0.7.0.20250822, <0.8.0)",
|
|
]
|
|
constraints = [
|
|
'pandas (>=2.1.4,<3.0.0); python_version < "3.11"',
|
|
'pandas (>=2.1.4,<4.0.0); python_version >= "3.11"',
|
|
]
|
|
|
|
[tool.uv]
|
|
package = true
|
|
|
|
[tool.setuptools.packages.find]
|
|
where = [".", "docling_core/resources/schemas"]
|
|
include = ["docling_core*"]
|
|
namespaces = true
|
|
|
|
[tool.setuptools.package-data]
|
|
"*" = ["*.json"]
|
|
|
|
[tool.ruff]
|
|
target-version = "py310"
|
|
line-length = 120
|
|
respect-gitignore = true
|
|
exclude = [
|
|
"test/data/**"
|
|
]
|
|
|
|
[tool.ruff.format]
|
|
skip-magic-trailing-comma = false
|
|
|
|
[tool.ruff.lint]
|
|
select = [
|
|
"C", # flake8-comprehensions
|
|
"C9", # mccabe
|
|
"E", # pycodestyle errors (default)
|
|
"F", # pyflakes (default)
|
|
"I", # isort
|
|
"PD", # pandas-vet
|
|
"PIE", # pie
|
|
"Q", # flake8-quotes
|
|
"RUF", # Enable all ruff-specific checks
|
|
"S307", # eval
|
|
"W", # pycodestyle warnings
|
|
"ASYNC", # async
|
|
"UP", # pyupgrade
|
|
]
|
|
|
|
ignore = [
|
|
"C408", # Unnecessary `dict()` call (rewrite as a literal)
|
|
"E501", # Line too long, handled by ruff formatter
|
|
"E741", # Ambiguous variable name: `l`
|
|
"D107", # "Missing docstring in __init__",
|
|
"F401", # imported but unused; consider using `importlib.util.find_spec` to test for "
|
|
"PL", # Pylint
|
|
"RUF005", # Consider ... instead of concatenation
|
|
"RUF012", # Mutable Class Attributes
|
|
"UP007", # Option and Union
|
|
"UP045", # Use `X | None` for type annotations
|
|
]
|
|
|
|
[tool.ruff.lint.pep8-naming]
|
|
classmethod-decorators = [
|
|
"classmethod",
|
|
"validator",
|
|
"pydantic.validator",
|
|
]
|
|
|
|
[tool.ruff.lint.pydocstyle]
|
|
convention = "google"
|
|
|
|
[tool.ruff.lint.per-file-ignores]
|
|
"__init__.py" = ["E402", "F401"]
|
|
"test/*.py" = ["ASYNC"] # Disable ASYNC check for tests
|
|
"*.ipynb" = ["I"] # Disable import sorting for notebooks
|
|
|
|
[tool.ruff.lint.mccabe]
|
|
max-complexity = 30
|
|
|
|
[tool.ruff.lint.isort]
|
|
combine-as-imports = false
|
|
|
|
# TODO: move to schema-compatible location
|
|
# [toolruff.lint.autoflake]
|
|
# in-place = true
|
|
# ignore-init-module-imports = true
|
|
# remove-all-unused-imports = true
|
|
# remove-unused-variables = true
|
|
# expand-star-imports = true
|
|
# recursive = true
|
|
|
|
[tool.mypy]
|
|
pretty = true
|
|
no_implicit_optional = true
|
|
namespace_packages = true
|
|
show_error_codes = true
|
|
python_version = "3.10"
|
|
plugins = ["pydantic.mypy"]
|
|
exclude = "(^|/)test/data/.*"
|
|
|
|
[[tool.mypy.overrides]]
|
|
module = [
|
|
"jsondiff.*",
|
|
"jsonref.*",
|
|
"jsonschema.*",
|
|
"requests.*",
|
|
"semchunk.*",
|
|
"tabulate.*",
|
|
"transformers.*",
|
|
"tree_sitter_java_orchard.*",
|
|
"yaml.*",
|
|
]
|
|
ignore_missing_imports = true
|
|
|
|
[[tool.mypy.overrides]]
|
|
module = ["test.*"]
|
|
disallow_untyped_defs = false
|
|
|
|
[tool.semantic_release]
|
|
# for default values check:
|
|
# https://github.com/python-semantic-release/python-semantic-release/blob/v7.32.2/semantic_release/defaults.cfg
|
|
|
|
version_source = "tag_only"
|
|
branch = "main"
|
|
|
|
# configure types which should trigger minor and patch version bumps respectively
|
|
# (note that they must be a subset of the configured allowed types):
|
|
parser_angular_allowed_types = "build,chore,ci,docs,feat,fix,perf,style,refactor,test"
|
|
parser_angular_minor_types = "feat"
|
|
parser_angular_patch_types = "fix,perf"
|