mirror of
https://github.com/docling-project/docling-parse.git
synced 2026-05-17 13:10:49 +00:00
@@ -1,26 +0,0 @@
|
||||
name: 'Set up Poetry and install'
|
||||
description: 'Set up a specific version of Poetry and install dependencies using caching.'
|
||||
inputs:
|
||||
python-version:
|
||||
description: "Version range or exact version of Python or PyPy to use, using SemVer's version range syntax."
|
||||
default: '3.11'
|
||||
runs:
|
||||
using: 'composite'
|
||||
steps:
|
||||
- name: Install poetry
|
||||
run: pipx install poetry==1.8.4
|
||||
shell: bash
|
||||
- uses: actions/setup-python@v5
|
||||
id: py
|
||||
with:
|
||||
python-version: ${{ inputs.python-version }}
|
||||
update-environment: false
|
||||
cache: 'poetry'
|
||||
- name: Setup poetry env with correct python
|
||||
run: |
|
||||
poetry env use ${{ steps.py.outputs.python-path }}
|
||||
poetry run python --version
|
||||
shell: bash
|
||||
- name: Install only dependencies and not the package itself
|
||||
run: poetry install --all-extras --no-root
|
||||
shell: bash
|
||||
@@ -3,8 +3,8 @@
|
||||
set -e # trigger failure on error - do not remove!
|
||||
set -x # display command on output
|
||||
|
||||
# Build the Python package with Poetry
|
||||
poetry build -f sdist
|
||||
# Build the Python package with uv
|
||||
uv build --sdist
|
||||
|
||||
sudo -E XDG_RUNTIME_DIR= podman build --progress=plain \
|
||||
--build-arg USE_SYSTEM_DEPS="$USE_SYSTEM_DEPS" \
|
||||
@@ -35,7 +35,8 @@ sudo -E XDG_RUNTIME_DIR= podman build --progress=plain \
|
||||
# pre-install build requirements + wheel for "--no-build-isolation"
|
||||
# build docling-parse wheel in an isolated network namespace (unshare -rn)
|
||||
# install the wheel and its dependencies
|
||||
RUN pip3.11 install poetry-core pybind11 wheel \
|
||||
RUN pip3.11 install --upgrade pip \
|
||||
&& pip3.11 install --upgrade --ignore-installed "setuptools>=77.0.3" "wheel>=0.43.0,<1.0.0" "pybind11>=2.13.6" \
|
||||
&& unshare -rn pip3.11 wheel \
|
||||
--no-deps --no-build-isolation -w /dist/ \
|
||||
/src/docling_parse*.tar.gz \
|
||||
@@ -46,4 +47,4 @@ sudo -E XDG_RUNTIME_DIR= podman build --progress=plain \
|
||||
|
||||
RUN pip3.11 install pytest \
|
||||
&& pytest
|
||||
EOF
|
||||
EOF
|
||||
@@ -10,11 +10,11 @@ fi
|
||||
CHGLOG_FILE="${CHGLOG_FILE:-CHANGELOG.md}"
|
||||
|
||||
# update package version
|
||||
poetry version "${TARGET_VERSION}"
|
||||
uv version "${TARGET_VERSION}"
|
||||
|
||||
# collect release notes
|
||||
REL_NOTES=$(mktemp)
|
||||
poetry run semantic-release changelog --unreleased >> "${REL_NOTES}"
|
||||
uv run semantic-release changelog --unreleased >> "${REL_NOTES}"
|
||||
|
||||
# update changelog
|
||||
TMP_CHGLOG=$(mktemp)
|
||||
|
||||
@@ -11,27 +11,28 @@ jobs:
|
||||
code-checks:
|
||||
uses: ./.github/workflows/checks.yml
|
||||
pre-release-check:
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-24.04
|
||||
outputs:
|
||||
TARGET_TAG_V: ${{ steps.version_check.outputs.TRGT_VERSION }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0 # for fetching tags, required for semantic-release
|
||||
- uses: ./.github/actions/setup-poetry
|
||||
- name: Install uv and set the python version
|
||||
uses: ./.astral-sh/setup-uv@v5
|
||||
- name: Check version of potential release
|
||||
id: version_check
|
||||
run: |
|
||||
TRGT_VERSION=$(poetry run semantic-release print-version)
|
||||
TRGT_VERSION=$(uv run semantic-release print-version)
|
||||
echo "TRGT_VERSION=${TRGT_VERSION}" >> $GITHUB_OUTPUT
|
||||
echo "${TRGT_VERSION}"
|
||||
- name: Check notes of potential release
|
||||
run: poetry run semantic-release changelog --unreleased
|
||||
run: uv run semantic-release changelog --unreleased
|
||||
release:
|
||||
needs: [code-checks, pre-release-check]
|
||||
if: needs.pre-release-check.outputs.TARGET_TAG_V != ''
|
||||
environment: auto-release
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-24.04
|
||||
concurrency: release
|
||||
steps:
|
||||
- uses: actions/create-github-app-token@v1
|
||||
@@ -43,7 +44,8 @@ jobs:
|
||||
with:
|
||||
token: ${{ steps.app-token.outputs.token }}
|
||||
fetch-depth: 0 # for fetching tags, required for semantic-release
|
||||
- uses: ./.github/actions/setup-poetry
|
||||
- name: Install uv and set the python version
|
||||
uses: ./.astral-sh/setup-uv@v5
|
||||
- name: Run release script
|
||||
env:
|
||||
GH_TOKEN: ${{ steps.app-token.outputs.token }}
|
||||
|
||||
@@ -6,24 +6,26 @@ env:
|
||||
|
||||
jobs:
|
||||
run-checks:
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-24.04
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: ./.github/actions/setup-poetry
|
||||
- name: Install uv and set the python version
|
||||
uses: astral-sh/setup-uv@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Run styling check
|
||||
run: poetry run pre-commit run --all-files
|
||||
- name: Install with poetry
|
||||
- name: Sync and install with uv
|
||||
run: |
|
||||
poetry install --all-extras
|
||||
uv sync --frozen --all-extras
|
||||
ls -l
|
||||
ls -l docling_parse
|
||||
- name: Run styling check
|
||||
run: |
|
||||
uv run pre-commit run --all-files
|
||||
- name: Testing
|
||||
run: |
|
||||
poetry run pytest -v tests
|
||||
- name: Build with poetry
|
||||
run: poetry build
|
||||
uv run pytest -v tests
|
||||
- name: Build with uv
|
||||
run: uv build
|
||||
|
||||
@@ -1,12 +1,28 @@
|
||||
on:
|
||||
workflow_call:
|
||||
|
||||
# jobs:
|
||||
# run-checks:
|
||||
# runs-on: ubuntu-latest
|
||||
# steps:
|
||||
# - uses: actions/checkout@v4
|
||||
# - name: Install uv and python
|
||||
# uses: astral-sh/setup-uv@v5
|
||||
# - name: Install podman
|
||||
# run: sudo apt-get update && sudo apt-get install -y podman
|
||||
# - name: Run build in docker
|
||||
# run: ./.github/scripts/build_rhel.sh
|
||||
# shell: bash
|
||||
|
||||
jobs:
|
||||
run-checks:
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: ./.github/actions/setup-poetry
|
||||
- name: Setup uv with python 3.11
|
||||
uses: astral-sh/setup-uv@v5
|
||||
with:
|
||||
python-version: 3.11
|
||||
- name: Install podman
|
||||
run: sudo apt-get update && sudo apt-get install -y podman
|
||||
- name: Run build in docker
|
||||
|
||||
@@ -12,19 +12,18 @@ env:
|
||||
jobs:
|
||||
build_sdist:
|
||||
name: Build sdist artifacts
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-24.04
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v5
|
||||
- name: Install poetry
|
||||
- name: Install uv and set the python version
|
||||
uses: astral-sh/setup-uv@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Build sdist
|
||||
run: |
|
||||
pipx install poetry
|
||||
- name: Build sdit
|
||||
run: |
|
||||
poetry build -f sdist
|
||||
uv build
|
||||
ls ./dist
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
@@ -44,11 +43,11 @@ jobs:
|
||||
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
|
||||
|
||||
os:
|
||||
- name: "ubuntu-22.04"
|
||||
- name: "ubuntu-24.04"
|
||||
platform: "linux"
|
||||
platform_id: "manylinux_x86_64"
|
||||
|
||||
- name: "ubuntu-22.04-arm"
|
||||
- name: "ubuntu-24.04-arm"
|
||||
platform: "linux"
|
||||
platform_id: "manylinux_aarch64"
|
||||
|
||||
@@ -70,23 +69,11 @@ jobs:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v5
|
||||
- name: Install uv and set the python version
|
||||
uses: astral-sh/setup-uv@v5
|
||||
id: py
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
update-environment: false
|
||||
|
||||
- name: Install Poetry
|
||||
run: |
|
||||
which python
|
||||
python --version
|
||||
which python3
|
||||
python3 --version
|
||||
echo "pythonpath: ${{ steps.py.outputs.python-path }}"
|
||||
${{ steps.py.outputs.python-path }} --version
|
||||
pipx install poetry==1.8.4
|
||||
poetry env use ${{ steps.py.outputs.python-path }}
|
||||
|
||||
- name: Set up custom PATH and set py version to cpXYZ [windows]
|
||||
if: ${{matrix.os.platform_id == 'win_amd64'}}
|
||||
@@ -124,32 +111,33 @@ jobs:
|
||||
BUILD_THREADS: "4"
|
||||
PYTORCH_MPS_HIGH_WATERMARK_RATIO: "0.0"
|
||||
run: |
|
||||
PY_CACHE_TAG=$(poetry run python -c 'import sys;print(sys.implementation.cache_tag)')
|
||||
PY_CACHE_TAG=$(uv run python -c 'import sys;print(sys.implementation.cache_tag)')
|
||||
echo "Building wheel ${CIBW_BUILD} ${{ env.CIBW_BUILD }}"
|
||||
echo "Building cp: ${{ env.python_cp_version }}"
|
||||
echo "Building cache_tag: ${PY_CACHE_TAG}"
|
||||
echo "Building platform_id: ${{ matrix.os.platform_id }}"
|
||||
poetry run python --version
|
||||
poetry run python --version | grep ${{ matrix.python-version }}
|
||||
poetry install --no-root --only=build
|
||||
uv run python --version
|
||||
uv run python --version | grep ${{ matrix.python-version }}
|
||||
cat ./pyproject.toml
|
||||
poetry run python -m cibuildwheel --output-dir wheelhouse
|
||||
uv pip install --group build
|
||||
rm -rf ./build || true
|
||||
python -m cibuildwheel --output-dir ./wheelhouse
|
||||
echo "step 1"
|
||||
ls -l wheelhouse
|
||||
poetry run wheel tags --remove --platform-tag macosx_${{ matrix.os.min_macos_version }}_0_x86_64 ./wheelhouse/*.whl
|
||||
uv run wheel tags --remove --platform-tag macosx_${{ matrix.os.min_macos_version }}_0_x86_64 ./wheelhouse/*.whl
|
||||
rm -f ./wheelhouse/*arm64.whl
|
||||
echo "step 2"
|
||||
ls -l wheelhouse
|
||||
poetry run delocate-wheel --require-archs x86_64 -v ./wheelhouse/*.whl
|
||||
uv run delocate-wheel --require-archs x86_64 -v ./wheelhouse/*.whl
|
||||
echo "step 3"
|
||||
ls -l wheelhouse
|
||||
for file in ./wheelhouse/*.whl; do
|
||||
echo "Inspecting $file"
|
||||
poetry run python -m zipfile --list "$file"
|
||||
uv run python -m zipfile --list "$file"
|
||||
echo "Checking if .so is contained in the wheel"
|
||||
poetry run python -m zipfile --list "$file" | grep \\.so
|
||||
uv run python -m zipfile --list "$file" | grep \\.so
|
||||
echo "Checking if the correct python version is contained in the wheel"
|
||||
poetry run python -m zipfile --list "$file" | grep ${PY_CACHE_TAG}
|
||||
uv run python -m zipfile --list "$file" | grep ${PY_CACHE_TAG}
|
||||
done
|
||||
mkdir -p ./dist
|
||||
cp wheelhouse/*.whl ./dist/
|
||||
@@ -175,32 +163,33 @@ jobs:
|
||||
PYTORCH_MPS_HIGH_WATERMARK_RATIO: "0.0"
|
||||
CUDA_VISIBLE_DEVICES: "cpu"
|
||||
run: |
|
||||
PY_CACHE_TAG=$(poetry run python -c 'import sys;print(sys.implementation.cache_tag)')
|
||||
PY_CACHE_TAG=$(uv run python -c 'import sys;print(sys.implementation.cache_tag)')
|
||||
echo "Building wheel ${CIBW_BUILD} ${{ env.CIBW_BUILD }}"
|
||||
echo "Building cp: ${{ env.python_cp_version }}"
|
||||
echo "Building cache_tag: ${PY_CACHE_TAG}"
|
||||
echo "Building platform_id: ${{ matrix.os.platform_id }}"
|
||||
poetry run python --version
|
||||
poetry run python --version | grep ${{ matrix.python-version }}
|
||||
poetry install --no-root --only=build
|
||||
uv run python --version
|
||||
uv run python --version | grep ${{ matrix.python-version }}
|
||||
cat ./pyproject.toml
|
||||
poetry run python -m cibuildwheel --output-dir wheelhouse
|
||||
uv pip install --group build
|
||||
rm -rf ./build || true
|
||||
python -m cibuildwheel --output-dir ./wheelhouse
|
||||
echo "step 1"
|
||||
ls -l wheelhouse
|
||||
poetry run wheel tags --remove --platform-tag macosx_${{ matrix.os.min_macos_version }}_0_arm64 ./wheelhouse/*.whl
|
||||
uv run wheel tags --remove --platform-tag macosx_${{ matrix.os.min_macos_version }}_0_arm64 ./wheelhouse/*.whl
|
||||
rm -f ./wheelhouse/*x86_64.whl
|
||||
echo "step 2"
|
||||
ls -l wheelhouse
|
||||
poetry run delocate-wheel --require-archs arm64 -v ./wheelhouse/*.whl
|
||||
uv run delocate-wheel --require-archs arm64 -v ./wheelhouse/*.whl
|
||||
echo "step 3"
|
||||
ls -l wheelhouse
|
||||
for file in ./wheelhouse/*.whl; do
|
||||
echo "Inspecting $file"
|
||||
poetry run python -m zipfile --list "$file"
|
||||
uv run python -m zipfile --list "$file"
|
||||
echo "Checking if .so is contained in the wheel"
|
||||
poetry run python -m zipfile --list "$file" | grep \\.so
|
||||
uv run python -m zipfile --list "$file" | grep \\.so
|
||||
echo "Checking if the correct python version is contained in the wheel"
|
||||
poetry run python -m zipfile --list "$file" | grep ${PY_CACHE_TAG}
|
||||
uv run python -m zipfile --list "$file" | grep ${PY_CACHE_TAG}
|
||||
done
|
||||
mkdir -p ./dist
|
||||
cp wheelhouse/*.whl ./dist/
|
||||
@@ -222,23 +211,24 @@ jobs:
|
||||
CIBW_BUILD_VERBOSITY: 3
|
||||
BUILD_THREADS: "8"
|
||||
run: |
|
||||
PY_CACHE_TAG=$(poetry run python -c 'import sys;print(sys.implementation.cache_tag)')
|
||||
PY_CACHE_TAG=$(uv run python -c 'import sys;print(sys.implementation.cache_tag)')
|
||||
echo "Building cp: ${{ env.python_cp_version }}"
|
||||
echo "Building cache_tag: ${PY_CACHE_TAG}"
|
||||
echo "Building platform_id: ${{ matrix.os.platform_id }}"
|
||||
poetry run python --version
|
||||
poetry run python --version | grep ${{ matrix.python-version }}
|
||||
poetry install --no-root --only=build
|
||||
uv run python --version
|
||||
uv run python --version | grep ${{ matrix.python-version }}
|
||||
cat ./pyproject.toml
|
||||
poetry run python -m cibuildwheel --output-dir ./wheelhouse
|
||||
uv pip install --group build
|
||||
rm -rf ./build || true
|
||||
python -m cibuildwheel --output-dir ./wheelhouse
|
||||
ls -l ./wheelhouse
|
||||
for file in ./wheelhouse/*.whl; do
|
||||
echo "Inspecting $file"
|
||||
poetry run python -m zipfile --list "$file"
|
||||
uv run python -m zipfile --list "$file"
|
||||
echo "Checking if .so is contained in the wheel"
|
||||
poetry run python -m zipfile --list "$file" | grep \\.so
|
||||
uv run python -m zipfile --list "$file" | grep \\.so
|
||||
echo "Checking if the correct python version is contained in the wheel"
|
||||
poetry run python -m zipfile --list "$file" | grep ${PY_CACHE_TAG}
|
||||
uv run python -m zipfile --list "$file" | grep ${PY_CACHE_TAG}
|
||||
done
|
||||
mkdir -p ./dist
|
||||
cp wheelhouse/*.whl ./dist/
|
||||
@@ -293,15 +283,20 @@ jobs:
|
||||
ASM_NASM: "C:/nasm/nasm.exe"
|
||||
shell: pwsh
|
||||
run: |
|
||||
Remove-Item -Recurse -Force "C:\Strawberry\"
|
||||
if (Test-Path "C:\Strawberry\") {
|
||||
Remove-Item -Recurse -Force "C:\Strawberry\"
|
||||
}
|
||||
$env:CMAKE_ARGS = "-DZLIB_LIBRARY=C:/windows-libs/external-libs/lib-mingw64/libz.a -DZLIB_INCLUDE_DIR=C:/windows-libs/external-libs/include -DJPEG_LIBRARY=C:/windows-libs/external-libs/lib-mingw64/libjpeg.a -DJPEG_INCLUDE_DIR=C:/windows-libs/external-libs/include"
|
||||
poetry install --no-interaction --no-root --only=build
|
||||
poetry run python -m cibuildwheel --output-dir .\wheelhouse
|
||||
uv pip install --group build
|
||||
if (Test-Path ".\build") {
|
||||
Remove-Item -Recurse -Force ".\build"
|
||||
}
|
||||
uv run python -m cibuildwheel --output-dir .\wheelhouse
|
||||
Get-ChildItem -Path .\wheelhouse -Filter *.whl | Format-List
|
||||
Get-ChildItem -Path .\wheelhouse -Filter *.whl | ForEach-Object {
|
||||
$file = $_.FullName
|
||||
Write-Output "Inspecting $file"
|
||||
poetry run python -m zipfile --list "$file"
|
||||
uv run python -m zipfile --list "$file"
|
||||
}
|
||||
if (-not (Test-Path -Path .\dist)) {
|
||||
New-Item -Path .\dist -ItemType Directory
|
||||
@@ -321,7 +316,7 @@ jobs:
|
||||
needs:
|
||||
- build_sdist
|
||||
- build_wheels
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-24.04
|
||||
environment:
|
||||
name: pypi
|
||||
url: https://pypi.org/p/docling-parse
|
||||
|
||||
@@ -4,7 +4,7 @@ repos:
|
||||
hooks:
|
||||
- id: system
|
||||
name: Black
|
||||
entry: poetry run black docling_parse tests
|
||||
entry: uv run black docling_parse tests
|
||||
pass_filenames: false
|
||||
language: system
|
||||
files: '\.py$'
|
||||
@@ -12,7 +12,7 @@ repos:
|
||||
hooks:
|
||||
- id: system
|
||||
name: isort
|
||||
entry: poetry run isort docling_parse tests
|
||||
entry: uv run isort docling_parse tests
|
||||
pass_filenames: false
|
||||
language: system
|
||||
files: '\.py$'
|
||||
@@ -20,7 +20,7 @@ repos:
|
||||
hooks:
|
||||
- id: autoflake
|
||||
name: autoflake
|
||||
entry: poetry run autoflake docling_parse tests
|
||||
entry: uv run autoflake docling_parse tests
|
||||
pass_filenames: false
|
||||
language: system
|
||||
files: '\.py$'
|
||||
@@ -28,7 +28,7 @@ repos:
|
||||
hooks:
|
||||
- id: mypy
|
||||
name: MyPy
|
||||
entry: poetry run mypy docling_parse tests
|
||||
entry: uv run mypy docling_parse tests
|
||||
pass_filenames: false
|
||||
language: system
|
||||
files: '\.py$'
|
||||
@@ -36,14 +36,14 @@ repos:
|
||||
# hooks:
|
||||
# - id: pytest
|
||||
# name: Pytest
|
||||
# entry: poetry run pytest tests/
|
||||
# entry: uv run pytest tests/
|
||||
# pass_filenames: false
|
||||
# language: system
|
||||
# files: '\.py$'
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: system
|
||||
name: Poetry check
|
||||
entry: poetry check --lock
|
||||
name: uv check
|
||||
entry: uv lock --check
|
||||
pass_filenames: false
|
||||
language: system
|
||||
|
||||
+2
-1
@@ -1,4 +1,5 @@
|
||||
cmake_minimum_required(VERSION 3.12..3.26)
|
||||
# cmake_minimum_required(VERSION 3.12..3.26)
|
||||
cmake_minimum_required(VERSION 3.12)
|
||||
|
||||
project(docling_parse VERSION 1.0.0 LANGUAGES CXX C)
|
||||
#set(CMAKE_VERBOSE_MAKEFILE off)
|
||||
|
||||
+11
-24
@@ -80,46 +80,33 @@ Please feel free to connect with us using the [discussion section](https://githu
|
||||
|
||||
## Developing
|
||||
|
||||
### Usage of Poetry
|
||||
### Usage of uv
|
||||
|
||||
We use Poetry to manage dependencies.
|
||||
We use uv to manage dependencies.
|
||||
|
||||
|
||||
#### Install
|
||||
|
||||
To install, see the documentation here: https://python-poetry.org/docs/master/#installing-with-the-official-installer
|
||||
To install, see the documentation here: https://docs.astral.sh/uv/getting-started/installation/
|
||||
|
||||
1. Install the Poetry globally in your machine
|
||||
1. Install uv standalone in your machine
|
||||
```bash
|
||||
curl -sSL https://install.python-poetry.org | python3 -
|
||||
# On macOS and Linux.
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
```
|
||||
The installation script will print the installation bin folder `POETRY_BIN` which you need in the next steps.
|
||||
|
||||
2. Make sure Poetry is in your `$PATH`
|
||||
- for `zsh`
|
||||
```sh
|
||||
echo 'export PATH="POETRY_BIN:$PATH"' >> ~/.zshrc
|
||||
```
|
||||
- for `bash`
|
||||
```sh
|
||||
echo 'export PATH="POETRY_BIN:$PATH"' >> ~/.bashrc
|
||||
```
|
||||
|
||||
3. The official guidelines linked above include useful details on the configuration of autocomplete for most shell environments, e.g. Bash and Zsh.
|
||||
|
||||
|
||||
#### Create a Virtual Environment and Install Dependencies
|
||||
|
||||
To activate the Virtual Environment, run:
|
||||
|
||||
```bash
|
||||
poetry shell
|
||||
uv shell
|
||||
```
|
||||
|
||||
To spawn a shell with the Virtual Environment activated. If the Virtual Environment doesn't exist, Poetry will create one for you. Then, to install dependencies, run:
|
||||
To spawn a shell with the Virtual Environment activated. If the Virtual Environment doesn't exist, uv will create one for you. Then, to install dependencies, run:
|
||||
|
||||
```bash
|
||||
poetry install
|
||||
uv install
|
||||
```
|
||||
|
||||
**(Advanced) Use a Specific Python Version**
|
||||
@@ -127,7 +114,7 @@ poetry install
|
||||
If for whatever reason you need to work in a specific (older) version of Python, run:
|
||||
|
||||
```bash
|
||||
poetry env use $(which python3.9)
|
||||
uv venv use $(which python3.9)
|
||||
```
|
||||
|
||||
This creates a Virtual Environment with Python 3.9. For other versions, replace `$(which python3.9)` by the path to the interpreter (e.g., `/usr/bin/python3.9`) or use `$(which pythonX.Y)`.
|
||||
@@ -136,7 +123,7 @@ This creates a Virtual Environment with Python 3.9. For other versions, replace
|
||||
#### Add a new dependency
|
||||
|
||||
```bash
|
||||
poetry add NAME
|
||||
uv add NAME
|
||||
```
|
||||
|
||||
## Coding style guidelines
|
||||
|
||||
+14
@@ -0,0 +1,14 @@
|
||||
include CMakeLists.txt
|
||||
include build.py
|
||||
include *.md
|
||||
include uv.lock
|
||||
include LICENSE
|
||||
|
||||
recursive-include app *
|
||||
recursive-include cmake *
|
||||
recursive-include src *
|
||||
recursive-include tests *
|
||||
|
||||
recursive-include docling_parse *.so *.pyd *.dll
|
||||
recursive-include docling_parse/pdf_resources *
|
||||
recursive-include docling_parse/pdf_resources_v2 *
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
[](https://pypi.org/project/docling-parse/)
|
||||
[](https://pypi.org/project/docling-parse/)
|
||||
[](https://python-poetry.org/)
|
||||
[](https://github.com/astral-sh/uv)
|
||||
[](https://github.com/pybind/pybind11/)
|
||||
[](https://github.com/docling-project/docling-parse/)
|
||||
[](https://opensource.org/licenses/MIT)
|
||||
@@ -12,7 +12,7 @@ Simple package to extract text, paths and bitmap images with coordinates from pr
|
||||
To do the visualizations yourself, simply run (change `word` into `char` or `line`),
|
||||
|
||||
```sh
|
||||
poetry run python ./docling_parse/visualize.py -i <path-to-pdf-file> -c word --interactive
|
||||
uv run python ./docling_parse/visualize.py -i <path-to-pdf-file> -c word --interactive
|
||||
```
|
||||
|
||||
<table>
|
||||
@@ -183,16 +183,16 @@ If you dont have an input file, then a template input file will be printed on th
|
||||
|
||||
### Python
|
||||
|
||||
To build the package, simply run (make sure [poetry](https://python-poetry.org/) is [installed](https://python-poetry.org/docs/#installing-with-the-official-installer)),
|
||||
To build the package, simply run (make sure [uv](https://docs.astral.sh/uv/) is [installed](https://docs.astral.sh/uv/getting-started/installation)),
|
||||
|
||||
```
|
||||
poetry install
|
||||
uv sync
|
||||
```
|
||||
|
||||
To test the package, run:
|
||||
|
||||
```
|
||||
poetry run pytest ./tests -v -s
|
||||
uv run pytest ./tests -v -s
|
||||
```
|
||||
|
||||
|
||||
|
||||
@@ -23,7 +23,8 @@ else()
|
||||
include(CMakeParseArguments)
|
||||
|
||||
set(CXXOPTS_URL https://github.com/jarro2783/cxxopts.git)
|
||||
set(CXXOPTS_TAG v3.2.0)
|
||||
# set(CXXOPTS_TAG v3.2.0)
|
||||
set(CXXOPTS_TAG v3.3.1)
|
||||
|
||||
ExternalProject_Add(extlib_cxxopts
|
||||
|
||||
|
||||
@@ -17,8 +17,8 @@ else()
|
||||
include(CMakeParseArguments)
|
||||
|
||||
set(JPEG_URL https://github.com/libjpeg-turbo/libjpeg-turbo.git)
|
||||
set(JPEG_TAG 3.0.3)
|
||||
|
||||
# set(JPEG_TAG 3.0.3)
|
||||
set(JPEG_TAG 3.1.1)
|
||||
ExternalProject_Add(extlib_jpeg
|
||||
|
||||
PREFIX extlib_jpeg
|
||||
|
||||
@@ -16,8 +16,8 @@ else()
|
||||
include(CMakeParseArguments)
|
||||
|
||||
set(JSON_URL https://github.com/nlohmann/json.git)
|
||||
set(JSON_TAG v3.11.3)
|
||||
|
||||
# set(JSON_TAG v3.11.3)
|
||||
set(JSON_TAG v3.12.0)
|
||||
ExternalProject_Add(extlib_json
|
||||
|
||||
PREFIX extlib_json
|
||||
|
||||
@@ -5,7 +5,8 @@ include(ExternalProject)
|
||||
include(CMakeParseArguments)
|
||||
|
||||
set(PYBIND11_URL https://github.com/pybind/pybind11.git)
|
||||
set(PYBIND11_TAG v2.13.5)
|
||||
# set(PYBIND11_TAG v2.13.5)
|
||||
set(PYBIND11_TAG v2.13.6)
|
||||
|
||||
ExternalProject_Add(extlib_pybind11
|
||||
PREFIX extlib_pybind11
|
||||
|
||||
@@ -18,8 +18,8 @@ else()
|
||||
include(CMakeParseArguments)
|
||||
|
||||
set(QPDF_URL https://github.com/qpdf/qpdf.git)
|
||||
set(QPDF_TAG v11.9.1 )
|
||||
|
||||
#set(QPDF_TAG v11.9.1 )
|
||||
set(QPDF_TAG v11.10.0 )
|
||||
set(QPDF_LIB ${EXTERNALS_PREFIX_PATH}/lib/libqpdf.a)
|
||||
set(JPEG_LIB ${EXTERNALS_PREFIX_PATH}/lib/libjpeg.a)
|
||||
|
||||
|
||||
@@ -19,8 +19,8 @@ else()
|
||||
include(CMakeParseArguments)
|
||||
|
||||
set(UTF8_URL https://github.com/nemtrif/utfcpp.git)
|
||||
set(UTF8_TAG v4.0.5)
|
||||
|
||||
# set(UTF8_TAG v4.0.5)
|
||||
set(UTF8_TAG v4.0.6)
|
||||
ExternalProject_Add(extlib_utf8
|
||||
PREFIX extlib_utf8
|
||||
|
||||
|
||||
Generated
-2780
File diff suppressed because it is too large
Load Diff
+73
-76
@@ -1,20 +1,10 @@
|
||||
[tool.poetry]
|
||||
[project]
|
||||
name = "docling-parse"
|
||||
version = "4.1.0"
|
||||
description = "Simple package to extract text with coordinates from programmatic PDFs"
|
||||
authors = ["Peter Staar <taa@zurich.ibm.com>"]
|
||||
maintainers = [
|
||||
"Peter Staar <taa@zurich.ibm.com>",
|
||||
"Christoph Auer <cau@zurich.ibm.com>",
|
||||
"Michele Dolfi <dol@zurich.ibm.com>",
|
||||
"Panos Vagenas <pva@zurich.ibm.com>",
|
||||
"Maxim Lysak <mly@zurich.ibm.com>",
|
||||
]
|
||||
repository = "https://github.com/docling-project/docling-parse"
|
||||
homepage = "https://github.com/docling-project/docling-parse"
|
||||
keywords= ["docling", "pdf", "parser"]
|
||||
license = "MIT"
|
||||
keywords = ["docling", "pdf", "parser"]
|
||||
classifiers = [
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Operating System :: MacOS :: MacOS X",
|
||||
"Operating System :: POSIX :: Linux",
|
||||
"Operating System :: Microsoft :: Windows",
|
||||
@@ -24,59 +14,78 @@ classifiers = [
|
||||
"Programming Language :: C++",
|
||||
"Programming Language :: Python :: 3"
|
||||
]
|
||||
license = "MIT"
|
||||
readme = "README.md"
|
||||
packages = [{include = "docling_parse"}]
|
||||
include = [
|
||||
{path = "docling_parse/*.so", format = "wheel"},
|
||||
{path = "docling_parse/*.pyd", format = "wheel"},
|
||||
{path = "docling_parse/*.dll", format = "wheel"},
|
||||
{path = "docling_parse/pdf_resources", format = ["sdist", "wheel"]},
|
||||
{path = "docling_parse/pdf_resources", format = ["sdist", "wheel"]},
|
||||
{path = "docling_parse/pdf_resources_v2", format = ["sdist", "wheel"]},
|
||||
{path = "CMakeLists.txt", format = "sdist"},
|
||||
{path = "build.py", format = "sdist"},
|
||||
{path = "*.md", format = "sdist"},
|
||||
{path = "poetry.lock", format = "sdist"},
|
||||
{path = "app/*.cpp", format = "sdist"},
|
||||
{path = "cmake/", format = "sdist"},
|
||||
{path = "app/", format = "sdist"},
|
||||
{path = "src/", format = "sdist"},
|
||||
{path = "tests/", format = "sdist"},
|
||||
authors = [
|
||||
{name = "Peter Staar", email = "taa@zurich.ibm.com"},
|
||||
{name = "Christoph Auer", email = "cau@zurich.ibm.com"},
|
||||
{name = "Michele Dolfi", email = "dol@zurich.ibm.com"},
|
||||
{name = "Panos Vagenas", email = "pva@zurich.ibm.com"},
|
||||
{name = "Maxim Lysak", email = "mly@zurich.ibm.com"},
|
||||
]
|
||||
build = "build.py"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.9"
|
||||
tabulate = ">=0.9.0,<1.0.0"
|
||||
pywin32 = { version = ">=305", markers = "sys_platform == 'win32'" }
|
||||
pillow = ">=10.0.0,<12.0.0"
|
||||
pydantic = "^2.0.0"
|
||||
docling-core = "^2.29.0"
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
pytest = "^7.4.2"
|
||||
black = {extras = ["jupyter"], version = "^24.4.2"}
|
||||
python-semantic-release = "^7.32.2"
|
||||
pre-commit = "^3.7.1"
|
||||
isort = "^5.10.1"
|
||||
mypy = "^1.13.0"
|
||||
tqdm = "^4.67.0"
|
||||
boto = "^2.49.0"
|
||||
boto3 = "^1.35.67"
|
||||
autoflake = "^2.3.1"
|
||||
|
||||
[tool.poetry.group.build.dependencies]
|
||||
cibuildwheel = "^2.19.2"
|
||||
wheel = "^0.43.0"
|
||||
delocate = "^0.11.0"
|
||||
|
||||
[tool.poetry.group.test.dependencies]
|
||||
pytest = "^7.4.2"
|
||||
requires-python = ">=3.9"
|
||||
dependencies = [
|
||||
"tabulate>=0.9.0,<1.0.0",
|
||||
"pillow>=10.0.0,<12.0.0",
|
||||
"pydantic>=2.0.0",
|
||||
"docling-core>=2.29.0",
|
||||
"pywin32>=305; sys_platform == 'win32'",
|
||||
]
|
||||
[project.urls]
|
||||
Homepage = "https://github.com/docling-project/docling-parse"
|
||||
Repository = "https://github.com/docling-project/docling-parse"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core", "pybind11>=2.13.1"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
requires = [
|
||||
"setuptools>=77.0.3",
|
||||
"pybind11>=2.13.6",
|
||||
"cibuildwheel>=2.19.2,<3.0.0",
|
||||
"wheel>=0.43.0,<1.0.0",
|
||||
"delocate>=0.11.0,<1.0.0",
|
||||
"cmake>=3.27.0,<4.0.0"
|
||||
]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[dependency-groups]
|
||||
build = [
|
||||
"setuptools>=77.0.3",
|
||||
"pybind11>=2.13.6",
|
||||
"cibuildwheel>=2.19.2,<3.0.0",
|
||||
"wheel>=0.43.0,<1.0.0",
|
||||
"delocate>=0.11.0,<1.0.0",
|
||||
"cmake>=3.27.0,<4.0.0"
|
||||
]
|
||||
dev = [
|
||||
"pytest>=7.4.2,<8.0.0",
|
||||
"black[jupyter]>=24.4.2,<25.0.0",
|
||||
"python-semantic-release>=7.32.2,<8.0.0",
|
||||
"pre-commit>=3.7.1,<4.0.0",
|
||||
"isort>=5.10.1,<6.0.0",
|
||||
"mypy>=1.13.0,<2.0.0",
|
||||
"tqdm>=4.67.0,<5.0.0",
|
||||
"boto>=2.49.0,<3.0.0",
|
||||
"boto3>=1.35.67,<2.0.0",
|
||||
"autoflake>=2.3.1,<3.0.0"
|
||||
]
|
||||
[tool.uv]
|
||||
package = true
|
||||
default-groups = "all"
|
||||
|
||||
[tool.setuptools]
|
||||
include-package-data = true
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
where = [".", "docling_parse*"]
|
||||
include = ["docling_parse*"]
|
||||
|
||||
[tool.setuptools.package-data]
|
||||
"docling_parse" = [
|
||||
"*.so", "*.pyd", "*.dll",
|
||||
"pdf_resources/*",
|
||||
"pdf_resources_v2/*"
|
||||
]
|
||||
|
||||
[tool.setuptools.exclude-package-data]
|
||||
"docling_parse" = ["*.pyc", "__pycache__"]
|
||||
|
||||
[tool.black]
|
||||
line-length = 88
|
||||
@@ -87,7 +96,7 @@ preview = true
|
||||
[tool.isort]
|
||||
profile = "black"
|
||||
line_length = 88
|
||||
py_version=310
|
||||
py_version = 310
|
||||
multi_line_output = 3
|
||||
include_trailing_comma = true
|
||||
|
||||
@@ -101,34 +110,22 @@ recursive = true
|
||||
|
||||
[tool.mypy]
|
||||
pretty = true
|
||||
# strict = true
|
||||
no_implicit_optional = true
|
||||
namespace_packages = true
|
||||
show_error_codes = true
|
||||
python_version = "3.9"
|
||||
# plugins = ["pydantic.mypy"]
|
||||
|
||||
#[mypy-docling_parse.*]
|
||||
#ignore_missing_imports = True
|
||||
|
||||
[[tool.mypy.overrides]]
|
||||
module = [
|
||||
"tabulate.*",
|
||||
"botocore.*",
|
||||
"boto3.*",
|
||||
"boto3.*"
|
||||
]
|
||||
ignore_missing_imports = true
|
||||
|
||||
|
||||
[tool.semantic_release]
|
||||
# for default values check:
|
||||
# https://github.com/python-semantic-release/python-semantic-release/blob/v7.32.2/semantic_release/defaults.cfg
|
||||
|
||||
version_source = "tag_only"
|
||||
branch = "main"
|
||||
|
||||
# configure types which should trigger minor and patch version bumps respectively
|
||||
# (note that they must be a subset of the configured allowed types):
|
||||
parser_angular_allowed_types = "build,chore,ci,docs,feat,fix,perf,style,refactor,test"
|
||||
parser_angular_minor_types = "feat"
|
||||
parser_angular_patch_types = "fix,perf"
|
||||
|
||||
@@ -0,0 +1,28 @@
|
||||
from setuptools import setup, find_packages, Distribution
|
||||
from setuptools.command.build_py import build_py as _build_py
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
class CustomBuildPy(_build_py):
|
||||
def run(self):
|
||||
subprocess.check_call([sys.executable, "build.py"])
|
||||
super().run()
|
||||
|
||||
class BinaryDistribution(Distribution):
|
||||
def has_ext_modules(self):
|
||||
return True
|
||||
|
||||
setup(
|
||||
packages=find_packages(include=["docling_parse", "docling_parse.*"]),
|
||||
distclass=BinaryDistribution,
|
||||
cmdclass={"build_py": CustomBuildPy},
|
||||
zip_safe=False,
|
||||
include_package_data=True,
|
||||
package_data={
|
||||
"docling_parse": [
|
||||
"*.so", "*.pyd", "*.dll",
|
||||
"pdf_resources/*",
|
||||
"pdf_resources_v2/*",
|
||||
],
|
||||
},
|
||||
)
|
||||
Reference in New Issue
Block a user