Files
2026-05-08 12:14:22 +02:00

27 lines
753 B
Python

from __future__ import annotations
from pathlib import Path
from huggingface_hub import snapshot_download
HF_DATASET_REPO_ID = "docling-project/regression-dataset-for-docling-parse"
HF_DATASET_REVISION = "96c606ee4d7a8db8170fef213ed2113cdfeeebce"
TESTS_DIR = Path(__file__).resolve().parent
TEST_DATA_DIR = TESTS_DIR / "data"
def ensure_test_data_downloaded(force: bool = False) -> Path:
TEST_DATA_DIR.mkdir(parents=True, exist_ok=True)
if not force and any(TEST_DATA_DIR.iterdir()):
return TEST_DATA_DIR
snapshot_download(
repo_id=HF_DATASET_REPO_ID,
repo_type="dataset",
revision=HF_DATASET_REVISION,
local_dir=str(TEST_DATA_DIR),
force_download=force,
)
return TEST_DATA_DIR