Files
docling-core/test/test_visualization.py
Panos Vagenas a258d525e1 feat: add visualizers (#263)
* feat: add visualizers

Signed-off-by: Panos Vagenas <pva@zurich.ibm.com>

* make visualizers composable

Signed-off-by: Panos Vagenas <pva@zurich.ibm.com>

* use BoundingRectangle instead of BoundingBox

Signed-off-by: Panos Vagenas <pva@zurich.ibm.com>

* enforce top-left coordinates

Signed-off-by: Panos Vagenas <pva@zurich.ibm.com>

* narrow down test data to first 3 pages

Signed-off-by: Panos Vagenas <pva@zurich.ibm.com>

* add file deletions

Signed-off-by: Panos Vagenas <pva@zurich.ibm.com>

---------

Signed-off-by: Panos Vagenas <pva@zurich.ibm.com>
2025-04-17 11:43:45 +02:00

43 lines
1.2 KiB
Python

from pathlib import Path
import PIL.Image
from docling_core.types.doc.document import DoclingDocument
from .test_data_gen_flag import GEN_TEST_DATA
VIZ_TEST_DATA_PATH = Path("./test/data/viz")
def verify(exp_file: Path, actual: PIL.Image.Image):
if GEN_TEST_DATA:
with open(exp_file, "w", encoding="utf-8") as f:
actual.save(exp_file)
else:
with PIL.Image.open(exp_file) as expected:
assert actual == expected
def test_doc_visualization():
src = Path("./test/data/doc/2408.09869v3_enriched.json")
doc = DoclingDocument.load_from_json(src)
viz_pages = doc.get_visualization()
for k in viz_pages:
if k <= 3:
verify(
exp_file=VIZ_TEST_DATA_PATH / f"{src.stem}_viz_p{k}.png",
actual=viz_pages[k],
)
def test_doc_visualization_no_label():
src = Path("./test/data/doc/2408.09869v3_enriched.json")
doc = DoclingDocument.load_from_json(src)
viz_pages = doc.get_visualization(show_label=False)
for k in viz_pages:
if k <= 3:
verify(
exp_file=VIZ_TEST_DATA_PATH / f"{src.stem}_viz_wout_lbl_p{k}.png",
actual=viz_pages[k],
)