mirror of
https://github.com/docling-project/docling-core.git
synced 2026-05-17 13:10:44 +00:00
a258d525e1
* feat: add visualizers Signed-off-by: Panos Vagenas <pva@zurich.ibm.com> * make visualizers composable Signed-off-by: Panos Vagenas <pva@zurich.ibm.com> * use BoundingRectangle instead of BoundingBox Signed-off-by: Panos Vagenas <pva@zurich.ibm.com> * enforce top-left coordinates Signed-off-by: Panos Vagenas <pva@zurich.ibm.com> * narrow down test data to first 3 pages Signed-off-by: Panos Vagenas <pva@zurich.ibm.com> * add file deletions Signed-off-by: Panos Vagenas <pva@zurich.ibm.com> --------- Signed-off-by: Panos Vagenas <pva@zurich.ibm.com>
43 lines
1.2 KiB
Python
43 lines
1.2 KiB
Python
from pathlib import Path
|
|
|
|
import PIL.Image
|
|
|
|
from docling_core.types.doc.document import DoclingDocument
|
|
|
|
from .test_data_gen_flag import GEN_TEST_DATA
|
|
|
|
VIZ_TEST_DATA_PATH = Path("./test/data/viz")
|
|
|
|
|
|
def verify(exp_file: Path, actual: PIL.Image.Image):
|
|
if GEN_TEST_DATA:
|
|
with open(exp_file, "w", encoding="utf-8") as f:
|
|
actual.save(exp_file)
|
|
else:
|
|
with PIL.Image.open(exp_file) as expected:
|
|
assert actual == expected
|
|
|
|
|
|
def test_doc_visualization():
|
|
src = Path("./test/data/doc/2408.09869v3_enriched.json")
|
|
doc = DoclingDocument.load_from_json(src)
|
|
viz_pages = doc.get_visualization()
|
|
for k in viz_pages:
|
|
if k <= 3:
|
|
verify(
|
|
exp_file=VIZ_TEST_DATA_PATH / f"{src.stem}_viz_p{k}.png",
|
|
actual=viz_pages[k],
|
|
)
|
|
|
|
|
|
def test_doc_visualization_no_label():
|
|
src = Path("./test/data/doc/2408.09869v3_enriched.json")
|
|
doc = DoclingDocument.load_from_json(src)
|
|
viz_pages = doc.get_visualization(show_label=False)
|
|
for k in viz_pages:
|
|
if k <= 3:
|
|
verify(
|
|
exp_file=VIZ_TEST_DATA_PATH / f"{src.stem}_viz_wout_lbl_p{k}.png",
|
|
actual=viz_pages[k],
|
|
)
|