mirror of
https://github.com/docling-project/docling.git
synced 2026-05-17 13:10:38 +00:00
ce4992363f
Signed-off-by: Hassan Raza <raihassanraza10@gmail.com>
116 lines
3.6 KiB
Python
116 lines
3.6 KiB
Python
import pytest
|
|
from docling_core.types.doc import DoclingDocument, ImageRef, ProvenanceItem
|
|
from docling_core.types.doc.base import BoundingBox, Size
|
|
from docling_core.types.doc.labels import DocItemLabel
|
|
from PIL import Image
|
|
|
|
from tests.verify_utils import verify_docitems
|
|
|
|
|
|
def _make_doc_with_bbox(
|
|
*, left: float, page_width: float = 612.0, page_height: float = 792.0
|
|
) -> DoclingDocument:
|
|
doc = DoclingDocument(name="test")
|
|
doc.add_page(page_no=1, size=Size(width=page_width, height=page_height))
|
|
doc.add_text(
|
|
label=DocItemLabel.PARAGRAPH,
|
|
text="bbox check",
|
|
orig="bbox check",
|
|
prov=ProvenanceItem(
|
|
page_no=1,
|
|
bbox=BoundingBox(l=left, t=20.0, r=30.0, b=40.0),
|
|
charspan=(0, 10),
|
|
),
|
|
)
|
|
return doc
|
|
|
|
|
|
def _make_doc_with_picture(*, image_size: tuple[int, int]) -> DoclingDocument:
|
|
doc = DoclingDocument(name="test")
|
|
doc.add_picture(
|
|
image=ImageRef.from_pil(Image.new("RGB", image_size, "red"), dpi=72)
|
|
)
|
|
return doc
|
|
|
|
|
|
def test_verify_docitems_allows_small_bbox_variance_for_non_fuzzy_docs():
|
|
verify_docitems(
|
|
doc_pred=_make_doc_with_bbox(left=11.53),
|
|
doc_true=_make_doc_with_bbox(left=10.0),
|
|
fuzzy=False,
|
|
pdf_filename="fixture.json",
|
|
)
|
|
|
|
|
|
def test_verify_docitems_rejects_large_bbox_variance_for_non_fuzzy_docs():
|
|
with pytest.raises(AssertionError, match="BBox left mismatch"):
|
|
verify_docitems(
|
|
doc_pred=_make_doc_with_bbox(left=12.01),
|
|
doc_true=_make_doc_with_bbox(left=10.0),
|
|
fuzzy=False,
|
|
pdf_filename="fixture.json",
|
|
)
|
|
|
|
|
|
def test_verify_docitems_allows_reasonable_bbox_variance_for_fuzzy_docs():
|
|
verify_docitems(
|
|
doc_pred=_make_doc_with_bbox(left=17.23, page_width=2000.0, page_height=2829.0),
|
|
doc_true=_make_doc_with_bbox(left=10.0, page_width=2000.0, page_height=2829.0),
|
|
fuzzy=True,
|
|
pdf_filename="fixture.json",
|
|
)
|
|
|
|
|
|
def test_verify_docitems_rejects_gross_bbox_variance_for_fuzzy_docs():
|
|
with pytest.raises(AssertionError, match="BBox left mismatch"):
|
|
verify_docitems(
|
|
doc_pred=_make_doc_with_bbox(
|
|
left=25.0, page_width=2000.0, page_height=2829.0
|
|
),
|
|
doc_true=_make_doc_with_bbox(
|
|
left=10.0, page_width=2000.0, page_height=2829.0
|
|
),
|
|
fuzzy=True,
|
|
pdf_filename="fixture.json",
|
|
)
|
|
|
|
|
|
def test_verify_docitems_rejects_bbox_presence_mismatch():
|
|
doc_true = _make_doc_with_bbox(left=10.0)
|
|
doc_pred = _make_doc_with_bbox(left=10.0)
|
|
doc_pred.texts[0].prov[0].bbox = None
|
|
|
|
with pytest.raises(AssertionError, match="BBox presence mismatch"):
|
|
verify_docitems(
|
|
doc_pred=doc_pred,
|
|
doc_true=doc_true,
|
|
fuzzy=False,
|
|
pdf_filename="fixture.json",
|
|
)
|
|
|
|
|
|
def test_verify_docitems_rejects_picture_count_mismatch():
|
|
doc_true = _make_doc_with_picture(image_size=(2, 2))
|
|
doc_pred = DoclingDocument(name="test")
|
|
|
|
with pytest.raises(AssertionError, match="Picture lengths do not match"):
|
|
verify_docitems(
|
|
doc_pred=doc_pred,
|
|
doc_true=doc_true,
|
|
fuzzy=False,
|
|
pdf_filename="fixture.json",
|
|
)
|
|
|
|
|
|
def test_verify_docitems_uses_predicted_picture_image():
|
|
doc_true = _make_doc_with_picture(image_size=(2, 2))
|
|
doc_pred = _make_doc_with_picture(image_size=(3, 2))
|
|
|
|
with pytest.raises(AssertionError):
|
|
verify_docitems(
|
|
doc_pred=doc_pred,
|
|
doc_true=doc_true,
|
|
fuzzy=False,
|
|
pdf_filename="fixture.json",
|
|
)
|