mirror of
https://github.com/docling-project/docling.git
synced 2026-05-17 13:10:38 +00:00
1e841ebcbd
Signed-off-by: LarytheLord <llawlietbagsum@gmail.com>
289 lines
9.5 KiB
Python
289 lines
9.5 KiB
Python
from io import BytesIO
|
|
from unittest.mock import MagicMock
|
|
|
|
import pytest
|
|
from docling_core.types.doc import BoundingBox, CoordOrigin
|
|
from PIL import Image
|
|
|
|
from docling.backend.image_backend import ImageDocumentBackend, _ImagePageBackend
|
|
from docling.datamodel.base_models import DocumentStream, InputFormat
|
|
from docling.datamodel.document import (
|
|
InputDocument,
|
|
_DocumentConversionInput,
|
|
_DummyBackend,
|
|
)
|
|
from docling.document_converter import DocumentConverter, ImageFormatOption
|
|
from docling.document_extractor import DocumentExtractor
|
|
|
|
|
|
def _make_png_stream(
|
|
width: int = 64, height: int = 48, color=(123, 45, 67)
|
|
) -> DocumentStream:
|
|
img = Image.new("RGB", (width, height), color)
|
|
buf = BytesIO()
|
|
img.save(buf, format="PNG")
|
|
buf.seek(0)
|
|
return DocumentStream(name="test.png", stream=buf)
|
|
|
|
|
|
def _make_multipage_tiff_stream(num_pages: int = 3, size=(32, 32)) -> DocumentStream:
|
|
frames = [
|
|
Image.new("RGB", size, (i * 10 % 255, i * 20 % 255, i * 30 % 255))
|
|
for i in range(num_pages)
|
|
]
|
|
buf = BytesIO()
|
|
frames[0].save(buf, format="TIFF", save_all=True, append_images=frames[1:])
|
|
buf.seek(0)
|
|
return DocumentStream(name="test.tiff", stream=buf)
|
|
|
|
|
|
def test_docs_builder_uses_image_backend_for_image_stream():
|
|
stream = _make_png_stream()
|
|
conv_input = _DocumentConversionInput(path_or_stream_iterator=[stream])
|
|
# Provide format options mapping that includes IMAGE -> ImageFormatOption (which carries ImageDocumentBackend)
|
|
format_options = {InputFormat.IMAGE: ImageFormatOption()}
|
|
|
|
docs = list(conv_input.docs(format_options))
|
|
assert len(docs) == 1
|
|
in_doc = docs[0]
|
|
assert in_doc.format == InputFormat.IMAGE
|
|
assert isinstance(in_doc._backend, ImageDocumentBackend)
|
|
assert in_doc.page_count == 1
|
|
|
|
|
|
def test_docs_builder_multipage_tiff_counts_frames():
|
|
stream = _make_multipage_tiff_stream(num_pages=4)
|
|
conv_input = _DocumentConversionInput(path_or_stream_iterator=[stream])
|
|
format_options = {InputFormat.IMAGE: ImageFormatOption()}
|
|
|
|
in_doc = next(conv_input.docs(format_options))
|
|
assert isinstance(in_doc._backend, ImageDocumentBackend)
|
|
assert in_doc.page_count == 4
|
|
|
|
|
|
def test_converter_default_maps_image_to_image_backend():
|
|
converter = DocumentConverter(allowed_formats=[InputFormat.IMAGE])
|
|
backend_cls = converter.format_to_options[InputFormat.IMAGE].backend
|
|
assert backend_cls is ImageDocumentBackend
|
|
|
|
|
|
def test_extractor_default_maps_image_to_image_backend():
|
|
extractor = DocumentExtractor(allowed_formats=[InputFormat.IMAGE])
|
|
backend_cls = extractor.extraction_format_to_options[InputFormat.IMAGE].backend
|
|
assert backend_cls is ImageDocumentBackend
|
|
|
|
|
|
def _get_backend_from_stream(stream: DocumentStream):
|
|
"""Helper to create InputDocument with ImageDocumentBackend from a stream."""
|
|
in_doc = InputDocument(
|
|
path_or_stream=stream.stream,
|
|
format=InputFormat.IMAGE,
|
|
backend=ImageDocumentBackend,
|
|
filename=stream.name,
|
|
)
|
|
return in_doc._backend
|
|
|
|
|
|
def test_num_pages_single():
|
|
"""Test page count for single-page image."""
|
|
stream = _make_png_stream(width=100, height=80)
|
|
doc_backend = _get_backend_from_stream(stream)
|
|
assert doc_backend.page_count() == 1
|
|
|
|
|
|
def test_num_pages_multipage():
|
|
"""Test page count for multi-page TIFF."""
|
|
stream = _make_multipage_tiff_stream(num_pages=5, size=(64, 64))
|
|
doc_backend = _get_backend_from_stream(stream)
|
|
assert doc_backend.page_count() == 5
|
|
|
|
|
|
def test_get_size():
|
|
"""Test getting page size."""
|
|
width, height = 120, 90
|
|
stream = _make_png_stream(width=width, height=height)
|
|
doc_backend = _get_backend_from_stream(stream)
|
|
page_backend: _ImagePageBackend = doc_backend.load_page(0)
|
|
size = page_backend.get_size()
|
|
assert size.width == width
|
|
assert size.height == height
|
|
|
|
|
|
def test_get_page_image_full():
|
|
"""Test getting full page image."""
|
|
width, height = 100, 80
|
|
stream = _make_png_stream(width=width, height=height)
|
|
doc_backend = _get_backend_from_stream(stream)
|
|
page_backend: _ImagePageBackend = doc_backend.load_page(0)
|
|
img = page_backend.get_page_image()
|
|
assert img.width == width
|
|
assert img.height == height
|
|
|
|
|
|
def test_get_page_image_scaled():
|
|
"""Test getting scaled page image."""
|
|
width, height = 100, 80
|
|
scale = 2.0
|
|
stream = _make_png_stream(width=width, height=height)
|
|
doc_backend = _get_backend_from_stream(stream)
|
|
page_backend: _ImagePageBackend = doc_backend.load_page(0)
|
|
img = page_backend.get_page_image(scale=scale)
|
|
assert img.width == round(width * scale)
|
|
assert img.height == round(height * scale)
|
|
|
|
|
|
def test_crop_page_image():
|
|
"""Test cropping page image."""
|
|
width, height = 200, 150
|
|
stream = _make_png_stream(width=width, height=height)
|
|
doc_backend = _get_backend_from_stream(stream)
|
|
page_backend: _ImagePageBackend = doc_backend.load_page(0)
|
|
|
|
# Crop a region from the center
|
|
cropbox = BoundingBox(l=50, t=30, r=150, b=120, coord_origin=CoordOrigin.TOPLEFT)
|
|
img = page_backend.get_page_image(cropbox=cropbox)
|
|
assert img.width == 100 # 150 - 50
|
|
assert img.height == 90 # 120 - 30
|
|
|
|
|
|
def test_crop_page_image_scaled():
|
|
"""Test cropping and scaling page image."""
|
|
width, height = 200, 150
|
|
scale = 0.5
|
|
stream = _make_png_stream(width=width, height=height)
|
|
doc_backend = _get_backend_from_stream(stream)
|
|
page_backend: _ImagePageBackend = doc_backend.load_page(0)
|
|
|
|
cropbox = BoundingBox(l=50, t=30, r=150, b=120, coord_origin=CoordOrigin.TOPLEFT)
|
|
img = page_backend.get_page_image(scale=scale, cropbox=cropbox)
|
|
assert img.width == round(100 * scale) # cropped width * scale
|
|
assert img.height == round(90 * scale) # cropped height * scale
|
|
|
|
|
|
def test_get_bitmap_rects():
|
|
"""Test getting bitmap rects - should return full page rectangle."""
|
|
width, height = 100, 80
|
|
stream = _make_png_stream(width=width, height=height)
|
|
doc_backend = _get_backend_from_stream(stream)
|
|
page_backend: _ImagePageBackend = doc_backend.load_page(0)
|
|
|
|
rects = list(page_backend.get_bitmap_rects())
|
|
assert len(rects) == 1
|
|
bbox = rects[0]
|
|
assert bbox.l == 0.0
|
|
assert bbox.t == 0.0
|
|
assert bbox.r == float(width)
|
|
assert bbox.b == float(height)
|
|
assert bbox.coord_origin == CoordOrigin.TOPLEFT
|
|
|
|
|
|
def test_get_bitmap_rects_scaled():
|
|
"""Test getting bitmap rects with scaling."""
|
|
width, height = 100, 80
|
|
scale = 2.0
|
|
stream = _make_png_stream(width=width, height=height)
|
|
doc_backend = _get_backend_from_stream(stream)
|
|
page_backend: _ImagePageBackend = doc_backend.load_page(0)
|
|
|
|
rects = list(page_backend.get_bitmap_rects(scale=scale))
|
|
assert len(rects) == 1
|
|
bbox = rects[0]
|
|
assert bbox.l == 0.0
|
|
assert bbox.t == 0.0
|
|
assert bbox.r == float(width * scale)
|
|
assert bbox.b == float(height * scale)
|
|
assert bbox.coord_origin == CoordOrigin.TOPLEFT
|
|
|
|
|
|
def test_get_text_in_rect():
|
|
"""Test that get_text_in_rect returns empty string for images (no OCR)."""
|
|
stream = _make_png_stream()
|
|
doc_backend = _get_backend_from_stream(stream)
|
|
page_backend: _ImagePageBackend = doc_backend.load_page(0)
|
|
|
|
bbox = BoundingBox(l=10, t=10, r=50, b=50, coord_origin=CoordOrigin.TOPLEFT)
|
|
text = page_backend.get_text_in_rect(bbox)
|
|
assert text == ""
|
|
|
|
|
|
def test_multipage_access():
|
|
"""Test accessing different pages in multi-page image."""
|
|
num_pages = 4
|
|
stream = _make_multipage_tiff_stream(num_pages=num_pages, size=(64, 64))
|
|
doc_backend = _get_backend_from_stream(stream)
|
|
assert doc_backend.page_count() == num_pages
|
|
|
|
# Access each page
|
|
for i in range(num_pages):
|
|
page_backend = doc_backend.load_page(i)
|
|
assert page_backend.is_valid()
|
|
size = page_backend.get_size()
|
|
assert size.width == 64
|
|
assert size.height == 64
|
|
|
|
|
|
def test_source_image_is_closed_after_backend_init(tmp_path, monkeypatch):
|
|
image_path = tmp_path / "test.png"
|
|
Image.new("RGB", (32, 32), (10, 20, 30)).save(image_path)
|
|
|
|
opened_images = []
|
|
original_open = Image.open
|
|
|
|
class TrackingImage:
|
|
def __init__(self, image):
|
|
self._image = image
|
|
self.closed = False
|
|
|
|
def __getattr__(self, attr):
|
|
return getattr(self._image, attr)
|
|
|
|
def close(self):
|
|
self.closed = True
|
|
return self._image.close()
|
|
|
|
def __enter__(self):
|
|
return self
|
|
|
|
def __exit__(self, exc_type, exc, tb):
|
|
self.close()
|
|
return False
|
|
|
|
def tracking_open(*args, **kwargs):
|
|
tracked_image = TrackingImage(original_open(*args, **kwargs))
|
|
opened_images.append(tracked_image)
|
|
return tracked_image
|
|
|
|
input_doc = InputDocument(
|
|
path_or_stream=image_path,
|
|
format=InputFormat.IMAGE,
|
|
backend=_DummyBackend,
|
|
filename=image_path.name,
|
|
)
|
|
|
|
monkeypatch.setattr("docling.backend.image_backend.Image.open", tracking_open)
|
|
backend = ImageDocumentBackend(
|
|
in_doc=input_doc,
|
|
path_or_stream=image_path,
|
|
)
|
|
|
|
assert len(opened_images) == 1
|
|
assert opened_images[0].closed is True
|
|
backend.unload()
|
|
|
|
|
|
def test_unload_closes_cached_frames():
|
|
stream = _make_multipage_tiff_stream(num_pages=3, size=(32, 32))
|
|
doc_backend = _get_backend_from_stream(stream)
|
|
|
|
tracked_closers = []
|
|
for frame in doc_backend._frames:
|
|
closer = MagicMock(wraps=frame.close)
|
|
frame.close = closer
|
|
tracked_closers.append(closer)
|
|
|
|
doc_backend.unload()
|
|
|
|
assert doc_backend._frames == []
|
|
for closer in tracked_closers:
|
|
closer.assert_called_once()
|