docling/tests/test_backend_image_native.py

from io import BytesIO
from unittest.mock import MagicMock

import pytest
from docling_core.types.doc import BoundingBox, CoordOrigin
from PIL import Image

from docling.backend.image_backend import ImageDocumentBackend, _ImagePageBackend
from docling.datamodel.base_models import DocumentStream, InputFormat
from docling.datamodel.document import (
    InputDocument,
    _DocumentConversionInput,
    _DummyBackend,
)
from docling.document_converter import DocumentConverter, ImageFormatOption
from docling.document_extractor import DocumentExtractor


def _make_png_stream(
    width: int = 64, height: int = 48, color=(123, 45, 67)
) -> DocumentStream:
    img = Image.new("RGB", (width, height), color)
    buf = BytesIO()
    img.save(buf, format="PNG")
    buf.seek(0)
    return DocumentStream(name="test.png", stream=buf)


def _make_multipage_tiff_stream(num_pages: int = 3, size=(32, 32)) -> DocumentStream:
    frames = [
        Image.new("RGB", size, (i * 10 % 255, i * 20 % 255, i * 30 % 255))
        for i in range(num_pages)
    ]
    buf = BytesIO()
    frames[0].save(buf, format="TIFF", save_all=True, append_images=frames[1:])
    buf.seek(0)
    return DocumentStream(name="test.tiff", stream=buf)


def test_docs_builder_uses_image_backend_for_image_stream():
    stream = _make_png_stream()
    conv_input = _DocumentConversionInput(path_or_stream_iterator=[stream])
    # Provide format options mapping that includes IMAGE -> ImageFormatOption (which carries ImageDocumentBackend)
    format_options = {InputFormat.IMAGE: ImageFormatOption()}

    docs = list(conv_input.docs(format_options))
    assert len(docs) == 1
    in_doc = docs[0]
    assert in_doc.format == InputFormat.IMAGE
    assert isinstance(in_doc._backend, ImageDocumentBackend)
    assert in_doc.page_count == 1


def test_docs_builder_multipage_tiff_counts_frames():
    stream = _make_multipage_tiff_stream(num_pages=4)
    conv_input = _DocumentConversionInput(path_or_stream_iterator=[stream])
    format_options = {InputFormat.IMAGE: ImageFormatOption()}

    in_doc = next(conv_input.docs(format_options))
    assert isinstance(in_doc._backend, ImageDocumentBackend)
    assert in_doc.page_count == 4


def test_converter_default_maps_image_to_image_backend():
    converter = DocumentConverter(allowed_formats=[InputFormat.IMAGE])
    backend_cls = converter.format_to_options[InputFormat.IMAGE].backend
    assert backend_cls is ImageDocumentBackend


def test_extractor_default_maps_image_to_image_backend():
    extractor = DocumentExtractor(allowed_formats=[InputFormat.IMAGE])
    backend_cls = extractor.extraction_format_to_options[InputFormat.IMAGE].backend
    assert backend_cls is ImageDocumentBackend


def _get_backend_from_stream(stream: DocumentStream):
    """Helper to create InputDocument with ImageDocumentBackend from a stream."""
    in_doc = InputDocument(
        path_or_stream=stream.stream,
        format=InputFormat.IMAGE,
        backend=ImageDocumentBackend,
        filename=stream.name,
    )
    return in_doc._backend


def test_num_pages_single():
    """Test page count for single-page image."""
    stream = _make_png_stream(width=100, height=80)
    doc_backend = _get_backend_from_stream(stream)
    assert doc_backend.page_count() == 1


def test_num_pages_multipage():
    """Test page count for multi-page TIFF."""
    stream = _make_multipage_tiff_stream(num_pages=5, size=(64, 64))
    doc_backend = _get_backend_from_stream(stream)
    assert doc_backend.page_count() == 5


def test_get_size():
    """Test getting page size."""
    width, height = 120, 90
    stream = _make_png_stream(width=width, height=height)
    doc_backend = _get_backend_from_stream(stream)
    page_backend: _ImagePageBackend = doc_backend.load_page(0)
    size = page_backend.get_size()
    assert size.width == width
    assert size.height == height


def test_get_page_image_full():
    """Test getting full page image."""
    width, height = 100, 80
    stream = _make_png_stream(width=width, height=height)
    doc_backend = _get_backend_from_stream(stream)
    page_backend: _ImagePageBackend = doc_backend.load_page(0)
    img = page_backend.get_page_image()
    assert img.width == width
    assert img.height == height


def test_get_page_image_scaled():
    """Test getting scaled page image."""
    width, height = 100, 80
    scale = 2.0
    stream = _make_png_stream(width=width, height=height)
    doc_backend = _get_backend_from_stream(stream)
    page_backend: _ImagePageBackend = doc_backend.load_page(0)
    img = page_backend.get_page_image(scale=scale)
    assert img.width == round(width * scale)
    assert img.height == round(height * scale)


def test_crop_page_image():
    """Test cropping page image."""
    width, height = 200, 150
    stream = _make_png_stream(width=width, height=height)
    doc_backend = _get_backend_from_stream(stream)
    page_backend: _ImagePageBackend = doc_backend.load_page(0)

    # Crop a region from the center
    cropbox = BoundingBox(l=50, t=30, r=150, b=120, coord_origin=CoordOrigin.TOPLEFT)
    img = page_backend.get_page_image(cropbox=cropbox)
    assert img.width == 100  # 150 - 50
    assert img.height == 90  # 120 - 30


def test_crop_page_image_scaled():
    """Test cropping and scaling page image."""
    width, height = 200, 150
    scale = 0.5
    stream = _make_png_stream(width=width, height=height)
    doc_backend = _get_backend_from_stream(stream)
    page_backend: _ImagePageBackend = doc_backend.load_page(0)

    cropbox = BoundingBox(l=50, t=30, r=150, b=120, coord_origin=CoordOrigin.TOPLEFT)
    img = page_backend.get_page_image(scale=scale, cropbox=cropbox)
    assert img.width == round(100 * scale)  # cropped width * scale
    assert img.height == round(90 * scale)  # cropped height * scale


def test_get_bitmap_rects():
    """Test getting bitmap rects - should return full page rectangle."""
    width, height = 100, 80
    stream = _make_png_stream(width=width, height=height)
    doc_backend = _get_backend_from_stream(stream)
    page_backend: _ImagePageBackend = doc_backend.load_page(0)

    rects = list(page_backend.get_bitmap_rects())
    assert len(rects) == 1
    bbox = rects[0]
    assert bbox.l == 0.0
    assert bbox.t == 0.0
    assert bbox.r == float(width)
    assert bbox.b == float(height)
    assert bbox.coord_origin == CoordOrigin.TOPLEFT


def test_get_bitmap_rects_scaled():
    """Test getting bitmap rects with scaling."""
    width, height = 100, 80
    scale = 2.0
    stream = _make_png_stream(width=width, height=height)
    doc_backend = _get_backend_from_stream(stream)
    page_backend: _ImagePageBackend = doc_backend.load_page(0)

    rects = list(page_backend.get_bitmap_rects(scale=scale))
    assert len(rects) == 1
    bbox = rects[0]
    assert bbox.l == 0.0
    assert bbox.t == 0.0
    assert bbox.r == float(width * scale)
    assert bbox.b == float(height * scale)
    assert bbox.coord_origin == CoordOrigin.TOPLEFT


def test_get_text_in_rect():
    """Test that get_text_in_rect returns empty string for images (no OCR)."""
    stream = _make_png_stream()
    doc_backend = _get_backend_from_stream(stream)
    page_backend: _ImagePageBackend = doc_backend.load_page(0)

    bbox = BoundingBox(l=10, t=10, r=50, b=50, coord_origin=CoordOrigin.TOPLEFT)
    text = page_backend.get_text_in_rect(bbox)
    assert text == ""


def test_multipage_access():
    """Test accessing different pages in multi-page image."""
    num_pages = 4
    stream = _make_multipage_tiff_stream(num_pages=num_pages, size=(64, 64))
    doc_backend = _get_backend_from_stream(stream)
    assert doc_backend.page_count() == num_pages

    # Access each page
    for i in range(num_pages):
        page_backend = doc_backend.load_page(i)
        assert page_backend.is_valid()
        size = page_backend.get_size()
        assert size.width == 64
        assert size.height == 64


def test_source_image_is_closed_after_backend_init(tmp_path, monkeypatch):
    image_path = tmp_path / "test.png"
    Image.new("RGB", (32, 32), (10, 20, 30)).save(image_path)

    opened_images = []
    original_open = Image.open

    class TrackingImage:
        def __init__(self, image):
            self._image = image
            self.closed = False

        def __getattr__(self, attr):
            return getattr(self._image, attr)

        def close(self):
            self.closed = True
            return self._image.close()

        def __enter__(self):
            return self

        def __exit__(self, exc_type, exc, tb):
            self.close()
            return False

    def tracking_open(*args, **kwargs):
        tracked_image = TrackingImage(original_open(*args, **kwargs))
        opened_images.append(tracked_image)
        return tracked_image

    input_doc = InputDocument(
        path_or_stream=image_path,
        format=InputFormat.IMAGE,
        backend=_DummyBackend,
        filename=image_path.name,
    )

    monkeypatch.setattr("docling.backend.image_backend.Image.open", tracking_open)
    backend = ImageDocumentBackend(
        in_doc=input_doc,
        path_or_stream=image_path,
    )

    assert len(opened_images) == 1
    assert opened_images[0].closed is True
    backend.unload()


def test_unload_closes_cached_frames():
    stream = _make_multipage_tiff_stream(num_pages=3, size=(32, 32))
    doc_backend = _get_backend_from_stream(stream)

    tracked_closers = []
    for frame in doc_backend._frames:
        closer = MagicMock(wraps=frame.close)
        frame.close = closer
        tracked_closers.append(closer)

    doc_backend.unload()

    assert doc_backend._frames == []
    for closer in tracked_closers:
        closer.assert_called_once()