From 72daeef3bcc88a6894aea7728aad5372a2e0e2e2 Mon Sep 17 00:00:00 2001 From: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> Date: Tue, 26 Aug 2025 12:42:05 +0200 Subject: [PATCH] feat: tool for thumbnails (#67) * add tool for thumnails Signed-off-by: Michele Dolfi * Update docling_mcp/tools/generation.py Signed-off-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> * restyle Signed-off-by: Michele Dolfi * remove fastmcp Signed-off-by: Michele Dolfi --------- Signed-off-by: Michele Dolfi Signed-off-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> --- docling_mcp/settings/conversion.py | 17 +++++++++++ docling_mcp/tools/conversion.py | 2 ++ docling_mcp/tools/generation.py | 49 ++++++++++++++++++++++++++++++ tests/test_mcp_server.py | 1 + 4 files changed, 69 insertions(+) create mode 100644 docling_mcp/settings/conversion.py diff --git a/docling_mcp/settings/conversion.py b/docling_mcp/settings/conversion.py new file mode 100644 index 0000000..65b558a --- /dev/null +++ b/docling_mcp/settings/conversion.py @@ -0,0 +1,17 @@ +"""This module contains the settings for conversion tools.""" + +from pydantic_settings import BaseSettings, SettingsConfigDict + + +class Settings(BaseSettings): + """Settings for the conversion tools.""" + + model_config = SettingsConfigDict( + env_prefix="DOCLING_MCP_", + env_file=".env", + # extra="allow", + ) + keep_images: bool = False + + +settings = Settings() diff --git a/docling_mcp/tools/conversion.py b/docling_mcp/tools/conversion.py index e9f6021..9496e9a 100644 --- a/docling_mcp/tools/conversion.py +++ b/docling_mcp/tools/conversion.py @@ -25,6 +25,7 @@ from docling_core.types.doc.labels import ( from docling_mcp.docling_cache import get_cache_key from docling_mcp.logger import setup_logger +from docling_mcp.settings.conversion import settings from docling_mcp.shared import local_document_cache, local_stack_cache, mcp # Create a default project logger @@ -84,6 +85,7 @@ class ConvertDocumentOutput: def _get_converter() -> DocumentConverter: pipeline_options = PdfPipelineOptions() # pipeline_options.do_ocr = False # Skip OCR for faster processing (enable for scanned docs) + pipeline_options.generate_page_images = settings.keep_images format_options: dict[InputFormat, FormatOption] = { InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options), diff --git a/docling_mcp/tools/generation.py b/docling_mcp/tools/generation.py index dc7f0da..d2778cf 100644 --- a/docling_mcp/tools/generation.py +++ b/docling_mcp/tools/generation.py @@ -5,6 +5,7 @@ from dataclasses import dataclass from io import BytesIO from typing import Annotated +from mcp.server.fastmcp import Image as MCPImage from pydantic import Field from docling.datamodel.base_models import ConversionStatus, InputFormat @@ -158,6 +159,54 @@ def save_docling_document( return SaveDocumentOutput(md_file, json_file) +@mcp.tool(title="Generate the thumbnail of a page in the Docling document") +def page_thumbnail( + document_key: Annotated[ + str, + Field(description="The unique identifier of the document in the local cache."), + ], + page_no: Annotated[ + int, Field(description="The number of the page starting at 1") + ] = 1, + size: Annotated[ + int, Field(description="The width of the thumbnail in pixels") + ] = 300, +) -> MCPImage: + """Generate a thumbnail image for the requested page. + + This tool takes a document that exists in the local cache and generates a thumnail image for the requested page. + """ + if document_key not in local_document_cache: + doc_keys = ", ".join(local_document_cache.keys()) + raise ValueError( + f"document-key: {document_key} is not found. Existing document-keys are: {doc_keys}" + ) + + doc = local_document_cache[document_key] + if page_no not in doc.pages: + raise ValueError( + f"page_no={page_no}: not found in the document. Available pages are: {', '.join(str(k) for k in doc.pages.keys())}" + ) + + im_ref = doc.pages[page_no].image + if im_ref is None: + raise ValueError( + "The DoclingDocument does not have page images. Please configure your server for generating page images using DOCLING_MCP_KEEP_IMAGES=true." + ) + im = im_ref.pil_image + if im is None: + raise RuntimeError("Server error. The image cannot be loaded in PIL.") + width = size + scale = float(width) / im.size[0] + im.thumbnail((width, int(im.size[1] * scale))) + + cache_dir = get_cache_dir() + im_file = cache_dir / f"{document_key}-{page_no}.png" + im.save(im_file, format="PNG") + + return MCPImage(path=im_file, format="png") + + @dataclass class UpdateDocumentOutput: """Output of the Docling document content generation tools.""" diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py index 5c5484a..1671ac1 100644 --- a/tests/test_mcp_server.py +++ b/tests/test_mcp_server.py @@ -21,6 +21,7 @@ async def test_list_tools(mcp_client: AsyncGenerator[Any, Any]) -> None: "create_new_docling_document", "export_docling_document_to_markdown", "save_docling_document", + "page_thumbnail", "add_title_to_docling_document", "add_section_heading_to_docling_document", "add_paragraph_to_docling_document",