Merge branch 'main' of github.com:DS4SD/docling into cau/pdfium-bitmap-fix

2026-05-17 13:10:38 +00:00 · 2026-03-24 13:15:32 +01:00
parent 70bf513ea7 90d6dd4e87
commit 79b18945a8
30 changed files with 1690 additions and 237 deletions
@@ -1,3 +1,19 @@
+## [v2.81.0](https://github.com/docling-project/docling/releases/tag/v2.81.0) - 2026-03-20
+
+### Feature
+
+* Route plain-text and Quarto/R Markdown files to the Markdown backend ([#3161](https://github.com/docling-project/docling/issues/3161)) ([`96d7c7e`](https://github.com/docling-project/docling/commit/96d7c7ec79992d8dddedfafaaedb7f9bf6e14f40))
+
+### Fix
+
+* **docx:** Missing list items after numbered header (#2665) ([#2678](https://github.com/docling-project/docling/issues/2678)) ([`2f7c09e`](https://github.com/docling-project/docling/commit/2f7c09e0d8f07a5fa0aaf4f33bdfb1f71d3f3063))
+* Avoid thread-unsafe close of pypdfium backend ([#3160](https://github.com/docling-project/docling/issues/3160)) ([`afb4bb6`](https://github.com/docling-project/docling/commit/afb4bb68023c5d8fb8dc5e39413a27678e642293))
+* Handle external image relationships in MsWordDocumentBackend ([#3114](https://github.com/docling-project/docling/issues/3114)) ([`8ae0974`](https://github.com/docling-project/docling/commit/8ae0974a9d86a447f78e4950bc0a45d5eba31e98))
+* Handle PermissionError for directory input on Windows CLI ([#3149](https://github.com/docling-project/docling/issues/3149)) ([`a39317a`](https://github.com/docling-project/docling/commit/a39317a147859c68bf8aef635276a23585725529))
+* Avoid in-place mutation of pipeline options breaking cache key ([#3115](https://github.com/docling-project/docling/issues/3115)) ([`412af62`](https://github.com/docling-project/docling/commit/412af62135869978b7d22e1dd4ee2725623fad44))
+* Preserve torch_dtype in get_engine_config and add it to CodeFormulaV2 ([#3117](https://github.com/docling-project/docling/issues/3117)) ([`53a5f80`](https://github.com/docling-project/docling/commit/53a5f80a43849d853d4e0598d3875e6aac2f88e0))
+* Release image backend resources after frame extraction ([#3134](https://github.com/docling-project/docling/issues/3134)) ([`1e841eb`](https://github.com/docling-project/docling/commit/1e841ebcbd048fbfc11d63b4086539b7cd88bb77))
+
 ## [v2.80.0](https://github.com/docling-project/docling/releases/tag/v2.80.0) - 2026-03-14

 ### Feature
@@ -30,7 +30,7 @@ Docling simplifies document processing, parsing diverse formats — including ad

 ## Features

-* 🗂️ Parsing of [multiple document formats][supported_formats] incl. PDF, DOCX, PPTX, XLSX, HTML, WAV, MP3, WebVTT, images (PNG, TIFF, JPEG, ...), LaTeX, and more
+* 🗂️ Parsing of [multiple document formats][supported_formats] incl. PDF, DOCX, PPTX, XLSX, HTML, WAV, MP3, WebVTT, images (PNG, TIFF, JPEG, ...), LaTeX, plain text, and more
 * 📑 Advanced PDF understanding incl. page layout, reading order, table structure, code, formulas, image classification, and more
 * 🧬 Unified, expressive [DoclingDocument][docling_document] representation format
 * ↪️ Various [export formats][supported_formats] and options, including Markdown, HTML, WebVTT, [DocTags](https://arxiv.org/abs/2503.11576) and lossless JSON
@@ -50,6 +50,7 @@ Docling simplifies document processing, parsing diverse formats — including ad
 * 💼 Parsing of XBRL (eXtensible Business Reporting Language) documents for financial reports
 * 💬 Parsing of WebVTT (Web Video Text Tracks) files and export to WebVTT format
 * 💬 Parsing of LaTeX files
+* 📝 Parsing of plain-text files (`.txt`, `.text`) and Markdown supersets (`.qmd`, `.Rmd`)

 ### Coming soon

@@ -405,12 +405,78 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):

        return None, None  # If the paragraph is not part of a list

+    def _get_level_element(self, numid: int, ilvl: int) -> Optional[BaseOxmlElement]:
+        """Find the level element from the numbering XML for a given numId and ilvl."""
+        try:
+            if not hasattr(self.docx_obj, "part") or not hasattr(
+                self.docx_obj.part, "package"
+            ):
+                return None
+
+            numbering_part = None
+            for part in self.docx_obj.part.package.parts:
+                if "numbering" in part.partname:
+                    numbering_part = part
+                    break
+
+            if numbering_part is None:
+                return None
+
+            numbering_root = numbering_part.element
+            namespaces = {"w": self._W_NS}
+
+            num_element = numbering_root.find(
+                f".//w:num[@w:numId='{numid}']", namespaces=namespaces
+            )
+            if num_element is None:
+                return None
+
+            abstract_num_id_elem = num_element.find(
+                ".//w:abstractNumId", namespaces=namespaces
+            )
+            if abstract_num_id_elem is None:
+                return None
+
+            abstract_num_id = abstract_num_id_elem.get(self.XML_KEY)
+            if abstract_num_id is None:
+                return None
+
+            abstract_num_element = numbering_root.find(
+                f".//w:abstractNum[@w:abstractNumId='{abstract_num_id}']",
+                namespaces=namespaces,
+            )
+            if abstract_num_element is None:
+                return None
+
+            return abstract_num_element.find(
+                f".//w:lvl[@w:ilvl='{ilvl}']", namespaces=namespaces
+            )
+        except Exception as e:
+            _log.debug(f"Error finding level element: {e}")
+            return None
+
+    def _get_start_value(self, numid: int, ilvl: int) -> int:
+        """Read the start value from the abstractNum definition."""
+        lvl_element = self._get_level_element(numid, ilvl)
+        if lvl_element is not None:
+            namespaces = {"w": self._W_NS}
+            start_element = lvl_element.find(".//w:start", namespaces=namespaces)
+            if start_element is not None:
+                val = start_element.get(self.XML_KEY)
+                if val is not None:
+                    return int(val)
+        return 1
+
    def _get_list_counter(self, numid: int, ilvl: int) -> int:
        """Get and increment the counter for a specific numId and ilvl combination."""
        key = (numid, ilvl)
        if key not in self.list_counters:
-            self.list_counters[key] = 0
+            start = self._get_start_value(numid, ilvl)
+            self.list_counters[key] = start - 1
        self.list_counters[key] += 1
+        # Reset sub-level counters since parent level advanced
+        for k in [k for k in self.list_counters if k[0] == numid and k[1] > ilvl]:
+            self.list_counters[k] = 0
        return self.list_counters[key]

    def _reset_list_counters_for_new_sequence(self, numid: int):
@@ -420,74 +486,30 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
        for key in keys_to_reset:
            self.list_counters[key] = 0

+    def _build_enum_marker(self, numid: int, ilvl: int) -> str:
+        """Build full hierarchical marker like '1.2.3.'"""
+        parts = []
+        for lvl in range(ilvl + 1):
+            counter = self.list_counters.get((numid, lvl))
+            if counter is None:
+                counter = self._get_start_value(numid, lvl)
+            parts.append(str(counter))
+        return ".".join(parts) + "."
+
    def _is_numbered_list(self, numId: int, ilvl: int) -> bool:
        """Check if a list is numbered based on its numFmt value."""
        try:
-            # Access the numbering part of the document
-            if not hasattr(self.docx_obj, "part") or not hasattr(
-                self.docx_obj.part, "package"
-            ):
-                return False
-
-            numbering_part = None
-            # Find the numbering part
-            for part in self.docx_obj.part.package.parts:
-                if "numbering" in part.partname:
-                    numbering_part = part
-                    break
-
-            if numbering_part is None:
-                return False
-
-            # Parse the numbering XML
-            numbering_root = numbering_part.element
-            namespaces = {"w": self._W_NS}
-
-            # Find the numbering definition with the given numId
-            num_xpath = f".//w:num[@w:numId='{numId}']"
-            num_element = numbering_root.find(num_xpath, namespaces=namespaces)
-
-            if num_element is None:
-                return False
-
-            # Get the abstractNumId from the num element
-            abstract_num_id_elem = num_element.find(
-                ".//w:abstractNumId", namespaces=namespaces
-            )
-            if abstract_num_id_elem is None:
-                return False
-
-            abstract_num_id = abstract_num_id_elem.get(f"{self._W_NS_CLARK}val")
-            if abstract_num_id is None:
-                return False
-
-            # Find the abstract numbering definition
-            abstract_num_xpath = (
-                f".//w:abstractNum[@w:abstractNumId='{abstract_num_id}']"
-            )
-            abstract_num_element = numbering_root.find(
-                abstract_num_xpath, namespaces=namespaces
-            )
-
-            if abstract_num_element is None:
-                return False
-
-            # Find the level definition for the given ilvl
-            lvl_xpath = f".//w:lvl[@w:ilvl='{ilvl}']"
-            lvl_element = abstract_num_element.find(lvl_xpath, namespaces=namespaces)
-
+            lvl_element = self._get_level_element(numId, ilvl)
            if lvl_element is None:
                return False

-            # Get the numFmt element
+            namespaces = {"w": self._W_NS}
            num_fmt_element = lvl_element.find(".//w:numFmt", namespaces=namespaces)
            if num_fmt_element is None:
                return False

-            num_fmt = num_fmt_element.get(f"{self._W_NS_CLARK}val")
+            num_fmt = num_fmt_element.get(self.XML_KEY)

-            # Numbered formats include: decimal, lowerRoman, upperRoman, lowerLetter, upperLetter
-            # Bullet formats include: bullet
            numbered_formats = {
                "decimal",
                "lowerRoman",
@@ -884,21 +906,55 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
        only_texts = []
        only_equations = []
        texts_and_equations = []
-        for subt in element.iter():
-            tag_name = etree.QName(subt).localname
-            if tag_name == "t" and "math" not in subt.tag:
-                if isinstance(subt.text, str):
-                    only_texts.append(subt.text)
-                    texts_and_equations.append(subt.text)
-            elif "oMath" in subt.tag and "oMathPara" not in subt.tag:
-                latex_equation = str(oMath2Latex(subt)).strip()
-                if len(latex_equation) > 0:
-                    only_equations.append(
-                        self.equation_bookends.format(EQ=latex_equation)
-                    )
-                    texts_and_equations.append(
-                        self.equation_bookends.format(EQ=latex_equation)
-                    )
+
+        # Collect oMath elements and text runs from the paragraph.
+        # Use direct children iteration first; fall back to deep iteration
+        # only if no oMath elements are found at the direct level.
+        direct_omaths = [
+            child
+            for child in element
+            if "oMath" in child.tag and "oMathPara" not in child.tag
+        ]
+
+        if direct_omaths:
+            # Iterate direct children to preserve sibling order and avoid
+            # processing nested oMath descendants of an already-converted node.
+            for child in element:
+                if "oMath" in child.tag and "oMathPara" not in child.tag:
+                    latex_equation = str(oMath2Latex(child)).strip()
+                    if len(latex_equation) > 0:
+                        only_equations.append(
+                            self.equation_bookends.format(EQ=latex_equation)
+                        )
+                        texts_and_equations.append(
+                            self.equation_bookends.format(EQ=latex_equation)
+                        )
+                else:
+                    # Collect text from non-math children (e.g. <w:r> runs)
+                    for t_elem in child.iter():
+                        t_tag = etree.QName(t_elem).localname
+                        if t_tag == "t" and "math" not in t_elem.tag:
+                            if isinstance(t_elem.text, str):
+                                only_texts.append(t_elem.text)
+                                texts_and_equations.append(t_elem.text)
+        else:
+            # Original deep-iteration fallback for nested oMath (e.g.
+            # inside oMathPara or other wrapper elements).
+            for subt in element.iter():
+                tag_name = etree.QName(subt).localname
+                if tag_name == "t" and "math" not in subt.tag:
+                    if isinstance(subt.text, str):
+                        only_texts.append(subt.text)
+                        texts_and_equations.append(subt.text)
+                elif "oMath" in subt.tag and "oMathPara" not in subt.tag:
+                    latex_equation = str(oMath2Latex(subt)).strip()
+                    if len(latex_equation) > 0:
+                        only_equations.append(
+                            self.equation_bookends.format(EQ=latex_equation)
+                        )
+                        texts_and_equations.append(
+                            self.equation_bookends.format(EQ=latex_equation)
+                        )

        if len(only_equations) < 1:
            return text, []
@@ -1033,15 +1089,28 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
            if (paragraph.text is None or len(paragraph.text.strip()) == 0) and len(
                text
            ) > 0:
-                # Standalone equation
+                # Standalone equation(s) — emit each as a separate formula
                level = self._get_level()
-                t1 = doc.add_text(
-                    label=DocItemLabel.FORMULA,
-                    parent=self.parents[level - 1],
-                    text=text.replace("<eq>", "").replace("</eq>", ""),
-                    content_layer=self.content_layer,
-                )
-                elem_ref.append(t1.get_ref())
+                parent = self.parents[level - 1]
+                if len(equations) > 1:
+                    for eq in equations:
+                        eq_text = eq.replace("<eq>", "").replace("</eq>", "").strip()
+                        if len(eq_text) > 0:
+                            t1 = doc.add_text(
+                                label=DocItemLabel.FORMULA,
+                                parent=parent,
+                                text=eq_text,
+                                content_layer=self.content_layer,
+                            )
+                            elem_ref.append(t1.get_ref())
+                else:
+                    t1 = doc.add_text(
+                        label=DocItemLabel.FORMULA,
+                        parent=parent,
+                        text=text.replace("<eq>", "").replace("</eq>", ""),
+                        content_layer=self.content_layer,
+                    )
+                    elem_ref.append(t1.get_ref())
            else:
                # Inline equation
                level = self._get_level()
@@ -1272,8 +1341,8 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
    ) -> None:
        """Resolve enumeration marker and add a formatted list item."""
        if is_numbered:
-            counter = self._get_list_counter(numid, ilevel)
-            enum_marker = str(counter) + "."
+            self._get_list_counter(numid, ilevel)
+            enum_marker = self._build_enum_marker(numid, ilevel)
        else:
            enum_marker = ""
        self._add_formatted_list_item(doc, elements, enum_marker, is_numbered, level)
@@ -1294,10 +1363,8 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):

        level = self._get_level()
        prev_indent = self._prev_indent()
-        if (
-            self._prev_numid() is None
-            or self._prev_numid() != numid
-            or (self._prev_numid() == numid and self.level_at_new_list is None)
+        if self._prev_numid() is None or (
+            self._prev_numid() == numid and self.level_at_new_list is None
        ):  # Open new list
            self.level_at_new_list = level

@@ -1360,12 +1427,44 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
                self.level_at_new_list + ilevel,
            )

-        elif self._prev_numid() == numid or prev_indent == ilevel:
+        elif self._prev_numid() == numid and isinstance(
+            self.parents.get(level - 1), ListGroup
+        ):
+            # Continue existing list - only if parent is actually a ListGroup
            self._add_list_item_with_marker(
                doc, elements, numid, ilevel, is_numbered, level - 1
            )
-        else:
-            _log.warning("List item not matching any insert condition.")
+        elif self._prev_numid() != numid or not isinstance(
+            self.parents.get(level - 1), ListGroup
+        ):
+            # New list sequence: Different numid OR parent is not a ListGroup
+            # Use anchor-based level to place new list at the correct document position
+            if self.level_at_new_list is not None:
+                use_level = self.level_at_new_list + ilevel
+                for k in list(self.parents.keys()):
+                    if k > use_level:
+                        self.parents[k] = None
+            else:
+                use_level = level
+                self.level_at_new_list = use_level
+
+            list_gr = doc.add_list_group(
+                name="list",
+                parent=self.parents[use_level - 1],
+                content_layer=self.content_layer,
+            )
+            self.parents[use_level] = list_gr
+            elem_ref.append(list_gr.get_ref())
+
+            # Set marker and enumerated arguments if this is an enumeration element.
+            if is_numbered:
+                self._get_list_counter(numid, ilevel)
+                enum_marker = self._build_enum_marker(numid, ilevel)
+            else:
+                enum_marker = ""
+            self._add_formatted_list_item(
+                doc, elements, enum_marker, is_numbered, use_level
+            )
        return elem_ref

    @staticmethod
@@ -375,8 +375,9 @@ class PyPdfiumPageBackend(PdfPageBackend):
            return Size(width=self._ppage.get_width(), height=self._ppage.get_height())

    def unload(self):
-        self._ppage = None
-        self.text_page = None
+        with pypdfium2_lock:
+            self._ppage = None
+            self.text_page = None


 class PyPdfiumDocumentBackend(PdfDocumentBackend):
@@ -370,6 +370,25 @@ def _split_list(raw: str | None) -> list[str] | None:
    return re.split(r"[;,]", raw)


+_OUTPUT_FORMATS_NOT_SUPPORTING_IMAGE_EMBEDDING = frozenset(
+    {
+        OutputFormat.TEXT,
+        OutputFormat.DOCTAGS,
+        OutputFormat.VTT,
+    }
+)
+
+
+def _should_generate_export_images(
+    image_export_mode: ImageRefMode,
+    to_formats: list[OutputFormat],
+) -> bool:
+    return image_export_mode != ImageRefMode.PLACEHOLDER and any(
+        to_format not in _OUTPUT_FORMATS_NOT_SUPPORTING_IMAGE_EMBEDDING
+        for to_format in to_formats
+    )
+
+
@app.command(no_args_is_help=True)
 def convert(  # noqa: C901
    input_sources: Annotated[
@@ -404,7 +423,7 @@ def convert(  # noqa: C901
        ImageRefMode,
        typer.Option(
            ...,
-            help="Image export mode for the document (only in case of JSON, Markdown or HTML). With `placeholder`, only the position of the image is marked in the output. In `embedded` mode, the image is embedded as base64 encoded string. In `referenced` mode, the image is exported in PNG format and referenced from the main exported document.",
+            help="Image export mode for image-capable document outputs (JSON, YAML, HTML, HTML split-page, and Markdown). Text, DocTags, and WebVTT outputs do not export images. With `placeholder`, only the position of the image is marked in the output. In `embedded` mode, the image is embedded as base64 encoded string. In `referenced` mode, the image is exported in PNG format and referenced from the main exported document.",
        ),
    ] = ImageRefMode.EMBEDDED,
    pipeline: Annotated[
@@ -750,7 +769,10 @@ def convert(  # noqa: C901
                )
                pipeline_options.table_structure_options.mode = table_mode

-            if image_export_mode != ImageRefMode.PLACEHOLDER:
+            if _should_generate_export_images(
+                image_export_mode,
+                to_formats,
+            ):
                pipeline_options.generate_page_images = True
                pipeline_options.generate_picture_images = (
                    True  # FIXME: to be deprecated in version 3
@@ -37,7 +37,7 @@ from docling.datamodel.pipeline_options import PipelineOptions
 class BaseFormatOption(BaseModel):
    """Base class for format options used by _DocumentConversionInput."""

-    pipeline_options: Optional[PipelineOptions] = None
+    pipeline_options: PipelineOptions | None = None
    backend: Type[AbstractDocumentBackend]

    model_config = ConfigDict(arbitrary_types_allowed=True)
@@ -89,7 +89,7 @@ FormatToExtensions: dict[InputFormat, list[str]] = {
    InputFormat.DOCX: ["docx", "dotx", "docm", "dotm"],
    InputFormat.PPTX: ["pptx", "potx", "ppsx", "pptm", "potm", "ppsm"],
    InputFormat.PDF: ["pdf"],
-    InputFormat.MD: ["md"],
+    InputFormat.MD: ["md", "txt", "text", "qmd", "rmd", "Rmd"],
    InputFormat.HTML: ["html", "htm", "xhtml"],
    InputFormat.XML_JATS: ["xml", "nxml"],
    InputFormat.XML_XBRL: ["xml", "xbrl"],
@@ -128,7 +128,7 @@ FormatToMimeType: dict[InputFormat, list[str]] = {
    ],
    InputFormat.PDF: ["application/pdf"],
    InputFormat.ASCIIDOC: ["text/asciidoc"],
-    InputFormat.MD: ["text/markdown", "text/x-markdown"],
+    InputFormat.MD: ["text/markdown", "text/x-markdown", "text/plain"],
    InputFormat.CSV: ["text/csv"],
    InputFormat.XLSX: [
        "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
@@ -180,6 +180,7 @@ class VlmStopReason(str, Enum):
    LENGTH = "length"  # max tokens reached
    STOP_SEQUENCE = "stop_sequence"  # Custom stopping criteria met
    END_OF_SEQUENCE = "end_of_sequence"  # Model generated end-of-text token
+    CONTENT_FILTERED = "content_filter"  # Content filtered by API provider
    UNSPECIFIED = "unspecified"  # Defaul none value


@@ -207,7 +208,7 @@ class BasePageElement(BaseModel):
    id: int
    page_no: int
    cluster: Cluster
-    text: Optional[str] = None
+    text: str | None = None


 class LayoutPrediction(BaseModel):
@@ -224,9 +225,9 @@ class VlmPrediction(BaseModel):
    text: str = ""
    generated_tokens: list[VlmPredictionToken] = []
    generation_time: float = -1
-    num_tokens: Optional[int] = None
+    num_tokens: int | None = None
    stop_reason: VlmStopReason = VlmStopReason.UNSPECIFIED
-    input_prompt: Optional[str] = None
+    input_prompt: str | None = None


 class ContainerElement(
@@ -252,14 +253,14 @@ class TextElement(BasePageElement):

 class FigureElement(BasePageElement):
    annotations: list[PictureDataType] = []
-    provenance: Optional[str] = None
-    predicted_class: Optional[str] = None
-    confidence: Optional[float] = None
+    provenance: str | None = None
+    predicted_class: str | None = None
+    confidence: float | None = None

    @field_serializer("confidence")
    def _serialize(
-        self, value: Optional[float], info: FieldSerializationInfo
-    ) -> Optional[float]:
+        self, value: float | None, info: FieldSerializationInfo
+    ) -> float | None:
        return (
            round_pydantic_float(value, info.context, PydanticSerCtxKey.CONFID_PREC)
            if value is not None
@@ -278,11 +279,11 @@ class EquationPrediction(BaseModel):


 class PagePredictions(BaseModel):
-    layout: Optional[LayoutPrediction] = None
-    tablestructure: Optional[TableStructurePrediction] = None
-    figures_classification: Optional[FigureClassificationPrediction] = None
-    equations_prediction: Optional[EquationPrediction] = None
-    vlm_response: Optional[VlmPrediction] = None
+    layout: LayoutPrediction | None = None
+    tablestructure: TableStructurePrediction | None = None
+    figures_classification: FigureClassificationPrediction | None = None
+    equations_prediction: EquationPrediction | None = None
+    vlm_response: VlmPrediction | None = None


 PageElement = Union[TextElement, Table, FigureElement, ContainerElement]
@@ -306,10 +307,10 @@ class Page(BaseModel):

    page_no: int
    # page_hash: Optional[str] = None
-    size: Optional[Size] = None
-    parsed_page: Optional[SegmentedPdfPage] = None
+    size: Size | None = None
+    parsed_page: SegmentedPdfPage | None = None
    predictions: PagePredictions = PagePredictions()
-    assembled: Optional[AssembledUnit] = None
+    assembled: AssembledUnit | None = None

    _backend: Optional["PdfPageBackend"] = (
        None  # Internal PDF backend. By default it is cleared during assembling.
@@ -330,9 +331,9 @@ class Page(BaseModel):
    def get_image(
        self,
        scale: float = 1.0,
-        max_size: Optional[int] = None,
-        cropbox: Optional[BoundingBox] = None,
-    ) -> Optional[Image]:
+        max_size: int | None = None,
+        cropbox: BoundingBox | None = None,
+    ) -> Image | None:
        if self._backend is None:
            return self._image_cache.get(scale, None)

@@ -358,7 +359,7 @@ class Page(BaseModel):
            )

    @property
-    def image(self) -> Optional[Image]:
+    def image(self) -> Image | None:
        return self.get_image(scale=self._default_image_scale)


@@ -373,7 +374,7 @@ class OpenAiChatMessage(BaseModel):
 class OpenAiResponseChoice(BaseModel):
    index: int
    message: OpenAiChatMessage
-    finish_reason: Optional[str]
+    finish_reason: str | None


 class OpenAiResponseUsage(BaseModel):
@@ -388,7 +389,7 @@ class OpenAiApiResponse(BaseModel):
    )

    id: str
-    model: Optional[str] = None  # returned by openai
+    model: str | None = None  # returned by openai
    choices: list[OpenAiResponseChoice]
    created: int
    usage: OpenAiResponseUsage
@@ -494,7 +494,7 @@ class _DocumentConversionInput(BaseModel):
            if mime is None:
                ext = obj.suffix[1:]
                mime = _DocumentConversionInput._mime_from_extension(ext)
-            if mime is None:  # must guess from
+            if mime is None:  # must guess from content
                with obj.open("rb") as f:
                    content = f.read(1024)  # Read first 1KB
            if mime is not None and mime.lower() == "application/zip":
@@ -624,9 +624,11 @@ class _DocumentConversionInput(BaseModel):
                    input_format = InputFormat.XML_JATS

        elif mime == "text/plain":
-            content_str = content.decode("utf-8")
+            content_str = content.decode("utf-8", errors="replace")
            if InputFormat.XML_USPTO in formats and content_str.startswith("PATN\r\n"):
                input_format = InputFormat.XML_USPTO
+            # No MD fallback: unrecognised text/plain content returns None.
+            # MD is detected via text/markdown mime (from .md/.text/.qmd/… extensions).

        return input_format

@@ -637,6 +639,14 @@ class _DocumentConversionInput(BaseModel):
            mime = FormatToMimeType[InputFormat.ASCIIDOC][0]
        elif ext in FormatToExtensions[InputFormat.HTML]:
            mime = FormatToMimeType[InputFormat.HTML][0]
+        elif (
+            ext in FormatToExtensions[InputFormat.XML_USPTO]
+            and ext in FormatToExtensions[InputFormat.MD]
+        ):
+            # "txt" appears in both XML_USPTO and MD extension lists.  Leave mime=None
+            # so the content-probing chain (_detect_html_xhtml, _detect_csv, then the
+            # "text/plain" fallback + _guess_from_content) can pick the right format.
+            pass
        elif ext in FormatToExtensions[InputFormat.MD]:
            mime = FormatToMimeType[InputFormat.MD][0]
        elif ext in FormatToExtensions[InputFormat.CSV]:
@@ -536,19 +536,21 @@ class PictureDescriptionBaseOptions(BaseOptions):
    batch_size: Annotated[
        int,
        Field(
+            ge=1,
            description=(
                "Number of images to process in a single batch during picture description. Higher values improve "
                "throughput but increase memory usage. Adjust based on available GPU/CPU memory."
-            )
+            ),
        ),
    ] = 8
    scale: Annotated[
        float,
        Field(
+            gt=0,
            description=(
                "Scaling factor for image resolution before processing. Higher values (e.g., 2.0) provide more detail "
                "for the vision model but increase processing time and memory. Range: 0.5-4.0 typical."
-            )
+            ),
        ),
    ] = 2.0
    picture_area_threshold: Annotated[
@@ -715,6 +717,15 @@ class PictureDescriptionVlmOptions(PictureDescriptionBaseOptions):
            )
        ),
    ] = {"max_new_tokens": 200, "do_sample": False}
+    padding_side: Annotated[
+        Literal["left", "right"],
+        Field(
+            description=(
+                "Tokenizer padding side used for batched generation. Defaults to left to preserve the legacy "
+                "behavior, but can be overridden for models that require right padding."
+            )
+        ),
+    ] = "left"

    @property
    def repo_cache_folder(self) -> str:
@@ -122,9 +122,10 @@ class ApiVlmEngine(BaseVlmEngine):
            images = preprocess_image_batch([input_data.image])
            image = images[0]

-            # Prepare API parameters (use merged params which include model spec params)
-            api_params = {
-                **self.merged_params,
+            # Prepare API parameters: engine defaults first, then user/model
+            # params override. This allows users to set Azure-specific params
+            # like max_completion_tokens or override temperature (#3112).
+            api_params: dict[str, object] = {
                "temperature": input_data.temperature,
            }

@@ -132,6 +133,14 @@ class ApiVlmEngine(BaseVlmEngine):
            if input_data.max_new_tokens:
                api_params["max_tokens"] = input_data.max_new_tokens

+            # User/model spec params take precedence over engine defaults
+            api_params.update(self.merged_params)
+
+            # If user specified max_completion_tokens, remove conflicting
+            # max_tokens (required for Azure OpenAI compatibility)
+            if "max_completion_tokens" in api_params:
+                api_params.pop("max_tokens", None)
+
            # Add stop strings if specified
            if input_data.stop_strings:
                api_params["stop"] = input_data.stop_strings
@@ -39,9 +39,16 @@ class PictureDescriptionBaseModel(
        options: PictureDescriptionBaseOptions,
        accelerator_options: AcceleratorOptions,
    ):
+        if options.batch_size < 1:
+            raise ValueError("Picture description batch_size must be >= 1")
+        if options.scale <= 0:
+            raise ValueError("Picture description scale must be > 0")
+
        self.enabled = enabled
        self.options = options
        self.provenance = "not-implemented"
+        self.elements_batch_size = options.batch_size
+        self.images_scale = options.scale

    def is_processable(self, doc: DoclingDocument, element: NodeItem) -> bool:
        return self.enabled and isinstance(element, PictureItem)
@@ -57,7 +57,6 @@ class PictureDescriptionVlmModel(
                import torch
                from transformers import (
                    AutoModelForImageTextToText,
-                    AutoModelForVision2Seq,
                    AutoProcessor,
                )
            except ImportError:
@@ -68,6 +67,9 @@ class PictureDescriptionVlmModel(
            # Initialize processor and model
            with _model_init_lock:
                self.processor = AutoProcessor.from_pretrained(artifacts_path)
+                tokenizer = getattr(self.processor, "tokenizer", None)
+                if tokenizer is not None:
+                    tokenizer.padding_side = self.options.padding_side
                self.model = AutoModelForImageTextToText.from_pretrained(
                    artifacts_path,
                    device_map=self.device,
@@ -89,6 +91,10 @@ class PictureDescriptionVlmModel(
    def _annotate_images(self, images: Iterable[Image.Image]) -> Iterable[str]:
        from transformers import GenerationConfig

+        image_batch = list(images)
+        if not image_batch:
+            return
+
        # Create input messages
        messages = [
            {
@@ -100,24 +106,25 @@ class PictureDescriptionVlmModel(
            },
        ]

-        # TODO: do batch generation
+        prompt = self.processor.apply_chat_template(
+            messages, add_generation_prompt=True
+        )
+        inputs = self.processor(
+            text=[prompt] * len(image_batch),
+            images=image_batch,
+            return_tensors="pt",
+            padding=True,
+        )
+        inputs = inputs.to(self.device)

-        for image in images:
-            # Prepare inputs
-            prompt = self.processor.apply_chat_template(
-                messages, add_generation_prompt=True
-            )
-            inputs = self.processor(text=prompt, images=[image], return_tensors="pt")
-            inputs = inputs.to(self.device)
+        generated_ids = self.model.generate(
+            **inputs,
+            generation_config=GenerationConfig(**self.options.generation_config),
+        )
+        generated_texts = self.processor.batch_decode(
+            generated_ids[:, inputs["input_ids"].shape[1] :],
+            skip_special_tokens=True,
+        )

-            # Generate outputs
-            generated_ids = self.model.generate(
-                **inputs,
-                generation_config=GenerationConfig(**self.options.generation_config),
-            )
-            generated_texts = self.processor.batch_decode(
-                generated_ids[:, inputs["input_ids"].shape[1] :],
-                skip_special_tokens=True,
-            )
-
-            yield generated_texts[0].strip()
+        for text in generated_texts:
+            yield text.strip()
@@ -4,7 +4,7 @@ import threading
 import time
 from collections.abc import Iterable
 from pathlib import Path
-from typing import Optional, Union
+from typing import Union

 import numpy as np
 from PIL.Image import Image
@@ -39,7 +39,7 @@ class HuggingFaceMlxModel(BaseVlmPageModel, HuggingFaceModelDownloadMixin):
    def __init__(
        self,
        enabled: bool,
-        artifacts_path: Optional[Path],
+        artifacts_path: Path | None,
        accelerator_options: AcceleratorOptions,
        vlm_options: InlineVlmOptions,
    ):
@@ -19,9 +19,9 @@ def api_image_request(
    prompt: str,
    url: AnyUrl,
    timeout: float = 20,
-    headers: Optional[dict[str, str]] = None,
+    headers: dict[str, str] | None = None,
    **params,
-) -> Tuple[str, Optional[int], VlmStopReason]:
+) -> Tuple[str, int | None, VlmStopReason]:
    img_io = BytesIO()
    image = (
        image.copy()
@@ -77,11 +77,14 @@ def api_image_request(
            api_resp = OpenAiApiResponse.model_validate_json(r.text)
            generated_text = api_resp.choices[0].message.content.strip()
            num_tokens = api_resp.usage.total_tokens
-            stop_reason = (
-                VlmStopReason.LENGTH
-                if api_resp.choices[0].finish_reason == "length"
-                else VlmStopReason.END_OF_SEQUENCE
-            )
+            finish_reason = api_resp.choices[0].finish_reason
+            if finish_reason == "content_filter":
+                _log.warning("API response was filtered due to content safety policy.")
+                stop_reason = VlmStopReason.CONTENT_FILTERED
+            elif finish_reason == "length":
+                stop_reason = VlmStopReason.LENGTH
+            else:
+                stop_reason = VlmStopReason.END_OF_SEQUENCE

            return generated_text, num_tokens, stop_reason
        except Exception as e:
@@ -97,10 +100,10 @@ def api_image_request_streaming(
    url: AnyUrl,
    *,
    timeout: float = 20,
-    headers: Optional[dict[str, str]] = None,
+    headers: dict[str, str] | None = None,
    generation_stoppers: list[GenerationStopper] = [],
    **params,
-) -> Tuple[str, Optional[int]]:
+) -> Tuple[str, int | None]:
    """
    Stream a chat completion from an OpenAI-compatible server (e.g., vLLM).
    Parses SSE lines: 'data: {json}\\n\\n', terminated by 'data: [DONE]'.
@@ -318,8 +318,9 @@ def run_vllm_example(input_doc_path: Path) -> bool:
            url="http://localhost:8000/v1/chat/completions",
            params={
                "model": "ibm-granite/granite-docling-258M",
-                "max_tokens": 4096,
-                "skip_special_tokens": True,
+                "temperature": 0.0,
+                "max_tokens": 8192,
+                "skip_special_tokens": False,
            },
            timeout=90,
        ),
@@ -35,7 +35,7 @@ Docling simplifies document processing, parsing diverse formats — including ad

 ## Features

-* 🗂️ Parsing of [multiple document formats][supported_formats] incl. PDF, DOCX, PPTX, XLSX, HTML, WAV, MP3, WebVTT, images (PNG, TIFF, JPEG, ...), LaTeX, and more
+* 🗂️ Parsing of [multiple document formats][supported_formats] incl. PDF, DOCX, PPTX, XLSX, HTML, WAV, MP3, WebVTT, images (PNG, TIFF, JPEG, ...), LaTeX, plain text, and more
 * 📑 Advanced PDF understanding incl. page layout, reading order, table structure, code, formulas, image classification, and more
 * 🧬 Unified, expressive [DoclingDocument][docling_document] representation format
 * ↪️ Various [export formats][supported_formats] and options, including Markdown, HTML, WebVTT, [DocTags](https://arxiv.org/abs/2503.11576) and lossless JSON
@@ -55,6 +55,7 @@ Docling simplifies document processing, parsing diverse formats — including ad
 * 💼 Parsing of XBRL (eXtensible Business Reporting Language) documents for financial reports
 * 💬 Parsing of WebVTT (Web Video Text Tracks) files
 * 💬 Parsing of LaTeX files
+* 📝 Parsing of plain-text files (`.txt`, `.text`) and Markdown supersets (`.qmd`, `.Rmd`)

 ### Coming soon

@@ -1,6 +1,6 @@
 [project]
 name = "docling"
-version = "2.80.0"  # DO NOT EDIT, updated automatically
+version = "2.81.0"  # DO NOT EDIT, updated automatically
 description = "SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications."
 license = "MIT"
 keywords = [
@@ -0,0 +1,7 @@
+item-0 at level 0: unspecified: group _root_
+  item-1 at level 1: section: group header-0
+    item-2 at level 2: section_header: Issue 3: Concatenated equation blocks
+      item-3 at level 3: text: The paragraph below contains thr ... ts are siblings inside a single <w:p>.
+      item-4 at level 3: formula: a=b
+      item-5 at level 3: formula: c=d
+      item-6 at level 3: formula: e=f
@@ -0,0 +1,132 @@
+{
+  "schema_name": "DoclingDocument",
+  "version": "1.9.0",
+  "name": "omml_multi_equation_paragraph",
+  "origin": {
+    "mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+    "binary_hash": 17520448227351822398,
+    "filename": "omml_multi_equation_paragraph.docx"
+  },
+  "furniture": {
+    "self_ref": "#/furniture",
+    "children": [],
+    "content_layer": "furniture",
+    "name": "_root_",
+    "label": "unspecified"
+  },
+  "body": {
+    "self_ref": "#/body",
+    "children": [
+      {
+        "$ref": "#/groups/0"
+      }
+    ],
+    "content_layer": "body",
+    "name": "_root_",
+    "label": "unspecified"
+  },
+  "groups": [
+    {
+      "self_ref": "#/groups/0",
+      "parent": {
+        "$ref": "#/body"
+      },
+      "children": [
+        {
+          "$ref": "#/texts/0"
+        }
+      ],
+      "content_layer": "body",
+      "name": "header-0",
+      "label": "section"
+    }
+  ],
+  "texts": [
+    {
+      "self_ref": "#/texts/0",
+      "parent": {
+        "$ref": "#/groups/0"
+      },
+      "children": [
+        {
+          "$ref": "#/texts/1"
+        },
+        {
+          "$ref": "#/texts/2"
+        },
+        {
+          "$ref": "#/texts/3"
+        },
+        {
+          "$ref": "#/texts/4"
+        }
+      ],
+      "content_layer": "body",
+      "label": "section_header",
+      "prov": [],
+      "orig": "Issue 3: Concatenated equation blocks",
+      "text": "Issue 3: Concatenated equation blocks",
+      "level": 1
+    },
+    {
+      "self_ref": "#/texts/1",
+      "parent": {
+        "$ref": "#/texts/0"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "text",
+      "prov": [],
+      "orig": "The paragraph below contains three separate <m:oMath> elements.\nExpected: three separate $$ blocks ($$a = b$$, $$c = d$$, $$e = f$$)\nDocling produces: one $$ block with all equations concatenated.\n\nAll three <m:oMath> elements are siblings inside a single <w:p>.",
+      "text": "The paragraph below contains three separate <m:oMath> elements.\nExpected: three separate $$ blocks ($$a = b$$, $$c = d$$, $$e = f$$)\nDocling produces: one $$ block with all equations concatenated.\n\nAll three <m:oMath> elements are siblings inside a single <w:p>.",
+      "formatting": {
+        "bold": false,
+        "italic": false,
+        "underline": false,
+        "strikethrough": false,
+        "script": "baseline"
+      }
+    },
+    {
+      "self_ref": "#/texts/2",
+      "parent": {
+        "$ref": "#/texts/0"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "formula",
+      "prov": [],
+      "orig": "a=b",
+      "text": "a=b"
+    },
+    {
+      "self_ref": "#/texts/3",
+      "parent": {
+        "$ref": "#/texts/0"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "formula",
+      "prov": [],
+      "orig": "c=d",
+      "text": "c=d"
+    },
+    {
+      "self_ref": "#/texts/4",
+      "parent": {
+        "$ref": "#/texts/0"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "formula",
+      "prov": [],
+      "orig": "e=f",
+      "text": "e=f"
+    }
+  ],
+  "pictures": [],
+  "tables": [],
+  "key_value_items": [],
+  "form_items": [],
+  "pages": {}
+}
@@ -0,0 +1,13 @@
+## Issue 3: Concatenated equation blocks
+
+The paragraph below contains three separate &lt;m:oMath&gt; elements.
+Expected: three separate $$ blocks ($$a = b$$, $$c = d$$, $$e = f$$)
+Docling produces: one $$ block with all equations concatenated.
+
+All three &lt;m:oMath&gt; elements are siblings inside a single &lt;w:p&gt;.
+
+$$a=b$$
+
+$$c=d$$
+
+$$e=f$$
@@ -49,4 +49,25 @@ item-0 at level 0: unspecified: group _root_
        item-48 at level 4: text: 
        item-49 at level 4: text: Paragraph 2.1.2
        item-50 at level 4: text: 
-        item-51 at level 4: text: 
+        item-51 at level 4: text: 
+        item-52 at level 4: text: 
+        item-53 at level 4: list: group list
+          item-54 at level 5: list_item: Appendix A: Glossary
+          item-55 at level 5: list: group list
+            item-56 at level 6: list_item: Section A.1
+            item-57 at level 6: list: group list
+              item-58 at level 7: list_item: Detail A.1.1
+            item-59 at level 6: list: group list
+              item-60 at level 7: list_item: Hardware Constraints – Egde Case
+              item-61 at level 7: list_item: Software Constraints
+              item-62 at level 7: list_item: Network Constraints
+              item-63 at level 7: list_item: Environmental Constraints
+              item-64 at level 7: list_item: Regulatory Constraints
+              item-65 at level 7: list_item: Budget Constraints
+              item-66 at level 7: list_item: Timeline Constraints
+              item-67 at level 7: list_item: Resource Constraints
+              item-68 at level 7: list: group list
+                item-69 at level 8: list_item: First sub-item at this level
+                item-70 at level 8: list_item: Second sub-item at this level
+                item-71 at level 8: list_item: Third sub-item at this level
+        item-72 at level 4: text: 
@@ -4,7 +4,7 @@
  "name": "unit_test_headers_numbered",
  "origin": {
    "mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
-    "binary_hash": 7684538628968220703,
+    "binary_hash": 5429064773624687111,
    "filename": "unit_test_headers_numbered.docx"
  },
  "furniture": {
@@ -140,6 +140,115 @@
      "content_layer": "body",
      "name": "header-2",
      "label": "section"
+    },
+    {
+      "self_ref": "#/groups/5",
+      "parent": {
+        "$ref": "#/texts/39"
+      },
+      "children": [
+        {
+          "$ref": "#/texts/47"
+        },
+        {
+          "$ref": "#/groups/6"
+        }
+      ],
+      "content_layer": "body",
+      "name": "list",
+      "label": "list"
+    },
+    {
+      "self_ref": "#/groups/6",
+      "parent": {
+        "$ref": "#/groups/5"
+      },
+      "children": [
+        {
+          "$ref": "#/texts/48"
+        },
+        {
+          "$ref": "#/groups/7"
+        },
+        {
+          "$ref": "#/groups/8"
+        }
+      ],
+      "content_layer": "body",
+      "name": "list",
+      "label": "list"
+    },
+    {
+      "self_ref": "#/groups/7",
+      "parent": {
+        "$ref": "#/groups/6"
+      },
+      "children": [
+        {
+          "$ref": "#/texts/49"
+        }
+      ],
+      "content_layer": "body",
+      "name": "list",
+      "label": "list"
+    },
+    {
+      "self_ref": "#/groups/8",
+      "parent": {
+        "$ref": "#/groups/6"
+      },
+      "children": [
+        {
+          "$ref": "#/texts/50"
+        },
+        {
+          "$ref": "#/texts/51"
+        },
+        {
+          "$ref": "#/texts/52"
+        },
+        {
+          "$ref": "#/texts/53"
+        },
+        {
+          "$ref": "#/texts/54"
+        },
+        {
+          "$ref": "#/texts/55"
+        },
+        {
+          "$ref": "#/texts/56"
+        },
+        {
+          "$ref": "#/texts/57"
+        },
+        {
+          "$ref": "#/groups/9"
+        }
+      ],
+      "content_layer": "body",
+      "name": "list",
+      "label": "list"
+    },
+    {
+      "self_ref": "#/groups/9",
+      "parent": {
+        "$ref": "#/groups/8"
+      },
+      "children": [
+        {
+          "$ref": "#/texts/58"
+        },
+        {
+          "$ref": "#/texts/59"
+        },
+        {
+          "$ref": "#/texts/60"
+        }
+      ],
+      "content_layer": "body",
+      "name": "list",
+      "label": "list"
    }
  ],
  "texts": [
@@ -801,6 +910,15 @@
        },
        {
          "$ref": "#/texts/45"
+        },
+        {
+          "$ref": "#/texts/46"
+        },
+        {
+          "$ref": "#/groups/5"
+        },
+        {
+          "$ref": "#/texts/61"
        }
      ],
      "content_layer": "body",
@@ -895,6 +1013,324 @@
      "prov": [],
      "orig": "",
      "text": ""
+    },
+    {
+      "self_ref": "#/texts/46",
+      "parent": {
+        "$ref": "#/texts/39"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "text",
+      "prov": [],
+      "orig": "",
+      "text": ""
+    },
+    {
+      "self_ref": "#/texts/47",
+      "parent": {
+        "$ref": "#/groups/5"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "list_item",
+      "prov": [],
+      "orig": "Appendix A: Glossary",
+      "text": "Appendix A: Glossary",
+      "formatting": {
+        "bold": false,
+        "italic": false,
+        "underline": false,
+        "strikethrough": false,
+        "script": "baseline"
+      },
+      "enumerated": true,
+      "marker": "1."
+    },
+    {
+      "self_ref": "#/texts/48",
+      "parent": {
+        "$ref": "#/groups/6"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "list_item",
+      "prov": [],
+      "orig": "Section A.1",
+      "text": "Section A.1",
+      "formatting": {
+        "bold": false,
+        "italic": false,
+        "underline": false,
+        "strikethrough": false,
+        "script": "baseline"
+      },
+      "enumerated": true,
+      "marker": "1.1."
+    },
+    {
+      "self_ref": "#/texts/49",
+      "parent": {
+        "$ref": "#/groups/7"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "list_item",
+      "prov": [],
+      "orig": "Detail A.1.1",
+      "text": "Detail A.1.1",
+      "formatting": {
+        "bold": false,
+        "italic": false,
+        "underline": false,
+        "strikethrough": false,
+        "script": "baseline"
+      },
+      "enumerated": true,
+      "marker": "1.1.1."
+    },
+    {
+      "self_ref": "#/texts/50",
+      "parent": {
+        "$ref": "#/groups/8"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "list_item",
+      "prov": [],
+      "orig": "Hardware Constraints – Egde Case",
+      "text": "Hardware Constraints – Egde Case",
+      "formatting": {
+        "bold": false,
+        "italic": false,
+        "underline": false,
+        "strikethrough": false,
+        "script": "baseline"
+      },
+      "enumerated": true,
+      "marker": "2.3.1."
+    },
+    {
+      "self_ref": "#/texts/51",
+      "parent": {
+        "$ref": "#/groups/8"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "list_item",
+      "prov": [],
+      "orig": "Software Constraints",
+      "text": "Software Constraints",
+      "formatting": {
+        "bold": false,
+        "italic": false,
+        "underline": false,
+        "strikethrough": false,
+        "script": "baseline"
+      },
+      "enumerated": true,
+      "marker": "2.3.2."
+    },
+    {
+      "self_ref": "#/texts/52",
+      "parent": {
+        "$ref": "#/groups/8"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "list_item",
+      "prov": [],
+      "orig": "Network Constraints",
+      "text": "Network Constraints",
+      "formatting": {
+        "bold": false,
+        "italic": false,
+        "underline": false,
+        "strikethrough": false,
+        "script": "baseline"
+      },
+      "enumerated": true,
+      "marker": "2.3.3."
+    },
+    {
+      "self_ref": "#/texts/53",
+      "parent": {
+        "$ref": "#/groups/8"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "list_item",
+      "prov": [],
+      "orig": "Environmental Constraints",
+      "text": "Environmental Constraints",
+      "formatting": {
+        "bold": false,
+        "italic": false,
+        "underline": false,
+        "strikethrough": false,
+        "script": "baseline"
+      },
+      "enumerated": true,
+      "marker": "2.3.4."
+    },
+    {
+      "self_ref": "#/texts/54",
+      "parent": {
+        "$ref": "#/groups/8"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "list_item",
+      "prov": [],
+      "orig": "Regulatory Constraints",
+      "text": "Regulatory Constraints",
+      "formatting": {
+        "bold": false,
+        "italic": false,
+        "underline": false,
+        "strikethrough": false,
+        "script": "baseline"
+      },
+      "enumerated": true,
+      "marker": "2.3.5."
+    },
+    {
+      "self_ref": "#/texts/55",
+      "parent": {
+        "$ref": "#/groups/8"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "list_item",
+      "prov": [],
+      "orig": "Budget Constraints",
+      "text": "Budget Constraints",
+      "formatting": {
+        "bold": false,
+        "italic": false,
+        "underline": false,
+        "strikethrough": false,
+        "script": "baseline"
+      },
+      "enumerated": true,
+      "marker": "2.3.6."
+    },
+    {
+      "self_ref": "#/texts/56",
+      "parent": {
+        "$ref": "#/groups/8"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "list_item",
+      "prov": [],
+      "orig": "Timeline Constraints",
+      "text": "Timeline Constraints",
+      "formatting": {
+        "bold": false,
+        "italic": false,
+        "underline": false,
+        "strikethrough": false,
+        "script": "baseline"
+      },
+      "enumerated": true,
+      "marker": "2.3.7."
+    },
+    {
+      "self_ref": "#/texts/57",
+      "parent": {
+        "$ref": "#/groups/8"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "list_item",
+      "prov": [],
+      "orig": "Resource Constraints",
+      "text": "Resource Constraints",
+      "formatting": {
+        "bold": false,
+        "italic": false,
+        "underline": false,
+        "strikethrough": false,
+        "script": "baseline"
+      },
+      "enumerated": true,
+      "marker": "2.3.8."
+    },
+    {
+      "self_ref": "#/texts/58",
+      "parent": {
+        "$ref": "#/groups/9"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "list_item",
+      "prov": [],
+      "orig": "First sub-item at this level",
+      "text": "First sub-item at this level",
+      "formatting": {
+        "bold": false,
+        "italic": false,
+        "underline": false,
+        "strikethrough": false,
+        "script": "baseline"
+      },
+      "enumerated": true,
+      "marker": "2.3.8.1."
+    },
+    {
+      "self_ref": "#/texts/59",
+      "parent": {
+        "$ref": "#/groups/9"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "list_item",
+      "prov": [],
+      "orig": "Second sub-item at this level",
+      "text": "Second sub-item at this level",
+      "formatting": {
+        "bold": false,
+        "italic": false,
+        "underline": false,
+        "strikethrough": false,
+        "script": "baseline"
+      },
+      "enumerated": true,
+      "marker": "2.3.8.2."
+    },
+    {
+      "self_ref": "#/texts/60",
+      "parent": {
+        "$ref": "#/groups/9"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "list_item",
+      "prov": [],
+      "orig": "Third sub-item at this level",
+      "text": "Third sub-item at this level",
+      "formatting": {
+        "bold": false,
+        "italic": false,
+        "underline": false,
+        "strikethrough": false,
+        "script": "baseline"
+      },
+      "enumerated": true,
+      "marker": "2.3.8.3."
+    },
+    {
+      "self_ref": "#/texts/61",
+      "parent": {
+        "$ref": "#/texts/39"
+      },
+      "children": [],
+      "content_layer": "body",
+      "label": "text",
+      "prov": [],
+      "orig": "",
+      "text": ""
    }
  ],
  "pictures": [],
@@ -40,4 +40,19 @@ Paragraph 2.1.1.1

 Paragraph 2.1.1

-Paragraph 2.1.2
+Paragraph 2.1.2
+
+1. Appendix A: Glossary
+    - 1.1. Section A.1
+        - 1.1.1. Detail A.1.1
+        - 2.3.1. Hardware Constraints – Egde Case
+        - 2.3.2. Software Constraints
+        - 2.3.3. Network Constraints
+        - 2.3.4. Environmental Constraints
+        - 2.3.5. Regulatory Constraints
+        - 2.3.6. Budget Constraints
+        - 2.3.7. Timeline Constraints
+        - 2.3.8. Resource Constraints
+            - 2.3.8.1. First sub-item at this level
+            - 2.3.8.2. Second sub-item at this level
+            - 2.3.8.3. Third sub-item at this level
@@ -0,0 +1,94 @@
+"""Tests for api_image_request module."""
+
+from unittest.mock import MagicMock, patch
+
+import pytest
+from PIL import Image
+
+from docling.datamodel.base_models import VlmStopReason
+from docling.utils.api_image_request import api_image_request
+
+
+class TestApiImageRequest:
+    """Test cases for api_image_request function."""
+
+    @pytest.fixture
+    def sample_image(self):
+        """Create a simple test image."""
+        return Image.new("RGB", (100, 100), color="red")
+
+    @pytest.fixture
+    def mock_response_factory(self):
+        """Factory to create mock API responses."""
+
+        def _create_mock_response(
+            content="Test response",
+            finish_reason="stop",
+            total_tokens=100,
+            status_ok=True,
+        ):
+            mock_resp = MagicMock()
+            mock_resp.ok = status_ok
+            mock_resp.text = f"""{{
+                "id": "test-id",
+                "created": 1234567890,
+                "choices": [{{
+                    "index": 0,
+                    "message": {{"role": "assistant", "content": "{content}"}},
+                    "finish_reason": "{finish_reason}"
+                }}],
+                "usage": {{"prompt_tokens": 50, "completion_tokens": 50, "total_tokens": {total_tokens}}}
+            }}"""
+            return mock_resp
+
+        return _create_mock_response
+
+    @patch("docling.utils.api_image_request.requests.post")
+    def test_content_filter_finish_reason(
+        self, mock_post, sample_image, mock_response_factory
+    ):
+        """Test that content_filter finish reason returns CONTENT_FILTERED."""
+        mock_post.return_value = mock_response_factory(
+            content="Filtered content", finish_reason="content_filter"
+        )
+
+        result_text, _tokens, stop_reason = api_image_request(
+            image=sample_image,
+            prompt="Test prompt",
+            url="http://test.api/v1/chat/completions",
+        )
+
+        assert result_text == "Filtered content"
+        assert stop_reason == VlmStopReason.CONTENT_FILTERED
+
+    @patch("docling.utils.api_image_request.requests.post")
+    def test_length_finish_reason(self, mock_post, sample_image, mock_response_factory):
+        """Test that length finish reason returns LENGTH."""
+        mock_post.return_value = mock_response_factory(
+            content="Truncated content", finish_reason="length"
+        )
+
+        result_text, _tokens, stop_reason = api_image_request(
+            image=sample_image,
+            prompt="Test prompt",
+            url="http://test.api/v1/chat/completions",
+        )
+
+        assert result_text == "Truncated content"
+        assert stop_reason == VlmStopReason.LENGTH
+
+    @patch("docling.utils.api_image_request.requests.post")
+    def test_stop_finish_reason(self, mock_post, sample_image, mock_response_factory):
+        """Test that stop finish reason returns END_OF_SEQUENCE."""
+        mock_post.return_value = mock_response_factory(
+            content="Normal completion", finish_reason="stop"
+        )
+
+        result_text, _tokens, stop_reason = api_image_request(
+            image=sample_image,
+            prompt="Test prompt",
+            url="http://test.api/v1/chat/completions",
+        )
+
+        assert result_text == "Normal completion"
+        assert stop_reason == VlmStopReason.END_OF_SEQUENCE
@@ -2,10 +2,13 @@ import logging
 import os
 import warnings
 from pathlib import Path
+from types import SimpleNamespace

 import pytest
 from docling_core.types.doc import GroupItem
+from lxml import etree

+import docling.backend.msword_backend as msword_backend_module
 from docling.backend.docx.drawingml.utils import get_libreoffice_cmd
 from docling.backend.msword_backend import MsWordDocumentBackend
 from docling.datamodel.base_models import InputFormat
@@ -44,6 +47,17 @@ def get_converter():
    return converter


+@pytest.fixture(scope="module")
+def backend(docx_paths) -> MsWordDocumentBackend:
+    docx_path = docx_paths[0]
+    in_doc = InputDocument(
+        path_or_stream=docx_path,
+        format=InputFormat.DOCX,
+        backend=MsWordDocumentBackend,
+    )
+    return in_doc._backend
+
+
@pytest.fixture(scope="module")
 def documents(docx_paths) -> list[tuple[Path, DoclingDocument]]:
    documents: list[dict[Path, DoclingDocument]] = []
@@ -423,3 +437,160 @@ def test_external_image_references():
    assert "Test Document with External Image" in md
    assert "text before the image" in md
    assert "after the external image" in md
+
+
+def test_list_counter_and_enum_marker(docx_paths):
+    """Test list counter increment, sub-level reset, marker building, and sequence reset."""
+    docx_path = docx_paths[0]
+    in_doc = InputDocument(
+        path_or_stream=docx_path,
+        format=InputFormat.DOCX,
+        backend=MsWordDocumentBackend,
+    )
+    backend = in_doc._backend
+
+    # Basic increment
+    assert backend._get_list_counter(1, 0) == 1
+    assert backend._get_list_counter(1, 0) == 2
+    assert backend._get_list_counter(1, 1) == 1
+    assert backend._get_list_counter(1, 1) == 2
+    assert backend._get_list_counter(1, 1) == 3
+
+    # Advancing parent level resets sub-levels
+    backend._get_list_counter(1, 2)  # (1,2) = 1
+    backend._get_list_counter(1, 0)  # (1,0) = 3, resets lvl 1 and 2
+    assert backend.list_counters[(1, 1)] == 0
+    assert backend.list_counters[(1, 2)] == 0
+    assert backend._get_list_counter(1, 1) == 1  # restarts from 1
+
+    # Hierarchical enum markers
+    backend.list_counters[(1, 0)] = 2
+    backend.list_counters[(1, 1)] = 3
+    backend.list_counters[(1, 2)] = 1
+    assert backend._build_enum_marker(1, 0) == "2."
+    assert backend._build_enum_marker(1, 1) == "2.3."
+    assert backend._build_enum_marker(1, 2) == "2.3.1."
+    assert backend._build_enum_marker(99, 0) == "1."  # missing counter defaults to 1
+
+    # Reset sequence for a specific numid
+    backend._get_list_counter(2, 0)  # (2,0) = 1
+    backend._reset_list_counters_for_new_sequence(1)
+    assert backend.list_counters[(1, 0)] == 0
+    assert backend.list_counters[(1, 1)] == 0
+    assert backend.list_counters[(2, 0)] == 1  # unaffected
+
+
+def test_handle_equations_in_text_returns_original_text_on_mismatch(
+    backend, monkeypatch
+):
+    element = etree.Element("p")
+    run = etree.SubElement(element, "r")
+    text_elem = etree.SubElement(run, "t")
+    text_elem.text = "alpha"
+    etree.SubElement(element, "oMath")
+
+    monkeypatch.setattr(msword_backend_module, "oMath2Latex", lambda _: "x")
+
+    text, equations = backend._handle_equations_in_text(element=element, text="beta")
+
+    assert text == "beta"
+    assert equations == []
+
+
+def test_handle_equations_in_text_skips_empty_substrings(backend, monkeypatch):
+    equation = backend.equation_bookends.format(EQ="x")
+
+    element = etree.Element("p")
+    empty_run = etree.SubElement(element, "r")
+    empty_text = etree.SubElement(empty_run, "t")
+    empty_text.text = ""
+    etree.SubElement(element, "oMath")
+    tail_run = etree.SubElement(element, "r")
+    tail_text = etree.SubElement(tail_run, "t")
+    tail_text.text = "tail"
+
+    monkeypatch.setattr(msword_backend_module, "oMath2Latex", lambda _: "x")
+
+    text, equations = backend._handle_equations_in_text(element=element, text="tail")
+
+    assert equations == [equation]
+    assert text == f"{equation}tail"
+
+
+def test_handle_text_elements_returns_empty_refs_when_text_is_none(
+    backend, monkeypatch
+):
+    element = backend.docx_obj.paragraphs[0]._element
+
+    monkeypatch.setattr(
+        backend, "_handle_equations_in_text", lambda element, text: (None, [])
+    )
+
+    refs = backend._handle_text_elements(element, DoclingDocument(name="test"))
+
+    assert refs == []
+
+
+def test_handle_text_elements_heading_defaults_to_non_numbered_when_style_missing(
+    backend, monkeypatch
+):
+    captured: dict[str, tuple[int, str, bool]] = {}
+
+    class FakeParagraph:
+        def __init__(self, element, docx_obj):
+            self.text = "Heading text"
+            self.style = SimpleNamespace()
+
+    monkeypatch.setattr(msword_backend_module, "Paragraph", FakeParagraph)
+    monkeypatch.setattr(backend, "_get_paragraph_elements", lambda paragraph: [])
+    monkeypatch.setattr(
+        backend, "_handle_equations_in_text", lambda element, text: (text, [])
+    )
+    monkeypatch.setattr(backend, "_get_comment_ids_for_element", lambda element: [])
+    monkeypatch.setattr(
+        backend, "_get_label_and_level", lambda paragraph: ("Heading", 1)
+    )
+    monkeypatch.setattr(backend, "_get_numId_and_ilvl", lambda paragraph: (None, None))
+
+    def fake_add_heading(doc, level, text, is_numbered_style):
+        captured["heading"] = (level, text, is_numbered_style)
+        return []
+
+    monkeypatch.setattr(backend, "_add_heading", fake_add_heading)
+
+    refs = backend._handle_text_elements(object(), DoclingDocument(name="test"))
+
+    assert refs == []
+    assert captured["heading"] == (1, "Heading text", False)
+
+
+def test_handle_text_elements_inline_equations_stop_when_text_is_consumed(
+    backend, monkeypatch
+):
+    equation_one = backend.equation_bookends.format(EQ="a")
+    equation_two = backend.equation_bookends.format(EQ="b")
+
+    class FakeParagraph:
+        def __init__(self, element, docx_obj):
+            self.text = "inline eq"
+            self.style = SimpleNamespace()
+
+    monkeypatch.setattr(msword_backend_module, "Paragraph", FakeParagraph)
+    monkeypatch.setattr(backend, "_get_paragraph_elements", lambda paragraph: [])
+    monkeypatch.setattr(
+        backend,
+        "_handle_equations_in_text",
+        lambda element, text: (equation_one, [equation_one, equation_two]),
+    )
+    monkeypatch.setattr(backend, "_get_comment_ids_for_element", lambda element: [])
+    monkeypatch.setattr(
+        backend, "_get_label_and_level", lambda paragraph: ("Normal", None)
+    )
+    monkeypatch.setattr(backend, "_get_numId_and_ilvl", lambda paragraph: (None, None))
+    monkeypatch.setattr(backend, "_prev_numid", lambda: None)
+    monkeypatch.setattr(backend, "_get_level", lambda: 1)
+    backend.parents[0] = None
+
+    refs = backend._handle_text_elements(object(), DoclingDocument(name="test"))
+
+    assert len(refs) == 2
@@ -1,8 +1,11 @@
 from pathlib import Path

+import pytest
+from docling_core.types.doc import ImageRefMode
 from typer.testing import CliRunner

-from docling.cli.main import app
+from docling.cli.main import _should_generate_export_images, app
+from docling.datamodel.base_models import OutputFormat

 runner = CliRunner()

@@ -27,6 +30,35 @@ def test_cli_convert(tmp_path):
    assert converted.exists()


+@pytest.mark.parametrize(
+    ("image_export_mode", "to_formats", "expected"),
+    [
+        (ImageRefMode.PLACEHOLDER, [OutputFormat.JSON], False),
+        (ImageRefMode.EMBEDDED, [OutputFormat.TEXT, OutputFormat.DOCTAGS], False),
+        (ImageRefMode.EMBEDDED, [OutputFormat.MARKDOWN], True),
+        (
+            ImageRefMode.EMBEDDED,
+            [OutputFormat.TEXT, OutputFormat.MARKDOWN],
+            True,
+        ),
+    ],
+)
+def test_should_generate_export_images(image_export_mode, to_formats, expected):
+    assert _should_generate_export_images(image_export_mode, to_formats) is expected
+
+
+def test_image_export_policy_covers_all_output_formats():
+    non_image_export_formats = {
+        OutputFormat.TEXT,
+        OutputFormat.DOCTAGS,
+        OutputFormat.VTT,
+    }
+    image_export_formats = set(OutputFormat) - non_image_export_formats
+
+    assert image_export_formats.isdisjoint(non_image_export_formats)
+    assert image_export_formats | non_image_export_formats == set(OutputFormat)
+
+
 def test_cli_audio_auto_detection(tmp_path):
    """Test that CLI automatically detects audio files and sets ASR pipeline."""
    from docling.datamodel.base_models import FormatToExtensions, InputFormat
@@ -0,0 +1,179 @@
+from collections.abc import Iterable
+from types import SimpleNamespace
+from typing import ClassVar, List, Type
+
+import pytest
+from docling_core.types.doc import (
+    DoclingDocument,
+    ImageRef,
+    PictureItem,
+    ProvenanceItem,
+)
+from docling_core.types.doc.base import BoundingBox, Size
+from PIL import Image
+
+from docling.datamodel.accelerator_options import AcceleratorOptions
+from docling.datamodel.base_models import ItemAndImageEnrichmentElement
+from docling.datamodel.pipeline_options import (
+    PictureDescriptionBaseOptions,
+    PictureDescriptionVlmEngineOptions,
+    PipelineOptions,
+)
+from docling.models.picture_description_base_model import PictureDescriptionBaseModel
+from docling.pipeline.base_pipeline import BasePipeline
+
+
+class _TestOptions(PictureDescriptionBaseOptions):
+    kind: ClassVar[str] = "test"
+
+
+class _ConfiguredPictureDescriptionModel(PictureDescriptionBaseModel):
+    def __init__(self, options: PictureDescriptionBaseOptions) -> None:
+        super().__init__(
+            enabled=True,
+            enable_remote_services=False,
+            artifacts_path=None,
+            options=options,
+            accelerator_options=AcceleratorOptions(),
+        )
+
+    @classmethod
+    def get_options_type(cls) -> Type[PictureDescriptionBaseOptions]:
+        return _TestOptions
+
+    def _annotate_images(self, images: Iterable[Image.Image]) -> Iterable[str]:
+        for _image in images:
+            yield "test description"
+
+
+class _BatchRecordingPictureDescriptionModel(_ConfiguredPictureDescriptionModel):
+    def __init__(self, options: PictureDescriptionBaseOptions) -> None:
+        super().__init__(options)
+        self.batch_sizes: List[int] = []
+
+    def __call__(
+        self,
+        doc: DoclingDocument,
+        element_batch: Iterable[ItemAndImageEnrichmentElement],
+    ) -> Iterable[PictureItem]:
+        element_list = list(element_batch)
+        self.batch_sizes.append(len(element_list))
+        for element in element_list:
+            assert isinstance(element.item, PictureItem)
+            yield element.item
+
+
+class _PictureDescriptionPipeline(BasePipeline):
+    def _build_document(self, conv_res):
+        return conv_res
+
+    def _determine_status(self, conv_res):
+        return conv_res.status
+
+    @classmethod
+    def get_default_options(cls) -> PipelineOptions:
+        return PipelineOptions()
+
+    @classmethod
+    def is_backend_supported(cls, backend) -> bool:
+        return True
+
+
+def _make_picture_doc(*, count: int, embed_images: bool = True) -> DoclingDocument:
+    doc = DoclingDocument(name="test")
+    for _ in range(count):
+        image = (
+            ImageRef.from_pil(Image.new("RGB", (20, 20), "red"), dpi=72)
+            if embed_images
+            else None
+        )
+        doc.add_picture(image=image)
+    return doc
+
+
+def test_picture_description_options_control_batch_size_and_scale() -> None:
+    model = _ConfiguredPictureDescriptionModel(_TestOptions(batch_size=3, scale=1.5))
+
+    assert model.elements_batch_size == 3
+    assert model.images_scale == 1.5
+
+
+def test_picture_description_batch_size_controls_pipeline_chunking() -> None:
+    pipeline = _PictureDescriptionPipeline(PipelineOptions())
+    model = _BatchRecordingPictureDescriptionModel(_TestOptions(batch_size=2))
+    pipeline.enrichment_pipe = [model]
+    conv_res = SimpleNamespace(
+        document=_make_picture_doc(count=5),
+        timings={},
+        status="success",
+    )
+
+    pipeline._enrich_document(conv_res)
+
+    assert model.batch_sizes == [2, 2, 1]
+
+
+def test_picture_description_scale_is_used_for_cropping() -> None:
+    model = _ConfiguredPictureDescriptionModel(_TestOptions(scale=1.5))
+    doc = DoclingDocument(name="test")
+    doc.add_page(page_no=1, size=Size(width=100, height=100))
+    picture = doc.add_picture(
+        prov=ProvenanceItem(
+            page_no=1,
+            bbox=BoundingBox(l=10, t=10, r=30, b=30),
+            charspan=(0, 0),
+        )
+    )
+
+    class _PageSpy:
+        def __init__(self):
+            self.page_no = 1
+            self.calls = []
+
+        def get_image(self, *, scale, cropbox):
+            self.calls.append({"scale": scale, "cropbox": cropbox})
+            return Image.new("RGB", (5, 5), "blue")
+
+    page = _PageSpy()
+    conv_res = SimpleNamespace(document=doc, pages=[page])
+
+    prepared = model.prepare_element(conv_res=conv_res, element=picture)
+
+    assert prepared is not None
+    assert page.calls[0]["scale"] == 1.5
+
+
+def test_picture_description_embedded_images_keep_original_size() -> None:
+    model = _ConfiguredPictureDescriptionModel(_TestOptions(scale=1.5))
+    doc = _make_picture_doc(count=1, embed_images=True)
+
+    prepared = model.prepare_element(
+        conv_res=SimpleNamespace(document=doc, pages=[]), element=doc.pictures[0]
+    )
+
+    assert prepared is not None
+    assert prepared.image.size == (20, 20)
+
+
+def test_picture_description_batch_size_must_be_positive() -> None:
+    with pytest.raises(ValueError):
+        _TestOptions(batch_size=0)
+
+
+def test_picture_description_scale_must_be_positive() -> None:
+    with pytest.raises(ValueError):
+        _TestOptions(scale=0)
+
+
+def test_picture_description_preset_batch_size_must_be_positive() -> None:
+    options = PictureDescriptionVlmEngineOptions.from_preset("smolvlm", batch_size=0)
+
+    with pytest.raises(ValueError, match="batch_size"):
+        _ConfiguredPictureDescriptionModel(options)
+
+
+def test_picture_description_preset_scale_must_be_positive() -> None:
+    options = PictureDescriptionVlmEngineOptions.from_preset("smolvlm", scale=0)
+
+    with pytest.raises(ValueError, match="scale"):
+        _ConfiguredPictureDescriptionModel(options)
@@ -0,0 +1,164 @@
+from pathlib import Path
+
+import pytest
+import torch
+from PIL import Image
+
+from docling.datamodel.accelerator_options import AcceleratorOptions
+from docling.datamodel.pipeline_options import PictureDescriptionVlmOptions
+from docling.models.stages.picture_description.picture_description_vlm_model import (
+    PictureDescriptionVlmModel,
+)
+
+
+class _DummyBatch(dict):
+    def to(self, device):
+        self["device"] = device
+        return self
+
+
+class _DummyProcessor:
+    def __init__(self) -> None:
+        self.template_calls = 0
+        self.process_calls = []
+        self.decode_calls = 0
+
+    def apply_chat_template(self, messages, add_generation_prompt=True):
+        self.template_calls += 1
+        self.messages = messages
+        self.add_generation_prompt = add_generation_prompt
+        return "formatted prompt"
+
+    def __call__(self, *, text, images, return_tensors, padding):
+        self.process_calls.append(
+            {
+                "text": text,
+                "images": images,
+                "return_tensors": return_tensors,
+                "padding": padding,
+            }
+        )
+        return _DummyBatch(
+            {
+                "input_ids": torch.tensor([[1, 2, 3], [1, 2, 3]]),
+                "attention_mask": torch.tensor([[1, 1, 1], [1, 1, 1]]),
+            }
+        )
+
+    def batch_decode(self, token_ids, *, skip_special_tokens):
+        self.decode_calls += 1
+        self.token_ids = token_ids
+        self.skip_special_tokens = skip_special_tokens
+        return ["first description", "second description"]
+
+
+class _DummyModel:
+    def __init__(self) -> None:
+        self.generate_calls = []
+
+    def generate(self, **kwargs):
+        self.generate_calls.append(kwargs)
+        return torch.tensor(
+            [
+                [1, 2, 3, 10, 11],
+                [1, 2, 3, 20, 21],
+            ]
+        )
+
+
+class _DummyTokenizer:
+    def __init__(self) -> None:
+        self.padding_side = "left"
+
+
+class _InitDummyProcessor:
+    def __init__(self) -> None:
+        self.tokenizer = _DummyTokenizer()
+
+
+class _InitDummyModel:
+    def eval(self):
+        return self
+
+
+def test_legacy_picture_description_vlm_batches_generation() -> None:
+    model = PictureDescriptionVlmModel.__new__(PictureDescriptionVlmModel)
+    model.processor = _DummyProcessor()
+    model.model = _DummyModel()
+    model.device = "cpu"
+    model.options = PictureDescriptionVlmOptions(
+        repo_id="org/model",
+        prompt="Describe this image in a few sentences.",
+        generation_config={"max_new_tokens": 17, "do_sample": False},
+    )
+
+    images = [
+        Image.new("RGB", (8, 8), "white"),
+        Image.new("RGB", (10, 10), "black"),
+    ]
+
+    outputs = list(model._annotate_images(images))
+
+    assert outputs == ["first description", "second description"]
+    assert model.processor.template_calls == 1
+    assert len(model.processor.process_calls) == 1
+    assert model.processor.process_calls[0]["text"] == [
+        "formatted prompt",
+        "formatted prompt",
+    ]
+    assert model.processor.process_calls[0]["images"] == images
+    assert model.processor.process_calls[0]["return_tensors"] == "pt"
+    assert model.processor.process_calls[0]["padding"] is True
+    assert model.processor.decode_calls == 1
+    assert model.processor.skip_special_tokens is True
+    assert len(model.model.generate_calls) == 1
+    assert model.model.generate_calls[0]["generation_config"].max_new_tokens == 17
+
+
+def test_legacy_picture_description_vlm_skips_empty_batch() -> None:
+    model = PictureDescriptionVlmModel.__new__(PictureDescriptionVlmModel)
+    model.processor = _DummyProcessor()
+    model.model = _DummyModel()
+    model.device = "cpu"
+    model.options = PictureDescriptionVlmOptions(repo_id="org/model")
+
+    assert list(model._annotate_images([])) == []
+    assert model.processor.template_calls == 0
+    assert model.processor.process_calls == []
+    assert model.processor.decode_calls == 0
+    assert model.model.generate_calls == []
+
+
+def test_legacy_picture_description_vlm_init_uses_configured_padding_side(
+    monkeypatch,
+) -> None:
+    transformers = pytest.importorskip("transformers")
+    processor = _InitDummyProcessor()
+    model = _InitDummyModel()
+
+    monkeypatch.setattr(
+        transformers.AutoProcessor,
+        "from_pretrained",
+        lambda *args, **kwargs: processor,
+    )
+    monkeypatch.setattr(
+        transformers.AutoModelForImageTextToText,
+        "from_pretrained",
+        lambda *args, **kwargs: model,
+    )
+    monkeypatch.setattr(torch, "compile", lambda compiled_model: compiled_model)
+
+    picture_description_model = PictureDescriptionVlmModel(
+        enabled=True,
+        enable_remote_services=False,
+        artifacts_path=Path("/tmp"),
+        options=PictureDescriptionVlmOptions(
+            repo_id="org/model",
+            padding_side="right",
+        ),
+        accelerator_options=AcceleratorOptions(device="cpu"),
+    )
+
+    assert processor.tokenizer.padding_side == "right"
+    assert picture_description_model.processor is processor
+    assert picture_description_model.model is model
@@ -669,7 +669,7 @@ name = "coloredlogs"
 version = "15.0.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "humanfriendly" },
+    { name = "humanfriendly", marker = "python_full_version < '3.14'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/cc/c7/eed8f27100517e8c0e6b923d5f0845d0cb99763da6fdee00478f91db7325/coloredlogs-15.0.1.tar.gz", hash = "sha256:7c991aa71a4577af2f82600d8f8f3a89f936baeaf9b50a9c197da014e5bf16b0", size = 278520, upload-time = "2021-06-11T10:22:45.202Z" }
 wheels = [
@@ -1113,7 +1113,7 @@ wheels = [

 [[package]]
 name = "docling"
-version = "2.80.0"
+version = "2.81.0"
 source = { editable = "." }
 dependencies = [
    { name = "accelerate" },
@@ -1526,11 +1526,11 @@ name = "fastapi"
 version = "0.135.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "annotated-doc" },
-    { name = "pydantic" },
-    { name = "starlette" },
-    { name = "typing-extensions" },
-    { name = "typing-inspection" },
+    { name = "annotated-doc", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
+    { name = "pydantic", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
+    { name = "starlette", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
+    { name = "typing-extensions", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
+    { name = "typing-inspection", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/c4/73/5903c4b13beae98618d64eb9870c3fac4f605523dd0312ca5c80dadbd5b9/fastapi-0.135.2.tar.gz", hash = "sha256:88a832095359755527b7f63bb4c6bc9edb8329a026189eed83d6c1afcf419d56", size = 395833, upload-time = "2026-03-23T14:12:41.697Z" }
 wheels = [
@@ -1748,12 +1748,12 @@ name = "gliner"
 version = "0.2.26"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "huggingface-hub" },
-    { name = "onnxruntime" },
-    { name = "sentencepiece" },
-    { name = "torch" },
-    { name = "tqdm" },
-    { name = "transformers" },
+    { name = "huggingface-hub", marker = "python_full_version < '3.14'" },
+    { name = "onnxruntime", marker = "python_full_version < '3.14'" },
+    { name = "sentencepiece", marker = "python_full_version < '3.14'" },
+    { name = "torch", marker = "python_full_version < '3.14'" },
+    { name = "tqdm", marker = "python_full_version < '3.14'" },
+    { name = "transformers", marker = "python_full_version < '3.14'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/49/18/e199cb97147c4a9260c75e4caf51e17be6ff969b0604a029c9c62810cbe0/gliner-0.2.26.tar.gz", hash = "sha256:6783be92b4b81caa878dcc4269ba37800207c37118d8ff9be028b93bddd6813d", size = 181224, upload-time = "2026-03-19T15:07:22.707Z" }
 wheels = [
@@ -1918,7 +1918,7 @@ name = "humanfriendly"
 version = "10.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "pyreadline3", marker = "sys_platform == 'win32'" },
+    { name = "pyreadline3", marker = "python_full_version < '3.14' and sys_platform == 'win32'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/cc/3f/2c29224acb2e2df4d2046e4c73ee2662023c58ff5b113c4c1adac0886c43/humanfriendly-10.0.tar.gz", hash = "sha256:6b0b831ce8f15f7300721aa49829fc4e83921a9a301cc7f606be6686a2288ddc", size = 360702, upload-time = "2021-09-17T21:40:43.31Z" }
 wheels = [
@@ -3059,14 +3059,14 @@ name = "mlx-lm"
 version = "0.29.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "jinja2" },
+    { name = "jinja2", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
    { name = "mlx", marker = "sys_platform == 'darwin'" },
    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
-    { name = "numpy", version = "2.4.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
-    { name = "protobuf" },
-    { name = "pyyaml" },
-    { name = "sentencepiece" },
-    { name = "transformers" },
+    { name = "numpy", version = "2.4.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' and sys_platform != 'emscripten' and sys_platform != 'win32'" },
+    { name = "protobuf", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
+    { name = "pyyaml", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
+    { name = "sentencepiece", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
+    { name = "transformers", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/e3/62/f46e1355256a114808517947f8e83ad6be310c7288c551db0fa678f47923/mlx_lm-0.29.1.tar.gz", hash = "sha256:b99180d8f33d33a077b814e550bfb2d8a59ae003d668fd1f4b3fff62a381d34b", size = 232302, upload-time = "2025-12-16T16:58:27.959Z" }
 wheels = [
@@ -3088,19 +3088,19 @@ name = "mlx-vlm"
 version = "0.3.9"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "datasets" },
-    { name = "fastapi" },
-    { name = "mlx" },
-    { name = "mlx-lm" },
+    { name = "datasets", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
+    { name = "fastapi", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
+    { name = "mlx", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
+    { name = "mlx-lm", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
-    { name = "numpy", version = "2.4.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
-    { name = "opencv-python" },
-    { name = "pillow" },
-    { name = "requests" },
-    { name = "soundfile" },
-    { name = "tqdm" },
-    { name = "transformers" },
-    { name = "uvicorn" },
+    { name = "numpy", version = "2.4.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' and sys_platform != 'emscripten' and sys_platform != 'win32'" },
+    { name = "opencv-python", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
+    { name = "pillow", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
+    { name = "requests", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
+    { name = "soundfile", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
+    { name = "tqdm", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
+    { name = "transformers", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
+    { name = "uvicorn", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/1d/98/6b3c2d1317a317d0df544fe9ab0ef4f233ea85c1e4ac2fe6af7289ea1ee5/mlx_vlm-0.3.9.tar.gz", hash = "sha256:ae5050d0b1a051a29099c3a65efdbf6874bb497e8465734ac1992b6b179135b4", size = 303350, upload-time = "2025-12-03T21:48:24.199Z" }
 wheels = [
@@ -3112,17 +3112,17 @@ name = "mlx-whisper"
 version = "0.4.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "huggingface-hub" },
-    { name = "mlx" },
-    { name = "more-itertools" },
-    { name = "numba" },
+    { name = "huggingface-hub", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
+    { name = "mlx", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
+    { name = "more-itertools", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
+    { name = "numba", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
-    { name = "numpy", version = "2.4.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+    { name = "numpy", version = "2.4.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' and sys_platform != 'emscripten' and sys_platform != 'win32'" },
    { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
-    { name = "scipy", version = "1.17.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
-    { name = "tiktoken" },
-    { name = "torch" },
-    { name = "tqdm" },
+    { name = "scipy", version = "1.17.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' and sys_platform != 'emscripten' and sys_platform != 'win32'" },
+    { name = "tiktoken", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
+    { name = "torch", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
+    { name = "tqdm", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
 ]
 wheels = [
    { url = "https://files.pythonhosted.org/packages/22/b7/a35232812a2ccfffcb7614ba96a91338551a660a0e9815cee668bf5743f0/mlx_whisper-0.4.3-py3-none-any.whl", hash = "sha256:6b82b6597a994643a3e5496c7bc229a672e5ca308458455bfe276e76ae024489", size = 890544, upload-time = "2025-08-29T14:56:13.815Z" },
@@ -3917,9 +3917,9 @@ name = "ocrmac"
 version = "1.0.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "click" },
-    { name = "pillow" },
-    { name = "pyobjc-framework-vision" },
+    { name = "click", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
+    { name = "pillow", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
+    { name = "pyobjc-framework-vision", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/5e/07/3e15ab404f75875c5e48c47163300eb90b7409044d8711fc3aaf52503f2e/ocrmac-1.0.1.tar.gz", hash = "sha256:507fe5e4cbd67b2d03f6729a52bbc11f9d0b58241134eb958a5daafd4b9d93d9", size = 1454317, upload-time = "2026-01-08T16:44:26.412Z" }
 wheels = [
@@ -3944,13 +3944,13 @@ name = "onnxruntime"
 version = "1.23.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "coloredlogs" },
-    { name = "flatbuffers" },
+    { name = "coloredlogs", marker = "python_full_version < '3.14'" },
+    { name = "flatbuffers", marker = "python_full_version < '3.14'" },
    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
-    { name = "numpy", version = "2.4.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
-    { name = "packaging" },
-    { name = "protobuf" },
-    { name = "sympy" },
+    { name = "numpy", version = "2.4.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' and python_full_version < '3.14'" },
+    { name = "packaging", marker = "python_full_version < '3.14'" },
+    { name = "protobuf", marker = "python_full_version < '3.14'" },
+    { name = "sympy", marker = "python_full_version < '3.14'" },
 ]
 wheels = [
    { url = "https://files.pythonhosted.org/packages/35/d6/311b1afea060015b56c742f3531168c1644650767f27ef40062569960587/onnxruntime-1.23.2-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:a7730122afe186a784660f6ec5807138bf9d792fa1df76556b27307ea9ebcbe3", size = 17195934, upload-time = "2025-10-27T23:06:14.143Z" },
@@ -3982,13 +3982,13 @@ name = "onnxruntime-gpu"
 version = "1.23.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "coloredlogs" },
-    { name = "flatbuffers" },
+    { name = "coloredlogs", marker = "(python_full_version < '3.14' and sys_platform != 'emscripten') or (python_full_version < '3.11' and sys_platform == 'emscripten')" },
+    { name = "flatbuffers", marker = "(python_full_version < '3.14' and sys_platform != 'emscripten') or (python_full_version < '3.11' and sys_platform == 'emscripten')" },
    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
-    { name = "numpy", version = "2.4.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
-    { name = "packaging" },
-    { name = "protobuf" },
-    { name = "sympy" },
+    { name = "numpy", version = "2.4.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' and python_full_version < '3.14' and sys_platform != 'emscripten'" },
+    { name = "packaging", marker = "(python_full_version < '3.14' and sys_platform != 'emscripten') or (python_full_version < '3.11' and sys_platform == 'emscripten')" },
+    { name = "protobuf", marker = "(python_full_version < '3.14' and sys_platform != 'emscripten') or (python_full_version < '3.11' and sys_platform == 'emscripten')" },
+    { name = "sympy", marker = "(python_full_version < '3.14' and sys_platform != 'emscripten') or (python_full_version < '3.11' and sys_platform == 'emscripten')" },
 ]
 wheels = [
    { url = "https://files.pythonhosted.org/packages/cb/ae/39283748c68a96be4f5f8a9561e0e3ca92af1eae6c2b1c07fb1da5f65cd1/onnxruntime_gpu-1.23.2-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:18de50c6c8eea50acc405ea13d299aec593e46478d7a22cd32cdbbdf7c42899d", size = 300525411, upload-time = "2025-10-22T16:56:08.415Z" },
@@ -5028,7 +5028,7 @@ name = "pyobjc-framework-cocoa"
 version = "12.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "pyobjc-core" },
+    { name = "pyobjc-core", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/02/a3/16ca9a15e77c061a9250afbae2eae26f2e1579eb8ca9462ae2d2c71e1169/pyobjc_framework_cocoa-12.1.tar.gz", hash = "sha256:5556c87db95711b985d5efdaaf01c917ddd41d148b1e52a0c66b1a2e2c5c1640", size = 2772191, upload-time = "2025-11-14T10:13:02.069Z" }
 wheels = [
@@ -5046,8 +5046,8 @@ name = "pyobjc-framework-coreml"
 version = "12.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "pyobjc-core" },
-    { name = "pyobjc-framework-cocoa" },
+    { name = "pyobjc-core", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
+    { name = "pyobjc-framework-cocoa", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/30/2d/baa9ea02cbb1c200683cb7273b69b4bee5070e86f2060b77e6a27c2a9d7e/pyobjc_framework_coreml-12.1.tar.gz", hash = "sha256:0d1a4216891a18775c9e0170d908714c18e4f53f9dc79fb0f5263b2aa81609ba", size = 40465, upload-time = "2025-11-14T10:14:02.265Z" }
 wheels = [
@@ -5065,8 +5065,8 @@ name = "pyobjc-framework-quartz"
 version = "12.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "pyobjc-core" },
-    { name = "pyobjc-framework-cocoa" },
+    { name = "pyobjc-core", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
+    { name = "pyobjc-framework-cocoa", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/94/18/cc59f3d4355c9456fc945eae7fe8797003c4da99212dd531ad1b0de8a0c6/pyobjc_framework_quartz-12.1.tar.gz", hash = "sha256:27f782f3513ac88ec9b6c82d9767eef95a5cf4175ce88a1e5a65875fee799608", size = 3159099, upload-time = "2025-11-14T10:21:24.31Z" }
 wheels = [
@@ -5084,10 +5084,10 @@ name = "pyobjc-framework-vision"
 version = "12.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "pyobjc-core" },
-    { name = "pyobjc-framework-cocoa" },
-    { name = "pyobjc-framework-coreml" },
-    { name = "pyobjc-framework-quartz" },
+    { name = "pyobjc-core", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
+    { name = "pyobjc-framework-cocoa", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
+    { name = "pyobjc-framework-coreml", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
+    { name = "pyobjc-framework-quartz", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/c2/5a/08bb3e278f870443d226c141af14205ff41c0274da1e053b72b11dfc9fb2/pyobjc_framework_vision-12.1.tar.gz", hash = "sha256:a30959100e85dcede3a786c544e621ad6eb65ff6abf85721f805822b8c5fe9b0", size = 59538, upload-time = "2025-11-14T10:23:21.979Z" }
 wheels = [
@@ -6528,9 +6528,9 @@ name = "soundfile"
 version = "0.13.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "cffi" },
+    { name = "cffi", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
-    { name = "numpy", version = "2.4.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+    { name = "numpy", version = "2.4.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' and sys_platform != 'emscripten' and sys_platform != 'win32'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/e1/41/9b873a8c055582859b239be17902a85339bec6a30ad162f98c9b0288a2cc/soundfile-0.13.1.tar.gz", hash = "sha256:b2c68dab1e30297317080a5b43df57e302584c49e2942defdde0acccc53f0e5b", size = 46156, upload-time = "2025-01-25T09:17:04.831Z" }
 wheels = [
@@ -6566,8 +6566,8 @@ name = "starlette"
 version = "1.0.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "anyio" },
-    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+    { name = "anyio", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
+    { name = "typing-extensions", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (python_full_version < '3.13' and sys_platform != 'emscripten' and sys_platform != 'win32')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/81/69/17425771797c36cded50b7fe44e850315d039f28b15901ab44839e70b593/starlette-1.0.0.tar.gz", hash = "sha256:6a4beaf1f81bb472fd19ea9b918b50dc3a77a6f2e190a12954b25e6ed5eea149", size = 2655289, upload-time = "2026-03-22T18:29:46.779Z" }
 wheels = [
@@ -7369,8 +7369,8 @@ name = "uvicorn"
 version = "0.42.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "click" },
-    { name = "h11" },
+    { name = "click", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
+    { name = "h11", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
    { name = "typing-extensions", marker = "python_full_version < '3.11'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/e3/ad/4a96c425be6fb67e0621e62d86c402b4a17ab2be7f7c055d9bd2f638b9e2/uvicorn-0.42.0.tar.gz", hash = "sha256:9b1f190ce15a2dd22e7758651d9b6d12df09a13d51ba5bf4fc33c383a48e1775", size = 85393, upload-time = "2026-03-16T06:19:50.077Z" }