feat: add documentation on why we set the layout model to run on CPU

Signed-off-by: ElHachem02 <peterelhachem02@gmail.com>
This commit is contained in:
ElHachem02
2026-03-02 15:16:46 +01:00
parent dcfa8bcd32
commit c90c068600
@@ -72,7 +72,12 @@ class ThreadedLayoutVlmPipeline(BasePipeline):
"""Initialize layout and VLM models."""
art_path = self._resolve_artifacts_path()
# Layout model
# The layout model is forced to run on CPU.
# In this threaded pipeline, the VLM exclusively owns the GPU.
# Allowing multiple models to use the GPU concurrently can cause
# device contention, memory spikes, and unstable inference behavior.
# Since the layout model is lightweight, running it on CPU avoids
# cross-thread GPU contention without significantly impacting latency.
self.layout_model = LayoutModel(
artifacts_path=art_path,
accelerator_options=AcceleratorOptions(device="cpu"),