fix(pptx): handle picture shapes with external image references

When processing PowerPoint files containing picture shapes that reference
external images (rather than embedded images), the python-pptx library
raises a ValueError("no embedded image") when accessing the `image`
property.

Previously, this caused the entire document conversion to fail because:

1. The `hasattr(shape, "image")` check at line 690 would trigger the
   property getter, which raises ValueError (hasattr only catches
   AttributeError, not ValueError)

2. The exception handler in `_handle_pictures()` only caught
   UnidentifiedImageError and OSError, not ValueError

This fix:
- Removes the unnecessary hasattr check since we already verify the
  shape type is MSO_SHAPE_TYPE.PICTURE
- Adds ValueError to the exception handler in `_handle_pictures()` so
  that picture shapes with external references are gracefully skipped
  with a warning instead of crashing the pipeline

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Sam Quigley
2026-01-24 14:02:44 +00:00
committed by Cesar Berrospi Ramis
parent 0602a7cdab
commit e69779e07b
+5 -6
View File
@@ -590,8 +590,8 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB
caption=None,
prov=prov,
)
except (UnidentifiedImageError, OSError) as e:
_log.warning(f"Warning: image cannot be loaded by Pillow: {e}")
except (UnidentifiedImageError, OSError, ValueError) as e:
_log.warning(f"Warning: image cannot be loaded: {e}")
return
def _handle_tables(self, shape, parent_slide, slide_ind, doc, slide_size):
@@ -687,10 +687,9 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB
self._handle_tables(shape, parent_slide, slide_ind, doc, slide_size)
if shape.shape_type == MSO_SHAPE_TYPE.PICTURE:
# Handle Pictures
if hasattr(shape, "image"):
self._handle_pictures(
shape, parent_slide, slide_ind, doc, slide_size
)
self._handle_pictures(
shape, parent_slide, slide_ind, doc, slide_size
)
# If shape doesn't have any text, move on to the next shape
if not hasattr(shape, "text"):
return