diff --git a/droidrun/agent/droid/droid_agent.py b/droidrun/agent/droid/droid_agent.py index 0f6c03b..ff0171f 100644 --- a/droidrun/agent/droid/droid_agent.py +++ b/droidrun/agent/droid/droid_agent.py @@ -1085,7 +1085,7 @@ class DroidAgent(Workflow): if self.config.logging.debug: logger.error(traceback.format_exc()) - # Capture final screenshot (independent of trajectory persistence) + # Capture final screenshot and UI state (independent of trajectory persistence) vision_any = ( self.config.agent.manager.vision or self.config.agent.executor.vision @@ -1111,6 +1111,15 @@ class DroidAgent(Workflow): except Exception as e: logger.warning(f"Failed to capture final screenshot: {e}") + try: + ui_state = await self.state_provider.get_state() + ctx.write_event_to_stream( + RecordUIStateEvent(ui_state=ui_state.elements) + ) + logger.debug("📋 Final UI state captured") + except Exception as e: + logger.warning(f"Failed to capture final UI state: {e}") + # Save trajectory to disk if self.config.logging.save_trajectory != "none": # Populate macro data from RecordingDriver log diff --git a/droidrun/agent/manager/manager_agent.py b/droidrun/agent/manager/manager_agent.py index e9d0a54..6e047dc 100644 --- a/droidrun/agent/manager/manager_agent.py +++ b/droidrun/agent/manager/manager_agent.py @@ -387,6 +387,30 @@ class ManagerAgent(Workflow): """Gather context and prepare manager prompt.""" logger.debug("💬 Preparing manager context...") + # Capture screenshot if needed + screenshot = None + if self.vision or self._stream_screenshots or self.save_trajectory != "none": + try: + screenshot = await self.action_ctx.driver.screenshot() + + if screenshot: + ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot)) + parent_span = trace.get_current_span() + record_langfuse_screenshot( + screenshot, + parent_span=parent_span, + screenshots_enabled=bool( + self.tracing_config + and self.tracing_config.langfuse_screenshots + ), + vision_enabled=self.vision, + ) + logger.debug("📸 Screenshot captured for Manager") + except DeviceDisconnectedError: + raise + except Exception as e: + logger.warning(f"Failed to capture screenshot: {e}") + # Get and format device state ui_state = await self.state_provider.get_state() self.action_ctx.ui = ui_state @@ -422,30 +446,6 @@ class ManagerAgent(Workflow): else: self.shared_state.app_card = "" - # Capture screenshot if needed - screenshot = None - if self.vision or self._stream_screenshots or self.save_trajectory != "none": - try: - screenshot = await self.action_ctx.driver.screenshot() - - if screenshot: - ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot)) - parent_span = trace.get_current_span() - record_langfuse_screenshot( - screenshot, - parent_span=parent_span, - screenshots_enabled=bool( - self.tracing_config - and self.tracing_config.langfuse_screenshots - ), - vision_enabled=self.vision, - ) - logger.debug("📸 Screenshot captured for Manager") - except DeviceDisconnectedError: - raise - except Exception as e: - logger.warning(f"Failed to capture screenshot: {e}") - # Detect text manipulation mode focused_text_clean = self.shared_state.focused_text.replace("'", "").strip() has_text_to_modify = focused_text_clean != "" diff --git a/droidrun/agent/manager/stateless_manager_agent.py b/droidrun/agent/manager/stateless_manager_agent.py index 163d869..3cd5ee0 100644 --- a/droidrun/agent/manager/stateless_manager_agent.py +++ b/droidrun/agent/manager/stateless_manager_agent.py @@ -175,24 +175,6 @@ class StatelessManagerAgent(Workflow): async def prepare_context( self, ctx: Context, ev: StartEvent ) -> ManagerContextEvent: - ui_state = await self.state_provider.get_state() - self.action_ctx.ui = ui_state - - self.shared_state.previous_formatted_device_state = ( - self.shared_state.formatted_device_state - ) - self.shared_state.formatted_device_state = ui_state.formatted_text - self.shared_state.focused_text = ui_state.focused_text - self.shared_state.a11y_tree = ui_state.elements - self.shared_state.phone_state = ui_state.phone_state - - self.shared_state.update_current_app( - package_name=ui_state.phone_state.get("packageName", "Unknown"), - activity_name=ui_state.phone_state.get("currentApp", "Unknown"), - ) - - ctx.write_event_to_stream(RecordUIStateEvent(ui_state=ui_state.elements)) - screenshot = None if self.vision or self.save_trajectory != "none": try: @@ -215,6 +197,24 @@ class StatelessManagerAgent(Workflow): except Exception as e: logger.warning(f"Failed to capture screenshot: {e}") + ui_state = await self.state_provider.get_state() + self.action_ctx.ui = ui_state + + self.shared_state.previous_formatted_device_state = ( + self.shared_state.formatted_device_state + ) + self.shared_state.formatted_device_state = ui_state.formatted_text + self.shared_state.focused_text = ui_state.focused_text + self.shared_state.a11y_tree = ui_state.elements + self.shared_state.phone_state = ui_state.phone_state + + self.shared_state.update_current_app( + package_name=ui_state.phone_state.get("packageName", "Unknown"), + activity_name=ui_state.phone_state.get("currentApp", "Unknown"), + ) + + ctx.write_event_to_stream(RecordUIStateEvent(ui_state=ui_state.elements)) + focused_text_clean = self.shared_state.focused_text.replace("'", "").strip() has_text_to_modify = focused_text_clean != ""