mirror of
https://github.com/droidrun/droidrun.git
synced 2026-05-23 07:40:37 +00:00
fix: consistent Screenshot → UI state emission order across all agents
- Manager and StatelessManager now emit ScreenshotEvent before RecordUIStateEvent, matching FastAgent/CodeAct ordering - DroidAgent finalize step now emits RecordUIStateEvent after the final screenshot so every screenshot has a paired UI state
This commit is contained in:
@@ -1085,7 +1085,7 @@ class DroidAgent(Workflow):
|
||||
if self.config.logging.debug:
|
||||
logger.error(traceback.format_exc())
|
||||
|
||||
# Capture final screenshot (independent of trajectory persistence)
|
||||
# Capture final screenshot and UI state (independent of trajectory persistence)
|
||||
vision_any = (
|
||||
self.config.agent.manager.vision
|
||||
or self.config.agent.executor.vision
|
||||
@@ -1111,6 +1111,15 @@ class DroidAgent(Workflow):
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to capture final screenshot: {e}")
|
||||
|
||||
try:
|
||||
ui_state = await self.state_provider.get_state()
|
||||
ctx.write_event_to_stream(
|
||||
RecordUIStateEvent(ui_state=ui_state.elements)
|
||||
)
|
||||
logger.debug("📋 Final UI state captured")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to capture final UI state: {e}")
|
||||
|
||||
# Save trajectory to disk
|
||||
if self.config.logging.save_trajectory != "none":
|
||||
# Populate macro data from RecordingDriver log
|
||||
|
||||
@@ -387,6 +387,30 @@ class ManagerAgent(Workflow):
|
||||
"""Gather context and prepare manager prompt."""
|
||||
logger.debug("💬 Preparing manager context...")
|
||||
|
||||
# Capture screenshot if needed
|
||||
screenshot = None
|
||||
if self.vision or self._stream_screenshots or self.save_trajectory != "none":
|
||||
try:
|
||||
screenshot = await self.action_ctx.driver.screenshot()
|
||||
|
||||
if screenshot:
|
||||
ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
|
||||
parent_span = trace.get_current_span()
|
||||
record_langfuse_screenshot(
|
||||
screenshot,
|
||||
parent_span=parent_span,
|
||||
screenshots_enabled=bool(
|
||||
self.tracing_config
|
||||
and self.tracing_config.langfuse_screenshots
|
||||
),
|
||||
vision_enabled=self.vision,
|
||||
)
|
||||
logger.debug("📸 Screenshot captured for Manager")
|
||||
except DeviceDisconnectedError:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to capture screenshot: {e}")
|
||||
|
||||
# Get and format device state
|
||||
ui_state = await self.state_provider.get_state()
|
||||
self.action_ctx.ui = ui_state
|
||||
@@ -422,30 +446,6 @@ class ManagerAgent(Workflow):
|
||||
else:
|
||||
self.shared_state.app_card = ""
|
||||
|
||||
# Capture screenshot if needed
|
||||
screenshot = None
|
||||
if self.vision or self._stream_screenshots or self.save_trajectory != "none":
|
||||
try:
|
||||
screenshot = await self.action_ctx.driver.screenshot()
|
||||
|
||||
if screenshot:
|
||||
ctx.write_event_to_stream(ScreenshotEvent(screenshot=screenshot))
|
||||
parent_span = trace.get_current_span()
|
||||
record_langfuse_screenshot(
|
||||
screenshot,
|
||||
parent_span=parent_span,
|
||||
screenshots_enabled=bool(
|
||||
self.tracing_config
|
||||
and self.tracing_config.langfuse_screenshots
|
||||
),
|
||||
vision_enabled=self.vision,
|
||||
)
|
||||
logger.debug("📸 Screenshot captured for Manager")
|
||||
except DeviceDisconnectedError:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to capture screenshot: {e}")
|
||||
|
||||
# Detect text manipulation mode
|
||||
focused_text_clean = self.shared_state.focused_text.replace("'", "").strip()
|
||||
has_text_to_modify = focused_text_clean != ""
|
||||
|
||||
@@ -175,24 +175,6 @@ class StatelessManagerAgent(Workflow):
|
||||
async def prepare_context(
|
||||
self, ctx: Context, ev: StartEvent
|
||||
) -> ManagerContextEvent:
|
||||
ui_state = await self.state_provider.get_state()
|
||||
self.action_ctx.ui = ui_state
|
||||
|
||||
self.shared_state.previous_formatted_device_state = (
|
||||
self.shared_state.formatted_device_state
|
||||
)
|
||||
self.shared_state.formatted_device_state = ui_state.formatted_text
|
||||
self.shared_state.focused_text = ui_state.focused_text
|
||||
self.shared_state.a11y_tree = ui_state.elements
|
||||
self.shared_state.phone_state = ui_state.phone_state
|
||||
|
||||
self.shared_state.update_current_app(
|
||||
package_name=ui_state.phone_state.get("packageName", "Unknown"),
|
||||
activity_name=ui_state.phone_state.get("currentApp", "Unknown"),
|
||||
)
|
||||
|
||||
ctx.write_event_to_stream(RecordUIStateEvent(ui_state=ui_state.elements))
|
||||
|
||||
screenshot = None
|
||||
if self.vision or self.save_trajectory != "none":
|
||||
try:
|
||||
@@ -215,6 +197,24 @@ class StatelessManagerAgent(Workflow):
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to capture screenshot: {e}")
|
||||
|
||||
ui_state = await self.state_provider.get_state()
|
||||
self.action_ctx.ui = ui_state
|
||||
|
||||
self.shared_state.previous_formatted_device_state = (
|
||||
self.shared_state.formatted_device_state
|
||||
)
|
||||
self.shared_state.formatted_device_state = ui_state.formatted_text
|
||||
self.shared_state.focused_text = ui_state.focused_text
|
||||
self.shared_state.a11y_tree = ui_state.elements
|
||||
self.shared_state.phone_state = ui_state.phone_state
|
||||
|
||||
self.shared_state.update_current_app(
|
||||
package_name=ui_state.phone_state.get("packageName", "Unknown"),
|
||||
activity_name=ui_state.phone_state.get("currentApp", "Unknown"),
|
||||
)
|
||||
|
||||
ctx.write_event_to_stream(RecordUIStateEvent(ui_state=ui_state.elements))
|
||||
|
||||
focused_text_clean = self.shared_state.focused_text.replace("'", "").strip()
|
||||
has_text_to_modify = focused_text_clean != ""
|
||||
|
||||
|
||||
Reference in New Issue
Block a user