From 1c1b546c4f92af2eadbe2389f615197e78c54edb Mon Sep 17 00:00:00 2001 From: philipph-askui Date: Mon, 9 Mar 2026 11:25:22 +0100 Subject: [PATCH 1/6] feat: add new `MultiDeviceAgent` to supports Android-Use and Computer-Use simultaneously --- src/askui/__init__.py | 5 +- src/askui/multi_device_agent.py | 143 ++++++++++++++++++++++++++++++++ 2 files changed, 147 insertions(+), 1 deletion(-) create mode 100644 src/askui/multi_device_agent.py diff --git a/src/askui/__init__.py b/src/askui/__init__.py index 9d71f276..69925c9b 100644 --- a/src/askui/__init__.py +++ b/src/askui/__init__.py @@ -54,6 +54,9 @@ except ImportError: _ANDROID_AGENT_AVAILABLE = False +if _ANDROID_AGENT_AVAILABLE: + from .multi_device_agent import MultiDeviceAgent + try: from .web_agent import WebVisionAgent from .web_testing_agent import WebTestingAgent @@ -107,7 +110,7 @@ ] if _ANDROID_AGENT_AVAILABLE: - __all__ += ["AndroidAgent", "AndroidVisionAgent"] + __all__ += ["AndroidAgent", "AndroidVisionAgent", "MultiDeviceAgent"] if _WEB_AGENTS_AVAILABLE: __all__ += ["WebVisionAgent", "WebTestingAgent"] diff --git a/src/askui/multi_device_agent.py b/src/askui/multi_device_agent.py new file mode 100644 index 00000000..32a27bea --- /dev/null +++ b/src/askui/multi_device_agent.py @@ -0,0 +1,143 @@ +import logging +from typing import Annotated + +from pydantic import Field + +from askui.agent_base import Agent +from askui.models.shared.tools import Tool +from askui.prompts.act_prompts import create_multidevice_agent_prompt +from askui.reporting import CompositeReporter, Reporter +from askui.retry import Retry +from askui.tools import AgentToolbox, ComputerAgentOsFacade +from askui.tools.android.agent_os_facade import AndroidAgentOsFacade +from askui.tools.android.ppadb_agent_os import PpadbAgentOs +from askui.tools.android.tools import ( + AndroidDragAndDropTool, + AndroidGetConnectedDevicesSerialNumbersTool, + AndroidGetConnectedDisplaysInfosTool, + AndroidGetCurrentConnectedDeviceInfosTool, + AndroidKeyCombinationTool, + AndroidKeyTapEventTool, + AndroidScreenshotTool, + AndroidSelectDeviceBySerialNumberTool, + AndroidSelectDisplayByUniqueIDTool, + AndroidShellTool, + AndroidSwipeTool, + AndroidTapTool, + AndroidTypeTool, +) +from askui.tools.askui import AskUiControllerClient +from askui.tools.computer import ( + ComputerGetMousePositionTool, + ComputerGetSystemInfoTool, + ComputerKeyboardPressedTool, + ComputerKeyboardReleaseTool, + ComputerKeyboardTapTool, + ComputerListDisplaysTool, + ComputerMouseClickTool, + ComputerMouseHoldDownTool, + ComputerMouseReleaseTool, + ComputerMouseScrollTool, + ComputerMoveMouseTool, + ComputerRetrieveActiveDisplayTool, + ComputerScreenshotTool, + ComputerSetActiveDisplayTool, + ComputerTypeTool, +) +from askui.tools.exception_tool import ExceptionTool + +logger = logging.getLogger(__name__) + + +class MultiDeviceAgent(Agent): + def __init__( + self, + display: Annotated[int, Field(ge=1)] = 1, + reporters: list[Reporter] | None = None, + tools: AgentToolbox | None = None, + retry: Retry | None = None, + act_tools: list[Tool] | None = None, + android_device_sn: str | None = None, + ): + self.android_device_sn = android_device_sn + self.android_os = PpadbAgentOs() + reporter = CompositeReporter(reporters=reporters) + self.android_agent_os_facade = AndroidAgentOsFacade(self.android_os) + self.computer_agent_os_tool = AgentToolbox( + AskUiControllerClient( + display=display, + reporter=reporter, + ) + ) + + self.android_tools: list[Tool] = [ + AndroidScreenshotTool(self.android_agent_os_facade), + AndroidTapTool(self.android_agent_os_facade), + AndroidTypeTool(self.android_agent_os_facade), + AndroidDragAndDropTool(self.android_agent_os_facade), + AndroidKeyTapEventTool(self.android_agent_os_facade), + AndroidSwipeTool(self.android_agent_os_facade), + AndroidKeyCombinationTool(self.android_agent_os_facade), + AndroidShellTool(self.android_agent_os_facade), + AndroidSelectDeviceBySerialNumberTool(self.android_agent_os_facade), + AndroidSelectDisplayByUniqueIDTool(self.android_agent_os_facade), + AndroidGetConnectedDevicesSerialNumbersTool(self.android_agent_os_facade), + AndroidGetConnectedDisplaysInfosTool(self.android_agent_os_facade), + AndroidGetCurrentConnectedDeviceInfosTool(self.android_agent_os_facade), + ] + self.computer_tools: list[Tool] = [ + ComputerGetSystemInfoTool(), + ComputerGetMousePositionTool(), + ComputerKeyboardPressedTool(), + ComputerKeyboardReleaseTool(), + ComputerKeyboardTapTool(), + ComputerMouseClickTool(), + ComputerMouseHoldDownTool(), + ComputerMouseReleaseTool(), + ComputerMouseScrollTool(), + ComputerMoveMouseTool(), + ComputerScreenshotTool(), + ComputerTypeTool(), + ComputerListDisplaysTool(), + ComputerRetrieveActiveDisplayTool(), + ComputerSetActiveDisplayTool(), + ] + + act_tools = act_tools or [] + + multi_device_tools: list[Tool] = ( + act_tools + self.android_tools + self.computer_tools + [ExceptionTool()] + ) + + if tools: + msg = ( + "'tools' parameter is not supported for MultiDeviceAgent and will" + " be ignored. Please set tools via the 'act_tools' parameter" + ) + logger.warning(msg) + + super().__init__( + reporter=reporter, + tools=multi_device_tools, + retry=retry, + agent_os=self.computer_agent_os_tool.os, + ) + + self.computer_agent_os_facade: ComputerAgentOsFacade = ComputerAgentOsFacade( + self.computer_agent_os_tool.os + ) + self.act_tool_collection.add_agent_os(self.computer_agent_os_facade) + + self.act_settings.messages.system = create_multidevice_agent_prompt() + + def close(self) -> None: + self.android_os.disconnect() + super().close() + + def open(self) -> None: + self.android_os.connect() + if self.android_device_sn is not None: + self.android_os.set_device_by_serial_number(self.android_device_sn) + if self._agent_os is not None: + self._agent_os.connect() + super().open() From 873a7a4fbc50f807e5ded117205cb5fd5249726e Mon Sep 17 00:00:00 2001 From: philipph-askui Date: Mon, 9 Mar 2026 11:29:39 +0100 Subject: [PATCH 2/6] chore: add inline comment to warning in init of MultiDeviceAgent --- src/askui/multi_device_agent.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/askui/multi_device_agent.py b/src/askui/multi_device_agent.py index 32a27bea..2931d3b6 100644 --- a/src/askui/multi_device_agent.py +++ b/src/askui/multi_device_agent.py @@ -110,6 +110,8 @@ def __init__( ) if tools: + # After all, I don't even know why we actually have both parameters in + # the constructor. msg = ( "'tools' parameter is not supported for MultiDeviceAgent and will" " be ignored. Please set tools via the 'act_tools' parameter" From 070710ddc0580526fb7669ac797a76ef53356a4a Mon Sep 17 00:00:00 2001 From: philipph-askui Date: Mon, 9 Mar 2026 11:34:41 +0100 Subject: [PATCH 3/6] fix: bug with missing android screenshots in Html report of MultiDeviceAgent --- src/askui/multi_device_agent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/askui/multi_device_agent.py b/src/askui/multi_device_agent.py index 2931d3b6..79880208 100644 --- a/src/askui/multi_device_agent.py +++ b/src/askui/multi_device_agent.py @@ -60,8 +60,8 @@ def __init__( android_device_sn: str | None = None, ): self.android_device_sn = android_device_sn - self.android_os = PpadbAgentOs() reporter = CompositeReporter(reporters=reporters) + self.android_os = PpadbAgentOs(reporter=reporter) self.android_agent_os_facade = AndroidAgentOsFacade(self.android_os) self.computer_agent_os_tool = AgentToolbox( AskUiControllerClient( From a085a2143ef0bdea533e045190889545462b9e5c Mon Sep 17 00:00:00 2001 From: philipph-askui Date: Mon, 9 Mar 2026 11:47:48 +0100 Subject: [PATCH 4/6] chore: update docs and add info on new `MultiDeviceAgent` --- docs/02_using_agents.md | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/docs/02_using_agents.md b/docs/02_using_agents.md index 8d736e52..a75e531b 100644 --- a/docs/02_using_agents.md +++ b/docs/02_using_agents.md @@ -1,6 +1,6 @@ # Using Agents -AskUI Vision Agent provides three predefined agent types for different automation targets. All agents share the same core API (`act()`, `get()`, `locate()`) but are optimized for their respective platforms. Each agent comes with its own system prompt tailored to its platform-specific tools and capabilities. +AskUI Vision Agent provides four predefined agent types for different automation targets. All agents share the same core API (`act()`, `get()`, `locate()`) but are optimized for their respective platforms. Each agent comes with its own system prompt tailored to its platform-specific tools and capabilities. ## ComputerAgent @@ -47,10 +47,30 @@ with WebVisionAgent() as agent: **Default tools:** All `ComputerAgent` tools plus `goto`, `back`, `forward`, `get_page_title`, `get_page_url` +## MultiDeviceAgent + +Use this agent when you need to control a desktop computer and an Android device within the same task. The agent has access to both the full set of computer tools (via AskUI Agent OS) and Android tools (via ADB), and can switch between devices seamlessly during execution. + +This is useful for cross-device workflows, such as triggering an action on the desktop and verifying the result on a mobile device, or transferring data between devices. + +```python +from askui import MultiDeviceAgent + +with MultiDeviceAgent(android_device_sn="emulator-5554") as agent: + agent.act("Open the web app on the computer and send a push notification, then verify it appears on the Android device") +``` + +If you have multiple Android devices connected, pass the serial number of the target device via `android_device_sn`. You can find serial numbers by running `adb devices`. If omitted, no device is preselected and the agent will select one at runtime. + +Requires the `android` dependency installed (`pip install askui[android]`) and a connected device (physical or emulator). + +**Default tools:** All `ComputerAgent` tools plus all `AndroidAgent` tools. Additional tools can be provided via the `act_tools` parameter. + ## Choosing an Agent | Target | Agent | Backend | |--------|-------|---------| | Desktop (Windows/macOS/Linux) | `ComputerAgent` | AskUI Agent OS (gRPC) | | Android devices | `AndroidAgent` | ADB | +| Desktop + Android | `MultiDeviceAgent` | AskUI Agent OS (gRPC) + ADB | | Web browsers | `WebVisionAgent` | Playwright | From dfc5e6faefd34850bcdee3b3dcc82a671705d7c2 Mon Sep 17 00:00:00 2001 From: philipph-askui Date: Mon, 9 Mar 2026 15:04:20 +0100 Subject: [PATCH 5/6] refactor: reimplement to avoid code duplication --- src/askui/multi_device_agent.py | 183 +++++++++++++------------------- 1 file changed, 71 insertions(+), 112 deletions(-) diff --git a/src/askui/multi_device_agent.py b/src/askui/multi_device_agent.py index 79880208..384bfb05 100644 --- a/src/askui/multi_device_agent.py +++ b/src/askui/multi_device_agent.py @@ -1,145 +1,104 @@ -import logging from typing import Annotated from pydantic import Field +from askui.agent import ComputerAgent from askui.agent_base import Agent +from askui.agent_settings import AgentSettings +from askui.android_agent import AndroidAgent from askui.models.shared.tools import Tool from askui.prompts.act_prompts import create_multidevice_agent_prompt from askui.reporting import CompositeReporter, Reporter from askui.retry import Retry -from askui.tools import AgentToolbox, ComputerAgentOsFacade -from askui.tools.android.agent_os_facade import AndroidAgentOsFacade -from askui.tools.android.ppadb_agent_os import PpadbAgentOs -from askui.tools.android.tools import ( - AndroidDragAndDropTool, - AndroidGetConnectedDevicesSerialNumbersTool, - AndroidGetConnectedDisplaysInfosTool, - AndroidGetCurrentConnectedDeviceInfosTool, - AndroidKeyCombinationTool, - AndroidKeyTapEventTool, - AndroidScreenshotTool, - AndroidSelectDeviceBySerialNumberTool, - AndroidSelectDisplayByUniqueIDTool, - AndroidShellTool, - AndroidSwipeTool, - AndroidTapTool, - AndroidTypeTool, -) -from askui.tools.askui import AskUiControllerClient -from askui.tools.computer import ( - ComputerGetMousePositionTool, - ComputerGetSystemInfoTool, - ComputerKeyboardPressedTool, - ComputerKeyboardReleaseTool, - ComputerKeyboardTapTool, - ComputerListDisplaysTool, - ComputerMouseClickTool, - ComputerMouseHoldDownTool, - ComputerMouseReleaseTool, - ComputerMouseScrollTool, - ComputerMoveMouseTool, - ComputerRetrieveActiveDisplayTool, - ComputerScreenshotTool, - ComputerSetActiveDisplayTool, - ComputerTypeTool, -) -from askui.tools.exception_tool import ExceptionTool - -logger = logging.getLogger(__name__) class MultiDeviceAgent(Agent): + """ + Multi device agent that combines a computer and an Android agent. + It can be used to perform actions on both devices simultaneously. + + Args: + display (int, optional): The display number for computer screen + interactions. Defaults to `1`. + reporters (list[Reporter] | None, optional): List of reporter instances. + tools (AgentToolbox | None, optional): Not supported; use `act_tools`. + retry (Retry | None, optional): Retry instance for failed actions. + act_tools (list[Tool] | None, optional): Additional tools for `act()`. + android_device_sn (str | None, optional): Android device serial number + to select on open. + + Example: + ```python + from askui import MultiDeviceAgent + + with MultiDeviceAgent(android_device_sn="emulator-5554") as agent: + agent.computer.click("Start") + agent.android.tap("OK") + agent.act("Fill the form on the phone and submit from the desktop") + ``` + """ + def __init__( self, - display: Annotated[int, Field(ge=1)] = 1, + desktop_display: Annotated[int, Field(ge=1)] = 1, + android_device_sn: str | int = 0, reporters: list[Reporter] | None = None, - tools: AgentToolbox | None = None, retry: Retry | None = None, act_tools: list[Tool] | None = None, - android_device_sn: str | None = None, - ): - self.android_device_sn = android_device_sn + settings: AgentSettings | None = None, + ) -> None: reporter = CompositeReporter(reporters=reporters) - self.android_os = PpadbAgentOs(reporter=reporter) - self.android_agent_os_facade = AndroidAgentOsFacade(self.android_os) - self.computer_agent_os_tool = AgentToolbox( - AskUiControllerClient( - display=display, - reporter=reporter, - ) - ) - - self.android_tools: list[Tool] = [ - AndroidScreenshotTool(self.android_agent_os_facade), - AndroidTapTool(self.android_agent_os_facade), - AndroidTypeTool(self.android_agent_os_facade), - AndroidDragAndDropTool(self.android_agent_os_facade), - AndroidKeyTapEventTool(self.android_agent_os_facade), - AndroidSwipeTool(self.android_agent_os_facade), - AndroidKeyCombinationTool(self.android_agent_os_facade), - AndroidShellTool(self.android_agent_os_facade), - AndroidSelectDeviceBySerialNumberTool(self.android_agent_os_facade), - AndroidSelectDisplayByUniqueIDTool(self.android_agent_os_facade), - AndroidGetConnectedDevicesSerialNumbersTool(self.android_agent_os_facade), - AndroidGetConnectedDisplaysInfosTool(self.android_agent_os_facade), - AndroidGetCurrentConnectedDeviceInfosTool(self.android_agent_os_facade), - ] - self.computer_tools: list[Tool] = [ - ComputerGetSystemInfoTool(), - ComputerGetMousePositionTool(), - ComputerKeyboardPressedTool(), - ComputerKeyboardReleaseTool(), - ComputerKeyboardTapTool(), - ComputerMouseClickTool(), - ComputerMouseHoldDownTool(), - ComputerMouseReleaseTool(), - ComputerMouseScrollTool(), - ComputerMoveMouseTool(), - ComputerScreenshotTool(), - ComputerTypeTool(), - ComputerListDisplaysTool(), - ComputerRetrieveActiveDisplayTool(), - ComputerSetActiveDisplayTool(), - ] - - act_tools = act_tools or [] - - multi_device_tools: list[Tool] = ( - act_tools + self.android_tools + self.computer_tools + [ExceptionTool()] - ) - - if tools: - # After all, I don't even know why we actually have both parameters in - # the constructor. - msg = ( - "'tools' parameter is not supported for MultiDeviceAgent and will" - " be ignored. Please set tools via the 'act_tools' parameter" - ) - logger.warning(msg) + # Initialize the base agent super().__init__( reporter=reporter, - tools=multi_device_tools, retry=retry, - agent_os=self.computer_agent_os_tool.os, + settings=settings, ) - self.computer_agent_os_facade: ComputerAgentOsFacade = ComputerAgentOsFacade( - self.computer_agent_os_tool.os + # Initialize the computer agent + self._computer_agent = ComputerAgent( + display=desktop_display, + reporters=[reporter], + settings=settings, ) - self.act_tool_collection.add_agent_os(self.computer_agent_os_facade) + + # Initialize the Android agent + self._android_agent = AndroidAgent( + device=android_device_sn, + reporters=[reporter], + settings=settings, + ) + + # Combine the tool collections of the computer and Android agents + self.act_tool_collection = ( + self._computer_agent.act_tool_collection + + self._android_agent.act_tool_collection + ) + + self.act_tool_collection.append_tool(*(act_tools or [])) self.act_settings.messages.system = create_multidevice_agent_prompt() + @property + def computer(self) -> ComputerAgent: + """The composed computer agent.""" + return self._computer_agent + + @property + def android(self) -> AndroidAgent: + """The composed Android agent.""" + return self._android_agent + def close(self) -> None: - self.android_os.disconnect() + self._computer_agent.act_agent_os_facade.disconnect() + self._android_agent.act_agent_os_facade.disconnect() super().close() def open(self) -> None: - self.android_os.connect() - if self.android_device_sn is not None: - self.android_os.set_device_by_serial_number(self.android_device_sn) - if self._agent_os is not None: - self._agent_os.connect() + self._computer_agent.open() + self._android_agent.open() super().open() + + # Get and locate functions must be overridden and throw please use + # .computer_agent and .android_agent instead. From 7e838ff91da197120741a683c59ded664d0afd98 Mon Sep 17 00:00:00 2001 From: philipph-askui Date: Mon, 9 Mar 2026 16:08:30 +0100 Subject: [PATCH 6/6] feat: add locate and get method to MultiDeviceAgent --- src/askui/multi_device_agent.py | 69 +++++++++++++++++++++++++++++++-- 1 file changed, 65 insertions(+), 4 deletions(-) diff --git a/src/askui/multi_device_agent.py b/src/askui/multi_device_agent.py index 384bfb05..6dba3ee2 100644 --- a/src/askui/multi_device_agent.py +++ b/src/askui/multi_device_agent.py @@ -1,4 +1,4 @@ -from typing import Annotated +from typing import Annotated, Optional, Type, overload from pydantic import Field @@ -6,10 +6,15 @@ from askui.agent_base import Agent from askui.agent_settings import AgentSettings from askui.android_agent import AndroidAgent +from askui.locators.locators import Locator +from askui.models.shared.settings import GetSettings, LocateSettings from askui.models.shared.tools import Tool +from askui.models.types.geometry import Point +from askui.models.types.response_schemas import ResponseSchema from askui.prompts.act_prompts import create_multidevice_agent_prompt from askui.reporting import CompositeReporter, Reporter from askui.retry import Retry +from askui.utils.source_utils import InputSource class MultiDeviceAgent(Agent): @@ -90,6 +95,65 @@ def android(self) -> AndroidAgent: """The composed Android agent.""" return self._android_agent + @overload + def get( + self, + query: Annotated[str, Field(min_length=1)], + response_schema: None = None, + source: Optional[InputSource] = None, + get_settings: GetSettings | None = None, + ) -> str: ... + @overload + def get( + self, + query: Annotated[str, Field(min_length=1)], + response_schema: Type[ResponseSchema], + source: Optional[InputSource] = None, + get_settings: GetSettings | None = None, + ) -> ResponseSchema: ... + + def get( + self, + query: Annotated[str, Field(min_length=1)], + response_schema: Type[ResponseSchema] | None = None, + source: Optional[InputSource] = None, + get_settings: GetSettings | None = None, + ) -> ResponseSchema | str: + """Not supported on `MultiDeviceAgent`. + + Use `agent.computer.get()` or `agent.android.get()` instead. + + Raises: + NotImplementedError: Always. + """ + error_msg = ( + "MultiDeviceAgent does not support get() directly." + " Use agent.computer.get() or agent.android.get()" + " instead." + ) + raise NotImplementedError(error_msg) + + def locate( + self, + locator: str | Locator, + screenshot: Optional[InputSource] = None, + locate_settings: LocateSettings | None = None, + ) -> Point: + """Not supported on `MultiDeviceAgent`. + + Use `agent.computer.locate()` or `agent.android.locate()` + instead. + + Raises: + NotImplementedError: Always. + """ + error_msg = ( + "MultiDeviceAgent does not support locate() directly." + " Use agent.computer.locate() or" + " agent.android.locate() instead." + ) + raise NotImplementedError(error_msg) + def close(self) -> None: self._computer_agent.act_agent_os_facade.disconnect() self._android_agent.act_agent_os_facade.disconnect() @@ -99,6 +163,3 @@ def open(self) -> None: self._computer_agent.open() self._android_agent.open() super().open() - - # Get and locate functions must be overridden and throw please use - # .computer_agent and .android_agent instead.