From 0619e8533c668abac681de83a5297ca38b0f3075 Mon Sep 17 00:00:00 2001 From: Vijit Dhingra Date: Sat, 11 Apr 2026 17:34:32 -0700 Subject: [PATCH 1/2] Add execute_commands() for command-only execution without agent invocation Introduces a new `execute_commands()` method on the RuntimeUse client that runs commands in the runtime environment and returns per-command exit codes without invoking the agent handler. Logs stream in real time via `on_assistant_message`, errors raise `AgentRuntimeError`, and the artifact upload handshake is supported. Wire protocol: new `command_execution_message` (incoming) and `command_execution_result_message` (outgoing) message types. Bumps both packages from 0.7.0 to 0.8.0. Made-with: Cursor --- .../runtimeuse-client-python/pyproject.toml | 2 +- .../src/runtimeuse_client/__init__.py | 10 + .../src/runtimeuse_client/client.py | 146 ++++++++++++++ .../src/runtimeuse_client/types.py | 58 ++++++ .../runtimeuse-client-python/test/conftest.py | 12 +- .../test/e2e/conftest.py | 11 +- .../test/e2e/test_e2e.py | 143 ++++++++++++++ .../test/sandbox/test_e2b.py | 39 +++- .../test/test_client.py | 185 ++++++++++++++++++ packages/runtimeuse/package.json | 2 +- packages/runtimeuse/src/index.ts | 3 + .../runtimeuse/src/invocation-runner.test.ts | 136 ++++++++++++- packages/runtimeuse/src/invocation-runner.ts | 67 ++++++- packages/runtimeuse/src/session.test.ts | 91 +++++++++ packages/runtimeuse/src/session.ts | 53 ++++- packages/runtimeuse/src/types.ts | 26 ++- 16 files changed, 969 insertions(+), 15 deletions(-) diff --git a/packages/runtimeuse-client-python/pyproject.toml b/packages/runtimeuse-client-python/pyproject.toml index 33d2261..f281860 100644 --- a/packages/runtimeuse-client-python/pyproject.toml +++ b/packages/runtimeuse-client-python/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "runtimeuse-client" -version = "0.7.0" +version = "0.8.0" description = "Client library for AI agent runtime communication over WebSocket" readme = "README.md" license = {"text" = "FSL"} diff --git a/packages/runtimeuse-client-python/src/runtimeuse_client/__init__.py b/packages/runtimeuse-client-python/src/runtimeuse_client/__init__.py index 7a49f4b..3ce8254 100644 --- a/packages/runtimeuse-client-python/src/runtimeuse_client/__init__.py +++ b/packages/runtimeuse-client-python/src/runtimeuse_client/__init__.py @@ -6,9 +6,14 @@ RuntimeEnvironmentDownloadableInterface, CommandInterface, InvocationMessage, + CommandExecutionMessage, QueryOptions, + ExecuteCommandsOptions, QueryResult, + CommandResultItem, + CommandExecutionResult, ResultMessageInterface, + CommandExecutionResultMessageInterface, TextResult, StructuredOutputResult, AssistantMessageInterface, @@ -31,9 +36,14 @@ "RuntimeEnvironmentDownloadableInterface", "CommandInterface", "InvocationMessage", + "CommandExecutionMessage", "QueryOptions", + "ExecuteCommandsOptions", "QueryResult", + "CommandResultItem", + "CommandExecutionResult", "ResultMessageInterface", + "CommandExecutionResultMessageInterface", "TextResult", "StructuredOutputResult", "AssistantMessageInterface", diff --git a/packages/runtimeuse-client-python/src/runtimeuse_client/client.py b/packages/runtimeuse-client-python/src/runtimeuse_client/client.py index 9173f5c..992f4ea 100644 --- a/packages/runtimeuse-client-python/src/runtimeuse_client/client.py +++ b/packages/runtimeuse-client-python/src/runtimeuse_client/client.py @@ -6,15 +6,20 @@ from .transports import Transport, WebSocketTransport from .types import ( InvocationMessage, + CommandExecutionMessage, AgentRuntimeMessageInterface, CancelMessage, ErrorMessageInterface, ResultMessageInterface, + CommandExecutionResultMessageInterface, QueryResult, + CommandExecutionResult, + CommandInterface, AssistantMessageInterface, ArtifactUploadRequestMessageInterface, ArtifactUploadResponseMessageInterface, QueryOptions, + ExecuteCommandsOptions, ) from .exceptions import AgentRuntimeError, CancelledException @@ -198,3 +203,144 @@ async def query( raise AgentRuntimeError("No result message received") return QueryResult(data=wire_result.data, metadata=wire_result.metadata) + + async def execute_commands( + self, + commands: list[CommandInterface], + options: ExecuteCommandsOptions, + ) -> CommandExecutionResult: + """Execute commands in the runtime without invoking the agent. + + Sends a :class:`CommandExecutionMessage`, processes the response + stream, and returns a :class:`CommandExecutionResult` with + per-command exit codes. + + Args: + commands: Commands to execute in the runtime environment. + options: Execution configuration including secrets, callbacks, + artifacts, and timeout. + + Raises: + AgentRuntimeError: If a command fails or the runtime sends an error. + CancelledException: If cancelled via :meth:`abort`. + TimeoutError: If the timeout is exceeded. + """ + logger = options.logger or _default_logger + + self._abort_event = asyncio.Event() + + message = CommandExecutionMessage( + message_type="command_execution_message", + source_id=options.source_id, + secrets_to_redact=options.secrets_to_redact, + commands=commands, + artifacts_dir=options.artifacts_dir, + pre_execution_downloadables=options.pre_execution_downloadables, + ) + + send_queue: asyncio.Queue[dict] = asyncio.Queue() + await send_queue.put(message.model_dump(mode="json")) + + wire_result: CommandExecutionResultMessageInterface | None = None + + async with asyncio.timeout(options.timeout): + async for msg in self._transport(send_queue=send_queue): + if self._abort_event.is_set(): + logger.info("Command execution cancelled by caller") + await send_queue.put( + CancelMessage(message_type="cancel_message").model_dump( + mode="json" + ) + ) + await send_queue.join() + raise CancelledException("Command execution was cancelled") + + try: + message_interface = AgentRuntimeMessageInterface.model_validate( + msg + ) + except pydantic.ValidationError: + logger.error( + f"Received unknown message type from agent runtime: {msg}" + ) + continue + + if ( + message_interface.message_type + == "command_execution_result_message" + ): + wire_result = ( + CommandExecutionResultMessageInterface.model_validate(msg) + ) + logger.info( + f"Received command execution result from agent runtime: {msg}" + ) + continue + + elif message_interface.message_type == "assistant_message": + if options.on_assistant_message is not None: + assistant_message_interface = ( + AssistantMessageInterface.model_validate(msg) + ) + await options.on_assistant_message( + assistant_message_interface + ) + continue + + elif message_interface.message_type == "error_message": + try: + error_message_interface = ( + ErrorMessageInterface.model_validate(msg) + ) + except pydantic.ValidationError: + logger.error( + f"Received malformed error message from agent runtime: {msg}", + ) + raise AgentRuntimeError(str(msg)) + logger.error( + f"Error from agent runtime: {error_message_interface}", + ) + raise AgentRuntimeError( + error_message_interface.error, + metadata=error_message_interface.metadata, + ) + + elif ( + message_interface.message_type + == "artifact_upload_request_message" + ): + logger.info( + f"Received artifact upload request message from agent runtime: {msg}" + ) + if options.on_artifact_upload_request is not None: + artifact_upload_request_message_interface = ( + ArtifactUploadRequestMessageInterface.model_validate( + msg + ) + ) + upload_result = await options.on_artifact_upload_request( + artifact_upload_request_message_interface + ) + artifact_upload_response_message_interface = ArtifactUploadResponseMessageInterface( + message_type="artifact_upload_response_message", + filename=artifact_upload_request_message_interface.filename, + filepath=artifact_upload_request_message_interface.filepath, + presigned_url=upload_result.presigned_url, + content_type=upload_result.content_type, + ) + await send_queue.put( + artifact_upload_response_message_interface.model_dump( + mode="json" + ) + ) + continue + + else: + logger.info( + f"Received non-result message from agent runtime: {msg}" + ) + + if wire_result is None: + raise AgentRuntimeError("No result message received") + + return CommandExecutionResult(results=wire_result.results) diff --git a/packages/runtimeuse-client-python/src/runtimeuse_client/types.py b/packages/runtimeuse-client-python/src/runtimeuse_client/types.py index f856b2e..0eec5f5 100644 --- a/packages/runtimeuse-client-python/src/runtimeuse_client/types.py +++ b/packages/runtimeuse-client-python/src/runtimeuse_client/types.py @@ -12,6 +12,7 @@ class AgentRuntimeMessageInterface(BaseModel): "assistant_message", "artifact_upload_request_message", "error_message", + "command_execution_result_message", ] @@ -99,6 +100,33 @@ class CancelMessage(BaseModel): message_type: Literal["cancel_message"] +class CommandExecutionMessage(BaseModel): + message_type: Literal["command_execution_message"] + source_id: str | None = None + secrets_to_redact: list[str] = Field(default_factory=list) + commands: list[CommandInterface] + artifacts_dir: str | None = None + pre_execution_downloadables: list[RuntimeEnvironmentDownloadableInterface] | None = None + + +class CommandResultItem(BaseModel): + command: str + exit_code: int + + +class CommandExecutionResult(BaseModel): + """Result returned by :meth:`RuntimeUseClient.execute_commands`.""" + + results: list[CommandResultItem] + + +class CommandExecutionResultMessageInterface(AgentRuntimeMessageInterface): + """Wire-format result message from command-only execution.""" + + message_type: Literal["command_execution_result_message"] + results: list[CommandResultItem] + + class ArtifactUploadResult(BaseModel): presigned_url: str content_type: str @@ -156,3 +184,33 @@ def __post_init__(self) -> None: raise ValueError( "artifacts_dir and on_artifact_upload_request must be specified together" ) + + +@dataclass +class ExecuteCommandsOptions: + """Options for :meth:`RuntimeUseClient.execute_commands`.""" + + #: Secret values to redact from command output. + secrets_to_redact: list[str] = field(default_factory=list) + #: Caller-defined identifier for tracing/logging purposes. + source_id: str | None = None + #: Directory inside the runtime environment where artifacts are written. + artifacts_dir: str | None = None + #: Files to download into the runtime environment before commands run. + pre_execution_downloadables: list[RuntimeEnvironmentDownloadableInterface] | None = None + #: Called for each assistant (intermediate) message streamed back. + on_assistant_message: OnAssistantMessageCallback | None = None + #: Called when the runtime requests an artifact upload URL. + on_artifact_upload_request: OnArtifactUploadRequestCallback | None = None + #: Overall timeout in seconds. ``None`` means no limit. + timeout: float | None = None + #: Logger instance; falls back to the module-level logger when ``None``. + logger: logging.Logger | None = None + + def __post_init__(self) -> None: + has_dir = self.artifacts_dir is not None + has_cb = self.on_artifact_upload_request is not None + if has_dir != has_cb: + raise ValueError( + "artifacts_dir and on_artifact_upload_request must be specified together" + ) diff --git a/packages/runtimeuse-client-python/test/conftest.py b/packages/runtimeuse-client-python/test/conftest.py index 18ca89b..5f14606 100644 --- a/packages/runtimeuse-client-python/test/conftest.py +++ b/packages/runtimeuse-client-python/test/conftest.py @@ -4,7 +4,7 @@ import dotenv import pytest -from src.runtimeuse_client import RuntimeUseClient, QueryOptions +from src.runtimeuse_client import RuntimeUseClient, QueryOptions, ExecuteCommandsOptions dotenv.load_dotenv() @@ -75,3 +75,13 @@ def query_options(): def make_query_options(): """Return the _make_query_options factory for tests that need custom fields.""" return _make_query_options + + +def _make_execute_commands_options(**overrides: Any) -> ExecuteCommandsOptions: + return ExecuteCommandsOptions(**overrides) + + +@pytest.fixture +def make_execute_commands_options(): + """Return the _make_execute_commands_options factory for tests.""" + return _make_execute_commands_options diff --git a/packages/runtimeuse-client-python/test/e2e/conftest.py b/packages/runtimeuse-client-python/test/e2e/conftest.py index c671818..1d7bb19 100644 --- a/packages/runtimeuse-client-python/test/e2e/conftest.py +++ b/packages/runtimeuse-client-python/test/e2e/conftest.py @@ -11,7 +11,7 @@ import pytest -from src.runtimeuse_client import RuntimeUseClient, QueryOptions +from src.runtimeuse_client import RuntimeUseClient, QueryOptions, ExecuteCommandsOptions E2E_PORT = 8089 REPO_ROOT = Path(__file__).resolve().parents[4] @@ -91,6 +91,15 @@ def make_query_options(): return _make_query_options +def _make_execute_commands_options(**overrides: Any) -> ExecuteCommandsOptions: + return ExecuteCommandsOptions(**overrides) + + +@pytest.fixture +def make_execute_commands_options(): + return _make_execute_commands_options + + @pytest.fixture def http_server(): """Local HTTP server for serving downloadable files and receiving artifact uploads. diff --git a/packages/runtimeuse-client-python/test/e2e/test_e2e.py b/packages/runtimeuse-client-python/test/e2e/test_e2e.py index 08989b3..688e8f7 100644 --- a/packages/runtimeuse-client-python/test/e2e/test_e2e.py +++ b/packages/runtimeuse-client-python/test/e2e/test_e2e.py @@ -10,7 +10,10 @@ from src.runtimeuse_client import ( RuntimeUseClient, QueryOptions, + ExecuteCommandsOptions, QueryResult, + CommandExecutionResult, + CommandResultItem, TextResult, StructuredOutputResult, AssistantMessageInterface, @@ -596,3 +599,143 @@ async def test_secret_redacted_from_error_message( assert "super-secret-value" not in str(exc_info.value) assert "[REDACTED]" in str(exc_info.value) + + +class TestExecuteCommands: + """Tests for RuntimeUseClient.execute_commands.""" + + async def test_single_command_success( + self, client: RuntimeUseClient, make_execute_commands_options + ): + result = await client.execute_commands( + commands=[CommandInterface(command="echo hello")], + options=make_execute_commands_options(timeout=10), + ) + + assert isinstance(result, CommandExecutionResult) + assert len(result.results) == 1 + assert result.results[0].command == "echo hello" + assert result.results[0].exit_code == 0 + + async def test_multiple_commands_success( + self, client: RuntimeUseClient, make_execute_commands_options + ): + result = await client.execute_commands( + commands=[ + CommandInterface(command="echo first"), + CommandInterface(command="echo second"), + ], + options=make_execute_commands_options(timeout=10), + ) + + assert len(result.results) == 2 + assert result.results[0].command == "echo first" + assert result.results[0].exit_code == 0 + assert result.results[1].command == "echo second" + assert result.results[1].exit_code == 0 + + async def test_command_output_streamed_via_callback( + self, client: RuntimeUseClient, make_execute_commands_options + ): + received: list[AssistantMessageInterface] = [] + + async def on_msg(msg: AssistantMessageInterface): + received.append(msg) + + await client.execute_commands( + commands=[CommandInterface(command="echo streamed-sentinel")], + options=make_execute_commands_options( + on_assistant_message=on_msg, timeout=10 + ), + ) + + all_text = [block for msg in received for block in msg.text_blocks] + assert any("streamed-sentinel" in t for t in all_text) + + async def test_failed_command_raises_error( + self, client: RuntimeUseClient, make_execute_commands_options + ): + with pytest.raises(AgentRuntimeError, match="command failed with exit code"): + await client.execute_commands( + commands=[CommandInterface(command="exit 1")], + options=make_execute_commands_options(timeout=10), + ) + + async def test_agent_handler_not_invoked( + self, client: RuntimeUseClient, make_execute_commands_options + ): + result = await client.execute_commands( + commands=[CommandInterface(command="echo no-agent")], + options=make_execute_commands_options(timeout=10), + ) + + assert isinstance(result, CommandExecutionResult) + assert len(result.results) == 1 + + async def test_command_with_cwd( + self, client: RuntimeUseClient, make_execute_commands_options + ): + received: list[AssistantMessageInterface] = [] + + async def on_msg(msg: AssistantMessageInterface): + received.append(msg) + + await client.execute_commands( + commands=[CommandInterface(command="pwd", cwd="/tmp")], + options=make_execute_commands_options( + on_assistant_message=on_msg, timeout=10 + ), + ) + + all_text = [block for msg in received for block in msg.text_blocks] + assert any("/tmp" in t for t in all_text) + + async def test_secrets_redacted_from_output( + self, client: RuntimeUseClient, make_execute_commands_options + ): + received: list[AssistantMessageInterface] = [] + + async def on_msg(msg: AssistantMessageInterface): + received.append(msg) + + await client.execute_commands( + commands=[CommandInterface(command="echo my-secret-token")], + options=make_execute_commands_options( + secrets_to_redact=["my-secret-token"], + on_assistant_message=on_msg, + timeout=10, + ), + ) + + all_text = [block for msg in received for block in msg.text_blocks] + assert not any("my-secret-token" in t for t in all_text) + assert any("[REDACTED]" in t for t in all_text) + + async def test_cancellation( + self, ws_url: str, make_execute_commands_options + ): + client = RuntimeUseClient(ws_url=ws_url) + + async def abort_on_first(msg: AssistantMessageInterface): + client.abort() + + with pytest.raises((CancelledException, TimeoutError)): + await client.execute_commands( + commands=[ + CommandInterface( + command="for i in $(seq 1 100); do echo line$i; sleep 0.1; done" + ), + ], + options=make_execute_commands_options( + on_assistant_message=abort_on_first, timeout=5 + ), + ) + + async def test_timeout( + self, client: RuntimeUseClient, make_execute_commands_options + ): + with pytest.raises(TimeoutError): + await client.execute_commands( + commands=[CommandInterface(command="sleep 30")], + options=make_execute_commands_options(timeout=0.5), + ) diff --git a/packages/runtimeuse-client-python/test/sandbox/test_e2b.py b/packages/runtimeuse-client-python/test/sandbox/test_e2b.py index f447428..c885710 100644 --- a/packages/runtimeuse-client-python/test/sandbox/test_e2b.py +++ b/packages/runtimeuse-client-python/test/sandbox/test_e2b.py @@ -1,12 +1,17 @@ -"""Smoke test: verify that an E2B sandbox can run runtimeuse and answer a query.""" +"""Smoke tests: verify that an E2B sandbox can run runtimeuse, +answer a query, and execute commands.""" import pytest from src.runtimeuse_client import ( RuntimeUseClient, QueryOptions, + ExecuteCommandsOptions, QueryResult, + CommandExecutionResult, + CommandInterface, TextResult, + AssistantMessageInterface, ) from test.sandbox_factories.e2b import create_e2b_runtimeuse @@ -22,7 +27,7 @@ async def test_hello_world(self): prompt="Say hello world", options=QueryOptions( system_prompt="Reply concisely.", - model="gpt-4.1-mini", + model="gpt-5.4", ), ) @@ -31,3 +36,33 @@ async def test_hello_world(self): assert len(result.data.text) > 0 finally: sandbox.kill() + + async def test_execute_commands(self): + sandbox, ws_url = create_e2b_runtimeuse(agent="openai") + try: + received: list[AssistantMessageInterface] = [] + + async def on_msg(msg: AssistantMessageInterface): + received.append(msg) + + client = RuntimeUseClient(ws_url=ws_url) + result = await client.execute_commands( + commands=[ + CommandInterface(command="echo hello-from-e2b"), + CommandInterface(command="node --version"), + ], + options=ExecuteCommandsOptions( + on_assistant_message=on_msg, + timeout=30, + ), + ) + + assert isinstance(result, CommandExecutionResult) + assert len(result.results) == 2 + assert result.results[0].exit_code == 0 + assert result.results[1].exit_code == 0 + + all_text = [block for msg in received for block in msg.text_blocks] + assert any("hello-from-e2b" in t for t in all_text) + finally: + sandbox.kill() diff --git a/packages/runtimeuse-client-python/test/test_client.py b/packages/runtimeuse-client-python/test/test_client.py index 3a19113..445d7d9 100644 --- a/packages/runtimeuse-client-python/test/test_client.py +++ b/packages/runtimeuse-client-python/test/test_client.py @@ -7,7 +7,10 @@ from src.runtimeuse_client import ( RuntimeUseClient, QueryOptions, + ExecuteCommandsOptions, QueryResult, + CommandExecutionResult, + CommandResultItem, TextResult, StructuredOutputResult, AssistantMessageInterface, @@ -15,6 +18,7 @@ ArtifactUploadResult, AgentRuntimeError, CancelledException, + CommandInterface, ) @@ -467,3 +471,184 @@ async def _dummy_cb(req): ) assert opts.artifacts_dir == "/tmp/artifacts" assert opts.on_artifact_upload_request is _dummy_cb + + +# --------------------------------------------------------------------------- +# execute_commands +# --------------------------------------------------------------------------- + +COMMAND_RESULT_MSG = { + "message_type": "command_execution_result_message", + "results": [{"command": "echo hello", "exit_code": 0}], +} + + +class TestExecuteCommands: + @pytest.mark.asyncio + async def test_returns_command_execution_result( + self, fake_transport, make_execute_commands_options + ): + transport, client = fake_transport([COMMAND_RESULT_MSG]) + + result = await client.execute_commands( + commands=[CommandInterface(command="echo hello")], + options=make_execute_commands_options(), + ) + + assert isinstance(result, CommandExecutionResult) + assert len(result.results) == 1 + assert result.results[0].command == "echo hello" + assert result.results[0].exit_code == 0 + + @pytest.mark.asyncio + async def test_sends_command_execution_message( + self, fake_transport, make_execute_commands_options + ): + transport, client = fake_transport([COMMAND_RESULT_MSG]) + + await client.execute_commands( + commands=[CommandInterface(command="echo hello")], + options=make_execute_commands_options(source_id="cmd-test"), + ) + + cmd_msgs = [ + m + for m in transport.sent + if m.get("message_type") == "command_execution_message" + ] + assert len(cmd_msgs) == 1 + assert cmd_msgs[0]["source_id"] == "cmd-test" + assert cmd_msgs[0]["commands"] == [{"command": "echo hello", "cwd": None}] + + @pytest.mark.asyncio + async def test_assistant_message_dispatched( + self, fake_transport, make_execute_commands_options + ): + assistant_msg = { + "message_type": "assistant_message", + "text_blocks": ["output line"], + } + transport, client = fake_transport([assistant_msg, COMMAND_RESULT_MSG]) + on_assistant = AsyncMock() + + await client.execute_commands( + commands=[CommandInterface(command="echo hello")], + options=make_execute_commands_options(on_assistant_message=on_assistant), + ) + + on_assistant.assert_awaited_once() + received = on_assistant.call_args[0][0] + assert isinstance(received, AssistantMessageInterface) + assert received.text_blocks == ["output line"] + + @pytest.mark.asyncio + async def test_error_message_raises( + self, fake_transport, make_execute_commands_options + ): + error_msg = { + "message_type": "error_message", + "error": "command failed with exit code: 1", + "metadata": {}, + } + transport, client = fake_transport([error_msg]) + + with pytest.raises(AgentRuntimeError, match="command failed"): + await client.execute_commands( + commands=[CommandInterface(command="exit 1")], + options=make_execute_commands_options(), + ) + + @pytest.mark.asyncio + async def test_no_result_raises( + self, fake_transport, make_execute_commands_options + ): + transport, client = fake_transport([]) + + with pytest.raises(AgentRuntimeError, match="No result message received"): + await client.execute_commands( + commands=[CommandInterface(command="echo hello")], + options=make_execute_commands_options(), + ) + + @pytest.mark.asyncio + async def test_abort_raises_cancelled( + self, fake_transport, make_execute_commands_options + ): + filler_msg = { + "message_type": "assistant_message", + "text_blocks": ["working..."], + } + transport, client = fake_transport([filler_msg, filler_msg]) + + async def abort_on_first_message(_msg): + client.abort() + + with pytest.raises(CancelledException): + await client.execute_commands( + commands=[CommandInterface(command="echo hello")], + options=make_execute_commands_options( + on_assistant_message=abort_on_first_message + ), + ) + + @pytest.mark.asyncio + async def test_timeout_raises(self, make_execute_commands_options): + async def stalling_transport(send_queue: asyncio.Queue[dict]): + await asyncio.sleep(10) + yield {} + + client = RuntimeUseClient(transport=stalling_transport) + + with pytest.raises(TimeoutError): + await client.execute_commands( + commands=[CommandInterface(command="echo hello")], + options=make_execute_commands_options(timeout=0.05), + ) + + @pytest.mark.asyncio + async def test_artifact_upload_handshake( + self, fake_transport, make_execute_commands_options + ): + upload_request = { + "message_type": "artifact_upload_request_message", + "filename": "output.txt", + "filepath": "/tmp/output.txt", + } + transport, client = fake_transport([upload_request, COMMAND_RESULT_MSG]) + + async def on_artifact( + req: ArtifactUploadRequestMessageInterface, + ) -> ArtifactUploadResult: + return ArtifactUploadResult( + presigned_url="https://s3.example.com/presigned", + content_type="text/plain", + ) + + await client.execute_commands( + commands=[CommandInterface(command="echo hello")], + options=make_execute_commands_options( + artifacts_dir="/tmp/artifacts", + on_artifact_upload_request=on_artifact, + ), + ) + + response_msgs = [ + m + for m in transport.sent + if m.get("message_type") == "artifact_upload_response_message" + ] + assert len(response_msgs) == 1 + assert response_msgs[0]["filename"] == "output.txt" + assert response_msgs[0]["presigned_url"] == "https://s3.example.com/presigned" + + def test_execute_commands_options_artifacts_validation(self): + with pytest.raises(ValueError, match="must be specified together"): + ExecuteCommandsOptions(artifacts_dir="/tmp/artifacts") + + async def _dummy_cb(req): + return ArtifactUploadResult( + presigned_url="https://example.com", content_type="text/plain" + ) + + with pytest.raises(ValueError, match="must be specified together"): + ExecuteCommandsOptions(on_artifact_upload_request=_dummy_cb) diff --git a/packages/runtimeuse/package.json b/packages/runtimeuse/package.json index fd0c14e..203adba 100644 --- a/packages/runtimeuse/package.json +++ b/packages/runtimeuse/package.json @@ -1,6 +1,6 @@ { "name": "runtimeuse", - "version": "0.7.0", + "version": "0.8.0", "description": "AI agent runtime with WebSocket protocol, artifact handling, and secret management", "license": "FSL", "type": "module", diff --git a/packages/runtimeuse/src/index.ts b/packages/runtimeuse/src/index.ts index 24187cd..5b0921f 100644 --- a/packages/runtimeuse/src/index.ts +++ b/packages/runtimeuse/src/index.ts @@ -40,6 +40,9 @@ export type { IncomingMessage, OutgoingMessage, InvocationMessage, + CommandExecutionMessage, + CommandExecutionResultMessage, + CommandExecutionResultItem, CancelMessage, ResultMessage, AssistantMessage, diff --git a/packages/runtimeuse/src/invocation-runner.test.ts b/packages/runtimeuse/src/invocation-runner.test.ts index a94de64..7e1889b 100644 --- a/packages/runtimeuse/src/invocation-runner.test.ts +++ b/packages/runtimeuse/src/invocation-runner.test.ts @@ -6,7 +6,7 @@ import type { MessageSender, } from "./agent-handler.js"; import type { Logger } from "./logger.js"; -import type { InvocationMessage, OutgoingMessage } from "./types.js"; +import type { InvocationMessage, CommandExecutionMessage, OutgoingMessage } from "./types.js"; import CommandHandler from "./command-handler.js"; import { InvocationRunner } from "./invocation-runner.js"; @@ -303,3 +303,137 @@ describe("InvocationRunner", () => { expect(CommandHandler).toHaveBeenCalledTimes(3); }); }); + +const BASE_COMMAND_EXECUTION_MESSAGE: CommandExecutionMessage = { + message_type: "command_execution_message", + source_id: "source-id", + secrets_to_redact: ["api-key"], + commands: [{ command: "echo hello", cwd: "/app" }], +}; + +function createCommandRunner(overrides?: Partial) { + const logger: Logger = { + log: vi.fn(), + error: vi.fn(), + debug: vi.fn(), + }; + const abortController = new AbortController(); + const send = vi.fn<(msg: OutgoingMessage) => void>(); + const handler: AgentHandler = { run: mockHandlerRun }; + const runner = new InvocationRunner({ + handler, + logger, + abortController, + send, + }); + + return { + runner, + logger, + send, + abortController, + message: { ...BASE_COMMAND_EXECUTION_MESSAGE, ...overrides }, + }; +} + +describe("InvocationRunner.runCommandsOnly", () => { + beforeEach(() => { + vi.clearAllMocks(); + mockDownload.mockResolvedValue(undefined); + mockExecute.mockResolvedValue({ exitCode: 0 }); + }); + + it("sends command_execution_result_message on success", async () => { + const { runner, message, send } = createCommandRunner(); + + await runner.runCommandsOnly(message); + + expect(send).toHaveBeenCalledWith({ + message_type: "command_execution_result_message", + results: [{ command: "echo hello", exit_code: 0 }], + }); + expect(mockHandlerRun).not.toHaveBeenCalled(); + }); + + it("collects results for multiple commands", async () => { + const { runner, message, send } = createCommandRunner({ + commands: [ + { command: "echo 1", cwd: "/app" }, + { command: "echo 2", cwd: "/app" }, + ], + }); + + await runner.runCommandsOnly(message); + + expect(send).toHaveBeenCalledWith({ + message_type: "command_execution_result_message", + results: [ + { command: "echo 1", exit_code: 0 }, + { command: "echo 2", exit_code: 0 }, + ], + }); + }); + + it("forwards stdout and stderr through assistant messages", async () => { + mockExecute.mockImplementation(async (options) => { + options.onStdout?.("stdout data"); + options.onStderr?.("stderr data"); + return { exitCode: 0 }; + }); + + const { runner, message, send } = createCommandRunner(); + + await runner.runCommandsOnly(message); + + expect(send).toHaveBeenCalledWith({ + message_type: "assistant_message", + text_blocks: ["stdout data"], + }); + expect(send).toHaveBeenCalledWith({ + message_type: "assistant_message", + text_blocks: ["stderr data"], + }); + }); + + it("sends error message and throws when command exits non-zero", async () => { + mockExecute.mockResolvedValueOnce({ exitCode: 2 }); + const { runner, message, send, logger } = createCommandRunner(); + + await expect(runner.runCommandsOnly(message)).rejects.toThrow( + "command failed with exit code: 2", + ); + + expect(logger.error).toHaveBeenCalledWith( + "command failed with exit code: 2", + ); + expect(send).toHaveBeenCalledWith({ + message_type: "error_message", + error: "command failed with exit code: 2", + metadata: {}, + }); + }); + + it("downloads runtime environment before running commands", async () => { + const events: string[] = []; + mockDownload.mockImplementation(async () => { + events.push("download"); + }); + mockExecute.mockImplementation(async (options) => { + events.push(`command:${options.command.command}`); + return { exitCode: 0 }; + }); + + const { runner, message } = createCommandRunner({ + pre_execution_downloadables: [ + { + download_url: "https://example.com/runtime.tar.gz", + working_dir: "/tmp", + }, + ], + }); + + await runner.runCommandsOnly(message); + + expect(events).toEqual(["download", "command:echo hello"]); + }); +}); diff --git a/packages/runtimeuse/src/invocation-runner.ts b/packages/runtimeuse/src/invocation-runner.ts index 5683c49..5b86c74 100644 --- a/packages/runtimeuse/src/invocation-runner.ts +++ b/packages/runtimeuse/src/invocation-runner.ts @@ -1,5 +1,11 @@ import type { AgentHandler, MessageSender } from "./agent-handler.js"; -import type { InvocationMessage, OutgoingMessage } from "./types.js"; +import type { + InvocationMessage, + CommandExecutionMessage, + CommandExecutionResultItem, + OutgoingMessage, + RuntimeEnvironmentDownloadable, +} from "./types.js"; import type { Logger } from "./logger.js"; import CommandHandler from "./command-handler.js"; import DownloadHandler from "./download-handler.js"; @@ -23,7 +29,7 @@ export class InvocationRunner { async run(message: InvocationMessage): Promise { const { handler, logger, abortController, send } = this.config; - await this.downloadRuntimeEnvironment(message); + await this.downloadRuntimeEnvironment(message.pre_agent_downloadables); await this.runCommands(message.pre_agent_invocation_commands, "pre-agent", message.secrets_to_redact); const sender = this.createSender(); @@ -69,13 +75,64 @@ export class InvocationRunner { ); } + async runCommandsOnly(message: CommandExecutionMessage): Promise { + const { logger, send } = this.config; + + await this.downloadRuntimeEnvironment(message.pre_execution_downloadables); + + const results: CommandExecutionResultItem[] = []; + for (const command of message.commands) { + await this.runCommandAndCollect(command, message.secrets_to_redact, results); + } + + const resultMessage: OutgoingMessage = { + message_type: "command_execution_result_message", + results, + }; + logger.log("Sending command execution result:", JSON.stringify(resultMessage)); + send(resultMessage); + } + + private async runCommandAndCollect( + command: { command: string; cwd?: string }, + secrets: string[], + results: CommandExecutionResultItem[], + ): Promise { + const { logger, abortController, send } = this.config; + + logger.log( + `Executing command: ${command.command} in directory: ${command.cwd}`, + ); + + const handler = new CommandHandler({ + command, + secrets, + logger, + abortController, + onStdout: (stdout) => + send({ message_type: "assistant_message", text_blocks: [stdout] }), + onStderr: (stderr) => + send({ message_type: "assistant_message", text_blocks: [stderr] }), + }); + + const result = await handler.execute(); + if (result.exitCode !== 0) { + const errorMsg = `command failed with exit code: ${result.exitCode}`; + logger.error(errorMsg); + send({ message_type: "error_message", error: errorMsg, metadata: {} }); + throw new Error(errorMsg); + } + + results.push({ command: command.command, exit_code: result.exitCode }); + } + private async downloadRuntimeEnvironment( - message: InvocationMessage, + downloadables?: RuntimeEnvironmentDownloadable[], ): Promise { - if (!message.pre_agent_downloadables) return; + if (!downloadables) return; this.config.logger.log("Downloading runtime environment downloadables..."); - for (const downloadable of message.pre_agent_downloadables) { + for (const downloadable of downloadables) { await this.downloadHandler.download( downloadable.download_url, downloadable.working_dir, diff --git a/packages/runtimeuse/src/session.test.ts b/packages/runtimeuse/src/session.test.ts index d53ce56..bba973d 100644 --- a/packages/runtimeuse/src/session.test.ts +++ b/packages/runtimeuse/src/session.test.ts @@ -440,6 +440,97 @@ describe("WebSocketSession", () => { }); }); + describe("command execution message", () => { + const COMMAND_EXEC_MSG = { + message_type: "command_execution_message" as const, + source_id: "test-source-id", + secrets_to_redact: ["secret123"], + commands: [{ command: "echo hello", cwd: "/app" }], + }; + + it("resolves when command execution finishes", async () => { + mockCommandExecute.mockResolvedValueOnce({ exitCode: 0 }); + const { session, ws } = createSession(); + const done = session.run(); + sendMessage(ws, COMMAND_EXEC_MSG); + await done; + }); + + it("sends command_execution_result_message on success", async () => { + mockCommandExecute.mockResolvedValueOnce({ exitCode: 0 }); + const { session, ws } = createSession(); + const done = session.run(); + sendMessage(ws, COMMAND_EXEC_MSG); + await done; + + const sent = parseSentMessages(ws); + const result = sent.find( + (m) => m.message_type === "command_execution_result_message", + ); + expect(result).toBeDefined(); + expect(result!.results).toEqual([ + { command: "echo hello", exit_code: 0 }, + ]); + }); + + it("does not invoke the agent handler", async () => { + mockCommandExecute.mockResolvedValueOnce({ exitCode: 0 }); + const { session, ws } = createSession(); + const done = session.run(); + sendMessage(ws, COMMAND_EXEC_MSG); + await done; + + expect(mockHandlerRun).not.toHaveBeenCalled(); + }); + + it("sends error when command fails", async () => { + mockCommandExecute.mockResolvedValueOnce({ exitCode: 1 }); + const { session, ws } = createSession(); + const done = session.run(); + sendMessage(ws, COMMAND_EXEC_MSG); + await done; + + expectSentError(ws, "command failed with exit code: 1"); + }); + + it("rejects duplicate command execution messages", async () => { + let resolveCmd!: () => void; + mockCommandExecute.mockImplementation( + () => + new Promise((r) => { + resolveCmd = () => r({ exitCode: 0 }); + }), + ); + + const { session, ws } = createSession(); + session.run(); + + sendMessage(ws, COMMAND_EXEC_MSG); + await tick(); + sendMessage(ws, COMMAND_EXEC_MSG); + await tick(); + + expectSentError(ws, "multiple invocation messages"); + + resolveCmd(); + }); + + it("redacts secrets from command output", async () => { + mockCommandExecute.mockImplementation(async function (this: any) { + return { exitCode: 0 }; + }); + const { session, ws } = createSession(); + const done = session.run(); + sendMessage(ws, COMMAND_EXEC_MSG); + await done; + + const sent = parseSentMessages(ws); + for (const msg of sent) { + expect(JSON.stringify(msg)).not.toContain("secret123"); + } + }); + }); + describe("pre-agent invocation commands", () => { it("continues to agent when pre-agent command exits with 0", async () => { mockCommandExecute.mockResolvedValueOnce({ exitCode: 0 }); diff --git a/packages/runtimeuse/src/session.ts b/packages/runtimeuse/src/session.ts index e62e6c2..d23fdc7 100644 --- a/packages/runtimeuse/src/session.ts +++ b/packages/runtimeuse/src/session.ts @@ -3,7 +3,7 @@ import { WebSocket } from "ws"; import type { AgentHandler } from "./agent-handler.js"; import { ArtifactManager } from "./artifact-manager.js"; import type { UploadTracker } from "./upload-tracker.js"; -import type { InvocationMessage, IncomingMessage, OutgoingMessage } from "./types.js"; +import type { InvocationMessage, CommandExecutionMessage, IncomingMessage, OutgoingMessage } from "./types.js"; import { getErrorMessage, serializeErrorMetadata } from "./error-utils.js"; import { redactSecrets, sleep } from "./utils.js"; import { createLogger, createRedactingLogger, defaultLogger, type Logger } from "./logger.js"; @@ -72,7 +72,8 @@ export class WebSocketSession { ): Promise { if ( !this.invocationReceived && - message.message_type !== "invocation_message" + message.message_type !== "invocation_message" && + message.message_type !== "command_execution_message" ) { throw new Error( "Received non-invocation message before invocation message! Received: " + @@ -114,6 +115,21 @@ export class WebSocketSession { await this.finalize(); resolve(); break; + + case "command_execution_message": + if (this.invocationReceived) { + throw new Error("Received multiple invocation messages!"); + } + this.invocationReceived = true; + await this.executeCommandsOnly(message); + const hasCommandArtifacts = this.artifactManager !== null; + if (process.env.NODE_ENV !== "test" || hasCommandArtifacts) { + this.logger.log("Waiting for post-invocation delay..."); + await sleep(this.config.postInvocationDelayMs ?? 3_000); + } + await this.finalize(); + resolve(); + break; } } @@ -150,6 +166,39 @@ export class WebSocketSession { } } + private async executeCommandsOnly(message: CommandExecutionMessage): Promise { + const sourceId = message.source_id ?? crypto.randomUUID(); + this.secrets = message.secrets_to_redact ?? []; + this.logger = createRedactingLogger(createLogger(sourceId), this.secrets); + this.config.uploadTracker.setLogger(this.logger); + this.logger.log("Received command execution request"); + + this.initArtifactManager(message.artifacts_dir); + + const runner = new InvocationRunner({ + handler: this.config.handler, + logger: this.logger, + abortController: this.abortController, + send: (msg) => this.send(msg), + }); + + try { + await runner.runCommandsOnly(message); + } catch (error) { + if (this.abortController.signal.aborted) { + this.ws.close(); + this.logger.log("Command execution aborted."); + return; + } + this.logger.error("Error in command execution:", error); + this.send({ + message_type: "error_message", + error: getErrorMessage(error), + metadata: serializeErrorMetadata(error), + }); + } + } + private initArtifactManager(artifactsDir?: string): void { if (!artifactsDir) return; this.artifactManager = new ArtifactManager({ diff --git a/packages/runtimeuse/src/types.ts b/packages/runtimeuse/src/types.ts index 7cc4e7a..433edac 100644 --- a/packages/runtimeuse/src/types.ts +++ b/packages/runtimeuse/src/types.ts @@ -22,6 +22,25 @@ interface InvocationMessage { pre_agent_downloadables?: RuntimeEnvironmentDownloadable[]; } +interface CommandExecutionMessage { + message_type: "command_execution_message"; + source_id?: string; + secrets_to_redact: string[]; + commands: Command[]; + artifacts_dir?: string; + pre_execution_downloadables?: RuntimeEnvironmentDownloadable[]; +} + +interface CommandExecutionResultItem { + command: string; + exit_code: number; +} + +interface CommandExecutionResultMessage { + message_type: "command_execution_result_message"; + results: CommandExecutionResultItem[]; +} + interface CancelMessage { message_type: "cancel_message"; } @@ -64,10 +83,12 @@ type OutgoingMessage = | ResultMessage | AssistantMessage | ArtifactUploadRequestMessage - | ErrorMessage; + | ErrorMessage + | CommandExecutionResultMessage; type IncomingMessage = | InvocationMessage + | CommandExecutionMessage | ArtifactUploadResponseMessage | CancelMessage; @@ -75,6 +96,9 @@ export type { IncomingMessage, OutgoingMessage, InvocationMessage, + CommandExecutionMessage, + CommandExecutionResultMessage, + CommandExecutionResultItem, CancelMessage, ResultMessage, AssistantMessage, From 33157ea6778a6d03f081d1cf7be1e02773ab49f2 Mon Sep 17 00:00:00 2001 From: Vijit Dhingra Date: Sat, 11 Apr 2026 17:40:07 -0700 Subject: [PATCH 2/2] Document execute_commands() in client README, docs site, and runtime README Made-with: Cursor --- docs/content/docs/python-client.mdx | 30 ++++++++++++++++++ packages/runtimeuse-client-python/README.md | 35 ++++++++++++++++++++- packages/runtimeuse/README.md | 20 +++++++----- 3 files changed, 77 insertions(+), 8 deletions(-) diff --git a/docs/content/docs/python-client.mdx b/docs/content/docs/python-client.mdx index 586c95d..d30c30b 100644 --- a/docs/content/docs/python-client.mdx +++ b/docs/content/docs/python-client.mdx @@ -147,6 +147,36 @@ result = await client.query( ) ``` +## Run Commands Without the Agent + +Use `execute_commands()` when you only need to run shell commands in the sandbox -- no agent invocation, no prompt. The method returns per-command exit codes and raises `AgentRuntimeError` if any command fails. + +```python +from runtimeuse_client import ( + CommandInterface, + ExecuteCommandsOptions, + RuntimeUseClient, +) + +client = RuntimeUseClient(ws_url="ws://localhost:8080") + +result = await client.execute_commands( + commands=[ + CommandInterface(command="mkdir -p /app/output"), + CommandInterface(command="echo 'sandbox is ready' > /app/output/status.txt"), + CommandInterface(command="cat /app/output/status.txt"), + ], + options=ExecuteCommandsOptions( + on_assistant_message=on_assistant_message, # streams stdout/stderr + ), +) + +for item in result.results: + print(f"{item.command} -> exit {item.exit_code}") +``` + +`execute_commands()` supports the same callbacks and options as `query()`: streaming via `on_assistant_message`, artifact uploads, cancellation, timeout, and `secrets_to_redact`. Use `pre_execution_downloadables` to fetch files into the sandbox before the commands run. + ## Cancel a Run Call `client.abort()` from another coroutine to cancel an in-flight query. The client sends a cancel message to the runtime and `query()` raises `CancelledException`. diff --git a/packages/runtimeuse-client-python/README.md b/packages/runtimeuse-client-python/README.md index bbf98c5..7eaeb2e 100644 --- a/packages/runtimeuse-client-python/README.md +++ b/packages/runtimeuse-client-python/README.md @@ -130,6 +130,36 @@ elif isinstance(result.data, StructuredOutputResult): print(result.metadata) # execution metadata ``` +### Command-Only Execution + +Use `execute_commands()` when you need to run shell commands in the sandbox without invoking the agent. This is useful for setup steps, health checks, or any workflow where you only need command exit codes. + +```python +from runtimeuse_client import ( + CommandInterface, + ExecuteCommandsOptions, + RuntimeUseClient, +) + +client = RuntimeUseClient(ws_url="ws://localhost:8080") + +result = await client.execute_commands( + commands=[ + CommandInterface(command="mkdir -p /app/output"), + CommandInterface(command="echo 'sandbox is ready' > /app/output/status.txt"), + CommandInterface(command="cat /app/output/status.txt"), + ], + options=ExecuteCommandsOptions( + on_assistant_message=on_assistant, # optional -- streams stdout/stderr + ), +) + +for item in result.results: + print(f"{item.command} -> exit code {item.exit_code}") +``` + +`execute_commands()` supports the same streaming, cancellation, timeout, secret redaction, artifact upload, and error semantics as `query()`. If any command exits non-zero, `AgentRuntimeError` is raised. + ### Artifact Upload Handshake When the agent runtime requests an artifact upload, provide a callback that returns a presigned URL and content type. The client sends the response back automatically. @@ -184,7 +214,10 @@ except CancelledException: | `ArtifactUploadRequestMessageInterface` | Runtime requesting a presigned URL for artifact upload | | `ArtifactUploadResponseMessageInterface` | Response with presigned URL sent back to runtime | | `ErrorMessageInterface` | Error from the agent runtime | -| `CommandInterface` | Pre/post invocation shell command | +| `ExecuteCommandsOptions` | Configuration for `client.execute_commands()` (callbacks, timeout) | +| `CommandExecutionResult` | Return type of `execute_commands()` (`.results`) | +| `CommandResultItem` | Per-command result (`.command`, `.exit_code`) | +| `CommandInterface` | Shell command to execute (`.command`, `.cwd`) | | `RuntimeEnvironmentDownloadableInterface` | File to download into the runtime before invocation | ### Exceptions diff --git a/packages/runtimeuse/README.md b/packages/runtimeuse/README.md index eb49156..79adc5e 100644 --- a/packages/runtimeuse/README.md +++ b/packages/runtimeuse/README.md @@ -175,6 +175,10 @@ When a client sends an `invocation_message`, the session: 4. **Sends `result_message`** -- the `AgentResult` from your handler is sent back to the client 5. **Finalizes** -- stops artifact watching, waits for pending uploads, closes the WebSocket +### Command-Only Execution + +The session also accepts a `command_execution_message` instead of an `invocation_message`. This runs `pre_execution_downloadables` and the provided commands, streams output as `assistant_message`s, and returns a `command_execution_result_message` with per-command exit codes -- without invoking the agent handler. See the [Python client docs](../runtimeuse-client-python/README.md#command-only-execution) for usage. + ## Artifact Management Files written to the artifacts directory are automatically detected via `chokidar` file watching and uploaded through a presigned URL handshake with the client. The artifacts directory is specified per-invocation via the `artifacts_dir` field in the `InvocationMessage`. @@ -222,13 +226,15 @@ Command output (stdout/stderr) from pre-commands is automatically redacted using | Type | Direction | Description | | ------------------------------- | ----------------- | --------------------------------------- | -| `InvocationMessage` | Client -> Runtime | Start an agent invocation | -| `CancelMessage` | Client -> Runtime | Cancel a running invocation | -| `ArtifactUploadResponseMessage` | Client -> Runtime | Presigned URL for artifact upload | -| `ResultMessage` | Runtime -> Client | Structured agent result | -| `AssistantMessage` | Runtime -> Client | Intermediate text from the agent | -| `ArtifactUploadRequestMessage` | Runtime -> Client | Request a presigned URL for an artifact | -| `ErrorMessage` | Runtime -> Client | Error during execution | +| `InvocationMessage` | Client -> Runtime | Start an agent invocation | +| `CommandExecutionMessage` | Client -> Runtime | Run commands without agent invocation | +| `CancelMessage` | Client -> Runtime | Cancel a running invocation or execution | +| `ArtifactUploadResponseMessage` | Client -> Runtime | Presigned URL for artifact upload | +| `ResultMessage` | Runtime -> Client | Structured agent result | +| `CommandExecutionResultMessage` | Runtime -> Client | Per-command exit codes | +| `AssistantMessage` | Runtime -> Client | Intermediate text from the agent | +| `ArtifactUploadRequestMessage` | Runtime -> Client | Request a presigned URL for an artifact | +| `ErrorMessage` | Runtime -> Client | Error during execution | ## Related Docs