From 7f28621034163bf55c30edd25f2ac44e1688e79a Mon Sep 17 00:00:00 2001 From: Vijit Dhingra Date: Sun, 15 Mar 2026 16:19:57 -0700 Subject: [PATCH 1/3] cli test --- packages/runtimeuse/package.json | 2 + .../runtimeuse/test/integration/cli.test.ts | 190 ++++++++++++++++++ .../test/integration/fixtures/echo-handler.js | 51 +++++ packages/runtimeuse/vitest.config.ts | 2 +- .../runtimeuse/vitest.integration.config.ts | 8 + 5 files changed, 252 insertions(+), 1 deletion(-) create mode 100644 packages/runtimeuse/test/integration/cli.test.ts create mode 100644 packages/runtimeuse/test/integration/fixtures/echo-handler.js create mode 100644 packages/runtimeuse/vitest.integration.config.ts diff --git a/packages/runtimeuse/package.json b/packages/runtimeuse/package.json index e860fd2..9a39cf6 100644 --- a/packages/runtimeuse/package.json +++ b/packages/runtimeuse/package.json @@ -35,6 +35,8 @@ "build": "tsc", "prepublishOnly": "npm run build", "test": "vitest run", + "pretest:integration": "npm run build", + "test:integration": "vitest run --config vitest.integration.config.ts", "typecheck": "tsc --noEmit", "dev-publish": "bash scripts/dev-publish.sh" }, diff --git a/packages/runtimeuse/test/integration/cli.test.ts b/packages/runtimeuse/test/integration/cli.test.ts new file mode 100644 index 0000000..733deeb --- /dev/null +++ b/packages/runtimeuse/test/integration/cli.test.ts @@ -0,0 +1,190 @@ +import { describe, it, expect, afterEach } from "vitest"; +import { spawn, type ChildProcess } from "child_process"; +import net from "net"; +import path from "path"; +import { fileURLToPath } from "url"; +import { WebSocket } from "ws"; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const CLI_JS = path.resolve(__dirname, "../../dist/cli.js"); +const ECHO_HANDLER = path.resolve(__dirname, "fixtures/echo-handler.js"); + +const STARTUP_TIMEOUT_MS = 8_000; +const POLL_INTERVAL_MS = 100; + +function portIsOpen(port: number): Promise { + return new Promise((resolve) => { + const sock = net.createConnection({ port, host: "127.0.0.1" }); + sock.setTimeout(100); + sock.on("connect", () => { + sock.destroy(); + resolve(true); + }); + sock.on("error", () => resolve(false)); + sock.on("timeout", () => { + sock.destroy(); + resolve(false); + }); + }); +} + +async function waitForPort( + port: number, + timeoutMs = STARTUP_TIMEOUT_MS, +): Promise { + const deadline = Date.now() + timeoutMs; + while (Date.now() < deadline) { + if (await portIsOpen(port)) return; + await new Promise((r) => setTimeout(r, POLL_INTERVAL_MS)); + } + throw new Error(`Port ${port} did not open within ${timeoutMs}ms`); +} + +function spawnCli( + args: string[], + env?: Record, +): ChildProcess { + return spawn("node", [CLI_JS, ...args], { + env: { ...process.env, NODE_ENV: "test", ...env }, + stdio: ["ignore", "pipe", "pipe"], + }); +} + +function collectOutput(proc: ChildProcess): { + stdout: () => string; + stderr: () => string; +} { + let out = ""; + let err = ""; + proc.stdout?.on("data", (d: Buffer) => { + out += d.toString(); + }); + proc.stderr?.on("data", (d: Buffer) => { + err += d.toString(); + }); + return { stdout: () => out, stderr: () => err }; +} + +function waitForExit(proc: ChildProcess): Promise { + return new Promise((resolve) => { + proc.on("exit", (code) => resolve(code)); + }); +} + +function connectWs(port: number): Promise { + return new Promise((resolve, reject) => { + const ws = new WebSocket(`ws://127.0.0.1:${port}`); + ws.on("open", () => resolve(ws)); + ws.on("error", reject); + }); +} + +function sendJson(ws: WebSocket, data: unknown): void { + ws.send(JSON.stringify(data)); +} + +function collectWsMessages(ws: WebSocket): Promise[]> { + const messages: Record[] = []; + return new Promise((resolve) => { + ws.on("message", (raw: Buffer) => { + messages.push(JSON.parse(raw.toString())); + }); + ws.on("close", () => resolve(messages)); + }); +} + +describe("CLI", () => { + const procs: ChildProcess[] = []; + + function tracked(proc: ChildProcess): ChildProcess { + procs.push(proc); + return proc; + } + + afterEach(() => { + for (const proc of procs) { + if (proc.exitCode === null && proc.signalCode === null) { + proc.kill("SIGTERM"); + } + } + procs.length = 0; + }); + + it("--help prints usage and exits 0", async () => { + const proc = tracked(spawnCli(["--help"])); + const { stdout } = collectOutput(proc); + const code = await waitForExit(proc); + + expect(code).toBe(0); + expect(stdout()).toContain("Usage: runtimeuse"); + expect(stdout()).toContain("--port"); + expect(stdout()).toContain("--handler"); + expect(stdout()).toContain("--agent"); + }); + + it("--port binds to the specified port", async () => { + const port = 9871; + const proc = tracked( + spawnCli(["--handler", ECHO_HANDLER, "--port", String(port)]), + ); + collectOutput(proc); + + await waitForPort(port); + + const ws = await connectWs(port); + ws.close(); + }); + + it("--handler loads a custom handler and responds to invocations", async () => { + const port = 9872; + const proc = tracked( + spawnCli(["--handler", ECHO_HANDLER, "--port", String(port)]), + ); + collectOutput(proc); + + await waitForPort(port); + + const ws = await connectWs(port); + const messagesPromise = collectWsMessages(ws); + + sendJson(ws, { + message_type: "invocation_message", + system_prompt: "You are a test assistant.", + user_prompt: "ECHO:hello from cli test", + secrets_to_redact: [], + model: "echo", + }); + + const messages = await messagesPromise; + const result = messages.find( + (m) => m.message_type === "result_message", + ); + + expect(result).toBeDefined(); + expect(result!.data).toEqual({ type: "text", text: "hello from cli test" }); + }); + + it("unknown --agent exits with error", async () => { + const proc = tracked(spawnCli(["--agent", "bogus"])); + const { stderr } = collectOutput(proc); + const code = await waitForExit(proc); + + expect(code).not.toBe(0); + expect(stderr()).toContain('unknown agent "bogus"'); + }); + + it("defaults to openai agent when no --agent is specified", async () => { + if (!process.env.OPENAI_API_KEY) { + return; // skip — can't start the openai handler without a key + } + + const port = 9873; + const proc = tracked(spawnCli(["--port", String(port)])); + const { stdout, stderr } = collectOutput(proc); + + await waitForPort(port); + + expect(stderr()).not.toContain("Error"); + expect(stdout()).toContain(`listening on port ${port}`); + }); +}); diff --git a/packages/runtimeuse/test/integration/fixtures/echo-handler.js b/packages/runtimeuse/test/integration/fixtures/echo-handler.js new file mode 100644 index 0000000..6c34afe --- /dev/null +++ b/packages/runtimeuse/test/integration/fixtures/echo-handler.js @@ -0,0 +1,51 @@ +/** + * Deterministic echo handler for integration tests. + * + * Interprets special prefixes in the user prompt to control behavior: + * ECHO: — return text result + * STRUCTURED: — return structured_output result + * SLOW: — sleep then return text (timeout / cancel tests) + * STREAM: — send n assistant messages before returning + * ERROR: — send error via sender and throw + * (anything else) — echo the prompt back as text + */ + +export const handler = { + async run(invocation, sender) { + const prompt = invocation.userPrompt; + + if (prompt.startsWith("ECHO:")) { + return { type: "text", text: prompt.slice("ECHO:".length) }; + } + + if (prompt.startsWith("STRUCTURED:")) { + const json = prompt.slice("STRUCTURED:".length); + return { + type: "structured_output", + structuredOutput: JSON.parse(json), + }; + } + + if (prompt.startsWith("SLOW:")) { + const ms = parseInt(prompt.slice("SLOW:".length), 10); + await new Promise((r) => setTimeout(r, ms)); + return { type: "text", text: "done" }; + } + + if (prompt.startsWith("STREAM:")) { + const count = parseInt(prompt.slice("STREAM:".length), 10); + for (let i = 0; i < count; i++) { + sender.sendAssistantMessage([`message ${i + 1} of ${count}`]); + } + return { type: "text", text: `streamed ${count} messages` }; + } + + if (prompt.startsWith("ERROR:")) { + const msg = prompt.slice("ERROR:".length); + sender.sendErrorMessage(msg, { source: "echo_handler" }); + throw new Error(msg); + } + + return { type: "text", text: prompt }; + }, +}; diff --git a/packages/runtimeuse/vitest.config.ts b/packages/runtimeuse/vitest.config.ts index f612c07..a666cbc 100644 --- a/packages/runtimeuse/vitest.config.ts +++ b/packages/runtimeuse/vitest.config.ts @@ -2,6 +2,6 @@ import { defineConfig } from "vitest/config"; export default defineConfig({ test: { - exclude: ["dist/**", "node_modules/**"], + exclude: ["dist/**", "node_modules/**", "test/**"], }, }); diff --git a/packages/runtimeuse/vitest.integration.config.ts b/packages/runtimeuse/vitest.integration.config.ts new file mode 100644 index 0000000..aaf97f8 --- /dev/null +++ b/packages/runtimeuse/vitest.integration.config.ts @@ -0,0 +1,8 @@ +import { defineConfig } from "vitest/config"; + +export default defineConfig({ + test: { + include: ["test/**/*.test.ts"], + testTimeout: 15_000, + }, +}); From da377beb0221ea78c86920c20119f08ea7691a59 Mon Sep 17 00:00:00 2001 From: Vijit Dhingra Date: Sun, 15 Mar 2026 18:52:55 -0700 Subject: [PATCH 2/3] add sandbox and LLM integration tests for Python client - E2B sandbox smoke test and shared factory (with reuse support) - OpenAI and Claude LLM tests: text, structured output, error propagation - Exclude sandbox/llm markers from CI; load .env in test conftest Made-with: Cursor --- .../test-runtimeuse-client-python.yml | 2 +- .../runtimeuse-client-python/pyproject.toml | 4 + .../runtimeuse-client-python/test/conftest.py | 3 + .../test/llm/__init__.py | 0 .../test/llm/conftest.py | 29 ++++++ .../test/llm/test_claude.py | 80 ++++++++++++++++ .../test/llm/test_openai.py | 80 ++++++++++++++++ .../test/sandbox/__init__.py | 0 .../test/sandbox/conftest.py | 0 .../test/sandbox/test_e2b.py | 33 +++++++ .../test/sandbox_factories/__init__.py | 3 + .../test/sandbox_factories/e2b.py | 92 +++++++++++++++++++ 12 files changed, 325 insertions(+), 1 deletion(-) create mode 100644 packages/runtimeuse-client-python/test/llm/__init__.py create mode 100644 packages/runtimeuse-client-python/test/llm/conftest.py create mode 100644 packages/runtimeuse-client-python/test/llm/test_claude.py create mode 100644 packages/runtimeuse-client-python/test/llm/test_openai.py create mode 100644 packages/runtimeuse-client-python/test/sandbox/__init__.py create mode 100644 packages/runtimeuse-client-python/test/sandbox/conftest.py create mode 100644 packages/runtimeuse-client-python/test/sandbox/test_e2b.py create mode 100644 packages/runtimeuse-client-python/test/sandbox_factories/__init__.py create mode 100644 packages/runtimeuse-client-python/test/sandbox_factories/e2b.py diff --git a/.github/workflows/test-runtimeuse-client-python.yml b/.github/workflows/test-runtimeuse-client-python.yml index 41629f5..f84d68d 100644 --- a/.github/workflows/test-runtimeuse-client-python.yml +++ b/.github/workflows/test-runtimeuse-client-python.yml @@ -32,5 +32,5 @@ jobs: - run: pip install -e ".[dev]" 2>/dev/null || pip install -e . working-directory: packages/runtimeuse-client-python - run: pip install pytest pytest-asyncio - - run: pytest test/ + - run: pytest test/ -m "not sandbox and not llm" working-directory: packages/runtimeuse-client-python diff --git a/packages/runtimeuse-client-python/pyproject.toml b/packages/runtimeuse-client-python/pyproject.toml index 65dbd7b..02897a7 100644 --- a/packages/runtimeuse-client-python/pyproject.toml +++ b/packages/runtimeuse-client-python/pyproject.toml @@ -32,6 +32,10 @@ packages = ["src/runtimeuse_client"] [tool.pytest.ini_options] asyncio_mode = "auto" +log_cli = true +log_cli_level = "INFO" markers = [ "e2e: end-to-end tests requiring a running runtimeuse server", + "sandbox: sandbox provider integration tests (requires E2B_API_KEY)", + "llm: real LLM integration tests (requires E2B_API_KEY + LLM API keys)", ] diff --git a/packages/runtimeuse-client-python/test/conftest.py b/packages/runtimeuse-client-python/test/conftest.py index b36a096..18ca89b 100644 --- a/packages/runtimeuse-client-python/test/conftest.py +++ b/packages/runtimeuse-client-python/test/conftest.py @@ -1,10 +1,13 @@ import asyncio from typing import Any, AsyncGenerator +import dotenv import pytest from src.runtimeuse_client import RuntimeUseClient, QueryOptions +dotenv.load_dotenv() + class FakeTransport: """In-memory transport for testing. diff --git a/packages/runtimeuse-client-python/test/llm/__init__.py b/packages/runtimeuse-client-python/test/llm/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/packages/runtimeuse-client-python/test/llm/conftest.py b/packages/runtimeuse-client-python/test/llm/conftest.py new file mode 100644 index 0000000..623601a --- /dev/null +++ b/packages/runtimeuse-client-python/test/llm/conftest.py @@ -0,0 +1,29 @@ +import pytest + +from test.sandbox_factories.e2b import create_e2b_runtimeuse + + +@pytest.fixture(scope="session") +def openai_ws_url(): + """Create an E2B sandbox running runtimeuse with the OpenAI agent.""" + try: + sandbox, ws_url = create_e2b_runtimeuse(agent="openai") + except RuntimeError as exc: + pytest.fail(str(exc)) + + yield ws_url + + sandbox.kill() + + +@pytest.fixture(scope="session") +def claude_ws_url(): + """Create an E2B sandbox running runtimeuse with the Claude agent.""" + try: + sandbox, ws_url = create_e2b_runtimeuse(agent="claude") + except RuntimeError as exc: + pytest.fail(str(exc)) + + yield ws_url + + sandbox.kill() diff --git a/packages/runtimeuse-client-python/test/llm/test_claude.py b/packages/runtimeuse-client-python/test/llm/test_claude.py new file mode 100644 index 0000000..f338a66 --- /dev/null +++ b/packages/runtimeuse-client-python/test/llm/test_claude.py @@ -0,0 +1,80 @@ +"""LLM integration tests using the Claude agent.""" + +import json + +import pytest + +from src.runtimeuse_client import ( + AgentRuntimeError, + RuntimeUseClient, + QueryOptions, + QueryResult, + TextResult, + StructuredOutputResult, +) + +pytestmark = [pytest.mark.llm, pytest.mark.asyncio] + +MODEL = "claude-sonnet-4-20250514" + +STRUCTURED_SCHEMA = json.dumps( + { + "type": "json_schema", + "schema": { + "type": "object", + "properties": { + "greeting": {"type": "string"}, + }, + "required": ["greeting"], + "additionalProperties": False, + }, + } +) + + +class TestClaudeText: + async def test_text_response(self, claude_ws_url: str): + client = RuntimeUseClient(ws_url=claude_ws_url) + result = await client.query( + prompt="Say hello world", + options=QueryOptions( + system_prompt="Reply concisely in plain text.", + model=MODEL, + ), + ) + + assert isinstance(result, QueryResult) + assert isinstance(result.data, TextResult) + assert len(result.data.text) > 0 + + +class TestClaudeStructuredOutput: + async def test_structured_response(self, claude_ws_url: str): + client = RuntimeUseClient(ws_url=claude_ws_url) + result = await client.query( + prompt="Greet the user", + options=QueryOptions( + system_prompt="Reply with a greeting.", + model=MODEL, + output_format_json_schema_str=STRUCTURED_SCHEMA, + ), + ) + + assert isinstance(result, QueryResult) + assert isinstance(result.data, StructuredOutputResult) + assert "greeting" in result.data.structured_output + assert isinstance(result.data.structured_output["greeting"], str) + assert len(result.data.structured_output["greeting"]) > 0 + + +class TestClaudeError: + async def test_invalid_model_raises_error(self, claude_ws_url: str): + client = RuntimeUseClient(ws_url=claude_ws_url) + with pytest.raises(AgentRuntimeError): + await client.query( + prompt="Say hello", + options=QueryOptions( + system_prompt="Reply concisely.", + model="nonexistent-model-xyz", + ), + ) diff --git a/packages/runtimeuse-client-python/test/llm/test_openai.py b/packages/runtimeuse-client-python/test/llm/test_openai.py new file mode 100644 index 0000000..b5870eb --- /dev/null +++ b/packages/runtimeuse-client-python/test/llm/test_openai.py @@ -0,0 +1,80 @@ +"""LLM integration tests using the OpenAI agent.""" + +import json + +import pytest + +from src.runtimeuse_client import ( + AgentRuntimeError, + RuntimeUseClient, + QueryOptions, + QueryResult, + TextResult, + StructuredOutputResult, +) + +pytestmark = [pytest.mark.llm, pytest.mark.asyncio] + +MODEL = "gpt-4.1-mini" + +STRUCTURED_SCHEMA = json.dumps( + { + "type": "json_schema", + "schema": { + "type": "object", + "properties": { + "greeting": {"type": "string"}, + }, + "required": ["greeting"], + "additionalProperties": False, + }, + } +) + + +class TestOpenAIText: + async def test_text_response(self, openai_ws_url: str): + client = RuntimeUseClient(ws_url=openai_ws_url) + result = await client.query( + prompt="Say hello world", + options=QueryOptions( + system_prompt="Reply concisely in plain text.", + model=MODEL, + ), + ) + + assert isinstance(result, QueryResult) + assert isinstance(result.data, TextResult) + assert len(result.data.text) > 0 + + +class TestOpenAIStructuredOutput: + async def test_structured_response(self, openai_ws_url: str): + client = RuntimeUseClient(ws_url=openai_ws_url) + result = await client.query( + prompt="Greet the user", + options=QueryOptions( + system_prompt="Reply with a greeting.", + model=MODEL, + output_format_json_schema_str=STRUCTURED_SCHEMA, + ), + ) + + assert isinstance(result, QueryResult) + assert isinstance(result.data, StructuredOutputResult) + assert "greeting" in result.data.structured_output + assert isinstance(result.data.structured_output["greeting"], str) + assert len(result.data.structured_output["greeting"]) > 0 + + +class TestOpenAIError: + async def test_invalid_model_raises_error(self, openai_ws_url: str): + client = RuntimeUseClient(ws_url=openai_ws_url) + with pytest.raises(AgentRuntimeError): + await client.query( + prompt="Say hello", + options=QueryOptions( + system_prompt="Reply concisely.", + model="nonexistent-model-xyz", + ), + ) diff --git a/packages/runtimeuse-client-python/test/sandbox/__init__.py b/packages/runtimeuse-client-python/test/sandbox/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/packages/runtimeuse-client-python/test/sandbox/conftest.py b/packages/runtimeuse-client-python/test/sandbox/conftest.py new file mode 100644 index 0000000..e69de29 diff --git a/packages/runtimeuse-client-python/test/sandbox/test_e2b.py b/packages/runtimeuse-client-python/test/sandbox/test_e2b.py new file mode 100644 index 0000000..f447428 --- /dev/null +++ b/packages/runtimeuse-client-python/test/sandbox/test_e2b.py @@ -0,0 +1,33 @@ +"""Smoke test: verify that an E2B sandbox can run runtimeuse and answer a query.""" + +import pytest + +from src.runtimeuse_client import ( + RuntimeUseClient, + QueryOptions, + QueryResult, + TextResult, +) +from test.sandbox_factories.e2b import create_e2b_runtimeuse + +pytestmark = [pytest.mark.sandbox, pytest.mark.asyncio] + + +class TestE2BSandbox: + async def test_hello_world(self): + sandbox, ws_url = create_e2b_runtimeuse(agent="openai") + try: + client = RuntimeUseClient(ws_url=ws_url) + result = await client.query( + prompt="Say hello world", + options=QueryOptions( + system_prompt="Reply concisely.", + model="gpt-4.1-mini", + ), + ) + + assert isinstance(result, QueryResult) + assert isinstance(result.data, TextResult) + assert len(result.data.text) > 0 + finally: + sandbox.kill() diff --git a/packages/runtimeuse-client-python/test/sandbox_factories/__init__.py b/packages/runtimeuse-client-python/test/sandbox_factories/__init__.py new file mode 100644 index 0000000..39d9142 --- /dev/null +++ b/packages/runtimeuse-client-python/test/sandbox_factories/__init__.py @@ -0,0 +1,3 @@ +from .e2b import create_e2b_runtimeuse + +__all__ = ["create_e2b_runtimeuse"] diff --git a/packages/runtimeuse-client-python/test/sandbox_factories/e2b.py b/packages/runtimeuse-client-python/test/sandbox_factories/e2b.py new file mode 100644 index 0000000..576d39d --- /dev/null +++ b/packages/runtimeuse-client-python/test/sandbox_factories/e2b.py @@ -0,0 +1,92 @@ +"""Factory for creating E2B sandboxes running a runtimeuse server.""" + +from __future__ import annotations + +import logging +import os + +from e2b import Template, wait_for_port, default_build_logger +from e2b_code_interpreter import Sandbox + +_logger = logging.getLogger(__name__) + +_DEFAULT_RUN_COMMAND = "npx -y runtimeuse@latest" + + +def _get_env_or_fail(name: str) -> str: + value = os.environ.get(name) + if not value: + raise RuntimeError(f"{name} environment variable is not set") + return value + + +def _should_try_reuse() -> bool: + """Return True when E2B_REUSE_TEMPLATE is set to a truthy value.""" + return os.environ.get("E2B_REUSE_TEMPLATE", "").lower() in ("1", "true", "yes") + + +def create_e2b_runtimeuse( + agent: str = "openai", + run_command: str | None = None, +) -> tuple[Sandbox, str]: + """Build an E2B template, create a sandbox, and return ``(sandbox, ws_url)``. + + When ``E2B_REUSE_TEMPLATE=1`` is set, the factory first tries to create a + sandbox from the existing template. If the template does not exist yet it + falls back to building it. When the env var is unset or falsy the template + is always rebuilt so it reflects the current ``RUNTIMEUSE_RUN_COMMAND`` and + env vars. + + The caller owns the returned sandbox and must call ``sandbox.kill()`` + when done. + """ + e2b_api_key = _get_env_or_fail("E2B_API_KEY") + cmd = run_command or os.environ.get("RUNTIMEUSE_RUN_COMMAND", _DEFAULT_RUN_COMMAND) + + envs: dict[str, str] = {} + if agent == "openai": + envs["OPENAI_API_KEY"] = _get_env_or_fail("OPENAI_API_KEY") + elif agent == "claude": + envs["ANTHROPIC_API_KEY"] = _get_env_or_fail("ANTHROPIC_API_KEY") + + alias = f"runtimeuse-test-{agent}" + start_cmd = f"{cmd} --agent {agent}" + + need_build = True + + if _should_try_reuse(): + _logger.info("Trying to reuse existing E2B template %r", alias) + try: + sandbox = Sandbox.create(template=alias, api_key=e2b_api_key) + need_build = False + except Exception: + _logger.info("Template %r not found, will build it", alias) + + if need_build: + _logger.info("Building E2B template %r with command: %s", alias, start_cmd) + + template = ( + Template() + .from_node_image("lts") + .apt_install(["unzip"]) + .npm_install(["@anthropic-ai/claude-code"], g=True) + .set_envs(envs) + .set_start_cmd(start_cmd, wait_for_port(8080)) + ) + + Template.build( + template, + alias, + cpu_count=2, + memory_mb=2048, + on_build_logs=default_build_logger(), + ) + + sandbox = Sandbox.create(template=alias, api_key=e2b_api_key) + + host = sandbox.get_host(8080) + ws_url = f"wss://{host}" + + _logger.info("Sandbox %s ready at %s", sandbox.sandbox_id, ws_url) + + return sandbox, ws_url From 4ffd9f6a3087db174e992fc66a37a9074af09432 Mon Sep 17 00:00:00 2001 From: Vijit Dhingra Date: Sun, 15 Mar 2026 22:42:49 -0700 Subject: [PATCH 3/3] add e2e tests for pre/post agent invocation commands and fix cancel hang Add 7 E2E tests verifying pre_agent_invocation_commands and post_agent_invocation_commands are executed by the server, including cwd support and failure handling. Change ws_url fixture to per-test scope so each test gets a fresh server. Fix send_queue.task_done() not being called when ws.send() raises ConnectionClosedOK, which caused send_queue.join() to hang forever during cancellation. Made-with: Cursor --- .../transports/websocket_transport.py | 6 +- .../test/e2e/conftest.py | 2 +- .../test/e2e/test_e2e.py | 144 ++++++++++++++++++ 3 files changed, 149 insertions(+), 3 deletions(-) diff --git a/packages/runtimeuse-client-python/src/runtimeuse_client/transports/websocket_transport.py b/packages/runtimeuse-client-python/src/runtimeuse_client/transports/websocket_transport.py index befe1be..242f250 100644 --- a/packages/runtimeuse-client-python/src/runtimeuse_client/transports/websocket_transport.py +++ b/packages/runtimeuse-client-python/src/runtimeuse_client/transports/websocket_transport.py @@ -43,5 +43,7 @@ async def _queue_sender( ) -> None: while True: message = await send_queue.get() - await ws.send(json.dumps(message)) - send_queue.task_done() + try: + await ws.send(json.dumps(message)) + finally: + send_queue.task_done() diff --git a/packages/runtimeuse-client-python/test/e2e/conftest.py b/packages/runtimeuse-client-python/test/e2e/conftest.py index 828d361..032ab52 100644 --- a/packages/runtimeuse-client-python/test/e2e/conftest.py +++ b/packages/runtimeuse-client-python/test/e2e/conftest.py @@ -26,7 +26,7 @@ def _port_is_open(port: int) -> bool: return s.connect_ex(("127.0.0.1", port)) == 0 -@pytest.fixture(scope="session") +@pytest.fixture def ws_url(): """Start a local runtimeuse server with the echo handler and yield its URL.""" if not CLI_JS.exists(): diff --git a/packages/runtimeuse-client-python/test/e2e/test_e2e.py b/packages/runtimeuse-client-python/test/e2e/test_e2e.py index 1d103a7..bd4f6a4 100644 --- a/packages/runtimeuse-client-python/test/e2e/test_e2e.py +++ b/packages/runtimeuse-client-python/test/e2e/test_e2e.py @@ -14,6 +14,7 @@ AssistantMessageInterface, AgentRuntimeError, CancelledException, + CommandInterface, ) pytestmark = [pytest.mark.e2e, pytest.mark.asyncio] @@ -107,6 +108,149 @@ async def abort_on_first(msg: AssistantMessageInterface): ) +class TestPrePostCommands: + async def test_pre_command_output_streamed( + self, client: RuntimeUseClient, make_query_options + ): + received: list[AssistantMessageInterface] = [] + + async def on_msg(msg: AssistantMessageInterface): + received.append(msg) + + result = await client.query( + prompt="ECHO:hello", + options=make_query_options( + pre_agent_invocation_commands=[ + CommandInterface(command="echo pre-sentinel") + ], + on_assistant_message=on_msg, + ), + ) + + assert isinstance(result.data, TextResult) + assert result.data.text == "hello" + all_text = [block for msg in received for block in msg.text_blocks] + assert any("pre-sentinel" in t for t in all_text) + + async def test_post_command_output_streamed( + self, client: RuntimeUseClient, make_query_options + ): + received: list[AssistantMessageInterface] = [] + + async def on_msg(msg: AssistantMessageInterface): + received.append(msg) + + result = await client.query( + prompt="ECHO:hello", + options=make_query_options( + post_agent_invocation_commands=[ + CommandInterface(command="echo post-sentinel") + ], + on_assistant_message=on_msg, + ), + ) + + assert isinstance(result.data, TextResult) + assert result.data.text == "hello" + all_text = [block for msg in received for block in msg.text_blocks] + assert any("post-sentinel" in t for t in all_text) + + async def test_pre_and_post_commands_both_run( + self, client: RuntimeUseClient, make_query_options + ): + received: list[AssistantMessageInterface] = [] + + async def on_msg(msg: AssistantMessageInterface): + received.append(msg) + + result = await client.query( + prompt="ECHO:hello", + options=make_query_options( + pre_agent_invocation_commands=[ + CommandInterface(command="echo pre-sentinel") + ], + post_agent_invocation_commands=[ + CommandInterface(command="echo post-sentinel") + ], + on_assistant_message=on_msg, + ), + ) + + assert isinstance(result.data, TextResult) + assert result.data.text == "hello" + all_text = [block for msg in received for block in msg.text_blocks] + assert any("pre-sentinel" in t for t in all_text) + assert any("post-sentinel" in t for t in all_text) + + async def test_pre_command_with_cwd( + self, client: RuntimeUseClient, make_query_options + ): + received: list[AssistantMessageInterface] = [] + + async def on_msg(msg: AssistantMessageInterface): + received.append(msg) + + await client.query( + prompt="ECHO:ok", + options=make_query_options( + pre_agent_invocation_commands=[ + CommandInterface(command="pwd", cwd="/tmp") + ], + on_assistant_message=on_msg, + ), + ) + + all_text = [block for msg in received for block in msg.text_blocks] + assert any("/tmp" in t for t in all_text) + + async def test_post_command_with_cwd( + self, client: RuntimeUseClient, make_query_options + ): + received: list[AssistantMessageInterface] = [] + + async def on_msg(msg: AssistantMessageInterface): + received.append(msg) + + await client.query( + prompt="ECHO:ok", + options=make_query_options( + post_agent_invocation_commands=[ + CommandInterface(command="pwd", cwd="/tmp") + ], + on_assistant_message=on_msg, + ), + ) + + all_text = [block for msg in received for block in msg.text_blocks] + assert any("/tmp" in t for t in all_text) + + async def test_failed_pre_command_raises_error( + self, client: RuntimeUseClient, make_query_options + ): + with pytest.raises(AgentRuntimeError, match="failed with exit code"): + await client.query( + prompt="ECHO:should not reach", + options=make_query_options( + pre_agent_invocation_commands=[ + CommandInterface(command="exit 1") + ], + ), + ) + + async def test_failed_post_command_raises_error( + self, client: RuntimeUseClient, make_query_options + ): + with pytest.raises(AgentRuntimeError, match="failed with exit code"): + await client.query( + prompt="ECHO:hello", + options=make_query_options( + post_agent_invocation_commands=[ + CommandInterface(command="exit 1") + ], + ), + ) + + class TestInvocationFieldsForwarded: async def test_fields_round_trip( self, client: RuntimeUseClient, make_query_options