From 7f28621034163bf55c30edd25f2ac44e1688e79a Mon Sep 17 00:00:00 2001
From: Vijit Dhingra <vijit@uselark.io>
Date: Sun, 15 Mar 2026 16:19:57 -0700
Subject: [PATCH 1/3] cli test

---
 packages/runtimeuse/package.json              |   2 +
 .../runtimeuse/test/integration/cli.test.ts   | 190 ++++++++++++++++++
 .../test/integration/fixtures/echo-handler.js |  51 +++++
 packages/runtimeuse/vitest.config.ts          |   2 +-
 .../runtimeuse/vitest.integration.config.ts   |   8 +
 5 files changed, 252 insertions(+), 1 deletion(-)
 create mode 100644 packages/runtimeuse/test/integration/cli.test.ts
 create mode 100644 packages/runtimeuse/test/integration/fixtures/echo-handler.js
 create mode 100644 packages/runtimeuse/vitest.integration.config.ts
diff --git a/packages/runtimeuse/package.json b/packages/runtimeuse/package.json
index e860fd2..9a39cf6 100644
--- a/packages/runtimeuse/package.json
+++ b/packages/runtimeuse/package.json
@@ -35,6 +35,8 @@
     "build": "tsc",
     "prepublishOnly": "npm run build",
     "test": "vitest run",
+    "pretest:integration": "npm run build",
+    "test:integration": "vitest run --config vitest.integration.config.ts",
     "typecheck": "tsc --noEmit",
     "dev-publish": "bash scripts/dev-publish.sh"
   },
diff --git a/packages/runtimeuse/test/integration/cli.test.ts b/packages/runtimeuse/test/integration/cli.test.ts
new file mode 100644
index 0000000..733deeb
--- /dev/null
+++ b/packages/runtimeuse/test/integration/cli.test.ts
@@ -0,0 +1,190 @@
+import { describe, it, expect, afterEach } from "vitest";
+import { spawn, type ChildProcess } from "child_process";
+import net from "net";
+import path from "path";
+import { fileURLToPath } from "url";
+import { WebSocket } from "ws";
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+const CLI_JS = path.resolve(__dirname, "../../dist/cli.js");
+const ECHO_HANDLER = path.resolve(__dirname, "fixtures/echo-handler.js");
+
+const STARTUP_TIMEOUT_MS = 8_000;
+const POLL_INTERVAL_MS = 100;
+
+function portIsOpen(port: number): Promise<boolean> {
+  return new Promise((resolve) => {
+    const sock = net.createConnection({ port, host: "127.0.0.1" });
+    sock.setTimeout(100);
+    sock.on("connect", () => {
+      sock.destroy();
+      resolve(true);
+    });
+    sock.on("error", () => resolve(false));
+    sock.on("timeout", () => {
+      sock.destroy();
+      resolve(false);
+    });
+  });
+}
+
+async function waitForPort(
+  port: number,
+  timeoutMs = STARTUP_TIMEOUT_MS,
+): Promise<void> {
+  const deadline = Date.now() + timeoutMs;
+  while (Date.now() < deadline) {
+    if (await portIsOpen(port)) return;
+    await new Promise((r) => setTimeout(r, POLL_INTERVAL_MS));
+  }
+  throw new Error(`Port ${port} did not open within ${timeoutMs}ms`);
+}
+
+function spawnCli(
+  args: string[],
+  env?: Record<string, string>,
+): ChildProcess {
+  return spawn("node", [CLI_JS, ...args], {
+    env: { ...process.env, NODE_ENV: "test", ...env },
+    stdio: ["ignore", "pipe", "pipe"],
+  });
+}
+
+function collectOutput(proc: ChildProcess): {
+  stdout: () => string;
+  stderr: () => string;
+} {
+  let out = "";
+  let err = "";
+  proc.stdout?.on("data", (d: Buffer) => {
+    out += d.toString();
+  });
+  proc.stderr?.on("data", (d: Buffer) => {
+    err += d.toString();
+  });
+  return { stdout: () => out, stderr: () => err };
+}
+
+function waitForExit(proc: ChildProcess): Promise<number | null> {
+  return new Promise((resolve) => {
+    proc.on("exit", (code) => resolve(code));
+  });
+}
+
+function connectWs(port: number): Promise<WebSocket> {
+  return new Promise((resolve, reject) => {
+    const ws = new WebSocket(`ws://127.0.0.1:${port}`);
+    ws.on("open", () => resolve(ws));
+    ws.on("error", reject);
+  });
+}
+
+function sendJson(ws: WebSocket, data: unknown): void {
+  ws.send(JSON.stringify(data));
+}
+
+function collectWsMessages(ws: WebSocket): Promise<Record<string, unknown>[]> {
+  const messages: Record<string, unknown>[] = [];
+  return new Promise((resolve) => {
+    ws.on("message", (raw: Buffer) => {
+      messages.push(JSON.parse(raw.toString()));
+    });
+    ws.on("close", () => resolve(messages));
+  });
+}
+
+describe("CLI", () => {
+  const procs: ChildProcess[] = [];
+
+  function tracked(proc: ChildProcess): ChildProcess {
+    procs.push(proc);
+    return proc;
+  }
+
+  afterEach(() => {
+    for (const proc of procs) {
+      if (proc.exitCode === null && proc.signalCode === null) {
+        proc.kill("SIGTERM");
+      }
+    }
+    procs.length = 0;
+  });
+
+  it("--help prints usage and exits 0", async () => {
+    const proc = tracked(spawnCli(["--help"]));
+    const { stdout } = collectOutput(proc);
+    const code = await waitForExit(proc);
+
+    expect(code).toBe(0);
+    expect(stdout()).toContain("Usage: runtimeuse");
+    expect(stdout()).toContain("--port");
+    expect(stdout()).toContain("--handler");
+    expect(stdout()).toContain("--agent");
+  });
+
+  it("--port binds to the specified port", async () => {
+    const port = 9871;
+    const proc = tracked(
+      spawnCli(["--handler", ECHO_HANDLER, "--port", String(port)]),
+    );
+    collectOutput(proc);
+
+    await waitForPort(port);
+
+    const ws = await connectWs(port);
+    ws.close();
+  });
+
+  it("--handler loads a custom handler and responds to invocations", async () => {
+    const port = 9872;
+    const proc = tracked(
+      spawnCli(["--handler", ECHO_HANDLER, "--port", String(port)]),
+    );
+    collectOutput(proc);
+
+    await waitForPort(port);
+
+    const ws = await connectWs(port);
+    const messagesPromise = collectWsMessages(ws);
+
+    sendJson(ws, {
+      message_type: "invocation_message",
+      system_prompt: "You are a test assistant.",
+      user_prompt: "ECHO:hello from cli test",
+      secrets_to_redact: [],
+      model: "echo",
+    });
+
+    const messages = await messagesPromise;
+    const result = messages.find(
+      (m) => m.message_type === "result_message",
+    );
+
+    expect(result).toBeDefined();
+    expect(result!.data).toEqual({ type: "text", text: "hello from cli test" });
+  });
+
+  it("unknown --agent exits with error", async () => {
+    const proc = tracked(spawnCli(["--agent", "bogus"]));
+    const { stderr } = collectOutput(proc);
+    const code = await waitForExit(proc);
+
+    expect(code).not.toBe(0);
+    expect(stderr()).toContain('unknown agent "bogus"');
+  });
+
+  it("defaults to openai agent when no --agent is specified", async () => {
+    if (!process.env.OPENAI_API_KEY) {
+      return; // skip — can't start the openai handler without a key
+    }
+
+    const port = 9873;
+    const proc = tracked(spawnCli(["--port", String(port)]));
+    const { stdout, stderr } = collectOutput(proc);
+
+    await waitForPort(port);
+
+    expect(stderr()).not.toContain("Error");
+    expect(stdout()).toContain(`listening on port ${port}`);
+  });
+});
diff --git a/packages/runtimeuse/test/integration/fixtures/echo-handler.js b/packages/runtimeuse/test/integration/fixtures/echo-handler.js
new file mode 100644
index 0000000..6c34afe
--- /dev/null
+++ b/packages/runtimeuse/test/integration/fixtures/echo-handler.js
@@ -0,0 +1,51 @@
+/**
+ * Deterministic echo handler for integration tests.
+ *
+ * Interprets special prefixes in the user prompt to control behavior:
+ *   ECHO:<text>           — return text result
+ *   STRUCTURED:<json>     — return structured_output result
+ *   SLOW:<ms>             — sleep then return text (timeout / cancel tests)
+ *   STREAM:<n>            — send n assistant messages before returning
+ *   ERROR:<msg>           — send error via sender and throw
+ *   (anything else)       — echo the prompt back as text
+ */
+
+export const handler = {
+  async run(invocation, sender) {
+    const prompt = invocation.userPrompt;
+
+    if (prompt.startsWith("ECHO:")) {
+      return { type: "text", text: prompt.slice("ECHO:".length) };
+    }
+
+    if (prompt.startsWith("STRUCTURED:")) {
+      const json = prompt.slice("STRUCTURED:".length);
+      return {
+        type: "structured_output",
+        structuredOutput: JSON.parse(json),
+      };
+    }
+
+    if (prompt.startsWith("SLOW:")) {
+      const ms = parseInt(prompt.slice("SLOW:".length), 10);
+      await new Promise((r) => setTimeout(r, ms));
+      return { type: "text", text: "done" };
+    }
+
+    if (prompt.startsWith("STREAM:")) {
+      const count = parseInt(prompt.slice("STREAM:".length), 10);
+      for (let i = 0; i < count; i++) {
+        sender.sendAssistantMessage([`message ${i + 1} of ${count}`]);
+      }
+      return { type: "text", text: `streamed ${count} messages` };
+    }
+
+    if (prompt.startsWith("ERROR:")) {
+      const msg = prompt.slice("ERROR:".length);
+      sender.sendErrorMessage(msg, { source: "echo_handler" });
+      throw new Error(msg);
+    }
+
+    return { type: "text", text: prompt };
+  },
+};
diff --git a/packages/runtimeuse/vitest.config.ts b/packages/runtimeuse/vitest.config.ts
index f612c07..a666cbc 100644
--- a/packages/runtimeuse/vitest.config.ts
+++ b/packages/runtimeuse/vitest.config.ts
@@ -2,6 +2,6 @@ import { defineConfig } from "vitest/config";
 
 export default defineConfig({
   test: {
-    exclude: ["dist/**", "node_modules/**"],
+    exclude: ["dist/**", "node_modules/**", "test/**"],
   },
 });
diff --git a/packages/runtimeuse/vitest.integration.config.ts b/packages/runtimeuse/vitest.integration.config.ts
new file mode 100644
index 0000000..aaf97f8
--- /dev/null
+++ b/packages/runtimeuse/vitest.integration.config.ts
@@ -0,0 +1,8 @@
+import { defineConfig } from "vitest/config";
+
+export default defineConfig({
+  test: {
+    include: ["test/**/*.test.ts"],
+    testTimeout: 15_000,
+  },
+});

From da377beb0221ea78c86920c20119f08ea7691a59 Mon Sep 17 00:00:00 2001
From: Vijit Dhingra <vijit@uselark.io>
Date: Sun, 15 Mar 2026 18:52:55 -0700
Subject: [PATCH 2/3] add sandbox and LLM integration tests for Python client

- E2B sandbox smoke test and shared factory (with reuse support)
- OpenAI and Claude LLM tests: text, structured output, error propagation
- Exclude sandbox/llm markers from CI; load .env in test conftest

Made-with: Cursor
---
 .../test-runtimeuse-client-python.yml         |  2 +-
 .../runtimeuse-client-python/pyproject.toml   |  4 +
 .../runtimeuse-client-python/test/conftest.py |  3 +
 .../test/llm/__init__.py                      |  0
 .../test/llm/conftest.py                      | 29 ++++++
 .../test/llm/test_claude.py                   | 80 ++++++++++++++++
 .../test/llm/test_openai.py                   | 80 ++++++++++++++++
 .../test/sandbox/__init__.py                  |  0
 .../test/sandbox/conftest.py                  |  0
 .../test/sandbox/test_e2b.py                  | 33 +++++++
 .../test/sandbox_factories/__init__.py        |  3 +
 .../test/sandbox_factories/e2b.py             | 92 +++++++++++++++++++
 12 files changed, 325 insertions(+), 1 deletion(-)
 create mode 100644 packages/runtimeuse-client-python/test/llm/__init__.py
 create mode 100644 packages/runtimeuse-client-python/test/llm/conftest.py
 create mode 100644 packages/runtimeuse-client-python/test/llm/test_claude.py
 create mode 100644 packages/runtimeuse-client-python/test/llm/test_openai.py
 create mode 100644 packages/runtimeuse-client-python/test/sandbox/__init__.py
 create mode 100644 packages/runtimeuse-client-python/test/sandbox/conftest.py
 create mode 100644 packages/runtimeuse-client-python/test/sandbox/test_e2b.py
 create mode 100644 packages/runtimeuse-client-python/test/sandbox_factories/__init__.py
 create mode 100644 packages/runtimeuse-client-python/test/sandbox_factories/e2b.py

diff --git a/.github/workflows/test-runtimeuse-client-python.yml b/.github/workflows/test-runtimeuse-client-python.yml
index 41629f5..f84d68d 100644
--- a/.github/workflows/test-runtimeuse-client-python.yml
+++ b/.github/workflows/test-runtimeuse-client-python.yml
@@ -32,5 +32,5 @@ jobs:
       - run: pip install -e ".[dev]" 2>/dev/null || pip install -e .
         working-directory: packages/runtimeuse-client-python
       - run: pip install pytest pytest-asyncio
-      - run: pytest test/
+      - run: pytest test/ -m "not sandbox and not llm"
         working-directory: packages/runtimeuse-client-python
diff --git a/packages/runtimeuse-client-python/pyproject.toml b/packages/runtimeuse-client-python/pyproject.toml
index 65dbd7b..02897a7 100644
--- a/packages/runtimeuse-client-python/pyproject.toml
+++ b/packages/runtimeuse-client-python/pyproject.toml
@@ -32,6 +32,10 @@ packages = ["src/runtimeuse_client"]
 
 [tool.pytest.ini_options]
 asyncio_mode = "auto"
+log_cli = true
+log_cli_level = "INFO"
 markers = [
     "e2e: end-to-end tests requiring a running runtimeuse server",
+    "sandbox: sandbox provider integration tests (requires E2B_API_KEY)",
+    "llm: real LLM integration tests (requires E2B_API_KEY + LLM API keys)",
 ]
diff --git a/packages/runtimeuse-client-python/test/conftest.py b/packages/runtimeuse-client-python/test/conftest.py
index b36a096..18ca89b 100644
--- a/packages/runtimeuse-client-python/test/conftest.py
+++ b/packages/runtimeuse-client-python/test/conftest.py
@@ -1,10 +1,13 @@
 import asyncio
 from typing import Any, AsyncGenerator
 
+import dotenv
 import pytest
 
 from src.runtimeuse_client import RuntimeUseClient, QueryOptions
 
+dotenv.load_dotenv()
+
 
 class FakeTransport:
     """In-memory transport for testing.
diff --git a/packages/runtimeuse-client-python/test/llm/__init__.py b/packages/runtimeuse-client-python/test/llm/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/packages/runtimeuse-client-python/test/llm/conftest.py b/packages/runtimeuse-client-python/test/llm/conftest.py
new file mode 100644
index 0000000..623601a
--- /dev/null
+++ b/packages/runtimeuse-client-python/test/llm/conftest.py
@@ -0,0 +1,29 @@
+import pytest
+
+from test.sandbox_factories.e2b import create_e2b_runtimeuse
+
+
+@pytest.fixture(scope="session")
+def openai_ws_url():
+    """Create an E2B sandbox running runtimeuse with the OpenAI agent."""
+    try:
+        sandbox, ws_url = create_e2b_runtimeuse(agent="openai")
+    except RuntimeError as exc:
+        pytest.fail(str(exc))
+
+    yield ws_url
+
+    sandbox.kill()
+
+
+@pytest.fixture(scope="session")
+def claude_ws_url():
+    """Create an E2B sandbox running runtimeuse with the Claude agent."""
+    try:
+        sandbox, ws_url = create_e2b_runtimeuse(agent="claude")
+    except RuntimeError as exc:
+        pytest.fail(str(exc))
+
+    yield ws_url
+
+    sandbox.kill()
diff --git a/packages/runtimeuse-client-python/test/llm/test_claude.py b/packages/runtimeuse-client-python/test/llm/test_claude.py
new file mode 100644
index 0000000..f338a66
--- /dev/null
+++ b/packages/runtimeuse-client-python/test/llm/test_claude.py
@@ -0,0 +1,80 @@
+"""LLM integration tests using the Claude agent."""
+
+import json
+
+import pytest
+
+from src.runtimeuse_client import (
+    AgentRuntimeError,
+    RuntimeUseClient,
+    QueryOptions,
+    QueryResult,
+    TextResult,
+    StructuredOutputResult,
+)
+
+pytestmark = [pytest.mark.llm, pytest.mark.asyncio]
+
+MODEL = "claude-sonnet-4-20250514"
+
+STRUCTURED_SCHEMA = json.dumps(
+    {
+        "type": "json_schema",
+        "schema": {
+            "type": "object",
+            "properties": {
+                "greeting": {"type": "string"},
+            },
+            "required": ["greeting"],
+            "additionalProperties": False,
+        },
+    }
+)
+
+
+class TestClaudeText:
+    async def test_text_response(self, claude_ws_url: str):
+        client = RuntimeUseClient(ws_url=claude_ws_url)
+        result = await client.query(
+            prompt="Say hello world",
+            options=QueryOptions(
+                system_prompt="Reply concisely in plain text.",
+                model=MODEL,
+            ),
+        )
+
+        assert isinstance(result, QueryResult)
+        assert isinstance(result.data, TextResult)
+        assert len(result.data.text) > 0
+
+
+class TestClaudeStructuredOutput:
+    async def test_structured_response(self, claude_ws_url: str):
+        client = RuntimeUseClient(ws_url=claude_ws_url)
+        result = await client.query(
+            prompt="Greet the user",
+            options=QueryOptions(
+                system_prompt="Reply with a greeting.",
+                model=MODEL,
+                output_format_json_schema_str=STRUCTURED_SCHEMA,
+            ),
+        )
+
+        assert isinstance(result, QueryResult)
+        assert isinstance(result.data, StructuredOutputResult)
+        assert "greeting" in result.data.structured_output
+        assert isinstance(result.data.structured_output["greeting"], str)
+        assert len(result.data.structured_output["greeting"]) > 0
+
+
+class TestClaudeError:
+    async def test_invalid_model_raises_error(self, claude_ws_url: str):
+        client = RuntimeUseClient(ws_url=claude_ws_url)
+        with pytest.raises(AgentRuntimeError):
+            await client.query(
+                prompt="Say hello",
+                options=QueryOptions(
+                    system_prompt="Reply concisely.",
+                    model="nonexistent-model-xyz",
+                ),
+            )
diff --git a/packages/runtimeuse-client-python/test/llm/test_openai.py b/packages/runtimeuse-client-python/test/llm/test_openai.py
new file mode 100644
index 0000000..b5870eb
--- /dev/null
+++ b/packages/runtimeuse-client-python/test/llm/test_openai.py
@@ -0,0 +1,80 @@
+"""LLM integration tests using the OpenAI agent."""
+
+import json
+
+import pytest
+
+from src.runtimeuse_client import (
+    AgentRuntimeError,
+    RuntimeUseClient,
+    QueryOptions,
+    QueryResult,
+    TextResult,
+    StructuredOutputResult,
+)
+
+pytestmark = [pytest.mark.llm, pytest.mark.asyncio]
+
+MODEL = "gpt-4.1-mini"
+
+STRUCTURED_SCHEMA = json.dumps(
+    {
+        "type": "json_schema",
+        "schema": {
+            "type": "object",
+            "properties": {
+                "greeting": {"type": "string"},
+            },
+            "required": ["greeting"],
+            "additionalProperties": False,
+        },
+    }
+)
+
+
+class TestOpenAIText:
+    async def test_text_response(self, openai_ws_url: str):
+        client = RuntimeUseClient(ws_url=openai_ws_url)
+        result = await client.query(
+            prompt="Say hello world",
+            options=QueryOptions(
+                system_prompt="Reply concisely in plain text.",
+                model=MODEL,
+            ),
+        )
+
+        assert isinstance(result, QueryResult)
+        assert isinstance(result.data, TextResult)
+        assert len(result.data.text) > 0
+
+
+class TestOpenAIStructuredOutput:
+    async def test_structured_response(self, openai_ws_url: str):
+        client = RuntimeUseClient(ws_url=openai_ws_url)
+        result = await client.query(
+            prompt="Greet the user",
+            options=QueryOptions(
+                system_prompt="Reply with a greeting.",
+                model=MODEL,
+                output_format_json_schema_str=STRUCTURED_SCHEMA,
+            ),
+        )
+
+        assert isinstance(result, QueryResult)
+        assert isinstance(result.data, StructuredOutputResult)
+        assert "greeting" in result.data.structured_output
+        assert isinstance(result.data.structured_output["greeting"], str)
+        assert len(result.data.structured_output["greeting"]) > 0
+
+
+class TestOpenAIError:
+    async def test_invalid_model_raises_error(self, openai_ws_url: str):
+        client = RuntimeUseClient(ws_url=openai_ws_url)
+        with pytest.raises(AgentRuntimeError):
+            await client.query(
+                prompt="Say hello",
+                options=QueryOptions(
+                    system_prompt="Reply concisely.",
+                    model="nonexistent-model-xyz",
+                ),
+            )
diff --git a/packages/runtimeuse-client-python/test/sandbox/__init__.py b/packages/runtimeuse-client-python/test/sandbox/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/packages/runtimeuse-client-python/test/sandbox/conftest.py b/packages/runtimeuse-client-python/test/sandbox/conftest.py
new file mode 100644
index 0000000..e69de29
diff --git a/packages/runtimeuse-client-python/test/sandbox/test_e2b.py b/packages/runtimeuse-client-python/test/sandbox/test_e2b.py
new file mode 100644
index 0000000..f447428
--- /dev/null
+++ b/packages/runtimeuse-client-python/test/sandbox/test_e2b.py
@@ -0,0 +1,33 @@
+"""Smoke test: verify that an E2B sandbox can run runtimeuse and answer a query."""
+
+import pytest
+
+from src.runtimeuse_client import (
+    RuntimeUseClient,
+    QueryOptions,
+    QueryResult,
+    TextResult,
+)
+from test.sandbox_factories.e2b import create_e2b_runtimeuse
+
+pytestmark = [pytest.mark.sandbox, pytest.mark.asyncio]
+
+
+class TestE2BSandbox:
+    async def test_hello_world(self):
+        sandbox, ws_url = create_e2b_runtimeuse(agent="openai")
+        try:
+            client = RuntimeUseClient(ws_url=ws_url)
+            result = await client.query(
+                prompt="Say hello world",
+                options=QueryOptions(
+                    system_prompt="Reply concisely.",
+                    model="gpt-4.1-mini",
+                ),
+            )
+
+            assert isinstance(result, QueryResult)
+            assert isinstance(result.data, TextResult)
+            assert len(result.data.text) > 0
+        finally:
+            sandbox.kill()
diff --git a/packages/runtimeuse-client-python/test/sandbox_factories/__init__.py b/packages/runtimeuse-client-python/test/sandbox_factories/__init__.py
new file mode 100644
index 0000000..39d9142
--- /dev/null
+++ b/packages/runtimeuse-client-python/test/sandbox_factories/__init__.py
@@ -0,0 +1,3 @@
+from .e2b import create_e2b_runtimeuse
+
+__all__ = ["create_e2b_runtimeuse"]
diff --git a/packages/runtimeuse-client-python/test/sandbox_factories/e2b.py b/packages/runtimeuse-client-python/test/sandbox_factories/e2b.py
new file mode 100644
index 0000000..576d39d
--- /dev/null
+++ b/packages/runtimeuse-client-python/test/sandbox_factories/e2b.py
@@ -0,0 +1,92 @@
+"""Factory for creating E2B sandboxes running a runtimeuse server."""
+
+from __future__ import annotations
+
+import logging
+import os
+
+from e2b import Template, wait_for_port, default_build_logger
+from e2b_code_interpreter import Sandbox
+
+_logger = logging.getLogger(__name__)
+
+_DEFAULT_RUN_COMMAND = "npx -y runtimeuse@latest"
+
+
+def _get_env_or_fail(name: str) -> str:
+    value = os.environ.get(name)
+    if not value:
+        raise RuntimeError(f"{name} environment variable is not set")
+    return value
+
+
+def _should_try_reuse() -> bool:
+    """Return True when E2B_REUSE_TEMPLATE is set to a truthy value."""
+    return os.environ.get("E2B_REUSE_TEMPLATE", "").lower() in ("1", "true", "yes")
+
+
+def create_e2b_runtimeuse(
+    agent: str = "openai",
+    run_command: str | None = None,
+) -> tuple[Sandbox, str]:
+    """Build an E2B template, create a sandbox, and return ``(sandbox, ws_url)``.
+
+    When ``E2B_REUSE_TEMPLATE=1`` is set, the factory first tries to create a
+    sandbox from the existing template.  If the template does not exist yet it
+    falls back to building it.  When the env var is unset or falsy the template
+    is always rebuilt so it reflects the current ``RUNTIMEUSE_RUN_COMMAND`` and
+    env vars.
+
+    The caller owns the returned sandbox and must call ``sandbox.kill()``
+    when done.
+    """
+    e2b_api_key = _get_env_or_fail("E2B_API_KEY")
+    cmd = run_command or os.environ.get("RUNTIMEUSE_RUN_COMMAND", _DEFAULT_RUN_COMMAND)
+
+    envs: dict[str, str] = {}
+    if agent == "openai":
+        envs["OPENAI_API_KEY"] = _get_env_or_fail("OPENAI_API_KEY")
+    elif agent == "claude":
+        envs["ANTHROPIC_API_KEY"] = _get_env_or_fail("ANTHROPIC_API_KEY")
+
+    alias = f"runtimeuse-test-{agent}"
+    start_cmd = f"{cmd} --agent {agent}"
+
+    need_build = True
+
+    if _should_try_reuse():
+        _logger.info("Trying to reuse existing E2B template %r", alias)
+        try:
+            sandbox = Sandbox.create(template=alias, api_key=e2b_api_key)
+            need_build = False
+        except Exception:
+            _logger.info("Template %r not found, will build it", alias)
+
+    if need_build:
+        _logger.info("Building E2B template %r with command: %s", alias, start_cmd)
+
+        template = (
+            Template()
+            .from_node_image("lts")
+            .apt_install(["unzip"])
+            .npm_install(["@anthropic-ai/claude-code"], g=True)
+            .set_envs(envs)
+            .set_start_cmd(start_cmd, wait_for_port(8080))
+        )
+
+        Template.build(
+            template,
+            alias,
+            cpu_count=2,
+            memory_mb=2048,
+            on_build_logs=default_build_logger(),
+        )
+
+        sandbox = Sandbox.create(template=alias, api_key=e2b_api_key)
+
+    host = sandbox.get_host(8080)
+    ws_url = f"wss://{host}"
+
+    _logger.info("Sandbox %s ready at %s", sandbox.sandbox_id, ws_url)
+
+    return sandbox, ws_url

From 4ffd9f6a3087db174e992fc66a37a9074af09432 Mon Sep 17 00:00:00 2001
From: Vijit Dhingra <vijit@uselark.io>
Date: Sun, 15 Mar 2026 22:42:49 -0700
Subject: [PATCH 3/3] add e2e tests for pre/post agent invocation commands and
 fix cancel hang

Add 7 E2E tests verifying pre_agent_invocation_commands and
post_agent_invocation_commands are executed by the server, including
cwd support and failure handling.

Change ws_url fixture to per-test scope so each test gets a fresh server.

Fix send_queue.task_done() not being called when ws.send() raises
ConnectionClosedOK, which caused send_queue.join() to hang forever
during cancellation.

Made-with: Cursor
---
 .../transports/websocket_transport.py         |   6 +-
 .../test/e2e/conftest.py                      |   2 +-
 .../test/e2e/test_e2e.py                      | 144 ++++++++++++++++++
 3 files changed, 149 insertions(+), 3 deletions(-)

diff --git a/packages/runtimeuse-client-python/src/runtimeuse_client/transports/websocket_transport.py b/packages/runtimeuse-client-python/src/runtimeuse_client/transports/websocket_transport.py
index befe1be..242f250 100644
--- a/packages/runtimeuse-client-python/src/runtimeuse_client/transports/websocket_transport.py
+++ b/packages/runtimeuse-client-python/src/runtimeuse_client/transports/websocket_transport.py
@@ -43,5 +43,7 @@ async def _queue_sender(
     ) -> None:
         while True:
             message = await send_queue.get()
-            await ws.send(json.dumps(message))
-            send_queue.task_done()
+            try:
+                await ws.send(json.dumps(message))
+            finally:
+                send_queue.task_done()
diff --git a/packages/runtimeuse-client-python/test/e2e/conftest.py b/packages/runtimeuse-client-python/test/e2e/conftest.py
index 828d361..032ab52 100644
--- a/packages/runtimeuse-client-python/test/e2e/conftest.py
+++ b/packages/runtimeuse-client-python/test/e2e/conftest.py
@@ -26,7 +26,7 @@ def _port_is_open(port: int) -> bool:
         return s.connect_ex(("127.0.0.1", port)) == 0
 
 
-@pytest.fixture(scope="session")
+@pytest.fixture
 def ws_url():
     """Start a local runtimeuse server with the echo handler and yield its URL."""
     if not CLI_JS.exists():
diff --git a/packages/runtimeuse-client-python/test/e2e/test_e2e.py b/packages/runtimeuse-client-python/test/e2e/test_e2e.py
index 1d103a7..bd4f6a4 100644
--- a/packages/runtimeuse-client-python/test/e2e/test_e2e.py
+++ b/packages/runtimeuse-client-python/test/e2e/test_e2e.py
@@ -14,6 +14,7 @@
     AssistantMessageInterface,
     AgentRuntimeError,
     CancelledException,
+    CommandInterface,
 )
 
 pytestmark = [pytest.mark.e2e, pytest.mark.asyncio]
@@ -107,6 +108,149 @@ async def abort_on_first(msg: AssistantMessageInterface):
             )
 
 
+class TestPrePostCommands:
+    async def test_pre_command_output_streamed(
+        self, client: RuntimeUseClient, make_query_options
+    ):
+        received: list[AssistantMessageInterface] = []
+
+        async def on_msg(msg: AssistantMessageInterface):
+            received.append(msg)
+
+        result = await client.query(
+            prompt="ECHO:hello",
+            options=make_query_options(
+                pre_agent_invocation_commands=[
+                    CommandInterface(command="echo pre-sentinel")
+                ],
+                on_assistant_message=on_msg,
+            ),
+        )
+
+        assert isinstance(result.data, TextResult)
+        assert result.data.text == "hello"
+        all_text = [block for msg in received for block in msg.text_blocks]
+        assert any("pre-sentinel" in t for t in all_text)
+
+    async def test_post_command_output_streamed(
+        self, client: RuntimeUseClient, make_query_options
+    ):
+        received: list[AssistantMessageInterface] = []
+
+        async def on_msg(msg: AssistantMessageInterface):
+            received.append(msg)
+
+        result = await client.query(
+            prompt="ECHO:hello",
+            options=make_query_options(
+                post_agent_invocation_commands=[
+                    CommandInterface(command="echo post-sentinel")
+                ],
+                on_assistant_message=on_msg,
+            ),
+        )
+
+        assert isinstance(result.data, TextResult)
+        assert result.data.text == "hello"
+        all_text = [block for msg in received for block in msg.text_blocks]
+        assert any("post-sentinel" in t for t in all_text)
+
+    async def test_pre_and_post_commands_both_run(
+        self, client: RuntimeUseClient, make_query_options
+    ):
+        received: list[AssistantMessageInterface] = []
+
+        async def on_msg(msg: AssistantMessageInterface):
+            received.append(msg)
+
+        result = await client.query(
+            prompt="ECHO:hello",
+            options=make_query_options(
+                pre_agent_invocation_commands=[
+                    CommandInterface(command="echo pre-sentinel")
+                ],
+                post_agent_invocation_commands=[
+                    CommandInterface(command="echo post-sentinel")
+                ],
+                on_assistant_message=on_msg,
+            ),
+        )
+
+        assert isinstance(result.data, TextResult)
+        assert result.data.text == "hello"
+        all_text = [block for msg in received for block in msg.text_blocks]
+        assert any("pre-sentinel" in t for t in all_text)
+        assert any("post-sentinel" in t for t in all_text)
+
+    async def test_pre_command_with_cwd(
+        self, client: RuntimeUseClient, make_query_options
+    ):
+        received: list[AssistantMessageInterface] = []
+
+        async def on_msg(msg: AssistantMessageInterface):
+            received.append(msg)
+
+        await client.query(
+            prompt="ECHO:ok",
+            options=make_query_options(
+                pre_agent_invocation_commands=[
+                    CommandInterface(command="pwd", cwd="/tmp")
+                ],
+                on_assistant_message=on_msg,
+            ),
+        )
+
+        all_text = [block for msg in received for block in msg.text_blocks]
+        assert any("/tmp" in t for t in all_text)
+
+    async def test_post_command_with_cwd(
+        self, client: RuntimeUseClient, make_query_options
+    ):
+        received: list[AssistantMessageInterface] = []
+
+        async def on_msg(msg: AssistantMessageInterface):
+            received.append(msg)
+
+        await client.query(
+            prompt="ECHO:ok",
+            options=make_query_options(
+                post_agent_invocation_commands=[
+                    CommandInterface(command="pwd", cwd="/tmp")
+                ],
+                on_assistant_message=on_msg,
+            ),
+        )
+
+        all_text = [block for msg in received for block in msg.text_blocks]
+        assert any("/tmp" in t for t in all_text)
+
+    async def test_failed_pre_command_raises_error(
+        self, client: RuntimeUseClient, make_query_options
+    ):
+        with pytest.raises(AgentRuntimeError, match="failed with exit code"):
+            await client.query(
+                prompt="ECHO:should not reach",
+                options=make_query_options(
+                    pre_agent_invocation_commands=[
+                        CommandInterface(command="exit 1")
+                    ],
+                ),
+            )
+
+    async def test_failed_post_command_raises_error(
+        self, client: RuntimeUseClient, make_query_options
+    ):
+        with pytest.raises(AgentRuntimeError, match="failed with exit code"):
+            await client.query(
+                prompt="ECHO:hello",
+                options=make_query_options(
+                    post_agent_invocation_commands=[
+                        CommandInterface(command="exit 1")
+                    ],
+                ),
+            )
+
+
 class TestInvocationFieldsForwarded:
     async def test_fields_round_trip(
         self, client: RuntimeUseClient, make_query_options