getlark · vijit-lark · Mar 16, 2026 · Mar 15, 2026 · Mar 16, 2026 · Mar 16, 2026
diff --git a/.github/workflows/test-runtimeuse-client-python.yml b/.github/workflows/test-runtimeuse-client-python.yml
@@ -32,5 +32,5 @@ jobs:
       - run: pip install -e ".[dev]" 2>/dev/null || pip install -e .
         working-directory: packages/runtimeuse-client-python
       - run: pip install pytest pytest-asyncio
-      - run: pytest test/
+      - run: pytest test/ -m "not sandbox and not llm"
         working-directory: packages/runtimeuse-client-python
diff --git a/packages/runtimeuse-client-python/pyproject.toml b/packages/runtimeuse-client-python/pyproject.toml
@@ -32,6 +32,10 @@ packages = ["src/runtimeuse_client"]
 
 [tool.pytest.ini_options]
 asyncio_mode = "auto"
+log_cli = true
+log_cli_level = "INFO"
 markers = [
     "e2e: end-to-end tests requiring a running runtimeuse server",
+    "sandbox: sandbox provider integration tests (requires E2B_API_KEY)",
+    "llm: real LLM integration tests (requires E2B_API_KEY + LLM API keys)",
 ]
diff --git a/packages/runtimeuse-client-python/src/runtimeuse_client/transports/websocket_transport.py b/packages/runtimeuse-client-python/src/runtimeuse_client/transports/websocket_transport.py
@@ -43,5 +43,7 @@ async def _queue_sender(
     ) -> None:
         while True:
             message = await send_queue.get()
-            await ws.send(json.dumps(message))
-            send_queue.task_done()
+            try:
+                await ws.send(json.dumps(message))
+            finally:
+                send_queue.task_done()
diff --git a/packages/runtimeuse-client-python/test/conftest.py b/packages/runtimeuse-client-python/test/conftest.py
@@ -1,10 +1,13 @@
 import asyncio
 from typing import Any, AsyncGenerator
 
+import dotenv
 import pytest
 
 from src.runtimeuse_client import RuntimeUseClient, QueryOptions
 
+dotenv.load_dotenv()
+
 
 class FakeTransport:
     """In-memory transport for testing.

diff --git a/packages/runtimeuse-client-python/test/e2e/conftest.py b/packages/runtimeuse-client-python/test/e2e/conftest.py
@@ -26,7 +26,7 @@ def _port_is_open(port: int) -> bool:
         return s.connect_ex(("127.0.0.1", port)) == 0
 
 
-@pytest.fixture(scope="session")
+@pytest.fixture
 def ws_url():
     """Start a local runtimeuse server with the echo handler and yield its URL."""
     if not CLI_JS.exists():

diff --git a/packages/runtimeuse-client-python/test/e2e/test_e2e.py b/packages/runtimeuse-client-python/test/e2e/test_e2e.py
@@ -14,6 +14,7 @@
     AssistantMessageInterface,
     AgentRuntimeError,
     CancelledException,
+    CommandInterface,
 )
 
 pytestmark = [pytest.mark.e2e, pytest.mark.asyncio]
@@ -107,6 +108,149 @@ async def abort_on_first(msg: AssistantMessageInterface):
             )
 
 
+class TestPrePostCommands:
+    async def test_pre_command_output_streamed(
+        self, client: RuntimeUseClient, make_query_options
+    ):
+        received: list[AssistantMessageInterface] = []
+
+        async def on_msg(msg: AssistantMessageInterface):
+            received.append(msg)
+
+        result = await client.query(
+            prompt="ECHO:hello",
+            options=make_query_options(
+                pre_agent_invocation_commands=[
+                    CommandInterface(command="echo pre-sentinel")
+                ],
+                on_assistant_message=on_msg,
+            ),
+        )
+
+        assert isinstance(result.data, TextResult)
+        assert result.data.text == "hello"
+        all_text = [block for msg in received for block in msg.text_blocks]
+        assert any("pre-sentinel" in t for t in all_text)
+
+    async def test_post_command_output_streamed(
+        self, client: RuntimeUseClient, make_query_options
+    ):
+        received: list[AssistantMessageInterface] = []
+
+        async def on_msg(msg: AssistantMessageInterface):
+            received.append(msg)
+
+        result = await client.query(
+            prompt="ECHO:hello",
+            options=make_query_options(
+                post_agent_invocation_commands=[
+                    CommandInterface(command="echo post-sentinel")
+                ],
+                on_assistant_message=on_msg,
+            ),
+        )
+
+        assert isinstance(result.data, TextResult)
+        assert result.data.text == "hello"
+        all_text = [block for msg in received for block in msg.text_blocks]
+        assert any("post-sentinel" in t for t in all_text)
+
+    async def test_pre_and_post_commands_both_run(
+        self, client: RuntimeUseClient, make_query_options
+    ):
+        received: list[AssistantMessageInterface] = []
+
+        async def on_msg(msg: AssistantMessageInterface):
+            received.append(msg)
+
+        result = await client.query(
+            prompt="ECHO:hello",
+            options=make_query_options(
+                pre_agent_invocation_commands=[
+                    CommandInterface(command="echo pre-sentinel")
+                ],
+                post_agent_invocation_commands=[
+                    CommandInterface(command="echo post-sentinel")
+                ],
+                on_assistant_message=on_msg,
+            ),
+        )
+
+        assert isinstance(result.data, TextResult)
+        assert result.data.text == "hello"
+        all_text = [block for msg in received for block in msg.text_blocks]
+        assert any("pre-sentinel" in t for t in all_text)
+        assert any("post-sentinel" in t for t in all_text)
+
+    async def test_pre_command_with_cwd(
+        self, client: RuntimeUseClient, make_query_options
+    ):
+        received: list[AssistantMessageInterface] = []
+
+        async def on_msg(msg: AssistantMessageInterface):
+            received.append(msg)
+
+        await client.query(
+            prompt="ECHO:ok",
+            options=make_query_options(
+                pre_agent_invocation_commands=[
+                    CommandInterface(command="pwd", cwd="/tmp")
+                ],
+                on_assistant_message=on_msg,
+            ),
+        )
+
+        all_text = [block for msg in received for block in msg.text_blocks]
+        assert any("/tmp" in t for t in all_text)
+
+    async def test_post_command_with_cwd(
+        self, client: RuntimeUseClient, make_query_options
+    ):
+        received: list[AssistantMessageInterface] = []
+
+        async def on_msg(msg: AssistantMessageInterface):
+            received.append(msg)
+
+        await client.query(
+            prompt="ECHO:ok",
+            options=make_query_options(
+                post_agent_invocation_commands=[
+                    CommandInterface(command="pwd", cwd="/tmp")
+                ],
+                on_assistant_message=on_msg,
+            ),
+        )
+
+        all_text = [block for msg in received for block in msg.text_blocks]
+        assert any("/tmp" in t for t in all_text)
+
+    async def test_failed_pre_command_raises_error(
+        self, client: RuntimeUseClient, make_query_options
+    ):
+        with pytest.raises(AgentRuntimeError, match="failed with exit code"):
+            await client.query(
+                prompt="ECHO:should not reach",
+                options=make_query_options(
+                    pre_agent_invocation_commands=[
+                        CommandInterface(command="exit 1")
+                    ],
+                ),
+            )
+
+    async def test_failed_post_command_raises_error(
+        self, client: RuntimeUseClient, make_query_options
+    ):
+        with pytest.raises(AgentRuntimeError, match="failed with exit code"):
+            await client.query(
+                prompt="ECHO:hello",
+                options=make_query_options(
+                    post_agent_invocation_commands=[
+                        CommandInterface(command="exit 1")
+                    ],
+                ),
+            )
+
+
 class TestInvocationFieldsForwarded:
     async def test_fields_round_trip(
         self, client: RuntimeUseClient, make_query_options

diff --git a/packages/runtimeuse-client-python/test/llm/__init__.py b/packages/runtimeuse-client-python/test/llm/__init__.py
diff --git a/packages/runtimeuse-client-python/test/llm/conftest.py b/packages/runtimeuse-client-python/test/llm/conftest.py
@@ -0,0 +1,29 @@
+import pytest
+
+from test.sandbox_factories.e2b import create_e2b_runtimeuse
+
+
+@pytest.fixture(scope="session")
+def openai_ws_url():
+    """Create an E2B sandbox running runtimeuse with the OpenAI agent."""
+    try:
+        sandbox, ws_url = create_e2b_runtimeuse(agent="openai")
+    except RuntimeError as exc:
+        pytest.fail(str(exc))
+
+    yield ws_url
+
+    sandbox.kill()
+
+
+@pytest.fixture(scope="session")
+def claude_ws_url():
+    """Create an E2B sandbox running runtimeuse with the Claude agent."""
+    try:
+        sandbox, ws_url = create_e2b_runtimeuse(agent="claude")
+    except RuntimeError as exc:
+        pytest.fail(str(exc))
+
+    yield ws_url
+
+    sandbox.kill()
diff --git a/packages/runtimeuse-client-python/test/llm/test_claude.py b/packages/runtimeuse-client-python/test/llm/test_claude.py
@@ -0,0 +1,80 @@
+"""LLM integration tests using the Claude agent."""
+
+import json
+
+import pytest
+
+from src.runtimeuse_client import (
+    AgentRuntimeError,
+    RuntimeUseClient,
+    QueryOptions,
+    QueryResult,
+    TextResult,
+    StructuredOutputResult,
+)
+
+pytestmark = [pytest.mark.llm, pytest.mark.asyncio]
+
+MODEL = "claude-sonnet-4-20250514"
+
+STRUCTURED_SCHEMA = json.dumps(
+    {
+        "type": "json_schema",
+        "schema": {
+            "type": "object",
+            "properties": {
+                "greeting": {"type": "string"},
+            },
+            "required": ["greeting"],
+            "additionalProperties": False,
+        },
+    }
+)
+
+
+class TestClaudeText:
+    async def test_text_response(self, claude_ws_url: str):
+        client = RuntimeUseClient(ws_url=claude_ws_url)
+        result = await client.query(
+            prompt="Say hello world",
+            options=QueryOptions(
+                system_prompt="Reply concisely in plain text.",
+                model=MODEL,
+            ),
+        )
+
+        assert isinstance(result, QueryResult)
+        assert isinstance(result.data, TextResult)
+        assert len(result.data.text) > 0
+
+
+class TestClaudeStructuredOutput:
+    async def test_structured_response(self, claude_ws_url: str):
+        client = RuntimeUseClient(ws_url=claude_ws_url)
+        result = await client.query(
+            prompt="Greet the user",
+            options=QueryOptions(
+                system_prompt="Reply with a greeting.",
+                model=MODEL,
+                output_format_json_schema_str=STRUCTURED_SCHEMA,
+            ),
+        )
+
+        assert isinstance(result, QueryResult)
+        assert isinstance(result.data, StructuredOutputResult)
+        assert "greeting" in result.data.structured_output
+        assert isinstance(result.data.structured_output["greeting"], str)
+        assert len(result.data.structured_output["greeting"]) > 0
+
+
+class TestClaudeError:
+    async def test_invalid_model_raises_error(self, claude_ws_url: str):
+        client = RuntimeUseClient(ws_url=claude_ws_url)
+        with pytest.raises(AgentRuntimeError):
+            await client.query(
+                prompt="Say hello",
+                options=QueryOptions(
+                    system_prompt="Reply concisely.",
+                    model="nonexistent-model-xyz",
+                ),
+            )