Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test-runtimeuse-client-python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,5 @@ jobs:
- run: pip install -e ".[dev]" 2>/dev/null || pip install -e .
working-directory: packages/runtimeuse-client-python
- run: pip install pytest pytest-asyncio
- run: pytest test/
- run: pytest test/ -m "not sandbox and not llm"
working-directory: packages/runtimeuse-client-python
4 changes: 4 additions & 0 deletions packages/runtimeuse-client-python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ packages = ["src/runtimeuse_client"]

[tool.pytest.ini_options]
asyncio_mode = "auto"
log_cli = true
log_cli_level = "INFO"
markers = [
"e2e: end-to-end tests requiring a running runtimeuse server",
"sandbox: sandbox provider integration tests (requires E2B_API_KEY)",
"llm: real LLM integration tests (requires E2B_API_KEY + LLM API keys)",
]
Original file line number Diff line number Diff line change
Expand Up @@ -43,5 +43,7 @@ async def _queue_sender(
) -> None:
while True:
message = await send_queue.get()
await ws.send(json.dumps(message))
send_queue.task_done()
try:
await ws.send(json.dumps(message))
finally:
send_queue.task_done()
3 changes: 3 additions & 0 deletions packages/runtimeuse-client-python/test/conftest.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import asyncio
from typing import Any, AsyncGenerator

import dotenv
import pytest

from src.runtimeuse_client import RuntimeUseClient, QueryOptions

dotenv.load_dotenv()


class FakeTransport:
"""In-memory transport for testing.
Expand Down
2 changes: 1 addition & 1 deletion packages/runtimeuse-client-python/test/e2e/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def _port_is_open(port: int) -> bool:
return s.connect_ex(("127.0.0.1", port)) == 0


@pytest.fixture(scope="session")
@pytest.fixture
def ws_url():
"""Start a local runtimeuse server with the echo handler and yield its URL."""
if not CLI_JS.exists():
Expand Down
144 changes: 144 additions & 0 deletions packages/runtimeuse-client-python/test/e2e/test_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
AssistantMessageInterface,
AgentRuntimeError,
CancelledException,
CommandInterface,
)

pytestmark = [pytest.mark.e2e, pytest.mark.asyncio]
Expand Down Expand Up @@ -107,6 +108,149 @@ async def abort_on_first(msg: AssistantMessageInterface):
)


class TestPrePostCommands:
async def test_pre_command_output_streamed(
self, client: RuntimeUseClient, make_query_options
):
received: list[AssistantMessageInterface] = []

async def on_msg(msg: AssistantMessageInterface):
received.append(msg)

result = await client.query(
prompt="ECHO:hello",
options=make_query_options(
pre_agent_invocation_commands=[
CommandInterface(command="echo pre-sentinel")
],
on_assistant_message=on_msg,
),
)

assert isinstance(result.data, TextResult)
assert result.data.text == "hello"
all_text = [block for msg in received for block in msg.text_blocks]
assert any("pre-sentinel" in t for t in all_text)

async def test_post_command_output_streamed(
self, client: RuntimeUseClient, make_query_options
):
received: list[AssistantMessageInterface] = []

async def on_msg(msg: AssistantMessageInterface):
received.append(msg)

result = await client.query(
prompt="ECHO:hello",
options=make_query_options(
post_agent_invocation_commands=[
CommandInterface(command="echo post-sentinel")
],
on_assistant_message=on_msg,
),
)

assert isinstance(result.data, TextResult)
assert result.data.text == "hello"
all_text = [block for msg in received for block in msg.text_blocks]
assert any("post-sentinel" in t for t in all_text)

async def test_pre_and_post_commands_both_run(
self, client: RuntimeUseClient, make_query_options
):
received: list[AssistantMessageInterface] = []

async def on_msg(msg: AssistantMessageInterface):
received.append(msg)

result = await client.query(
prompt="ECHO:hello",
options=make_query_options(
pre_agent_invocation_commands=[
CommandInterface(command="echo pre-sentinel")
],
post_agent_invocation_commands=[
CommandInterface(command="echo post-sentinel")
],
on_assistant_message=on_msg,
),
)

assert isinstance(result.data, TextResult)
assert result.data.text == "hello"
all_text = [block for msg in received for block in msg.text_blocks]
assert any("pre-sentinel" in t for t in all_text)
assert any("post-sentinel" in t for t in all_text)

async def test_pre_command_with_cwd(
self, client: RuntimeUseClient, make_query_options
):
received: list[AssistantMessageInterface] = []

async def on_msg(msg: AssistantMessageInterface):
received.append(msg)

await client.query(
prompt="ECHO:ok",
options=make_query_options(
pre_agent_invocation_commands=[
CommandInterface(command="pwd", cwd="/tmp")
],
on_assistant_message=on_msg,
),
)

all_text = [block for msg in received for block in msg.text_blocks]
assert any("/tmp" in t for t in all_text)

async def test_post_command_with_cwd(
self, client: RuntimeUseClient, make_query_options
):
received: list[AssistantMessageInterface] = []

async def on_msg(msg: AssistantMessageInterface):
received.append(msg)

await client.query(
prompt="ECHO:ok",
options=make_query_options(
post_agent_invocation_commands=[
CommandInterface(command="pwd", cwd="/tmp")
],
on_assistant_message=on_msg,
),
)

all_text = [block for msg in received for block in msg.text_blocks]
assert any("/tmp" in t for t in all_text)

async def test_failed_pre_command_raises_error(
self, client: RuntimeUseClient, make_query_options
):
with pytest.raises(AgentRuntimeError, match="failed with exit code"):
await client.query(
prompt="ECHO:should not reach",
options=make_query_options(
pre_agent_invocation_commands=[
CommandInterface(command="exit 1")
],
),
)

async def test_failed_post_command_raises_error(
self, client: RuntimeUseClient, make_query_options
):
with pytest.raises(AgentRuntimeError, match="failed with exit code"):
await client.query(
prompt="ECHO:hello",
options=make_query_options(
post_agent_invocation_commands=[
CommandInterface(command="exit 1")
],
),
)


class TestInvocationFieldsForwarded:
async def test_fields_round_trip(
self, client: RuntimeUseClient, make_query_options
Expand Down
Empty file.
29 changes: 29 additions & 0 deletions packages/runtimeuse-client-python/test/llm/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import pytest

from test.sandbox_factories.e2b import create_e2b_runtimeuse


@pytest.fixture(scope="session")
def openai_ws_url():
"""Create an E2B sandbox running runtimeuse with the OpenAI agent."""
try:
sandbox, ws_url = create_e2b_runtimeuse(agent="openai")
except RuntimeError as exc:
pytest.fail(str(exc))

yield ws_url

sandbox.kill()


@pytest.fixture(scope="session")
def claude_ws_url():
"""Create an E2B sandbox running runtimeuse with the Claude agent."""
try:
sandbox, ws_url = create_e2b_runtimeuse(agent="claude")
except RuntimeError as exc:
pytest.fail(str(exc))

yield ws_url

sandbox.kill()
80 changes: 80 additions & 0 deletions packages/runtimeuse-client-python/test/llm/test_claude.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
"""LLM integration tests using the Claude agent."""

import json

import pytest

from src.runtimeuse_client import (
AgentRuntimeError,
RuntimeUseClient,
QueryOptions,
QueryResult,
TextResult,
StructuredOutputResult,
)

pytestmark = [pytest.mark.llm, pytest.mark.asyncio]

MODEL = "claude-sonnet-4-20250514"

STRUCTURED_SCHEMA = json.dumps(
{
"type": "json_schema",
"schema": {
"type": "object",
"properties": {
"greeting": {"type": "string"},
},
"required": ["greeting"],
"additionalProperties": False,
},
}
)


class TestClaudeText:
async def test_text_response(self, claude_ws_url: str):
client = RuntimeUseClient(ws_url=claude_ws_url)
result = await client.query(
prompt="Say hello world",
options=QueryOptions(
system_prompt="Reply concisely in plain text.",
model=MODEL,
),
)

assert isinstance(result, QueryResult)
assert isinstance(result.data, TextResult)
assert len(result.data.text) > 0


class TestClaudeStructuredOutput:
async def test_structured_response(self, claude_ws_url: str):
client = RuntimeUseClient(ws_url=claude_ws_url)
result = await client.query(
prompt="Greet the user",
options=QueryOptions(
system_prompt="Reply with a greeting.",
model=MODEL,
output_format_json_schema_str=STRUCTURED_SCHEMA,
),
)

assert isinstance(result, QueryResult)
assert isinstance(result.data, StructuredOutputResult)
assert "greeting" in result.data.structured_output
assert isinstance(result.data.structured_output["greeting"], str)
assert len(result.data.structured_output["greeting"]) > 0


class TestClaudeError:
async def test_invalid_model_raises_error(self, claude_ws_url: str):
client = RuntimeUseClient(ws_url=claude_ws_url)
with pytest.raises(AgentRuntimeError):
await client.query(
prompt="Say hello",
options=QueryOptions(
system_prompt="Reply concisely.",
model="nonexistent-model-xyz",
),
)
Loading