diff --git a/docs.json b/docs.json
index dc0e6e64..19e0d0dd 100644
--- a/docs.json
+++ b/docs.json
@@ -269,6 +269,7 @@
"sdk/guides/agent-server/api-sandbox",
"sdk/guides/agent-server/cloud-workspace",
"sdk/guides/agent-server/custom-tools",
+ "sdk/guides/agent-server/settings-secrets-api",
{
"group": "API Reference",
"openapi": {
diff --git a/sdk/guides/agent-acp.mdx b/sdk/guides/agent-acp.mdx
index b4ac3ca0..292fd5fe 100644
--- a/sdk/guides/agent-acp.mdx
+++ b/sdk/guides/agent-acp.mdx
@@ -185,7 +185,8 @@ This example is available on GitHub: [examples/01_standalone_sdk/40_acp_agent_ex
This example shows how to use an ACP-compatible server (claude-agent-acp)
as the agent backend instead of direct LLM calls. It also demonstrates
``ask_agent()`` ā a stateless side-question that forks the ACP session
-and leaves the main conversation untouched.
+and leaves the main conversation untouched ā and sending an image alongside
+text to verify multimodal (vision) input support.
Prerequisites:
- Node.js / npx available
@@ -197,23 +198,41 @@ Usage:
import os
+from openhands.sdk import ImageContent, Message, TextContent
from openhands.sdk.agent import ACPAgent
from openhands.sdk.conversation import Conversation
+IMAGE_URL = "https://github.com/OpenHands/docs/raw/main/openhands/static/img/logo.png"
+
agent = ACPAgent(acp_command=["npx", "-y", "@agentclientprotocol/claude-agent-acp"])
try:
cwd = os.getcwd()
conversation = Conversation(agent=agent, workspace=cwd)
- # --- Main conversation turn ---
+ # --- Main conversation turn (text only) ---
conversation.send_message(
"List the Python source files under openhands-sdk/openhands/sdk/agent/, "
"then read the __init__.py and summarize what agent classes are exported."
)
conversation.run()
+ # --- Image input turn (text + image) ---
+ print("\n--- image input ---")
+ conversation.send_message(
+ Message(
+ role="user",
+ content=[
+ TextContent(
+ text="Describe what you see in this image in one sentence."
+ ),
+ ImageContent(image_urls=[IMAGE_URL]),
+ ],
+ )
+ )
+ conversation.run()
+
# --- ask_agent: stateless side-question via fork_session ---
print("\n--- ask_agent ---")
response = conversation.ask_agent(
@@ -294,9 +313,9 @@ os.environ["ANTHROPIC_API_KEY"] = llm_api_key
runtime_api_key = os.getenv("RUNTIME_API_KEY")
assert runtime_api_key, "RUNTIME_API_KEY required"
-# If GITHUB_SHA is set (e.g. running in CI of a PR), use that to ensure consistency
-# Otherwise, use the latest image from main
-server_image_sha = os.getenv("GITHUB_SHA") or "main"
+# SDK_SHA is the canonical commit SHA set by CI workflows (avoids the
+# built-in GITHUB_SHA which resolves to the merge-commit on PRs).
+server_image_sha = os.getenv("SDK_SHA") or os.getenv("GITHUB_SHA") or "main"
server_image = f"ghcr.io/openhands/agent-server:{server_image_sha[:7]}-python-amd64"
logger.info(f"Using server image: {server_image}")
diff --git a/sdk/guides/agent-delegation.mdx b/sdk/guides/agent-delegation.mdx
index dac6a8f4..d043bd93 100644
--- a/sdk/guides/agent-delegation.mdx
+++ b/sdk/guides/agent-delegation.mdx
@@ -179,15 +179,13 @@ from openhands.sdk import (
from openhands.sdk.context import Skill
from openhands.sdk.subagent import register_agent
from openhands.sdk.tool import register_tool
+from openhands.tools import register_builtins_agents
from openhands.tools.delegate import (
DelegateTool,
DelegationVisualizer,
)
-from openhands.tools.preset.default import get_default_tools, register_builtins_agents
-ONLY_RUN_SIMPLE_DELEGATION = False
-
logger = get_logger(__name__)
# Configure LLM and agent
@@ -198,91 +196,6 @@ llm = LLM(
usage_id="agent",
)
-cwd = os.getcwd()
-
-tools = get_default_tools(enable_browser=True)
-tools.append(Tool(name=DelegateTool.name))
-register_builtins_agents()
-
-main_agent = Agent(
- llm=llm,
- tools=tools,
-)
-conversation = Conversation(
- agent=main_agent,
- workspace=cwd,
- visualizer=DelegationVisualizer(name="Delegator"),
-)
-
-conversation.send_message(
- "Forget about coding. Let's switch to travel planning. "
- "Let's plan a trip to London. I have two issues I need to solve: "
- "Lodging: what are the best areas to stay at while keeping budget in mind? "
- "Activities: what are the top 5 must-see attractions and hidden gems? "
- "Please use the delegation tools to handle these two tasks in parallel. "
- "Make sure the sub-agents use their own knowledge "
- "and dont rely on internet access. "
- "They should keep it short. After getting the results, merge both analyses "
- "into a single consolidated report.\n\n"
-)
-conversation.run()
-
-conversation.send_message(
- "Ask the lodging sub-agent what it thinks about Covent Garden."
-)
-conversation.run()
-
-# Report cost for simple delegation example
-cost_simple = conversation.conversation_stats.get_combined_metrics().accumulated_cost
-print(f"EXAMPLE_COST (simple delegation): {cost_simple}")
-
-print("Simple delegation example done!", "\n" * 20)
-
-if ONLY_RUN_SIMPLE_DELEGATION:
- # For CI: always emit the EXAMPLE_COST marker before exiting.
- print(f"EXAMPLE_COST: {cost_simple}")
- exit(0)
-
-
-# -------- Agent Delegation Second Part: Built-in Agent Types (Explore + Bash) --------
-
-main_agent = Agent(
- llm=llm,
- tools=[Tool(name=DelegateTool.name)],
-)
-conversation = Conversation(
- agent=main_agent,
- workspace=cwd,
- visualizer=DelegationVisualizer(name="Delegator (builtins)"),
-)
-
-builtin_task_message = (
- "Demonstrate SDK built-in sub-agent types. "
- "1) Spawn an 'explore' sub-agent and ask it to list the markdown files in "
- "openhands-sdk/openhands/sdk/subagent/builtins/ and summarize what each "
- "built-in agent type is for (based on the file contents). "
- "2) Spawn a 'bash' sub-agent and ask it to run `python --version` in the "
- "terminal and return the exact output. "
- "3) Merge both results into a short report. "
- "Do not use internet access."
-)
-
-print("=" * 100)
-print("Demonstrating built-in agent delegation (explore + bash)...")
-print("=" * 100)
-
-conversation.send_message(builtin_task_message)
-conversation.run()
-
-# Report cost for builtin agent types example
-cost_builtin = conversation.conversation_stats.get_combined_metrics().accumulated_cost
-print(f"EXAMPLE_COST (builtin agents): {cost_builtin}")
-
-print("Built-in agent delegation example done!", "\n" * 20)
-
-
-# -------- Agent Delegation Third Part: User-Defined Agent Types --------
-
def create_lodging_planner(llm: LLM) -> Agent:
"""Create a lodging planner focused on London stays."""
@@ -343,6 +256,7 @@ register_agent(
factory_func=create_activities_planner,
description="Creates time-efficient London activity itineraries.",
)
+register_builtins_agents()
# Make the delegation tool available to the main agent
register_tool("DelegateTool", DelegateTool)
@@ -353,26 +267,26 @@ main_agent = Agent(
)
conversation = Conversation(
agent=main_agent,
- workspace=cwd,
+ workspace=os.getcwd(),
visualizer=DelegationVisualizer(name="Delegator"),
)
-task_message = (
- "Plan a 3-day London trip. "
- "1) Spawn two sub-agents: lodging_planner (hotel options) and "
- "activities_planner (itinerary). "
- "2) Ask lodging_planner for 3-4 central London hotel recommendations with "
- "neighborhoods, quick pros/cons, and transit notes by budget. "
- "3) Ask activities_planner for a concise 3-day itinerary with nearby stops, "
- " food/coffee suggestions, and any ticket/reservation notes. "
- "4) Share both sub-agent results and propose a combined plan."
-)
-
print("=" * 100)
print("Demonstrating London trip delegation (lodging + activities)...")
print("=" * 100)
-conversation.send_message(task_message)
+conversation.send_message("""
+Let's plan a trip to London. I have two specific areas to address:
+
+Lodging: What are the best areas to stay in while keeping a budget in mind?
+Activities: What are the top five must-see attractions and hidden gems?
+
+Please use delegation tools to handle these two tasks in parallel.
+Ensure the sub-agents use their own internal knowledge and do not
+rely on internet access. Keep the responses concise.
+Once you have the results, use the bash sub-agent to write a file
+named london_trip_report.txt containing the findings in the working directory.
+""")
conversation.run()
conversation.send_message(
@@ -384,12 +298,9 @@ conversation.run()
cost_user_defined = (
conversation.conversation_stats.get_combined_metrics().accumulated_cost
)
-print(f"EXAMPLE_COST (user-defined agents): {cost_user_defined}")
+print(f"EXAMPLE_COST: {cost_user_defined}")
print("All done!")
-
-# Full example cost report for CI workflow
-print(f"EXAMPLE_COST: {cost_simple + cost_builtin + cost_user_defined}")
```
diff --git a/sdk/guides/agent-file-based.mdx b/sdk/guides/agent-file-based.mdx
index f8f5f0f7..ed576c4b 100644
--- a/sdk/guides/agent-file-based.mdx
+++ b/sdk/guides/agent-file-based.mdx
@@ -518,7 +518,7 @@ grammar_checker = AgentDefinition(
register_agent(
name=grammar_checker.name,
factory_func=agent_definition_to_factory(grammar_checker),
- description=grammar_checker.description,
+ description=grammar_checker,
)
# 3. Set up the orchestrator agent with the DelegateTool
diff --git a/sdk/guides/agent-server/api-sandbox.mdx b/sdk/guides/agent-server/api-sandbox.mdx
index 2fea0916..79d47705 100644
--- a/sdk/guides/agent-server/api-sandbox.mdx
+++ b/sdk/guides/agent-server/api-sandbox.mdx
@@ -151,9 +151,9 @@ if not runtime_api_key:
exit(1)
-# If GITHUB_SHA is set (e.g. running in CI of a PR), use that to ensure consistency
-# Otherwise, use the latest image from main
-server_image_sha = os.getenv("GITHUB_SHA") or "main"
+# SDK_SHA is the canonical commit SHA set by CI workflows (avoids the
+# built-in GITHUB_SHA which resolves to the merge-commit on PRs).
+server_image_sha = os.getenv("SDK_SHA") or os.getenv("GITHUB_SHA") or "main"
server_image = f"ghcr.io/openhands/agent-server:{server_image_sha[:7]}-python-amd64"
logger.info(f"Using server image: {server_image}")
diff --git a/sdk/guides/agent-server/apptainer-sandbox.mdx b/sdk/guides/agent-server/apptainer-sandbox.mdx
index 19adceb0..411920a0 100644
--- a/sdk/guides/agent-server/apptainer-sandbox.mdx
+++ b/sdk/guides/agent-server/apptainer-sandbox.mdx
@@ -77,11 +77,11 @@ def get_server_image():
"""Get the server image tag, using PR-specific image in CI."""
platform_str = detect_platform()
arch = "arm64" if "arm64" in platform_str else "amd64"
- # If GITHUB_SHA is set (e.g. running in CI of a PR), use that to ensure consistency
- # Otherwise, use the latest image from main
- github_sha = os.getenv("GITHUB_SHA")
- if github_sha:
- return f"ghcr.io/openhands/agent-server:{github_sha[:7]}-python-{arch}"
+ # SDK_SHA is the canonical commit SHA set by CI workflows (avoids the
+ # built-in GITHUB_SHA which resolves to the merge-commit on PRs).
+ sha = os.getenv("SDK_SHA") or os.getenv("GITHUB_SHA")
+ if sha:
+ return f"ghcr.io/openhands/agent-server:{sha[:7]}-python-{arch}"
return "ghcr.io/openhands/agent-server:latest-python"
@@ -95,7 +95,7 @@ logger.info(f"Using server image: {server_image}")
with ApptainerWorkspace(
# use pre-built image for faster startup
server_image=server_image,
- host_port=8010,
+ # host_port auto-selects an available port when not specified
platform=detect_platform(),
) as workspace:
# 3) Create agent
diff --git a/sdk/guides/agent-server/cloud-workspace.mdx b/sdk/guides/agent-server/cloud-workspace.mdx
index 43cc07e9..3900c3a9 100644
--- a/sdk/guides/agent-server/cloud-workspace.mdx
+++ b/sdk/guides/agent-server/cloud-workspace.mdx
@@ -310,10 +310,9 @@ with OpenHandsCloudWorkspace(
cloud_api_key=cloud_api_key,
) as workspace:
# --- LLM from SaaS account settings ---
- # get_llm() calls GET /users/me?expose_secrets=true,
- # sending your Cloud API key plus the sandbox session
- # key that OpenHands Cloud issued for this workspace.
- # It returns a fully configured LLM instance.
+ # get_llm() calls GET /users/me?expose_secrets=true
+ # (dual auth: Bearer + session key) and returns a
+ # fully configured LLM instance.
# Override any parameter: workspace.get_llm(model="gpt-4o")
llm = workspace.get_llm()
logger.info(f"LLM configured: model={llm.model}")
diff --git a/sdk/guides/agent-server/docker-sandbox.mdx b/sdk/guides/agent-server/docker-sandbox.mdx
index 301d5df3..daee6e43 100644
--- a/sdk/guides/agent-server/docker-sandbox.mdx
+++ b/sdk/guides/agent-server/docker-sandbox.mdx
@@ -160,11 +160,11 @@ def get_server_image():
"""Get the server image tag, using PR-specific image in CI."""
platform_str = detect_platform()
arch = "arm64" if "arm64" in platform_str else "amd64"
- # If GITHUB_SHA is set (e.g. running in CI of a PR), use that to ensure consistency
- # Otherwise, use the latest image from main
- github_sha = os.getenv("GITHUB_SHA")
- if github_sha:
- return f"ghcr.io/openhands/agent-server:{github_sha[:7]}-python-{arch}"
+ # SDK_SHA is the canonical commit SHA set by CI workflows (avoids the
+ # built-in GITHUB_SHA which resolves to the merge-commit on PRs).
+ sha = os.getenv("SDK_SHA") or os.getenv("GITHUB_SHA")
+ if sha:
+ return f"ghcr.io/openhands/agent-server:{sha[:7]}-python-{arch}"
return "ghcr.io/openhands/agent-server:latest-python"
@@ -173,7 +173,7 @@ def get_server_image():
# image or `DockerDevWorkspace` to automatically build the image on-demand.
# with DockerDevWorkspace(
# # dynamically build agent-server image
-# base_image="nikolaik/python-nodejs:python3.13-nodejs22",
+# base_image="nikolaik/python-nodejs:python3.13-nodejs22-slim",
# host_port=8010,
# platform=detect_platform(),
# ) as workspace:
@@ -182,7 +182,7 @@ logger.info(f"Using server image: {server_image}")
with DockerWorkspace(
# use pre-built image for faster startup
server_image=server_image,
- host_port=8010,
+ # host_port auto-selects an available port when not specified
platform=detect_platform(),
) as workspace:
# 3) Create agent
@@ -364,11 +364,11 @@ def get_server_image():
"""Get the server image tag, using PR-specific image in CI."""
platform_str = detect_platform()
arch = "arm64" if "arm64" in platform_str else "amd64"
- # If GITHUB_SHA is set (e.g. running in CI of a PR), use that to ensure consistency
- # Otherwise, use the latest image from main
- github_sha = os.getenv("GITHUB_SHA")
- if github_sha:
- return f"ghcr.io/openhands/agent-server:{github_sha[:7]}-python-{arch}"
+ # SDK_SHA is the canonical commit SHA set by CI workflows (avoids the
+ # built-in GITHUB_SHA which resolves to the merge-commit on PRs).
+ sha = os.getenv("SDK_SHA") or os.getenv("GITHUB_SHA")
+ if sha:
+ return f"ghcr.io/openhands/agent-server:{sha[:7]}-python-{arch}"
return "ghcr.io/openhands/agent-server:latest-python"
@@ -561,11 +561,11 @@ def get_server_image():
"""Get the server image tag, using PR-specific image in CI."""
platform_str = detect_platform()
arch = "arm64" if "arm64" in platform_str else "amd64"
- # If GITHUB_SHA is set (e.g. running in CI of a PR), use that to ensure consistency
- # Otherwise, use the latest image from main
- github_sha = os.getenv("GITHUB_SHA")
- if github_sha:
- return f"ghcr.io/openhands/agent-server:{github_sha[:7]}-python-{arch}"
+ # SDK_SHA is the canonical commit SHA set by CI workflows (avoids the
+ # built-in GITHUB_SHA which resolves to the merge-commit on PRs).
+ sha = os.getenv("SDK_SHA") or os.getenv("GITHUB_SHA")
+ if sha:
+ return f"ghcr.io/openhands/agent-server:{sha[:7]}-python-{arch}"
return "ghcr.io/openhands/agent-server:latest-python"
@@ -574,7 +574,7 @@ def get_server_image():
# automatically build the image on-demand.
# with DockerDevWorkspace(
# # dynamically build agent-server image
-# base_image="nikolaik/python-nodejs:python3.13-nodejs22",
+# base_image="nikolaik/python-nodejs:python3.13-nodejs22-slim",
# host_port=8010,
# platform=detect_platform(),
# ) as workspace:
@@ -582,7 +582,7 @@ server_image = get_server_image()
logger.info(f"Using server image: {server_image}")
with DockerWorkspace(
server_image=server_image,
- host_port=8011,
+ # host_port auto-selects an available port when not specified
platform=detect_platform(),
extra_ports=True, # Expose extra ports for VSCode and VNC
) as workspace:
diff --git a/sdk/guides/agent-server/local-server.mdx b/sdk/guides/agent-server/local-server.mdx
index 541c5038..6e0cb7e9 100644
--- a/sdk/guides/agent-server/local-server.mdx
+++ b/sdk/guides/agent-server/local-server.mdx
@@ -114,16 +114,21 @@ import sys
import tempfile
import threading
import time
+from pathlib import Path
from pydantic import SecretStr
from openhands.sdk import LLM, Conversation, RemoteConversation, Workspace, get_logger
-from openhands.sdk.event import ConversationStateUpdateEvent
+from openhands.sdk.event import ConversationStateUpdateEvent, HookExecutionEvent
+from openhands.sdk.hooks import HookConfig, HookDefinition, HookMatcher
from openhands.tools.preset.default import get_default_agent
logger = get_logger(__name__)
+# Hook script directory for this example
+HOOK_SCRIPTS_DIR = Path(__file__).parent / "hook_scripts"
+
def _stream_output(stream, prefix, target_stream):
"""Stream output from subprocess to target stream with prefix."""
@@ -278,20 +283,62 @@ with ManagedAPIServer(port=8001) as server:
)
logger.info(f"Output: {result.stdout}")
+ # Configure hooks - demonstrating the hooks system with RemoteConversation
+ # Server-side hooks (PreToolUse, PostToolUse, UserPromptSubmit, Stop) are
+ # executed by the agent server. Client-side hooks (SessionStart, SessionEnd)
+ # are executed locally.
+
+ hook_config = HookConfig(
+ # Stop hook - run Python syntax check before allowing agent to finish.
+ # If any Python file has syntax errors, the hook returns "deny" with the
+ # error output, which gets sent back to the agent as feedback, and the
+ # agent continues working to fix the issue.
+ stop=[
+ HookMatcher(
+ matcher="*", # Match all stop reasons
+ hooks=[
+ HookDefinition(
+ command=str(HOOK_SCRIPTS_DIR / "pycompile_check.sh"),
+ timeout=60,
+ )
+ ],
+ )
+ ],
+ )
+
conversation = Conversation(
agent=agent,
workspace=workspace,
callbacks=[event_callback],
+ hook_config=hook_config,
)
assert isinstance(conversation, RemoteConversation)
+ # Track hook execution events
+ hook_events: list[HookExecutionEvent] = []
+
+ def hook_event_tracker(event):
+ """Additional callback to track hook execution events."""
+ if isinstance(event, HookExecutionEvent):
+ hook_events.append(event)
+ logger.info(f"šŖ HookExecutionEvent captured: {event.hook_event_type}")
+
+ # Append our hook tracker to the existing callbacks
+ conversation._callbacks.append(hook_event_tracker)
+
try:
logger.info(f"\nš Conversation ID: {conversation.state.id}")
- # Send first message and run
- logger.info("š Sending first message...")
+ # Test scenario: Ask the agent to create a Python file with syntax errors
+ # The stop hook should detect the syntax error and send feedback back
+ # to the agent to fix it
+ logger.info("š Sending message to test on_stop hook with syntax check...")
conversation.send_message(
- "Read the current repo and write 3 facts about the project into FACTS.txt."
+ "Create a Python file called 'test_broken.py' in the current directory "
+ "with an obvious syntax error (like 'def broken(:\n pass' - missing "
+ "closing parenthesis). After creating the file, immediately use the "
+ "finish action. If you receive any feedback about errors, fix them and "
+ "try to finish again."
)
# Generate title using a specific LLM
@@ -299,10 +346,41 @@ with ManagedAPIServer(port=8001) as server:
logger.info(f"Generated conversation title: {title}")
logger.info("š Running conversation...")
- conversation.run()
+ logger.info(
+ "Expected behavior: Agent creates broken .py file -> tries to finish "
+ "-> stop hook runs syntax check -> check fails -> hook sends feedback "
+ "-> agent fixes the syntax error -> tries to finish again -> passes"
+ )
- logger.info("ā
First task completed!")
- logger.info(f"Agent status: {conversation.state.execution_status}")
+ # Keep running until the agent actually finishes
+ # When a stop hook denies, the state goes: running -> finished -> running
+ # The client's run() may return when it sees 'finished', so we need to
+ # check if the agent is still running and continue
+ max_runs = 10 # Allow enough retries for agent to fix issues
+ run_count = 0
+ while run_count < max_runs:
+ run_count += 1
+ logger.info(f"š Run attempt #{run_count}")
+ conversation.run()
+ current_status = conversation.state.execution_status
+ logger.info(f" After run(), status = {current_status}")
+
+ # Small delay to let any pending state updates arrive
+ time.sleep(0.5)
+ current_status = conversation.state.execution_status
+ logger.info(f" After delay, status = {current_status}")
+
+ if current_status.value == "finished":
+ logger.info(" ā
Agent finished!")
+ break
+ elif current_status.value == "running":
+ logger.info(" Agent still running (hook denied stop), continuing...")
+ else:
+ logger.info(f" Unexpected status: {current_status}, stopping")
+ break
+
+ logger.info("ā
Task completed!")
+ logger.info(f"Final agent status: {conversation.state.execution_status}")
# Wait for events to stop coming (no events for 2 seconds)
logger.info("ā³ Waiting for events to stop...")
@@ -310,10 +388,50 @@ with ManagedAPIServer(port=8001) as server:
time.sleep(0.1)
logger.info("ā
Events have stopped")
- logger.info("š Running conversation again...")
- conversation.send_message("Great! Now delete that file.")
- conversation.run()
- logger.info("ā
Second task completed!")
+ # Analyze hook execution events
+ logger.info("\n" + "=" * 50)
+ logger.info("š Hook Execution Events Analysis")
+ logger.info("=" * 50)
+
+ logger.info(f"Total HookExecutionEvents received: {len(hook_events)}")
+ for i, he in enumerate(hook_events, 1):
+ logger.info(f"\n Hook Event #{i}:")
+ logger.info(f" Type: {he.hook_event_type}")
+ logger.info(f" Command: {he.hook_command}")
+ logger.info(f" Success: {he.success}")
+ logger.info(f" Blocked: {he.blocked}")
+ logger.info(f" Exit Code: {he.exit_code}")
+ if he.additional_context:
+ # Truncate for readability
+ ctx = (
+ he.additional_context[:500] + "..."
+ if len(he.additional_context) > 500
+ else he.additional_context
+ )
+ logger.info(f" Additional Context: {ctx}")
+ if he.error:
+ logger.info(f" Error: {he.error}")
+
+ # Count stop hooks that were denied (pre-commit failed)
+ stop_events = [e for e in hook_events if e.hook_event_type == "Stop"]
+ denied_stops = [e for e in stop_events if e.blocked]
+
+ logger.info(f"\nStop hook events: {len(stop_events)}")
+ logger.info(f"Denied stops (pre-commit failures): {len(denied_stops)}")
+
+ if denied_stops:
+ logger.info(
+ "\nā
SUCCESS: Stop hook denied at least once due to "
+ "pre-commit failure!"
+ )
+ logger.info(
+ " The agent should have received feedback and fixed the issue."
+ )
+ else:
+ logger.info(
+ "\nā ļø No denied stops detected. Either pre-commit passed on first "
+ "try or the hook didn't work as expected."
+ )
# Demonstrate state.events functionality
logger.info("\n" + "=" * 50)
@@ -324,10 +442,10 @@ with ManagedAPIServer(port=8001) as server:
total_events = len(conversation.state.events)
logger.info(f"š Total events in conversation: {total_events}")
- # Get recent events (last 5) using state.events
- logger.info("\nš Getting last 5 events using state.events...")
+ # Get recent events (last 10) using state.events
+ logger.info("\nš Getting last 10 events using state.events...")
all_events = conversation.state.events
- recent_events = all_events[-5:] if len(all_events) >= 5 else all_events
+ recent_events = all_events[-10:] if len(all_events) >= 10 else all_events
for i, event in enumerate(recent_events, 1):
event_type = type(event).__name__
@@ -335,7 +453,7 @@ with ManagedAPIServer(port=8001) as server:
logger.info(f" {i}. {event_type} at {timestamp}")
# Let's see what the actual event types are
- logger.info("\nš Event types found:")
+ logger.info("\nš Event types found in recent events:")
event_types = set()
for event in recent_events:
event_type = type(event).__name__
diff --git a/sdk/guides/agent-server/settings-secrets-api.mdx b/sdk/guides/agent-server/settings-secrets-api.mdx
new file mode 100644
index 00000000..c9ba61f7
--- /dev/null
+++ b/sdk/guides/agent-server/settings-secrets-api.mdx
@@ -0,0 +1,153 @@
+---
+title: Settings and Secrets API
+description: Manage agent settings and custom secrets through the agent server REST API.
+---
+
+import RunExampleCode from "/sdk/shared-snippets/how-to-run-example.mdx";
+
+> A ready-to-run example is available [here](#ready-to-run-example)!
+
+The Settings and Secrets API provides REST endpoints for managing agent configuration and custom secrets through a local agent server. This is the recommended pattern for frontend clients that need to:
+
+- Store secrets securely via the Settings API (encrypted at rest)
+- Pass encrypted secrets when starting conversations via `secrets_encrypted=True`
+- Never have access to plaintext secrets after initial storage
+
+## Key Concepts
+
+### Settings Endpoints
+
+The agent server exposes settings management via REST:
+
+- **GET /api/settings** - Retrieve current settings
+- **PATCH /api/settings** - Update settings with a partial diff
+
+```python icon="python"
+# Store LLM configuration - API key is encrypted at rest
+response = client.patch(
+ "/api/settings",
+ json={
+ "agent_settings_diff": {
+ "llm": {
+ "model": "anthropic/claude-sonnet-4-5-20250929",
+ "api_key": api_key,
+ }
+ }
+ },
+)
+settings = response.json()
+# API key is redacted by default
+assert settings["agent_settings"]["llm"]["api_key"] == "**********"
+```
+
+### Encrypted Secrets for Starting Conversations
+
+Frontend clients use the `X-Expose-Secrets: encrypted` header to get cipher-encrypted secrets:
+
+```python icon="python"
+# Get settings with cipher-encrypted secrets
+response = client.get(
+ "/api/settings",
+ headers={"X-Expose-Secrets": "encrypted"},
+)
+encrypted_settings = response.json()
+
+# Encrypted keys start with "gAAAAA" (Fernet token format)
+encrypted_api_key = encrypted_settings["agent_settings"]["llm"]["api_key"]
+```
+
+Then use the encrypted LLM config when starting a conversation:
+
+```python icon="python"
+# Extract LLM config from settings (includes encrypted api_key)
+encrypted_llm = encrypted_settings["agent_settings"]["llm"]
+
+# Start conversation with encrypted secrets
+start_request = {
+ "agent": {
+ "kind": "Agent",
+ "llm": encrypted_llm, # Use entire LLM config from settings
+ "tools": [{"name": "TerminalTool"}, {"name": "FileEditorTool"}],
+ },
+ "workspace": {"working_dir": "/tmp/demo"},
+ "secrets_encrypted": True, # Server will decrypt the API key
+ "initial_message": {
+ "role": "user",
+ "content": [{"type": "text", "text": "Create a hello.txt file"}],
+ "run": True,
+ },
+}
+response = client.post("/api/conversations", json=start_request)
+```
+
+The server decrypts the secrets before using them, ensuring the frontend never has access to plaintext secrets after initial storage.
+
+### Custom Secrets CRUD Operations
+
+Custom secrets can be created, listed, retrieved, and deleted:
+
+```python icon="python"
+# Create a secret
+client.put(
+ "/api/settings/secrets",
+ json={
+ "name": "MY_PROJECT_TOKEN",
+ "value": "secret-token-abc123",
+ "description": "Example project token",
+ },
+)
+
+# List secrets (values not exposed)
+secrets = client.get("/api/settings/secrets").json()["secrets"]
+
+# Get secret value
+value = client.get("/api/settings/secrets/MY_PROJECT_TOKEN").text
+
+# Delete secret
+client.delete("/api/settings/secrets/MY_PROJECT_TOKEN")
+```
+
+### Secret Name Validation
+
+Secret names must follow environment variable naming conventions:
+
+- Start with a letter (a-z, A-Z)
+- Contain only letters, numbers, and underscores
+- Be 1-64 characters long
+
+Invalid names are rejected with a 422 response:
+
+```python icon="python"
+# Invalid: starts with number - returns 422
+response = client.put(
+ "/api/settings/secrets",
+ json={"name": "123_invalid", "value": "test"},
+)
+
+# Invalid: contains hyphen - returns 422
+response = client.put(
+ "/api/settings/secrets",
+ json={"name": "invalid-name", "value": "test"},
+)
+```
+
+## Ready-to-Run Example
+
+This example demonstrates the complete encrypted secrets workflow:
+1. Store LLM API key via `PATCH /api/settings` (encrypted at rest)
+2. Fetch settings with `X-Expose-Secrets: encrypted` header
+3. Start conversation via `POST /api/conversations` with `secrets_encrypted=True`
+4. Poll conversation state and verify agent task completion
+5. Test custom secrets CRUD operations
+
+```python icon="python" expandable examples/02_remote_agent_server/12_settings_and_secrets_api.py
+
+```
+
+
+
+## Next Steps
+
+- **[Local Agent Server](/sdk/guides/agent-server/local-server)** - Run agents through a local HTTP server
+- **[Docker Sandboxed Server](/sdk/guides/agent-server/docker-sandbox)** - Run server in Docker for isolation
+- **[Agent Server Overview](/sdk/guides/agent-server/overview)** - Architecture and implementation details
diff --git a/sdk/guides/agent-settings.mdx b/sdk/guides/agent-settings.mdx
index 12f9e26e..05e8ed6c 100644
--- a/sdk/guides/agent-settings.mdx
+++ b/sdk/guides/agent-settings.mdx
@@ -74,10 +74,10 @@ This example is available on GitHub: [examples/01_standalone_sdk/46_agent_settin
```python icon="python" expandable examples/01_standalone_sdk/46_agent_settings.py
-"""Create, serialize, and deserialize AgentSettings, then build a working agent.
+"""Create, serialize, and deserialize OpenHandsAgentSettings, then build an agent.
Demonstrates:
-1. Configuring an agent entirely through AgentSettings (LLM, tools, condenser).
+1. Configuring an agent entirely through OpenHandsAgentSettings (LLM, tools, condenser).
2. Serializing settings to JSON and restoring them.
3. Building an Agent from settings via ``create_agent()``.
4. Running a short conversation to prove the settings take effect.
@@ -89,7 +89,7 @@ import os
from pydantic import SecretStr
-from openhands.sdk import LLM, AgentSettings, Conversation, Tool
+from openhands.sdk import LLM, Conversation, OpenHandsAgentSettings, Tool
from openhands.sdk.settings import CondenserSettings
from openhands.tools.file_editor import FileEditorTool
from openhands.tools.terminal import TerminalTool
@@ -99,7 +99,7 @@ from openhands.tools.terminal import TerminalTool
api_key = os.getenv("LLM_API_KEY")
assert api_key is not None, "LLM_API_KEY environment variable is not set."
-settings = AgentSettings(
+settings = OpenHandsAgentSettings(
llm=LLM(
model=os.getenv("LLM_MODEL", "anthropic/claude-sonnet-4-5-20250929"),
api_key=SecretStr(api_key),
@@ -118,7 +118,7 @@ print("Serialized settings (JSON):")
print(json.dumps(payload, indent=2, default=str)[:800], "ā¦")
print()
-restored = AgentSettings.model_validate(payload)
+restored = OpenHandsAgentSettings.model_validate(payload)
assert restored.condenser.enabled is True
assert restored.condenser.max_size == 50
assert len(restored.tools) == 2
@@ -149,7 +149,7 @@ print()
# āā 4. Different settings ā different behavior āāāāāāāāāāāāāāāāāāāāāāāāāāā
# Now create settings with ONLY the terminal tool and condenser disabled.
-terminal_only_settings = AgentSettings(
+terminal_only_settings = OpenHandsAgentSettings(
llm=settings.llm,
tools=[Tool(name=TerminalTool.name)],
condenser=CondenserSettings(enabled=False),
diff --git a/sdk/guides/browser-session-recording.mdx b/sdk/guides/browser-session-recording.mdx
index 39a50f09..176a6859 100644
--- a/sdk/guides/browser-session-recording.mdx
+++ b/sdk/guides/browser-session-recording.mdx
@@ -64,7 +64,10 @@ from openhands.sdk import (
)
from openhands.sdk.tool import Tool
from openhands.tools.browser_use import BrowserToolSet
-from openhands.tools.browser_use.definition import BROWSER_RECORDING_OUTPUT_DIR
+from openhands.tools.browser_use.definition import (
+ BROWSER_RECORDING_OUTPUT_DIR,
+ BrowserNavigateAction,
+)
logger = get_logger(__name__)
@@ -108,31 +111,39 @@ conversation = Conversation(
# The prompt instructs the agent to:
# 1. Start recording the browser session
-# 2. Browse to a website and perform some actions
+# 2. Navigate to a page and get its content
# 3. Stop recording (auto-saves to file)
PROMPT = """
Please complete the following task to demonstrate browser session recording:
-1. First, use `browser_start_recording` to begin recording the browser session.
-
-2. Then navigate to https://docs.openhands.dev/ and:
- - Get the page content
- - Scroll down the page
- - Get the browser state to see interactive elements
-
-3. Next, navigate to https://docs.openhands.dev/openhands/usage/cli/installation and:
- - Get the page content
- - Scroll down to see more content
-
-4. Finally, use `browser_stop_recording` to stop the recording.
- Events are automatically saved.
+1. Use `browser_start_recording` to begin recording.
+2. Navigate to https://docs.openhands.dev/ and:
+ - Get the page content
+ - Scroll down the page
+ - Get the browser state to see interactive elements
+3. Use `browser_stop_recording` to stop and save the recording.
"""
print("=" * 80)
print("Browser Session Recording Example")
print("=" * 80)
print("\nTask: Record an agent's browser session and save it for replay")
-print("\nStarting conversation with agent...\n")
+
+# Pre-initialize the browser so CDP is ready before the agent starts.
+# This avoids wasting LLM calls if the browser fails to connect.
+print("\nInitializing browser...")
+
+init_obs = conversation.execute_tool(
+ "browser_navigate",
+ BrowserNavigateAction(url="about:blank"),
+)
+if init_obs.is_error:
+ print(f"Browser initialization failed: {init_obs.text}")
+ print("Ensure Chrome/Chromium is installed and accessible.")
+ exit(1)
+print("Browser initialized successfully.\n")
+
+print("Starting conversation with agent...\n")
conversation.send_message(PROMPT)
conversation.run()
@@ -213,6 +224,9 @@ print("=" * 100)
cost = conversation.conversation_stats.get_combined_metrics().accumulated_cost
print(f"Conversation ID: {conversation.id}")
print(f"EXAMPLE_COST: {cost}")
+
+# Close conversation to shut down browser and other tool executors
+conversation.close()
```
diff --git a/sdk/guides/custom-tools.mdx b/sdk/guides/custom-tools.mdx
index 57cb96ce..f2e802a2 100644
--- a/sdk/guides/custom-tools.mdx
+++ b/sdk/guides/custom-tools.mdx
@@ -411,24 +411,29 @@ llm = LLM(
cwd = os.getcwd()
-def _make_bash_and_grep_tools(conv_state) -> list[ToolDefinition]:
- """Create terminal and custom grep tools sharing one executor."""
-
- terminal_executor = TerminalExecutor(working_dir=conv_state.workspace.working_dir)
- # terminal_tool = terminal_tool.set_executor(executor=terminal_executor)
- terminal_tool = TerminalTool.create(conv_state, executor=terminal_executor)[0]
+class BashAndGrepToolSet(ToolDefinition[Action, Observation]):
+ """Create terminal and grep tools sharing one terminal executor."""
- # Use the GrepTool.create() method with shared terminal_executor
- grep_tool = GrepTool.create(conv_state, terminal_executor=terminal_executor)[0]
-
- return [terminal_tool, grep_tool]
+ @classmethod
+ def create(cls, conv_state, **params) -> Sequence[ToolDefinition]:
+ terminal_executor = TerminalExecutor(
+ working_dir=conv_state.workspace.working_dir
+ )
+ terminal_tool = TerminalTool.create(
+ conv_state, executor=terminal_executor, **params
+ )[0]
+ grep_tool = GrepTool.create(
+ conv_state,
+ terminal_executor=terminal_executor,
+ )[0]
+ return [terminal_tool, grep_tool]
-register_tool("BashAndGrepToolSet", _make_bash_and_grep_tools)
+register_tool(BashAndGrepToolSet.name, BashAndGrepToolSet)
tools = [
Tool(name=FileEditorTool.name),
- Tool(name="BashAndGrepToolSet"),
+ Tool(name=BashAndGrepToolSet.name),
]
# Agent
diff --git a/sdk/guides/github-workflows/pr-review.mdx b/sdk/guides/github-workflows/pr-review.mdx
index 51c5d5ff..ebe7329d 100644
--- a/sdk/guides/github-workflows/pr-review.mdx
+++ b/sdk/guides/github-workflows/pr-review.mdx
@@ -151,28 +151,16 @@ jobs:
github.event.requested_reviewer.login == 'openhands-agent'
runs-on: ubuntu-latest
steps:
- - name: Checkout for composite action
- uses: actions/checkout@v4
- with:
- repository: OpenHands/software-agent-sdk
- # Use a specific version tag or branch (e.g., 'v1.0.0' or 'main')
- ref: main
- sparse-checkout: .github/actions/pr-review
-
- name: Run PR Review
- uses: ./.github/actions/pr-review
+ uses: OpenHands/extensions/plugins/pr-review@main
with:
- # LLM model(s) to use. Can be comma-separated for A/B testing
- # - one model will be randomly selected per review
llm-model: anthropic/claude-sonnet-4-5-20250929
llm-base-url: ''
- # [DEPRECATED] review-style is no longer used; standard and roasted are merged
- # review-style: roasted
- # Extensions version to use (version tag or branch name)
- extensions-version: main
- # Secrets
+ review-style: roasted
llm-api-key: ${{ secrets.LLM_API_KEY }}
github-token: ${{ secrets.GITHUB_TOKEN }}
+ # Optional: Laminar API key for observability
+ lmnr-api-key: ${{ secrets.LMNR_PROJECT_API_KEY }}
```
### Action Inputs
diff --git a/sdk/guides/llm-subscriptions.mdx b/sdk/guides/llm-subscriptions.mdx
index 524d6e71..c2966ba8 100644
--- a/sdk/guides/llm-subscriptions.mdx
+++ b/sdk/guides/llm-subscriptions.mdx
@@ -100,6 +100,7 @@ to access OpenAI's Codex models without consuming API credits.
The subscription_login() method handles:
- OAuth PKCE authentication flow
+- Device-code authentication for remote/headless environments
- Credential caching (~/.openhands/auth/)
- Automatic token refresh
@@ -111,21 +112,44 @@ Supported models:
Requirements:
- Active ChatGPT Plus or Pro subscription
-- Browser access for initial OAuth login
+- Browser access for initial OAuth login, or another browser/device for
+ device-code login
+
+Environment variables:
+- OPENHANDS_SUBSCRIPTION_MODEL: Model to use (default: gpt-5.2-codex)
+- OPENHANDS_SUBSCRIPTION_AUTH_METHOD: "browser" or "device_code"
+ (default: browser)
+- OPENHANDS_SUBSCRIPTION_FORCE_LOGIN: Set to "1" to force fresh login
+- SUBSCRIPTION_LOGIN_ONLY: Set to "1" to verify login without running an agent
"""
import os
+from typing import Literal
from openhands.sdk import LLM, Agent, Conversation, Tool
from openhands.tools.file_editor import FileEditorTool
from openhands.tools.terminal import TerminalTool
+AuthMethod = Literal["browser", "device_code"]
+
+
# First time: Opens browser for OAuth login
# Subsequent calls: Reuses cached credentials (auto-refreshes if expired)
+model = os.getenv("OPENHANDS_SUBSCRIPTION_MODEL", "gpt-5.2-codex")
+auth_method_env = os.getenv("OPENHANDS_SUBSCRIPTION_AUTH_METHOD", "browser")
+if auth_method_env not in ("browser", "device_code"):
+ raise ValueError(
+ "OPENHANDS_SUBSCRIPTION_AUTH_METHOD must be 'browser' or 'device_code'"
+ )
+auth_method: AuthMethod = auth_method_env
+force_login = os.getenv("OPENHANDS_SUBSCRIPTION_FORCE_LOGIN") == "1"
+
llm = LLM.subscription_login(
vendor="openai",
- model="gpt-5.2-codex", # or "gpt-5.2", "gpt-5.1-codex-max", "gpt-5.1-codex-mini"
+ model=model, # or "gpt-5.2", "gpt-5.1-codex-max", "gpt-5.1-codex-mini"
+ auth_method=auth_method,
+ force_login=force_login,
)
# Alternative: Force a fresh login (useful if credentials are stale)
@@ -135,9 +159,23 @@ llm = LLM.subscription_login(
# llm = LLM.subscription_login(
# vendor="openai", model="gpt-5.2-codex", open_browser=False
# )
+#
+# Alternative: Use device-code login for remote/headless environments
+# llm = LLM.subscription_login(
+# vendor="openai",
+# model="gpt-5.2-codex",
+# auth_method="device_code",
+# force_login=True,
+# )
# Verify subscription mode is active
print(f"Using subscription mode: {llm.is_subscription}")
+print(f"Model: {llm.model}")
+print(f"Auth method: {auth_method}")
+
+if os.getenv("SUBSCRIPTION_LOGIN_ONLY") == "1":
+ print("Login verified; skipping agent run because SUBSCRIPTION_LOGIN_ONLY=1.")
+ raise SystemExit(0)
# Use the LLM with an agent as usual
agent = Agent(
diff --git a/sdk/guides/plugins.mdx b/sdk/guides/plugins.mdx
index 74099f54..e05a2e0a 100644
--- a/sdk/guides/plugins.mdx
+++ b/sdk/guides/plugins.mdx
@@ -404,13 +404,13 @@ def demo_enable_disable_plugin(installed_dir: Path, plugin_name: str) -> None:
]
metadata = json.loads((installed_dir / ".installed.json").read_text())
- assert metadata["plugins"][plugin_name]["enabled"] is False
+ assert metadata["extensions"][plugin_name]["enabled"] is False
assert enable_plugin(plugin_name, installed_dir=installed_dir) is True
print_state("After re-enable", installed_dir)
metadata = json.loads((installed_dir / ".installed.json").read_text())
- assert metadata["plugins"][plugin_name]["enabled"] is True
+ assert metadata["extensions"][plugin_name]["enabled"] is True
assert plugin_name in [
plugin.name for plugin in load_installed_plugins(installed_dir=installed_dir)
]
diff --git a/sdk/guides/skill.mdx b/sdk/guides/skill.mdx
index 5bf4d441..c8ae727e 100644
--- a/sdk/guides/skill.mdx
+++ b/sdk/guides/skill.mdx
@@ -391,7 +391,7 @@ Usage:
import sys
from pathlib import Path
-from openhands.sdk.plugin import Marketplace
+from openhands.sdk.marketplace import Marketplace
from openhands.sdk.skills import (
install_skills_from_marketplace,
list_installed_skills,
@@ -470,6 +470,7 @@ def main():
if __name__ == "__main__":
main()
+ print("EXAMPLE_COST: 0")
```
@@ -769,7 +770,7 @@ from pathlib import Path
from pydantic import SecretStr
from openhands.sdk import LLM, Agent, AgentContext, Conversation
-from openhands.sdk.context.skills import (
+from openhands.sdk.skills import (
discover_skill_resources,
load_skills_from_dir,
)