OpenHands · enyst · Oct 19, 2025 · Oct 20, 2025 · Oct 20, 2025 · Oct 21, 2025
diff --git a/FACTS.txt b/FACTS.txt
@@ -0,0 +1,5 @@
+1. The OpenHands Software Agent SDK is a set of Python and REST APIs for building agents that work with code, supporting tasks from simple README generation to complex multi-agent refactors and rewrites.
+
+2. The SDK supports multiple workspace environments - agents can either use the local machine as their workspace or run inside ephemeral workspaces (e.g., in Docker or Kubernetes) using the Agent Server.
+
+3. The project is organized into multiple sub-packages including openhands-sdk, openhands-tools, openhands-workspace, and openhands-agent-server, and powers production applications like the OpenHands CLI and OpenHands Cloud.
diff --git a/examples/01_standalone_sdk/29_responses_streaming.py b/examples/01_standalone_sdk/29_responses_streaming.py
@@ -0,0 +1,115 @@
+"""Streaming Responses API example.
+
+This demonstrates how to enable token streaming for the Responses API path,
+log streaming deltas to ``./logs/stream/`` as JSON, and print the streamed text
+incrementally to the terminal.
+"""
+
+from __future__ import annotations
+
+import datetime as _dt
+import json
+import os
+from pathlib import Path
+from typing import Any
+
+from pydantic import SecretStr
+
+from openhands.sdk import (
+    Conversation,
+    ConversationCallbackType,
+    LLMStreamChunk,
+    get_logger,
+)
+from openhands.sdk.conversation.streaming_visualizer import create_streaming_visualizer
+from openhands.sdk.conversation.visualizer import DefaultConversationVisualizer
+from openhands.sdk.llm import LLM
+from openhands.tools.preset.default import get_default_agent
+
+
+PRINT_STREAM_TO_STDOUT = False
+
+
+logger = get_logger(__name__)
+LOG_DIR = Path("logs/stream")
+
+
+def _serialize_event(event: LLMStreamChunk) -> dict[str, Any]:
+    record = {
+        "type": event.type,
+        "part_kind": event.part_kind,
+        "text": event.text_delta,
+        "arguments": event.arguments_delta,
+        "output_index": event.output_index,
+        "content_index": event.content_index,
+        "item_id": event.item_id,
+        "response_id": event.response_id,
+        "is_final": event.is_final,
+    }
+    return record
+
+
+def main() -> None:
+    api_key = os.getenv("LLM_API_KEY") or os.getenv("OPENAI_API_KEY")
+    if not api_key:
+        raise RuntimeError("Set LLM_API_KEY or OPENAI_API_KEY in your environment.")
+
+    model = os.getenv("LLM_MODEL", "openhands/gpt-5-codex")
+    base_url = os.getenv("LLM_BASE_URL")
+
+    llm = LLM(
+        model=model,
+        api_key=SecretStr(api_key),
+        base_url=base_url,
+        usage_id="stream-demo",
+    )
+
+    agent = get_default_agent(llm=llm, cli_mode=True)
+
+    timestamp = _dt.datetime.utcnow().strftime("%Y%m%d-%H%M%S")
+    LOG_DIR.mkdir(parents=True, exist_ok=True)
+    log_path = LOG_DIR / f"responses_stream_{timestamp}.jsonl"
+
+    def on_token(event: LLMStreamChunk) -> None:
+        record = _serialize_event(event)
+        with log_path.open("a", encoding="utf-8") as fp:
+            fp.write(json.dumps(record) + "\n")
+
+        delta = event.text_delta or event.arguments_delta
+        if delta and PRINT_STREAM_TO_STDOUT:
+            print(delta, end="", flush=True)
+        if event.is_final and event.part_kind == "status" and PRINT_STREAM_TO_STDOUT:
+            print("\n--- stream complete ---")
+
+    callbacks: list[ConversationCallbackType] = []
+    if not PRINT_STREAM_TO_STDOUT:
+        streaming_visualizer = create_streaming_visualizer()
+        callbacks.append(streaming_visualizer.on_event)
+
+    conversation = Conversation(
+        agent=agent,
+        workspace=os.getcwd(),
+        token_callbacks=[on_token],
+        callbacks=callbacks or None,
+        visualizer=None if callbacks else DefaultConversationVisualizer,
+    )
+
+    story_prompt = (
+        "Tell me a long story about LLM streaming, make sure it has multiple "
+        "paragraphs. Then write it on disk using a tool call."
+    )
+    conversation.send_message(story_prompt)
+    conversation.run()
+
+    cleanup_prompt = (
+        "Thank you. Please delete the streaming story file now that I've read it, "
+        "then confirm the deletion."
+    )
+    conversation.send_message(cleanup_prompt)
+    conversation.run()
+
+    logger.info("Stream log written to %s", log_path)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/openhands-sdk/openhands/sdk/__init__.py b/openhands-sdk/openhands/sdk/__init__.py
@@ -14,18 +14,24 @@
     RemoteConversation,
 )
 from openhands.sdk.conversation.conversation_stats import ConversationStats
+from openhands.sdk.conversation.visualizer import (
+    ConversationVisualizerBase,
+    DefaultConversationVisualizer,
+)
 from openhands.sdk.event import Event, LLMConvertibleEvent
 from openhands.sdk.event.llm_convertible import MessageEvent
 from openhands.sdk.io import FileStore, LocalFileStore
 from openhands.sdk.llm import (
     LLM,
     ImageContent,
     LLMRegistry,
+    LLMStreamChunk,
     Message,
     RedactedThinkingBlock,
     RegistryEvent,
     TextContent,
     ThinkingBlock,
+    TokenCallbackType,
 )
 from openhands.sdk.logger import get_logger
 from openhands.sdk.mcp import (
@@ -58,7 +64,11 @@
 __all__ = [
     "LLM",
     "LLMRegistry",
+    "LLMStreamChunk",
+    "TokenCallbackType",
     "ConversationStats",
+    "ConversationVisualizerBase",
+    "DefaultConversationVisualizer",
     "RegistryEvent",
     "Message",
     "TextContent",

diff --git a/openhands-sdk/openhands/sdk/agent/agent.py b/openhands-sdk/openhands/sdk/agent/agent.py
@@ -13,6 +13,7 @@
 from openhands.sdk.conversation import (
     ConversationCallbackType,
     ConversationState,
+    ConversationTokenCallbackType,
     LocalConversation,
 )
 from openhands.sdk.conversation.state import ConversationExecutionStatus
@@ -135,6 +136,7 @@ def step(
         self,
         conversation: LocalConversation,
         on_event: ConversationCallbackType,
+        on_token: ConversationTokenCallbackType | None = None,
     ) -> None:
         state = conversation.state
         # Check for pending actions (implicit confirmation)
@@ -167,7 +169,10 @@ def step(
 
         try:
             llm_response = make_llm_completion(
-                self.llm, _messages, tools=list(self.tools_map.values())
+                self.llm,
+                _messages,
+                tools=list(self.tools_map.values()),
+                on_token=on_token,
             )
         except FunctionCallValidationError as e:
             logger.warning(f"LLM generated malformed function call: {e}")

diff --git a/openhands-sdk/openhands/sdk/agent/base.py b/openhands-sdk/openhands/sdk/agent/base.py
@@ -20,7 +20,10 @@
 
 if TYPE_CHECKING:
     from openhands.sdk.conversation import ConversationState, LocalConversation
-    from openhands.sdk.conversation.types import ConversationCallbackType
+    from openhands.sdk.conversation.types import (
+        ConversationCallbackType,
+        ConversationTokenCallbackType,
+    )
 
 
 logger = get_logger(__name__)
@@ -239,6 +242,7 @@ def step(
         self,
         conversation: "LocalConversation",
         on_event: "ConversationCallbackType",
+        on_token: "ConversationTokenCallbackType | None" = None,
     ) -> None:
         """Taking a step in the conversation.
 
@@ -250,6 +254,9 @@ def step(
         4.1 If conversation is finished, set state.execution_status to FINISHED
         4.2 Otherwise, just return, Conversation will kick off the next step
 
+        If the underlying LLM supports streaming, partial deltas are forwarded to
+        ``on_token`` before the full response is returned.
+
         NOTE: state will be mutated in-place.
         """
 

diff --git a/openhands-sdk/openhands/sdk/agent/utils.py b/openhands-sdk/openhands/sdk/agent/utils.py
@@ -12,6 +12,7 @@
 
 from openhands.sdk.context.condenser.base import CondenserBase
 from openhands.sdk.context.view import View
+from openhands.sdk.conversation.types import ConversationTokenCallbackType
 from openhands.sdk.event.base import Event, LLMConvertibleEvent
 from openhands.sdk.event.condenser import Condensation
 from openhands.sdk.llm import LLM, LLMResponse, Message
@@ -182,13 +183,15 @@ def make_llm_completion(
     llm: LLM,
     messages: list[Message],
     tools: list[ToolDefinition] | None = None,
+    on_token: ConversationTokenCallbackType | None = None,
 ) -> LLMResponse:
     """Make an LLM completion call with the provided messages and tools.
 
     Args:
         llm: The LLM instance to use for completion
         messages: The messages to send to the LLM
         tools: Optional list of tools to provide to the LLM
+        on_token: Optional callback for streaming token updates
 
     Returns:
         LLMResponse from the LLM completion call
@@ -200,10 +203,12 @@ def make_llm_completion(
             include=None,
             store=False,
             add_security_risk_prediction=True,
+            on_token=on_token,
         )
     else:
         return llm.completion(
             messages=messages,
             tools=tools or [],
             add_security_risk_prediction=True,
+            on_token=on_token,
         )
diff --git a/openhands-sdk/openhands/sdk/conversation/__init__.py b/openhands-sdk/openhands/sdk/conversation/__init__.py
@@ -11,7 +11,10 @@
     ConversationState,
 )
 from openhands.sdk.conversation.stuck_detector import StuckDetector
-from openhands.sdk.conversation.types import ConversationCallbackType
+from openhands.sdk.conversation.types import (
+    ConversationCallbackType,
+    ConversationTokenCallbackType,
+)
 from openhands.sdk.conversation.visualizer import (
     ConversationVisualizerBase,
     DefaultConversationVisualizer,
@@ -24,6 +27,7 @@
     "ConversationState",
     "ConversationExecutionStatus",
     "ConversationCallbackType",
+    "ConversationTokenCallbackType",
     "DefaultConversationVisualizer",
     "ConversationVisualizerBase",
     "SecretRegistry",

diff --git a/openhands-sdk/openhands/sdk/conversation/base.py b/openhands-sdk/openhands/sdk/conversation/base.py
@@ -1,12 +1,16 @@
 from abc import ABC, abstractmethod
 from collections.abc import Iterable, Mapping
 from pathlib import Path
-from typing import TYPE_CHECKING, Protocol
+from typing import TYPE_CHECKING, Protocol, TypeVar, cast
 
 from openhands.sdk.conversation.conversation_stats import ConversationStats
 from openhands.sdk.conversation.events_list_base import EventsListBase
 from openhands.sdk.conversation.secret_registry import SecretValue
-from openhands.sdk.conversation.types import ConversationCallbackType, ConversationID
+from openhands.sdk.conversation.types import (
+    ConversationCallbackType,
+    ConversationID,
+    ConversationTokenCallbackType,
+)
 from openhands.sdk.llm.llm import LLM
 from openhands.sdk.llm.message import Message
 from openhands.sdk.observability.laminar import (
@@ -27,6 +31,13 @@
     from openhands.sdk.conversation.state import ConversationExecutionStatus
 
 
+CallbackType = TypeVar(
+    "CallbackType",
+    ConversationCallbackType,
+    ConversationTokenCallbackType,
+)
+
+
 class ConversationStateProtocol(Protocol):
     """Protocol defining the interface for conversation state objects."""
 
@@ -235,9 +246,7 @@ def ask_agent(self, question: str) -> str:
         ...
 
     @staticmethod
-    def compose_callbacks(
-        callbacks: Iterable[ConversationCallbackType],
-    ) -> ConversationCallbackType:
+    def compose_callbacks(callbacks: Iterable[CallbackType]) -> CallbackType:
         """Compose multiple callbacks into a single callback function.
 
         Args:
@@ -252,4 +261,4 @@ def composed(event) -> None:
                 if cb:
                     cb(event)
 
-        return composed
+        return cast(CallbackType, composed)
diff --git a/openhands-sdk/openhands/sdk/conversation/conversation.py b/openhands-sdk/openhands/sdk/conversation/conversation.py
@@ -4,7 +4,11 @@
 from openhands.sdk.agent.base import AgentBase
 from openhands.sdk.conversation.base import BaseConversation
 from openhands.sdk.conversation.secret_registry import SecretValue
-from openhands.sdk.conversation.types import ConversationCallbackType, ConversationID
+from openhands.sdk.conversation.types import (
+    ConversationCallbackType,
+    ConversationID,
+    ConversationTokenCallbackType,
+)
 from openhands.sdk.conversation.visualizer import (
     ConversationVisualizerBase,
     DefaultConversationVisualizer,
@@ -49,6 +53,7 @@ def __new__(
         persistence_dir: str | Path | None = None,
         conversation_id: ConversationID | None = None,
         callbacks: list[ConversationCallbackType] | None = None,
+        token_callbacks: list[ConversationTokenCallbackType] | None = None,
         max_iteration_per_run: int = 500,
         stuck_detection: bool = True,
         visualizer: (
@@ -65,6 +70,7 @@ def __new__(
         workspace: RemoteWorkspace,
         conversation_id: ConversationID | None = None,
         callbacks: list[ConversationCallbackType] | None = None,
+        token_callbacks: list[ConversationTokenCallbackType] | None = None,
         max_iteration_per_run: int = 500,
         stuck_detection: bool = True,
         visualizer: (
@@ -81,6 +87,7 @@ def __new__(
         persistence_dir: str | Path | None = None,
         conversation_id: ConversationID | None = None,
         callbacks: list[ConversationCallbackType] | None = None,
+        token_callbacks: list[ConversationTokenCallbackType] | None = None,
         max_iteration_per_run: int = 500,
         stuck_detection: bool = True,
         visualizer: (
@@ -104,6 +111,7 @@ def __new__(
                 agent=agent,
                 conversation_id=conversation_id,
                 callbacks=callbacks,
+                token_callbacks=token_callbacks,
                 max_iteration_per_run=max_iteration_per_run,
                 stuck_detection=stuck_detection,
                 visualizer=visualizer,
@@ -115,6 +123,7 @@ def __new__(
             agent=agent,
             conversation_id=conversation_id,
             callbacks=callbacks,
+            token_callbacks=token_callbacks,
             max_iteration_per_run=max_iteration_per_run,
             stuck_detection=stuck_detection,
             visualizer=visualizer,