OpenHands
diff --git a/‎README.md‎
Lines changed: 25 additions & 0 deletions b/‎README.md‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎examples/01_standalone_sdk/24_responses_streaming.py‎
Lines changed: 98 additions & 0 deletions b/‎examples/01_standalone_sdk/24_responses_streaming.py‎
Lines changed: 98 additions & 0 deletions
diff --git a/‎openhands/sdk/__init__.py‎
Lines changed: 4 additions & 0 deletions b/‎openhands/sdk/__init__.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎openhands/sdk/agent/agent.py‎
Lines changed: 8 additions & 1 deletion b/‎openhands/sdk/agent/agent.py‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎openhands/sdk/agent/base.py‎
Lines changed: 8 additions & 1 deletion b/‎openhands/sdk/agent/base.py‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎openhands/sdk/conversation/__init__.py‎
Lines changed: 5 additions & 1 deletion b/‎openhands/sdk/conversation/__init__.py‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎openhands/sdk/conversation/conversation.py‎
Lines changed: 10 additions & 1 deletion b/‎openhands/sdk/conversation/conversation.py‎
Lines changed: 10 additions & 1 deletion
diff --git a/‎openhands/sdk/conversation/impl/local_conversation.py‎
Lines changed: 38 additions & 3 deletions b/‎openhands/sdk/conversation/impl/local_conversation.py‎
Lines changed: 38 additions & 3 deletions
@@ -141,6 +141,31 @@ registry.add("default", llm)
 llm = registry.get("default")
 ```
 
+### Streaming Responses
+
+You can receive incremental deltas from the Responses API by supplying a token
+callback when constructing a conversation. Each callback receives an
+``LLMStreamEvent`` describing the delta.
+
+```python
+from pathlib import Path
+from openhands.sdk import Conversation, LLMStreamEvent
+
+log_dir = Path("logs/stream")
+log_dir.mkdir(parents=True, exist_ok=True)
+
+def on_token(event: LLMStreamEvent) -> None:
+    print(event.text or event.arguments or "", end="", flush=True)
+
+conversation = Conversation(agent=agent, token_callbacks=[on_token])
+conversation.send_message("Summarize the benefits of token streaming.")
+conversation.run()
+```
+
+See `examples/01_standalone_sdk/24_responses_streaming.py` for a complete
+example that also persists each delta as JSON in `./logs/stream/`.
+
+
 ### Tools
 
 Tools provide agents with capabilities to interact with the environment. The SDK includes several built-in tools:
 
@@ -0,0 +1,98 @@
+"""Streaming Responses API example.
+
+This demonstrates how to enable token streaming for the Responses API path,
+log streaming deltas to ``./logs/stream/`` as JSON, and print the streamed text
+incrementally to the terminal.
+"""
+
+from __future__ import annotations
+
+import datetime as _dt
+import json
+import os
+from pathlib import Path
+from typing import Any
+
+from pydantic import SecretStr
+
+from openhands.sdk import Conversation, LLMStreamEvent, get_logger
+from openhands.sdk.llm import LLM
+from openhands.tools.preset.default import get_default_agent
+
+
+logger = get_logger(__name__)
+LOG_DIR = Path("logs/stream")
+
+
+def _serialize_event(event: LLMStreamEvent) -> dict[str, Any]:
+    record = {
+        "type": event.type,
+        "text": event.text,
+        "arguments": event.arguments,
+        "output_index": event.output_index,
+        "content_index": event.content_index,
+        "item_id": event.item_id,
+        "is_final": event.is_final,
+    }
+    return record
+
+
+def main() -> None:
+    api_key = os.getenv("LLM_API_KEY") or os.getenv("OPENAI_API_KEY")
+    if not api_key:
+        raise RuntimeError("Set LLM_API_KEY or OPENAI_API_KEY in your environment.")
+
+    model = os.getenv("LLM_MODEL", "openhands/gpt-5-codex")
+    base_url = os.getenv("LLM_BASE_URL")
+
+    llm = LLM(
+        model=model,
+        api_key=SecretStr(api_key),
+        base_url=base_url,
+        service_id="stream-demo",
+    )
+
+    agent = get_default_agent(llm=llm, cli_mode=True)
+
+    timestamp = _dt.datetime.utcnow().strftime("%Y%m%d-%H%M%S")
+    LOG_DIR.mkdir(parents=True, exist_ok=True)
+    log_path = LOG_DIR / f"responses_stream_{timestamp}.jsonl"
+
+    def on_token(event: LLMStreamEvent) -> None:
+        record = _serialize_event(event)
+        with log_path.open("a", encoding="utf-8") as fp:
+            fp.write(json.dumps(record) + "\n")
+
+        stream_chunk = event.text or event.arguments
+        if stream_chunk:
+            print(stream_chunk, end="", flush=True)
+        if event.is_final:
+            print("\n--- stream complete ---")
+
+    conversation = Conversation(
+        agent=agent,
+        workspace=os.getcwd(),
+        token_callbacks=[on_token],
+    )
+
+    story_prompt = (
+        "Compose a vivid, many-paragraph story about a developer discovering "
+        "the value of streaming token updates. Stream the narrative as you "
+        "write it, and when you complete the story, save the full text to a "
+        "file named 'streaming_story.md'."
+    )
+    conversation.send_message(story_prompt)
+    conversation.run()
+
+    cleanup_prompt = (
+        "Thank you. Please delete streaming_story.md now that I've read it, "
+        "then confirm the deletion."
+    )
+    conversation.send_message(cleanup_prompt)
+    conversation.run()
+
+    logger.info("Stream log written to %s", log_path)
+
+
+if __name__ == "__main__":
+    main()
@@ -20,11 +20,13 @@
     LLM,
     ImageContent,
     LLMRegistry,
+    LLMStreamEvent,
     Message,
     RedactedThinkingBlock,
     RegistryEvent,
     TextContent,
     ThinkingBlock,
+    TokenCallbackType,
 )
 from openhands.sdk.logger import get_logger
 from openhands.sdk.mcp import (
@@ -58,6 +60,8 @@
 __all__ = [
     "LLM",
     "LLMRegistry",
+    "LLMStreamEvent",
+    "TokenCallbackType",
     "ConversationStats",
     "RegistryEvent",
     "Message",
 
@@ -5,7 +5,11 @@
 import openhands.sdk.security.risk as risk
 from openhands.sdk.agent.base import AgentBase
 from openhands.sdk.context.view import View
-from openhands.sdk.conversation import ConversationCallbackType, ConversationState
+from openhands.sdk.conversation import (
+    ConversationCallbackType,
+    ConversationState,
+    ConversationTokenCallbackType,
+)
 from openhands.sdk.conversation.state import AgentExecutionStatus
 from openhands.sdk.event import (
     ActionEvent,
@@ -133,6 +137,7 @@ def step(
         self,
         state: ConversationState,
         on_event: ConversationCallbackType,
+        on_token: ConversationTokenCallbackType | None = None,
     ) -> None:
         # Check for pending actions (implicit confirmation)
         # and execute them before sampling new actions.
@@ -182,13 +187,15 @@ def step(
                     store=False,
                     add_security_risk_prediction=self._add_security_risk_prediction,
                     metadata=self.llm.metadata,
+                    on_token=on_token,
                 )
             else:
                 llm_response = self.llm.completion(
                     messages=_messages,
                     tools=list(self.tools_map.values()),
                     extra_body={"metadata": self.llm.metadata},
                     add_security_risk_prediction=self._add_security_risk_prediction,
+                    on_token=on_token,
                 )
         except Exception as e:
             # If there is a condenser registered and the exception is a context window
 
@@ -22,7 +22,10 @@
 
 if TYPE_CHECKING:
     from openhands.sdk.conversation.state import ConversationState
-    from openhands.sdk.conversation.types import ConversationCallbackType
+    from openhands.sdk.conversation.types import (
+        ConversationCallbackType,
+        ConversationTokenCallbackType,
+    )
 
 logger = get_logger(__name__)
 
@@ -236,6 +239,7 @@ def step(
         self,
         state: "ConversationState",
         on_event: "ConversationCallbackType",
+        on_token: "ConversationTokenCallbackType | None" = None,
     ) -> None:
         """Taking a step in the conversation.
 
@@ -247,6 +251,9 @@ def step(
         4.1 If conversation is finished, set state.agent_status to FINISHED
         4.2 Otherwise, just return, Conversation will kick off the next step
 
+        If the underlying LLM supports streaming, partial deltas are forwarded to
+        ``on_token`` before the full response is returned.
+
         NOTE: state will be mutated in-place.
         """
 
 
@@ -7,7 +7,10 @@
 from openhands.sdk.conversation.secrets_manager import SecretsManager
 from openhands.sdk.conversation.state import ConversationState
 from openhands.sdk.conversation.stuck_detector import StuckDetector
-from openhands.sdk.conversation.types import ConversationCallbackType
+from openhands.sdk.conversation.types import (
+    ConversationCallbackType,
+    ConversationTokenCallbackType,
+)
 from openhands.sdk.conversation.visualizer import ConversationVisualizer
 
 
@@ -16,6 +19,7 @@
     "BaseConversation",
     "ConversationState",
     "ConversationCallbackType",
+    "ConversationTokenCallbackType",
     "ConversationVisualizer",
     "SecretsManager",
     "StuckDetector",
 
@@ -3,7 +3,11 @@
 from openhands.sdk.agent.base import AgentBase
 from openhands.sdk.conversation.base import BaseConversation
 from openhands.sdk.conversation.secrets_manager import SecretValue
-from openhands.sdk.conversation.types import ConversationCallbackType, ConversationID
+from openhands.sdk.conversation.types import (
+    ConversationCallbackType,
+    ConversationID,
+    ConversationTokenCallbackType,
+)
 from openhands.sdk.logger import get_logger
 from openhands.sdk.workspace import LocalWorkspace, RemoteWorkspace
 
@@ -32,6 +36,7 @@ def __new__(
         persistence_dir: str | None = None,
         conversation_id: ConversationID | None = None,
         callbacks: list[ConversationCallbackType] | None = None,
+        token_callbacks: list[ConversationTokenCallbackType] | None = None,
         max_iteration_per_run: int = 500,
         stuck_detection: bool = True,
         visualize: bool = True,
@@ -46,6 +51,7 @@ def __new__(
         workspace: RemoteWorkspace,
         conversation_id: ConversationID | None = None,
         callbacks: list[ConversationCallbackType] | None = None,
+        token_callbacks: list[ConversationTokenCallbackType] | None = None,
         max_iteration_per_run: int = 500,
         stuck_detection: bool = True,
         visualize: bool = True,
@@ -60,6 +66,7 @@ def __new__(
         persistence_dir: str | None = None,
         conversation_id: ConversationID | None = None,
         callbacks: list[ConversationCallbackType] | None = None,
+        token_callbacks: list[ConversationTokenCallbackType] | None = None,
         max_iteration_per_run: int = 500,
         stuck_detection: bool = True,
         visualize: bool = True,
@@ -81,6 +88,7 @@ def __new__(
                 agent=agent,
                 conversation_id=conversation_id,
                 callbacks=callbacks,
+                token_callbacks=token_callbacks,
                 max_iteration_per_run=max_iteration_per_run,
                 stuck_detection=stuck_detection,
                 visualize=visualize,
@@ -92,6 +100,7 @@ def __new__(
             agent=agent,
             conversation_id=conversation_id,
             callbacks=callbacks,
+            token_callbacks=token_callbacks,
             max_iteration_per_run=max_iteration_per_run,
             stuck_detection=stuck_detection,
             visualize=visualize,
 
@@ -8,14 +8,19 @@
 from openhands.sdk.conversation.state import AgentExecutionStatus, ConversationState
 from openhands.sdk.conversation.stuck_detector import StuckDetector
 from openhands.sdk.conversation.title_utils import generate_conversation_title
-from openhands.sdk.conversation.types import ConversationCallbackType, ConversationID
+from openhands.sdk.conversation.types import (
+    ConversationCallbackType,
+    ConversationID,
+    ConversationTokenCallbackType,
+)
 from openhands.sdk.conversation.visualizer import create_default_visualizer
 from openhands.sdk.event import (
     MessageEvent,
     PauseEvent,
+    StreamingDeltaEvent,
     UserRejectObservation,
 )
-from openhands.sdk.llm import LLM, Message, TextContent
+from openhands.sdk.llm import LLM, LLMStreamEvent, Message, TextContent
 from openhands.sdk.llm.llm_registry import LLMRegistry
 from openhands.sdk.logger import get_logger
 from openhands.sdk.security.confirmation_policy import (
@@ -35,6 +40,7 @@ def __init__(
         persistence_dir: str | None = None,
         conversation_id: ConversationID | None = None,
         callbacks: list[ConversationCallbackType] | None = None,
+        token_callbacks: list[ConversationTokenCallbackType] | None = None,
         max_iteration_per_run: int = 500,
         stuck_detection: bool = True,
         visualize: bool = True,
@@ -110,6 +116,31 @@ def _default_callback(e):
         for llm in list(self.agent.get_all_llms()):
             self.llm_registry.add(llm)
 
+        def _compose_token_callbacks(
+            callbacks: list[ConversationTokenCallbackType],
+        ) -> ConversationTokenCallbackType:
+            def _composed(event):
+                for cb in callbacks:
+                    cb(event)
+
+            return _composed
+
+        user_token_callback = (
+            _compose_token_callbacks(token_callbacks) if token_callbacks else None
+        )
+
+        def _handle_stream_event(stream_event: LLMStreamEvent) -> None:
+            try:
+                self._on_event(
+                    StreamingDeltaEvent(source="agent", stream_event=stream_event)
+                )
+            except Exception:
+                logger.exception("stream_event_processing_error", exc_info=True)
+            if user_token_callback:
+                user_token_callback(stream_event)
+
+        self._on_token = _handle_stream_event
+
         # Initialize secrets if provided
         if secrets:
             # Convert dict[str, str] to dict[str, SecretValue]
@@ -242,7 +273,11 @@ def run(self) -> None:
                     self._state.agent_status = AgentExecutionStatus.RUNNING
 
                 # step must mutate the SAME state object
-                self.agent.step(self._state, on_event=self._on_event)
+                self.agent.step(
+                    self._state,
+                    on_event=self._on_event,
+                    on_token=self._on_token,
+                )
                 iteration += 1
 
                 # Check for non-finished terminal conditions