Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
d149370
Add streaming support for Responses API
enyst Oct 19, 2025
d331abf
Document LLM streaming refactor plan
enyst Oct 20, 2025
e31b728
Refactor streaming chunk model and visualizer
enyst Oct 20, 2025
3983ce4
Merge remote-tracking branch 'upstream/main' into streaming-responses
enyst Oct 21, 2025
a341d0e
Merge remote-tracking branch 'upstream/main' into streaming-responses
enyst Oct 21, 2025
031fcf1
Merge remote-tracking branch 'upstream/main' into streaming-responses
enyst Oct 23, 2025
287c9c2
Merge branch 'main' into streaming-responses
enyst Oct 23, 2025
21bcaa5
Merge main branch into streaming-responses
openhands-agent Nov 14, 2025
f920696
Merge branch 'main' into streaming-responses
enyst Nov 20, 2025
a65dbda
Simplify streaming visualizer and always-persist streaming panels
enyst Nov 20, 2025
27f9653
Merge main into streaming-responses and resolve conflicts
openhands-agent Nov 25, 2025
dbbd0cf
Fix merge conflicts and type errors after merging main
openhands-agent Nov 25, 2025
7ac405d
Fix circular import and update tests for streaming API
openhands-agent Nov 25, 2025
847eaaa
Trigger CI re-run
openhands-agent Nov 25, 2025
80c06f7
remove md
xingyaoww Nov 26, 2025
9859171
rename example
xingyaoww Nov 26, 2025
71fce09
make LLMStreamChunk a basemodel
xingyaoww Nov 26, 2025
6a67bac
clean up some merges
xingyaoww Nov 26, 2025
ab8961a
simplify local convo and remove streaming event since that's probably…
xingyaoww Nov 26, 2025
fa57f08
update the right init
xingyaoww Nov 26, 2025
66e2092
rm streaming visualizer
xingyaoww Nov 26, 2025
9d1914c
some attempt to simplify
xingyaoww Nov 26, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions FACTS.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
1. The OpenHands Software Agent SDK is a set of Python and REST APIs for building agents that work with code, supporting tasks from simple README generation to complex multi-agent refactors and rewrites.

2. The SDK supports multiple workspace environments - agents can either use the local machine as their workspace or run inside ephemeral workspaces (e.g., in Docker or Kubernetes) using the Agent Server.

3. The project is organized into multiple sub-packages including openhands-sdk, openhands-tools, openhands-workspace, and openhands-agent-server, and powers production applications like the OpenHands CLI and OpenHands Cloud.
115 changes: 115 additions & 0 deletions examples/01_standalone_sdk/29_responses_streaming.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
"""Streaming Responses API example.

This demonstrates how to enable token streaming for the Responses API path,
log streaming deltas to ``./logs/stream/`` as JSON, and print the streamed text
incrementally to the terminal.
"""

from __future__ import annotations

import datetime as _dt
import json
import os
from pathlib import Path
from typing import Any

from pydantic import SecretStr

from openhands.sdk import (
Conversation,
ConversationCallbackType,
LLMStreamChunk,
get_logger,
)
from openhands.sdk.conversation.streaming_visualizer import create_streaming_visualizer
from openhands.sdk.conversation.visualizer import DefaultConversationVisualizer
from openhands.sdk.llm import LLM
from openhands.tools.preset.default import get_default_agent


PRINT_STREAM_TO_STDOUT = False


logger = get_logger(__name__)
LOG_DIR = Path("logs/stream")


def _serialize_event(event: LLMStreamChunk) -> dict[str, Any]:
record = {
"type": event.type,
"part_kind": event.part_kind,
"text": event.text_delta,
"arguments": event.arguments_delta,
"output_index": event.output_index,
"content_index": event.content_index,
"item_id": event.item_id,
"response_id": event.response_id,
"is_final": event.is_final,
}
return record


def main() -> None:
api_key = os.getenv("LLM_API_KEY") or os.getenv("OPENAI_API_KEY")
if not api_key:
raise RuntimeError("Set LLM_API_KEY or OPENAI_API_KEY in your environment.")

model = os.getenv("LLM_MODEL", "openhands/gpt-5-codex")
base_url = os.getenv("LLM_BASE_URL")

llm = LLM(
model=model,
api_key=SecretStr(api_key),
base_url=base_url,
usage_id="stream-demo",
)

agent = get_default_agent(llm=llm, cli_mode=True)

timestamp = _dt.datetime.utcnow().strftime("%Y%m%d-%H%M%S")
LOG_DIR.mkdir(parents=True, exist_ok=True)
log_path = LOG_DIR / f"responses_stream_{timestamp}.jsonl"

def on_token(event: LLMStreamChunk) -> None:
record = _serialize_event(event)
with log_path.open("a", encoding="utf-8") as fp:
fp.write(json.dumps(record) + "\n")

delta = event.text_delta or event.arguments_delta
if delta and PRINT_STREAM_TO_STDOUT:
print(delta, end="", flush=True)
if event.is_final and event.part_kind == "status" and PRINT_STREAM_TO_STDOUT:
print("\n--- stream complete ---")

callbacks: list[ConversationCallbackType] = []
if not PRINT_STREAM_TO_STDOUT:
streaming_visualizer = create_streaming_visualizer()
callbacks.append(streaming_visualizer.on_event)

conversation = Conversation(
agent=agent,
workspace=os.getcwd(),
token_callbacks=[on_token],
callbacks=callbacks or None,
visualizer=None if callbacks else DefaultConversationVisualizer,
)

story_prompt = (
"Tell me a long story about LLM streaming, make sure it has multiple "
"paragraphs. Then write it on disk using a tool call."
)
conversation.send_message(story_prompt)
conversation.run()

cleanup_prompt = (
"Thank you. Please delete the streaming story file now that I've read it, "
"then confirm the deletion."
)
conversation.send_message(cleanup_prompt)
conversation.run()

logger.info("Stream log written to %s", log_path)


if __name__ == "__main__":
main()
10 changes: 10 additions & 0 deletions openhands-sdk/openhands/sdk/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,24 @@
RemoteConversation,
)
from openhands.sdk.conversation.conversation_stats import ConversationStats
from openhands.sdk.conversation.visualizer import (
ConversationVisualizerBase,
DefaultConversationVisualizer,
)
from openhands.sdk.event import Event, LLMConvertibleEvent
from openhands.sdk.event.llm_convertible import MessageEvent
from openhands.sdk.io import FileStore, LocalFileStore
from openhands.sdk.llm import (
LLM,
ImageContent,
LLMRegistry,
LLMStreamChunk,
Message,
RedactedThinkingBlock,
RegistryEvent,
TextContent,
ThinkingBlock,
TokenCallbackType,
)
from openhands.sdk.logger import get_logger
from openhands.sdk.mcp import (
Expand Down Expand Up @@ -58,7 +64,11 @@
__all__ = [
"LLM",
"LLMRegistry",
"LLMStreamChunk",
"TokenCallbackType",
"ConversationStats",
"ConversationVisualizerBase",
"DefaultConversationVisualizer",
"RegistryEvent",
"Message",
"TextContent",
Expand Down
7 changes: 6 additions & 1 deletion openhands-sdk/openhands/sdk/agent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from openhands.sdk.conversation import (
ConversationCallbackType,
ConversationState,
ConversationTokenCallbackType,
LocalConversation,
)
from openhands.sdk.conversation.state import ConversationExecutionStatus
Expand Down Expand Up @@ -135,6 +136,7 @@ def step(
self,
conversation: LocalConversation,
on_event: ConversationCallbackType,
on_token: ConversationTokenCallbackType | None = None,
) -> None:
state = conversation.state
# Check for pending actions (implicit confirmation)
Expand Down Expand Up @@ -167,7 +169,10 @@ def step(

try:
llm_response = make_llm_completion(
self.llm, _messages, tools=list(self.tools_map.values())
self.llm,
_messages,
tools=list(self.tools_map.values()),
on_token=on_token,
)
except FunctionCallValidationError as e:
logger.warning(f"LLM generated malformed function call: {e}")
Expand Down
9 changes: 8 additions & 1 deletion openhands-sdk/openhands/sdk/agent/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,10 @@

if TYPE_CHECKING:
from openhands.sdk.conversation import ConversationState, LocalConversation
from openhands.sdk.conversation.types import ConversationCallbackType
from openhands.sdk.conversation.types import (
ConversationCallbackType,
ConversationTokenCallbackType,
)


logger = get_logger(__name__)
Expand Down Expand Up @@ -239,6 +242,7 @@ def step(
self,
conversation: "LocalConversation",
on_event: "ConversationCallbackType",
on_token: "ConversationTokenCallbackType | None" = None,
) -> None:
"""Taking a step in the conversation.

Expand All @@ -250,6 +254,9 @@ def step(
4.1 If conversation is finished, set state.execution_status to FINISHED
4.2 Otherwise, just return, Conversation will kick off the next step

If the underlying LLM supports streaming, partial deltas are forwarded to
``on_token`` before the full response is returned.

NOTE: state will be mutated in-place.
"""

Expand Down
5 changes: 5 additions & 0 deletions openhands-sdk/openhands/sdk/agent/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

from openhands.sdk.context.condenser.base import CondenserBase
from openhands.sdk.context.view import View
from openhands.sdk.conversation.types import ConversationTokenCallbackType
from openhands.sdk.event.base import Event, LLMConvertibleEvent
from openhands.sdk.event.condenser import Condensation
from openhands.sdk.llm import LLM, LLMResponse, Message
Expand Down Expand Up @@ -182,13 +183,15 @@ def make_llm_completion(
llm: LLM,
messages: list[Message],
tools: list[ToolDefinition] | None = None,
on_token: ConversationTokenCallbackType | None = None,
) -> LLMResponse:
"""Make an LLM completion call with the provided messages and tools.

Args:
llm: The LLM instance to use for completion
messages: The messages to send to the LLM
tools: Optional list of tools to provide to the LLM
on_token: Optional callback for streaming token updates

Returns:
LLMResponse from the LLM completion call
Expand All @@ -200,10 +203,12 @@ def make_llm_completion(
include=None,
store=False,
add_security_risk_prediction=True,
on_token=on_token,
)
else:
return llm.completion(
messages=messages,
tools=tools or [],
add_security_risk_prediction=True,
on_token=on_token,
)
6 changes: 5 additions & 1 deletion openhands-sdk/openhands/sdk/conversation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@
ConversationState,
)
from openhands.sdk.conversation.stuck_detector import StuckDetector
from openhands.sdk.conversation.types import ConversationCallbackType
from openhands.sdk.conversation.types import (
ConversationCallbackType,
ConversationTokenCallbackType,
)
from openhands.sdk.conversation.visualizer import (
ConversationVisualizerBase,
DefaultConversationVisualizer,
Expand All @@ -24,6 +27,7 @@
"ConversationState",
"ConversationExecutionStatus",
"ConversationCallbackType",
"ConversationTokenCallbackType",
"DefaultConversationVisualizer",
"ConversationVisualizerBase",
"SecretRegistry",
Expand Down
21 changes: 15 additions & 6 deletions openhands-sdk/openhands/sdk/conversation/base.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
from abc import ABC, abstractmethod
from collections.abc import Iterable, Mapping
from pathlib import Path
from typing import TYPE_CHECKING, Protocol
from typing import TYPE_CHECKING, Protocol, TypeVar, cast

from openhands.sdk.conversation.conversation_stats import ConversationStats
from openhands.sdk.conversation.events_list_base import EventsListBase
from openhands.sdk.conversation.secret_registry import SecretValue
from openhands.sdk.conversation.types import ConversationCallbackType, ConversationID
from openhands.sdk.conversation.types import (
ConversationCallbackType,
ConversationID,
ConversationTokenCallbackType,
)
from openhands.sdk.llm.llm import LLM
from openhands.sdk.llm.message import Message
from openhands.sdk.observability.laminar import (
Expand All @@ -27,6 +31,13 @@
from openhands.sdk.conversation.state import ConversationExecutionStatus


CallbackType = TypeVar(
"CallbackType",
ConversationCallbackType,
ConversationTokenCallbackType,
)


class ConversationStateProtocol(Protocol):
"""Protocol defining the interface for conversation state objects."""

Expand Down Expand Up @@ -235,9 +246,7 @@ def ask_agent(self, question: str) -> str:
...

@staticmethod
def compose_callbacks(
callbacks: Iterable[ConversationCallbackType],
) -> ConversationCallbackType:
def compose_callbacks(callbacks: Iterable[CallbackType]) -> CallbackType:
"""Compose multiple callbacks into a single callback function.

Args:
Expand All @@ -252,4 +261,4 @@ def composed(event) -> None:
if cb:
cb(event)

return composed
return cast(CallbackType, composed)
11 changes: 10 additions & 1 deletion openhands-sdk/openhands/sdk/conversation/conversation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@
from openhands.sdk.agent.base import AgentBase
from openhands.sdk.conversation.base import BaseConversation
from openhands.sdk.conversation.secret_registry import SecretValue
from openhands.sdk.conversation.types import ConversationCallbackType, ConversationID
from openhands.sdk.conversation.types import (
ConversationCallbackType,
ConversationID,
ConversationTokenCallbackType,
)
from openhands.sdk.conversation.visualizer import (
ConversationVisualizerBase,
DefaultConversationVisualizer,
Expand Down Expand Up @@ -49,6 +53,7 @@ def __new__(
persistence_dir: str | Path | None = None,
conversation_id: ConversationID | None = None,
callbacks: list[ConversationCallbackType] | None = None,
token_callbacks: list[ConversationTokenCallbackType] | None = None,
max_iteration_per_run: int = 500,
stuck_detection: bool = True,
visualizer: (
Expand All @@ -65,6 +70,7 @@ def __new__(
workspace: RemoteWorkspace,
conversation_id: ConversationID | None = None,
callbacks: list[ConversationCallbackType] | None = None,
token_callbacks: list[ConversationTokenCallbackType] | None = None,
max_iteration_per_run: int = 500,
stuck_detection: bool = True,
visualizer: (
Expand All @@ -81,6 +87,7 @@ def __new__(
persistence_dir: str | Path | None = None,
conversation_id: ConversationID | None = None,
callbacks: list[ConversationCallbackType] | None = None,
token_callbacks: list[ConversationTokenCallbackType] | None = None,
max_iteration_per_run: int = 500,
stuck_detection: bool = True,
visualizer: (
Expand All @@ -104,6 +111,7 @@ def __new__(
agent=agent,
conversation_id=conversation_id,
callbacks=callbacks,
token_callbacks=token_callbacks,
max_iteration_per_run=max_iteration_per_run,
stuck_detection=stuck_detection,
visualizer=visualizer,
Expand All @@ -115,6 +123,7 @@ def __new__(
agent=agent,
conversation_id=conversation_id,
callbacks=callbacks,
token_callbacks=token_callbacks,
max_iteration_per_run=max_iteration_per_run,
stuck_detection=stuck_detection,
visualizer=visualizer,
Expand Down
Loading
Loading