OpenHands
diff --git a/‎README.md‎
Lines changed: 4 additions & 4 deletions b/‎README.md‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎examples/01_standalone_sdk/24_responses_streaming.py‎
Lines changed: 28 additions & 10 deletions b/‎examples/01_standalone_sdk/24_responses_streaming.py‎
Lines changed: 28 additions & 10 deletions
diff --git a/‎llm_streaming_refactor_plan.md‎
Lines changed: 13 additions & 16 deletions b/‎llm_streaming_refactor_plan.md‎
Lines changed: 13 additions & 16 deletions
diff --git a/‎openhands/sdk/__init__.py‎
Lines changed: 12 additions & 2 deletions b/‎openhands/sdk/__init__.py‎
Lines changed: 12 additions & 2 deletions
diff --git a/‎openhands/sdk/conversation/impl/local_conversation.py‎
Lines changed: 4 additions & 4 deletions b/‎openhands/sdk/conversation/impl/local_conversation.py‎
Lines changed: 4 additions & 4 deletions
@@ -145,17 +145,17 @@ llm = registry.get("default")
 
 You can receive incremental deltas from the Responses API by supplying a token
 callback when constructing a conversation. Each callback receives an
-``LLMStreamEvent`` describing the delta.
+``LLMStreamChunk`` describing the delta.
 
 ```python
 from pathlib import Path
-from openhands.sdk import Conversation, LLMStreamEvent
+from openhands.sdk import Conversation, LLMStreamChunk
 
 log_dir = Path("logs/stream")
 log_dir.mkdir(parents=True, exist_ok=True)
 
-def on_token(event: LLMStreamEvent) -> None:
-    print(event.text or event.arguments or "", end="", flush=True)
+def on_token(event: LLMStreamChunk) -> None:
+    print(event.text_delta or event.arguments_delta or "", end="", flush=True)
 
 conversation = Conversation(agent=agent, token_callbacks=[on_token])
 conversation.send_message("Summarize the benefits of token streaming.")
 
@@ -15,23 +15,34 @@
 
 from pydantic import SecretStr
 
-from openhands.sdk import Conversation, LLMStreamEvent, get_logger
+from openhands.sdk import (
+    Conversation,
+    ConversationCallbackType,
+    LLMStreamChunk,
+    get_logger,
+)
+from openhands.sdk.conversation.visualizer import create_streaming_visualizer
 from openhands.sdk.llm import LLM
 from openhands.tools.preset.default import get_default_agent
 
 
+PRINT_STREAM_TO_STDOUT = False
+
+
 logger = get_logger(__name__)
 LOG_DIR = Path("logs/stream")
 
 
-def _serialize_event(event: LLMStreamEvent) -> dict[str, Any]:
+def _serialize_event(event: LLMStreamChunk) -> dict[str, Any]:
     record = {
         "type": event.type,
-        "text": event.text,
-        "arguments": event.arguments,
+        "part_kind": event.part_kind,
+        "text": event.text_delta,
+        "arguments": event.arguments_delta,
         "output_index": event.output_index,
         "content_index": event.content_index,
         "item_id": event.item_id,
+        "response_id": event.response_id,
         "is_final": event.is_final,
     }
     return record
@@ -58,21 +69,28 @@ def main() -> None:
     LOG_DIR.mkdir(parents=True, exist_ok=True)
     log_path = LOG_DIR / f"responses_stream_{timestamp}.jsonl"
 
-    def on_token(event: LLMStreamEvent) -> None:
+    def on_token(event: LLMStreamChunk) -> None:
         record = _serialize_event(event)
         with log_path.open("a", encoding="utf-8") as fp:
             fp.write(json.dumps(record) + "\n")
 
-        stream_chunk = event.text or event.arguments
-        if stream_chunk:
-            print(stream_chunk, end="", flush=True)
-        if event.is_final:
+        delta = event.text_delta or event.arguments_delta
+        if delta and PRINT_STREAM_TO_STDOUT:
+            print(delta, end="", flush=True)
+        if event.is_final and event.part_kind == "status" and PRINT_STREAM_TO_STDOUT:
             print("\n--- stream complete ---")
 
+    callbacks: list[ConversationCallbackType] = []
+    if not PRINT_STREAM_TO_STDOUT:
+        streaming_visualizer = create_streaming_visualizer()
+        callbacks.append(streaming_visualizer.on_event)
+
     conversation = Conversation(
         agent=agent,
         workspace=os.getcwd(),
         token_callbacks=[on_token],
+        callbacks=callbacks or None,
+        visualize=False,
     )
 
     story_prompt = (
@@ -83,7 +101,7 @@ def on_token(event: LLMStreamEvent) -> None:
     conversation.run()
 
     cleanup_prompt = (
-        "Thank you. Please delete streaming_story.md now that I've read it, "
+        "Thank you. Please delete the streaming story file now that I've read it, "
         "then confirm the deletion."
     )
     conversation.send_message(cleanup_prompt)
 
@@ -74,16 +74,12 @@ Keeping the raw LiteLLM payload inside each `LLMStreamChunk` means we do **not**
 
 ## Visualization strategy
 
-1. **Track a hierarchy per conversation event.** When a LiteLLM stream begins we emit a placeholder `MessageEvent` (assistant message) or `ActionEvent` (function call). Each `LLMStreamChunk` should include a `response_id`/`item_id` so we can map to the owning conversation event:
-   - `output_text` → existing `MessageEvent` for the assistant response.
-   - `reasoning_summary_*` → reasoning area inside `MessageEvent`.
-   - `function_call_arguments_*` → arguments area inside `ActionEvent`.
-2. **Use `Live` per section.** For each unique `(conversation_event_id, part_kind, item_id)` create a Rich `Live` instance that updates with concatenated text. When the part is terminal, stop the `Live` and leave the final text in place.
-3. **Avoid newlines unless emitted by the model.** We’ll join chunks using plain string concatenation and only add newline characters when the delta contains `\n` or when we intentionally insert separators (e.g., between tool JSON arguments).
-4. **Segregate sections:**
-   - `Reasoning:` header per `MessageEvent`. Each new reasoning item gets its own Live line under that message.
-   - `Assistant:` body for natural language output, appended inside the message panel.
-   - `Function Arguments:` block under each action panel, streaming JSON incrementally.
+We will leave the existing `ConversationVisualizer` untouched for default/legacy usage and introduce a new `StreamingConversationVisualizer` that renders deltas directly inside the final panels:
+
+1. **Create/update per-response panels.** The first chunk for a `(response_id, output_index)` pair creates (or reuses) a panel for the assistant message or tool call and immediately starts streaming into it.
+2. **Route text into semantic sections.** Assistant text, reasoning summaries, function-call arguments, tool output, and refusals each update their own section inside the panel.
+3. **Use Rich `Live` when interactive.** In a real terminal we keep the panel on screen and update it in place; when the console is not interactive (tests, logging) we fall back to static updates.
+4. **Leave the panel in place when finished.** When the final chunk arrives we stop updating but keep the panel visible; the subsequent `MessageEvent`/`ActionEvent` is suppressed to avoid duplicate re-rendering.
 
 ## Implementation roadmap
 
@@ -95,11 +91,11 @@ Keeping the raw LiteLLM payload inside each `LLMStreamChunk` means we do **not**
    - When we enqueue the initial `MessageEvent`/`ActionEvent`, cache a lookup (e.g., `inflight_streams[(response_id, output_index)] = conversation_event_id`).
    - Update `LocalConversation` token callback wrapper to attach the resolved conversation event ID onto the `LLMStreamChunk` before emitting/persisting.
 
-3. **Visualizer rewrite**
-   - Maintain `self._stream_views[(conversation_event_id, part_kind, item_id)] = LiveState` where `LiveState` wraps buffer, style, and a `Live` instance.
-   - On streaming updates: update buffer, `live.update(Text(buffer, style=...))` without printing newlines.
-   - On final chunk: stop `Live`, render final static text, and optionally record in conversation state for playback.
-   - Ensure replay (when visualizer processes stored events) converts stored parts into final text as well.
+3. **Streaming visualizer**
+   - Implement `StreamingConversationVisualizer` with lightweight session tracking (keyed by response/output) that owns Rich panels for streaming sections.
+   - Stream updates into the same panel that will remain visible after completion; use `Live` only when running in an interactive terminal.
+   - Suppress duplicate rendering when the final `MessageEvent`/`ActionEvent` arrives, since the streamed panel already contains the content.
+   - Provide a factory helper (e.g., `create_streaming_visualizer`) for callers that want the streaming experience.
 
 4. **Persistence / tests**
    - Update tests to ensure:
@@ -117,5 +113,6 @@ Keeping the raw LiteLLM payload inside each `LLMStreamChunk` means we do **not**
 - [ ] Refactor classifier to output `LLMStreamChunk` objects with clear `part_kind`.
 - [ ] Track in-flight conversation events so parts know their owner.
 - [ ] Replace print-based visualizer streaming with `Live` blocks per section.
-- [ ] Extend unit tests to cover multiple messages, reasoning segments, and tool calls.
+- [ ] Extend unit tests to cover multiple messages, reasoning segments, tool calls, and the new streaming visualizer.
+- [ ] Update the standalone streaming example to wire in the streaming visualizer helper.
 - [ ] Manually validate with long streaming example to confirm smooth in-place updates.
@@ -13,14 +13,20 @@
     RemoteConversation,
 )
 from openhands.sdk.conversation.conversation_stats import ConversationStats
+from openhands.sdk.conversation.visualizer import (
+    ConversationVisualizer,
+    StreamingConversationVisualizer,
+    create_default_visualizer,
+    create_streaming_visualizer,
+)
 from openhands.sdk.event import Event, LLMConvertibleEvent
 from openhands.sdk.event.llm_convertible import MessageEvent
 from openhands.sdk.io import FileStore, LocalFileStore
 from openhands.sdk.llm import (
     LLM,
     ImageContent,
     LLMRegistry,
-    LLMStreamEvent,
+    LLMStreamChunk,
     Message,
     RedactedThinkingBlock,
     RegistryEvent,
@@ -60,9 +66,13 @@
 __all__ = [
     "LLM",
     "LLMRegistry",
-    "LLMStreamEvent",
+    "LLMStreamChunk",
     "TokenCallbackType",
     "ConversationStats",
+    "ConversationVisualizer",
+    "StreamingConversationVisualizer",
+    "create_default_visualizer",
+    "create_streaming_visualizer",
     "RegistryEvent",
     "Message",
     "TextContent",
 
@@ -20,7 +20,7 @@
     StreamingDeltaEvent,
     UserRejectObservation,
 )
-from openhands.sdk.llm import LLM, LLMStreamEvent, Message, TextContent
+from openhands.sdk.llm import LLM, LLMStreamChunk, Message, TextContent
 from openhands.sdk.llm.llm_registry import LLMRegistry
 from openhands.sdk.logger import get_logger
 from openhands.sdk.security.confirmation_policy import (
@@ -129,15 +129,15 @@ def _composed(event):
             _compose_token_callbacks(token_callbacks) if token_callbacks else None
         )
 
-        def _handle_stream_event(stream_event: LLMStreamEvent) -> None:
+        def _handle_stream_event(stream_chunk: LLMStreamChunk) -> None:
             try:
                 self._on_event(
-                    StreamingDeltaEvent(source="agent", stream_event=stream_event)
+                    StreamingDeltaEvent(source="agent", stream_chunk=stream_chunk)
                 )
             except Exception:
                 logger.exception("stream_event_processing_error", exc_info=True)
             if user_token_callback:
-                user_token_callback(stream_event)
+                user_token_callback(stream_chunk)
 
         self._on_token = _handle_stream_event