diff --git a/lib/httpapi/claude.go b/lib/httpapi/claude.go index 641efe47..9ccefdd2 100644 --- a/lib/httpapi/claude.go +++ b/lib/httpapi/claude.go @@ -5,6 +5,11 @@ import ( st "github.com/coder/agentapi/lib/screentracker" ) +// formatPaste wraps message in bracketed paste escape sequences. +// These sequences start with ESC (\x1b), which TUI selection +// widgets (e.g. Claude Code's numbered-choice prompt) interpret +// as "cancel". For selection prompts, callers should use +// MessageTypeRaw to send raw keystrokes directly instead. func formatPaste(message string) []st.MessagePart { return []st.MessagePart{ // Bracketed paste mode start sequence diff --git a/lib/screentracker/pty_conversation.go b/lib/screentracker/pty_conversation.go index cce8c677..80c9b942 100644 --- a/lib/screentracker/pty_conversation.go +++ b/lib/screentracker/pty_conversation.go @@ -3,6 +3,7 @@ package screentracker import ( "context" "encoding/json" + "errors" "fmt" "log/slog" "os" @@ -16,6 +17,26 @@ import ( "golang.org/x/xerrors" ) +const ( + // writeStabilizeEchoTimeout is the timeout for the echo + // detection WaitFor loop in writeStabilize Phase 1. The + // effective ceiling may be slightly longer because the 1s + // stability check inside the condition runs outside + // WaitFor's timeout select. Non-echoing agents (e.g. TUI + // agents using bracketed paste) will hit this timeout, + // which is non-fatal. + // + // TODO: move to PTYConversationConfig if agents need + // different echo detection windows. + writeStabilizeEchoTimeout = 2 * time.Second + + // writeStabilizeProcessTimeout is the maximum time to wait + // for the screen to change after sending a carriage return. + // This detects whether the agent is actually processing the + // input. + writeStabilizeProcessTimeout = 15 * time.Second +) + // A screenSnapshot represents a snapshot of the PTY at a specific time. type screenSnapshot struct { timestamp time.Time @@ -411,7 +432,19 @@ func (c *PTYConversation) sendMessage(ctx context.Context, messageParts ...Messa return nil } -// writeStabilize writes messageParts to the screen and waits for the screen to stabilize after the message is written. +// writeStabilize writes messageParts to the PTY and waits for +// the agent to process them. It operates in two phases: +// +// Phase 1 (echo detection): writes the message text and waits +// for the screen to change and stabilize. This detects agents +// that echo typed input. If the screen doesn't change within +// writeStabilizeEchoTimeout, this is non-fatal — many TUI +// agents buffer bracketed-paste input without rendering it. +// +// Phase 2 (processing detection): writes a carriage return +// and waits for the screen to change, indicating the agent +// started processing. This phase is fatal on timeout — if the +// agent doesn't react to Enter, it's unresponsive. func (c *PTYConversation) writeStabilize(ctx context.Context, messageParts ...MessagePart) error { screenBeforeMessage := c.cfg.AgentIO.ReadScreen() for _, part := range messageParts { @@ -421,7 +454,7 @@ func (c *PTYConversation) writeStabilize(ctx context.Context, messageParts ...Me } // wait for the screen to stabilize after the message is written if err := util.WaitFor(ctx, util.WaitTimeout{ - Timeout: 15 * time.Second, + Timeout: writeStabilizeEchoTimeout, MinInterval: 50 * time.Millisecond, InitialWait: true, Clock: c.cfg.Clock, @@ -441,14 +474,25 @@ func (c *PTYConversation) writeStabilize(ctx context.Context, messageParts ...Me } return false, nil }); err != nil { - return xerrors.Errorf("failed to wait for screen to stabilize: %w", err) + if !errors.Is(err, util.WaitTimedOut) { + // Context cancellation or condition errors are fatal. + return xerrors.Errorf("failed to wait for screen to stabilize: %w", err) + } + // Phase 1 timeout is non-fatal: the agent may not echo + // input (e.g. TUI agents buffer bracketed-paste content + // internally). Proceed to Phase 2 to send the carriage + // return. + c.cfg.Logger.Info( + "echo detection timed out, sending carriage return", + "timeout", writeStabilizeEchoTimeout, + ) } // wait for the screen to change after the carriage return is written screenBeforeCarriageReturn := c.cfg.AgentIO.ReadScreen() lastCarriageReturnTime := time.Time{} if err := util.WaitFor(ctx, util.WaitTimeout{ - Timeout: 15 * time.Second, + Timeout: writeStabilizeProcessTimeout, MinInterval: 25 * time.Millisecond, Clock: c.cfg.Clock, }, func() (bool, error) { diff --git a/lib/screentracker/pty_conversation_test.go b/lib/screentracker/pty_conversation_test.go index 0e7d9635..8d65e064 100644 --- a/lib/screentracker/pty_conversation_test.go +++ b/lib/screentracker/pty_conversation_test.go @@ -8,6 +8,7 @@ import ( "log/slog" "os" "sync" + "sync/atomic" "testing" "time" @@ -447,6 +448,130 @@ func TestMessages(t *testing.T) { c, _, _ := newConversation(context.Background(), t) assert.ErrorIs(t, c.Send(st.MessagePartText{Content: ""}), st.ErrMessageValidationEmpty) }) + + t.Run("send-message-no-echo-agent-reacts", func(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), testTimeout) + t.Cleanup(cancel) + + // Given: an agent that doesn't echo typed input but + // reacts to carriage return by updating the screen. + c, _, mClock := newConversation(ctx, t, func(cfg *st.PTYConversationConfig) { + a := &testAgent{screen: "prompt"} + a.onWrite = func(data []byte) { + if string(data) == "\r" { + a.screen = "processing..." + } + } + cfg.AgentIO = a + }) + advanceFor(ctx, t, mClock, interval*threshold) + + // When: a message is sent. Phase 1 times out (no echo), + // Phase 2 writes \r and the agent reacts. + sendAndAdvance(ctx, t, c, mClock, st.MessagePartText{Content: "hello"}) + + // Then: Send succeeds and the user message is recorded. + msgs := c.Messages() + require.True(t, len(msgs) >= 2) + var foundUserMsg bool + for _, msg := range msgs { + if msg.Role == st.ConversationRoleUser && msg.Message == "hello" { + foundUserMsg = true + break + } + } + assert.True(t, foundUserMsg, "expected user message 'hello' in conversation") + }) + + t.Run("send-message-no-echo-no-react", func(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), testTimeout) + t.Cleanup(cancel) + + // Given: an agent that is completely unresponsive — it + // neither echoes input nor reacts to carriage return. + c, _, mClock := newConversation(ctx, t, func(cfg *st.PTYConversationConfig) { + a := &testAgent{screen: "prompt"} + a.onWrite = func(data []byte) {} + cfg.AgentIO = a + }) + advanceFor(ctx, t, mClock, interval*threshold) + + // When: a message is sent. Both Phase 1 (echo) and + // Phase 2 (processing) time out. + // Note: can't use sendAndAdvance here because it calls + // require.NoError internally. + var sendErr error + var sendDone atomic.Bool + go func() { + sendErr = c.Send(st.MessagePartText{Content: "hello"}) + sendDone.Store(true) + }() + advanceUntil(ctx, t, mClock, func() bool { return sendDone.Load() }) + + // Then: Send fails with a Phase 2 error (not Phase 1). + require.Error(t, sendErr) + assert.Contains(t, sendErr.Error(), "failed to wait for processing to start") + }) + + t.Run("send-message-no-echo-context-cancelled", func(t *testing.T) { + // Given: a non-echoing agent and a cancellable context. + // The onWrite signals when writeStabilize starts writing + // message parts — this is used to synchronize the cancel. + sendCtx, sendCancel := context.WithCancel(context.Background()) + t.Cleanup(sendCancel) + + writeStarted := make(chan struct{}, 1) + c, _, mClock := newConversation(sendCtx, t, func(cfg *st.PTYConversationConfig) { + a := &testAgent{screen: "prompt"} + a.onWrite = func(data []byte) { + select { + case writeStarted <- struct{}{}: + default: + } + } + cfg.AgentIO = a + }) + advanceFor(sendCtx, t, mClock, interval*threshold) + + // When: a message is sent and the context is cancelled + // during Phase 1 (after the message is written to the + // PTY, before echo detection completes). + var sendErr error + var sendDone atomic.Bool + go func() { + sendErr = c.Send(st.MessagePartText{Content: "hello"}) + sendDone.Store(true) + }() + + // Advance tick-by-tick until writeStabilize starts + // (onWrite fires). This gives the send loop goroutine + // scheduling time between advances. + advanceUntil(sendCtx, t, mClock, func() bool { + select { + case <-writeStarted: + return true + default: + return false + } + }) + + // writeStabilize Phase 1 is now running. Its WaitFor is + // blocked on a mock timer sleep select. Cancel: the + // select sees ctx.Done() immediately. + sendCancel() + + // WaitFor returns ctx.Err(). The errors.Is guard in + // Phase 1 propagates it as fatal. Use Eventually since + // the goroutine needs scheduling time. + require.Eventually(t, sendDone.Load, 5*time.Second, 10*time.Millisecond) + + // Then: the error wraps context.Canceled, not a Phase 2 + // timeout. This validates the errors.Is(WaitTimedOut) + // guard. + require.Error(t, sendErr) + assert.ErrorIs(t, sendErr, context.Canceled) + assert.NotContains(t, sendErr.Error(), "failed to wait for processing to start") + }) } func TestStatePersistence(t *testing.T) {