From a245694da284535facdf035fb5b807d94dc793c5 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 18 Apr 2026 00:37:36 +0000
Subject: [PATCH 1/7] test: harden MCP tool stdout handling and add stdout
 pollution coverage

Agent-Logs-Url: https://github.com/github/gh-aw/sessions/c9da255d-4104-43e9-8911-088372c96594

Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com>
---
 pkg/cli/mcp_server_stdio_integration_test.go | 173 +++++++++++++++++++
 pkg/cli/mcp_tools_management.go              |  47 ++++-
 pkg/cli/mcp_tools_output_streams_test.go     | 119 +++++++++++++
 pkg/cli/mcp_tools_readonly.go                |  15 +-
 4 files changed, 342 insertions(+), 12 deletions(-)
 create mode 100644 pkg/cli/mcp_tools_output_streams_test.go

diff --git a/pkg/cli/mcp_server_stdio_integration_test.go b/pkg/cli/mcp_server_stdio_integration_test.go
index c01d4ff4891..43dd5651ab5 100644
--- a/pkg/cli/mcp_server_stdio_integration_test.go
+++ b/pkg/cli/mcp_server_stdio_integration_test.go
@@ -3,9 +3,13 @@
 package cli
 
 import (
+	"bufio"
 	"bytes"
 	"context"
+	"encoding/json"
 	"errors"
+	"fmt"
+	"io"
 	"os"
 	"os/exec"
 	"path/filepath"
@@ -84,3 +88,172 @@ engine: copilot
 	}
 
 }
+
+func TestMCPServer_CompileAllWorkflows_StdoutOnlyJSONRPC(t *testing.T) {
+	binaryPath := "../../gh-aw"
+	if _, err := os.Stat(binaryPath); os.IsNotExist(err) {
+		t.Skip("Skipping test: gh-aw binary not found. Run 'make build' first.")
+	}
+
+	tmpDir := testutil.TempDir(t, "mcp-stdio-rpc-*")
+	workflowsDir := filepath.Join(tmpDir, ".github", "workflows")
+	if err := os.MkdirAll(workflowsDir, 0755); err != nil {
+		t.Fatalf("Failed to create workflows directory: %v", err)
+	}
+
+	workflowPath := filepath.Join(workflowsDir, "test.md")
+	workflowContent := `---
+on: push
+engine: copilot
+---
+# Test Workflow
+`
+	if err := os.WriteFile(workflowPath, []byte(workflowContent), 0644); err != nil {
+		t.Fatalf("Failed to write workflow file: %v", err)
+	}
+
+	if err := initTestGitRepo(tmpDir); err != nil {
+		t.Fatalf("Failed to initialize git repository: %v", err)
+	}
+
+	originalDir, err := os.Getwd()
+	if err != nil {
+		t.Fatalf("Failed to get current working directory: %v", err)
+	}
+
+	absBinaryPath := filepath.Join(originalDir, binaryPath)
+	ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second)
+	defer cancel()
+
+	cmd := exec.CommandContext(ctx, absBinaryPath, "mcp-server", "--cmd", absBinaryPath)
+	cmd.Dir = tmpDir
+	cmd.Env = os.Environ()
+
+	stdin, err := cmd.StdinPipe()
+	if err != nil {
+		t.Fatalf("Failed to open stdin pipe: %v", err)
+	}
+	stdout, err := cmd.StdoutPipe()
+	if err != nil {
+		t.Fatalf("Failed to open stdout pipe: %v", err)
+	}
+	var stderr bytes.Buffer
+	cmd.Stderr = &stderr
+
+	if err := cmd.Start(); err != nil {
+		t.Fatalf("Failed to start MCP server: %v", err)
+	}
+	defer func() {
+		_ = stdin.Close()
+		_ = cmd.Wait()
+	}()
+
+	reader := bufio.NewScanner(stdout)
+	reader.Buffer(make([]byte, 1024), 1024*1024)
+
+	sendJSONRPCMessage(t, stdin, map[string]any{
+		"jsonrpc": "2.0",
+		"id":      1,
+		"method":  "initialize",
+		"params": map[string]any{
+			"protocolVersion": "2025-03-26",
+			"capabilities":    map[string]any{},
+			"clientInfo": map[string]any{
+				"name":    "stdio-test-client",
+				"version": "1.0.0",
+			},
+		},
+	})
+
+	_, _ = waitForJSONRPCResponse(t, reader, 1)
+
+	sendJSONRPCMessage(t, stdin, map[string]any{
+		"jsonrpc": "2.0",
+		"method":  "notifications/initialized",
+		"params":  map[string]any{},
+	})
+
+	sendJSONRPCMessage(t, stdin, map[string]any{
+		"jsonrpc": "2.0",
+		"id":      2,
+		"method":  "tools/call",
+		"params": map[string]any{
+			"name":      "compile",
+			"arguments": map[string]any{},
+		},
+	})
+
+	response, raw := waitForJSONRPCResponse(t, reader, 2)
+	if !strings.Contains(raw, `"jsonrpc":"2.0"`) {
+		t.Fatalf("Expected JSON-RPC response on stdout, got: %s", raw)
+	}
+
+	resultData, ok := response["result"].(map[string]any)
+	if !ok {
+		t.Fatalf("Expected result object in compile response, got: %#v", response["result"])
+	}
+	contentList, ok := resultData["content"].([]any)
+	if !ok || len(contentList) == 0 {
+		t.Fatalf("Expected non-empty content list in compile result, got: %#v", resultData["content"])
+	}
+	firstContent, ok := contentList[0].(map[string]any)
+	if !ok {
+		t.Fatalf("Expected first content item to be object, got: %#v", contentList[0])
+	}
+	text, _ := firstContent["text"].(string)
+	if text == "" {
+		t.Fatal("Expected compile result text content to be non-empty")
+	}
+
+	var compileResult []map[string]any
+	if err := json.Unmarshal([]byte(text), &compileResult); err != nil {
+		t.Fatalf("Expected compile response text to be pure JSON without log pollution, got parse error: %v; text=%q", err, text)
+	}
+
+	sendJSONRPCMessage(t, stdin, map[string]any{
+		"jsonrpc": "2.0",
+		"id":      3,
+		"method":  "shutdown",
+		"params":  map[string]any{},
+	})
+	_, _ = waitForJSONRPCResponse(t, reader, 3)
+}
+
+func sendJSONRPCMessage(t *testing.T, w io.Writer, msg map[string]any) {
+	t.Helper()
+	payload, err := json.Marshal(msg)
+	if err != nil {
+		t.Fatalf("Failed to marshal JSON-RPC message: %v", err)
+	}
+	if _, err := fmt.Fprintf(w, "%s\n", payload); err != nil {
+		t.Fatalf("Failed to write JSON-RPC message: %v", err)
+	}
+}
+
+func waitForJSONRPCResponse(t *testing.T, scanner *bufio.Scanner, expectedID int) (map[string]any, string) {
+	t.Helper()
+
+	for scanner.Scan() {
+		line := scanner.Text()
+		var msg map[string]any
+		if err := json.Unmarshal([]byte(line), &msg); err != nil {
+			t.Fatalf("Non-JSON output detected on MCP stdout (stdout must be JSON-RPC only): %q (err=%v)", line, err)
+		}
+
+		if version, ok := msg["jsonrpc"].(string); !ok || version != "2.0" {
+			t.Fatalf("Non-JSON-RPC output detected on MCP stdout: %q", line)
+		}
+
+		if rawID, ok := msg["id"]; ok {
+			if messageID, ok := rawID.(float64); ok && int(messageID) == expectedID {
+				return msg, line
+			}
+		}
+	}
+
+	if err := scanner.Err(); err != nil {
+		t.Fatalf("Failed while reading MCP stdout: %v", err)
+	}
+	t.Fatalf("Did not receive JSON-RPC response for id=%d", expectedID)
+	return nil, ""
+}
diff --git a/pkg/cli/mcp_tools_management.go b/pkg/cli/mcp_tools_management.go
index 39bf7061ed8..a2c6ee2b497 100644
--- a/pkg/cli/mcp_tools_management.go
+++ b/pkg/cli/mcp_tools_management.go
@@ -2,6 +2,8 @@ package cli
 
 import (
 	"context"
+	"errors"
+	"os/exec"
 	"strconv"
 
 	"github.com/github/gh-aw/pkg/logger"
@@ -59,15 +61,24 @@ func registerAddTool(server *mcp.Server, execCmd execCmdFunc) {
 
 		// Execute the CLI command
 		cmd := execCmd(ctx, cmdArgs...)
-		output, err := cmd.CombinedOutput()
+		stdout, err := cmd.Output()
 
 		if err != nil {
-			return nil, nil, newMCPError(jsonrpc.CodeInternalError, "failed to add workflows", map[string]any{"error": err.Error(), "output": string(output)})
+			var stderr string
+			var exitErr *exec.ExitError
+			if errors.As(err, &exitErr) {
+				stderr = string(exitErr.Stderr)
+			}
+			return nil, nil, newMCPError(jsonrpc.CodeInternalError, "failed to add workflows", map[string]any{
+				"error":  err.Error(),
+				"stdout": string(stdout),
+				"stderr": stderr,
+			})
 		}
 
 		return &mcp.CallToolResult{
 			Content: []mcp.Content{
-				&mcp.TextContent{Text: string(output)},
+				&mcp.TextContent{Text: string(stdout)},
 			},
 		}, nil, nil
 	})
@@ -131,15 +142,24 @@ Returns formatted text output showing:
 
 		// Execute the CLI command
 		cmd := execCmd(ctx, cmdArgs...)
-		output, err := cmd.CombinedOutput()
+		stdout, err := cmd.Output()
 
 		if err != nil {
-			return nil, nil, newMCPError(jsonrpc.CodeInternalError, "failed to update workflows", map[string]any{"error": err.Error(), "output": string(output)})
+			var stderr string
+			var exitErr *exec.ExitError
+			if errors.As(err, &exitErr) {
+				stderr = string(exitErr.Stderr)
+			}
+			return nil, nil, newMCPError(jsonrpc.CodeInternalError, "failed to update workflows", map[string]any{
+				"error":  err.Error(),
+				"stdout": string(stdout),
+				"stderr": stderr,
+			})
 		}
 
 		return &mcp.CallToolResult{
 			Content: []mcp.Content{
-				&mcp.TextContent{Text: string(output)},
+				&mcp.TextContent{Text: string(stdout)},
 			},
 		}, nil, nil
 	})
@@ -212,15 +232,24 @@ Returns formatted text output showing:
 
 		// Execute the CLI command
 		cmd := execCmd(ctx, cmdArgs...)
-		output, err := cmd.CombinedOutput()
+		stdout, err := cmd.Output()
 
 		if err != nil {
-			return nil, nil, newMCPError(jsonrpc.CodeInternalError, "failed to fix workflows", map[string]any{"error": err.Error(), "output": string(output)})
+			var stderr string
+			var exitErr *exec.ExitError
+			if errors.As(err, &exitErr) {
+				stderr = string(exitErr.Stderr)
+			}
+			return nil, nil, newMCPError(jsonrpc.CodeInternalError, "failed to fix workflows", map[string]any{
+				"error":  err.Error(),
+				"stdout": string(stdout),
+				"stderr": stderr,
+			})
 		}
 
 		return &mcp.CallToolResult{
 			Content: []mcp.Content{
-				&mcp.TextContent{Text: string(output)},
+				&mcp.TextContent{Text: string(stdout)},
 			},
 		}, nil, nil
 	})
diff --git a/pkg/cli/mcp_tools_output_streams_test.go b/pkg/cli/mcp_tools_output_streams_test.go
new file mode 100644
index 00000000000..ec1f9b5996c
--- /dev/null
+++ b/pkg/cli/mcp_tools_output_streams_test.go
@@ -0,0 +1,119 @@
+//go:build !integration
+
+package cli
+
+import (
+	"context"
+	"fmt"
+	"os/exec"
+	"testing"
+
+	"github.com/modelcontextprotocol/go-sdk/mcp"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func mockExecWithStdoutAndStderr(stdoutText, stderrText string) execCmdFunc {
+	return func(ctx context.Context, args ...string) *exec.Cmd {
+		script := fmt.Sprintf("printf %q; printf %q 1>&2", stdoutText, stderrText)
+		return exec.CommandContext(ctx, "sh", "-c", script)
+	}
+}
+
+func extractTextResult(t *testing.T, result *mcp.CallToolResult) string {
+	t.Helper()
+	require.NotNil(t, result, "Tool result should not be nil")
+	require.NotEmpty(t, result.Content, "Tool result should contain content")
+
+	textContent, ok := result.Content[0].(*mcp.TextContent)
+	require.True(t, ok, "Tool result content should be text")
+	return textContent.Text
+}
+
+func TestCompileTool_UsesOnlyStdoutOnSuccess(t *testing.T) {
+	const (
+		expectedStdout = `[{"workflow":"test.md","valid":true,"errors":[],"warnings":[]}]`
+		stderrNoise    = "diagnostic noise should not be returned"
+	)
+
+	server := mcp.NewServer(&mcp.Implementation{Name: "test", Version: "1.0"}, nil)
+	err := registerCompileTool(server, mockExecWithStdoutAndStderr(expectedStdout, stderrNoise), "")
+	require.NoError(t, err, "registerCompileTool should succeed")
+
+	session := connectInMemory(t, server)
+	result, err := session.CallTool(context.Background(), &mcp.CallToolParams{
+		Name:      "compile",
+		Arguments: map[string]any{},
+	})
+	require.NoError(t, err, "compile tool call should succeed")
+
+	output := extractTextResult(t, result)
+	assert.Equal(t, expectedStdout, output, "compile tool should return subprocess stdout only")
+	assert.NotContains(t, output, stderrNoise, "compile tool output should not contain stderr noise")
+}
+
+func TestCommandBackedTools_UseOnlyStdoutOnSuccess(t *testing.T) {
+	tests := []struct {
+		name         string
+		toolName     string
+		args         map[string]any
+		expectedOut  string
+		stderrNoise  string
+		registerTool func(server *mcp.Server)
+	}{
+		{
+			name:     "add",
+			toolName: "add",
+			args: map[string]any{
+				"workflows": []string{"owner/repo/workflow"},
+			},
+			expectedOut: "add-stdout",
+			stderrNoise: "add-stderr",
+			registerTool: func(server *mcp.Server) { registerAddTool(server, mockExecWithStdoutAndStderr("add-stdout", "add-stderr")) },
+		},
+		{
+			name:         "update",
+			toolName:     "update",
+			args:         map[string]any{},
+			expectedOut:  "update-stdout",
+			stderrNoise:  "update-stderr",
+			registerTool: func(server *mcp.Server) { registerUpdateTool(server, mockExecWithStdoutAndStderr("update-stdout", "update-stderr")) },
+		},
+		{
+			name:     "fix",
+			toolName: "fix",
+			args: map[string]any{
+				"list_codemods": true,
+			},
+			expectedOut: "fix-stdout",
+			stderrNoise: "fix-stderr",
+			registerTool: func(server *mcp.Server) { registerFixTool(server, mockExecWithStdoutAndStderr("fix-stdout", "fix-stderr")) },
+		},
+		{
+			name:         "mcp-inspect",
+			toolName:     "mcp-inspect",
+			args:         map[string]any{},
+			expectedOut:  "inspect-stdout",
+			stderrNoise:  "inspect-stderr",
+			registerTool: func(server *mcp.Server) { registerMCPInspectTool(server, mockExecWithStdoutAndStderr("inspect-stdout", "inspect-stderr")) },
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			server := mcp.NewServer(&mcp.Implementation{Name: "test", Version: "1.0"}, nil)
+			tt.registerTool(server)
+
+			session := connectInMemory(t, server)
+			result, err := session.CallTool(context.Background(), &mcp.CallToolParams{
+				Name:      tt.toolName,
+				Arguments: tt.args,
+			})
+			require.NoError(t, err, "%s tool call should succeed", tt.toolName)
+
+			output := extractTextResult(t, result)
+			assert.Equal(t, tt.expectedOut, output, "%s tool should return subprocess stdout only", tt.toolName)
+			assert.NotContains(t, output, tt.stderrNoise, "%s tool output should not contain stderr noise", tt.toolName)
+		})
+	}
+}
diff --git a/pkg/cli/mcp_tools_readonly.go b/pkg/cli/mcp_tools_readonly.go
index 7af2c902aef..138fa3978f6 100644
--- a/pkg/cli/mcp_tools_readonly.go
+++ b/pkg/cli/mcp_tools_readonly.go
@@ -326,15 +326,24 @@ Returns formatted text output showing:
 
 		// Execute the CLI command
 		cmd := execCmd(ctx, cmdArgs...)
-		output, err := cmd.CombinedOutput()
+		stdout, err := cmd.Output()
 
 		if err != nil {
-			return nil, nil, newMCPError(jsonrpc.CodeInternalError, "failed to inspect MCP servers", map[string]any{"error": err.Error(), "output": string(output)})
+			var stderr string
+			var exitErr *exec.ExitError
+			if errors.As(err, &exitErr) {
+				stderr = string(exitErr.Stderr)
+			}
+			return nil, nil, newMCPError(jsonrpc.CodeInternalError, "failed to inspect MCP servers", map[string]any{
+				"error":  err.Error(),
+				"stdout": string(stdout),
+				"stderr": stderr,
+			})
 		}
 
 		return &mcp.CallToolResult{
 			Content: []mcp.Content{
-				&mcp.TextContent{Text: string(output)},
+				&mcp.TextContent{Text: string(stdout)},
 			},
 		}, nil, nil
 	})

From 64326d96205d4b515b7915e652f0e763298fe082 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 18 Apr 2026 00:51:10 +0000
Subject: [PATCH 2/7] test: satisfy testifylint in MCP stdout stream tests

Agent-Logs-Url: https://github.com/github/gh-aw/sessions/c9da255d-4104-43e9-8911-088372c96594

Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com>
---
 pkg/cli/mcp_tools_output_streams_test.go | 38 ++++++++++++++----------
 1 file changed, 23 insertions(+), 15 deletions(-)

diff --git a/pkg/cli/mcp_tools_output_streams_test.go b/pkg/cli/mcp_tools_output_streams_test.go
index ec1f9b5996c..8fa195229f9 100644
--- a/pkg/cli/mcp_tools_output_streams_test.go
+++ b/pkg/cli/mcp_tools_output_streams_test.go
@@ -48,7 +48,7 @@ func TestCompileTool_UsesOnlyStdoutOnSuccess(t *testing.T) {
 	require.NoError(t, err, "compile tool call should succeed")
 
 	output := extractTextResult(t, result)
-	assert.Equal(t, expectedStdout, output, "compile tool should return subprocess stdout only")
+	assert.JSONEq(t, expectedStdout, output, "compile tool should return subprocess stdout only")
 	assert.NotContains(t, output, stderrNoise, "compile tool output should not contain stderr noise")
 }
 
@@ -69,15 +69,19 @@ func TestCommandBackedTools_UseOnlyStdoutOnSuccess(t *testing.T) {
 			},
 			expectedOut: "add-stdout",
 			stderrNoise: "add-stderr",
-			registerTool: func(server *mcp.Server) { registerAddTool(server, mockExecWithStdoutAndStderr("add-stdout", "add-stderr")) },
+			registerTool: func(server *mcp.Server) {
+				registerAddTool(server, mockExecWithStdoutAndStderr("add-stdout", "add-stderr"))
+			},
 		},
 		{
-			name:         "update",
-			toolName:     "update",
-			args:         map[string]any{},
-			expectedOut:  "update-stdout",
-			stderrNoise:  "update-stderr",
-			registerTool: func(server *mcp.Server) { registerUpdateTool(server, mockExecWithStdoutAndStderr("update-stdout", "update-stderr")) },
+			name:        "update",
+			toolName:    "update",
+			args:        map[string]any{},
+			expectedOut: "update-stdout",
+			stderrNoise: "update-stderr",
+			registerTool: func(server *mcp.Server) {
+				registerUpdateTool(server, mockExecWithStdoutAndStderr("update-stdout", "update-stderr"))
+			},
 		},
 		{
 			name:     "fix",
@@ -87,15 +91,19 @@ func TestCommandBackedTools_UseOnlyStdoutOnSuccess(t *testing.T) {
 			},
 			expectedOut: "fix-stdout",
 			stderrNoise: "fix-stderr",
-			registerTool: func(server *mcp.Server) { registerFixTool(server, mockExecWithStdoutAndStderr("fix-stdout", "fix-stderr")) },
+			registerTool: func(server *mcp.Server) {
+				registerFixTool(server, mockExecWithStdoutAndStderr("fix-stdout", "fix-stderr"))
+			},
 		},
 		{
-			name:         "mcp-inspect",
-			toolName:     "mcp-inspect",
-			args:         map[string]any{},
-			expectedOut:  "inspect-stdout",
-			stderrNoise:  "inspect-stderr",
-			registerTool: func(server *mcp.Server) { registerMCPInspectTool(server, mockExecWithStdoutAndStderr("inspect-stdout", "inspect-stderr")) },
+			name:        "mcp-inspect",
+			toolName:    "mcp-inspect",
+			args:        map[string]any{},
+			expectedOut: "inspect-stdout",
+			stderrNoise: "inspect-stderr",
+			registerTool: func(server *mcp.Server) {
+				registerMCPInspectTool(server, mockExecWithStdoutAndStderr("inspect-stdout", "inspect-stderr"))
+			},
 		},
 	}
 

From 69ebd361bd6b19d131f51715d407220fe49cd938 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 18 Apr 2026 01:03:55 +0000
Subject: [PATCH 3/7] test: refine MCP stdout stream tests from validation
 feedback

Agent-Logs-Url: https://github.com/github/gh-aw/sessions/c9da255d-4104-43e9-8911-088372c96594

Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com>
---
 pkg/cli/mcp_server_stdio_integration_test.go |  5 ++++-
 pkg/cli/mcp_tools_output_streams_test.go     | 12 ++++++------
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/pkg/cli/mcp_server_stdio_integration_test.go b/pkg/cli/mcp_server_stdio_integration_test.go
index 43dd5651ab5..bfeb3a88997 100644
--- a/pkg/cli/mcp_server_stdio_integration_test.go
+++ b/pkg/cli/mcp_server_stdio_integration_test.go
@@ -230,7 +230,10 @@ func sendJSONRPCMessage(t *testing.T, w io.Writer, msg map[string]any) {
 	}
 }
 
-func waitForJSONRPCResponse(t *testing.T, scanner *bufio.Scanner, expectedID int) (map[string]any, string) {
+// waitForJSONRPCResponse reads JSON-RPC lines from stdout until it finds the
+// response with the expected ID. It returns the decoded response object and the
+// raw JSON line as read from stdout.
+func waitForJSONRPCResponse(t *testing.T, scanner *bufio.Scanner, expectedID int) (response map[string]any, rawLine string) {
 	t.Helper()
 
 	for scanner.Scan() {
diff --git a/pkg/cli/mcp_tools_output_streams_test.go b/pkg/cli/mcp_tools_output_streams_test.go
index 8fa195229f9..2a4c33d4745 100644
--- a/pkg/cli/mcp_tools_output_streams_test.go
+++ b/pkg/cli/mcp_tools_output_streams_test.go
@@ -13,7 +13,7 @@ import (
 	"github.com/stretchr/testify/require"
 )
 
-func mockExecWithStdoutAndStderr(stdoutText, stderrText string) execCmdFunc {
+func mockCommandWithOutput(stdoutText, stderrText string) execCmdFunc {
 	return func(ctx context.Context, args ...string) *exec.Cmd {
 		script := fmt.Sprintf("printf %q; printf %q 1>&2", stdoutText, stderrText)
 		return exec.CommandContext(ctx, "sh", "-c", script)
@@ -37,7 +37,7 @@ func TestCompileTool_UsesOnlyStdoutOnSuccess(t *testing.T) {
 	)
 
 	server := mcp.NewServer(&mcp.Implementation{Name: "test", Version: "1.0"}, nil)
-	err := registerCompileTool(server, mockExecWithStdoutAndStderr(expectedStdout, stderrNoise), "")
+	err := registerCompileTool(server, mockCommandWithOutput(expectedStdout, stderrNoise), "")
 	require.NoError(t, err, "registerCompileTool should succeed")
 
 	session := connectInMemory(t, server)
@@ -70,7 +70,7 @@ func TestCommandBackedTools_UseOnlyStdoutOnSuccess(t *testing.T) {
 			expectedOut: "add-stdout",
 			stderrNoise: "add-stderr",
 			registerTool: func(server *mcp.Server) {
-				registerAddTool(server, mockExecWithStdoutAndStderr("add-stdout", "add-stderr"))
+				registerAddTool(server, mockCommandWithOutput("add-stdout", "add-stderr"))
 			},
 		},
 		{
@@ -80,7 +80,7 @@ func TestCommandBackedTools_UseOnlyStdoutOnSuccess(t *testing.T) {
 			expectedOut: "update-stdout",
 			stderrNoise: "update-stderr",
 			registerTool: func(server *mcp.Server) {
-				registerUpdateTool(server, mockExecWithStdoutAndStderr("update-stdout", "update-stderr"))
+				registerUpdateTool(server, mockCommandWithOutput("update-stdout", "update-stderr"))
 			},
 		},
 		{
@@ -92,7 +92,7 @@ func TestCommandBackedTools_UseOnlyStdoutOnSuccess(t *testing.T) {
 			expectedOut: "fix-stdout",
 			stderrNoise: "fix-stderr",
 			registerTool: func(server *mcp.Server) {
-				registerFixTool(server, mockExecWithStdoutAndStderr("fix-stdout", "fix-stderr"))
+				registerFixTool(server, mockCommandWithOutput("fix-stdout", "fix-stderr"))
 			},
 		},
 		{
@@ -102,7 +102,7 @@ func TestCommandBackedTools_UseOnlyStdoutOnSuccess(t *testing.T) {
 			expectedOut: "inspect-stdout",
 			stderrNoise: "inspect-stderr",
 			registerTool: func(server *mcp.Server) {
-				registerMCPInspectTool(server, mockExecWithStdoutAndStderr("inspect-stdout", "inspect-stderr"))
+				registerMCPInspectTool(server, mockCommandWithOutput("inspect-stdout", "inspect-stderr"))
 			},
 		},
 	}

From fd1dc631dc615f4a095d4aa2e9865baa607714d2 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 18 Apr 2026 01:31:30 +0000
Subject: [PATCH 4/7] revert: restore mcp_tools_management stream handling

Agent-Logs-Url: https://github.com/github/gh-aw/sessions/6da62d79-7117-4bea-8f71-476d7ab9e44c

Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com>
---
 pkg/cli/mcp_tools_management.go          | 47 +++++-------------------
 pkg/cli/mcp_tools_output_streams_test.go | 36 +-----------------
 2 files changed, 10 insertions(+), 73 deletions(-)

diff --git a/pkg/cli/mcp_tools_management.go b/pkg/cli/mcp_tools_management.go
index a2c6ee2b497..39bf7061ed8 100644
--- a/pkg/cli/mcp_tools_management.go
+++ b/pkg/cli/mcp_tools_management.go
@@ -2,8 +2,6 @@ package cli
 
 import (
 	"context"
-	"errors"
-	"os/exec"
 	"strconv"
 
 	"github.com/github/gh-aw/pkg/logger"
@@ -61,24 +59,15 @@ func registerAddTool(server *mcp.Server, execCmd execCmdFunc) {
 
 		// Execute the CLI command
 		cmd := execCmd(ctx, cmdArgs...)
-		stdout, err := cmd.Output()
+		output, err := cmd.CombinedOutput()
 
 		if err != nil {
-			var stderr string
-			var exitErr *exec.ExitError
-			if errors.As(err, &exitErr) {
-				stderr = string(exitErr.Stderr)
-			}
-			return nil, nil, newMCPError(jsonrpc.CodeInternalError, "failed to add workflows", map[string]any{
-				"error":  err.Error(),
-				"stdout": string(stdout),
-				"stderr": stderr,
-			})
+			return nil, nil, newMCPError(jsonrpc.CodeInternalError, "failed to add workflows", map[string]any{"error": err.Error(), "output": string(output)})
 		}
 
 		return &mcp.CallToolResult{
 			Content: []mcp.Content{
-				&mcp.TextContent{Text: string(stdout)},
+				&mcp.TextContent{Text: string(output)},
 			},
 		}, nil, nil
 	})
@@ -142,24 +131,15 @@ Returns formatted text output showing:
 
 		// Execute the CLI command
 		cmd := execCmd(ctx, cmdArgs...)
-		stdout, err := cmd.Output()
+		output, err := cmd.CombinedOutput()
 
 		if err != nil {
-			var stderr string
-			var exitErr *exec.ExitError
-			if errors.As(err, &exitErr) {
-				stderr = string(exitErr.Stderr)
-			}
-			return nil, nil, newMCPError(jsonrpc.CodeInternalError, "failed to update workflows", map[string]any{
-				"error":  err.Error(),
-				"stdout": string(stdout),
-				"stderr": stderr,
-			})
+			return nil, nil, newMCPError(jsonrpc.CodeInternalError, "failed to update workflows", map[string]any{"error": err.Error(), "output": string(output)})
 		}
 
 		return &mcp.CallToolResult{
 			Content: []mcp.Content{
-				&mcp.TextContent{Text: string(stdout)},
+				&mcp.TextContent{Text: string(output)},
 			},
 		}, nil, nil
 	})
@@ -232,24 +212,15 @@ Returns formatted text output showing:
 
 		// Execute the CLI command
 		cmd := execCmd(ctx, cmdArgs...)
-		stdout, err := cmd.Output()
+		output, err := cmd.CombinedOutput()
 
 		if err != nil {
-			var stderr string
-			var exitErr *exec.ExitError
-			if errors.As(err, &exitErr) {
-				stderr = string(exitErr.Stderr)
-			}
-			return nil, nil, newMCPError(jsonrpc.CodeInternalError, "failed to fix workflows", map[string]any{
-				"error":  err.Error(),
-				"stdout": string(stdout),
-				"stderr": stderr,
-			})
+			return nil, nil, newMCPError(jsonrpc.CodeInternalError, "failed to fix workflows", map[string]any{"error": err.Error(), "output": string(output)})
 		}
 
 		return &mcp.CallToolResult{
 			Content: []mcp.Content{
-				&mcp.TextContent{Text: string(stdout)},
+				&mcp.TextContent{Text: string(output)},
 			},
 		}, nil, nil
 	})
diff --git a/pkg/cli/mcp_tools_output_streams_test.go b/pkg/cli/mcp_tools_output_streams_test.go
index 2a4c33d4745..e24d56ecec3 100644
--- a/pkg/cli/mcp_tools_output_streams_test.go
+++ b/pkg/cli/mcp_tools_output_streams_test.go
@@ -52,7 +52,7 @@ func TestCompileTool_UsesOnlyStdoutOnSuccess(t *testing.T) {
 	assert.NotContains(t, output, stderrNoise, "compile tool output should not contain stderr noise")
 }
 
-func TestCommandBackedTools_UseOnlyStdoutOnSuccess(t *testing.T) {
+func TestMCPInspectTool_UsesOnlyStdoutOnSuccess(t *testing.T) {
 	tests := []struct {
 		name         string
 		toolName     string
@@ -61,40 +61,6 @@ func TestCommandBackedTools_UseOnlyStdoutOnSuccess(t *testing.T) {
 		stderrNoise  string
 		registerTool func(server *mcp.Server)
 	}{
-		{
-			name:     "add",
-			toolName: "add",
-			args: map[string]any{
-				"workflows": []string{"owner/repo/workflow"},
-			},
-			expectedOut: "add-stdout",
-			stderrNoise: "add-stderr",
-			registerTool: func(server *mcp.Server) {
-				registerAddTool(server, mockCommandWithOutput("add-stdout", "add-stderr"))
-			},
-		},
-		{
-			name:        "update",
-			toolName:    "update",
-			args:        map[string]any{},
-			expectedOut: "update-stdout",
-			stderrNoise: "update-stderr",
-			registerTool: func(server *mcp.Server) {
-				registerUpdateTool(server, mockCommandWithOutput("update-stdout", "update-stderr"))
-			},
-		},
-		{
-			name:     "fix",
-			toolName: "fix",
-			args: map[string]any{
-				"list_codemods": true,
-			},
-			expectedOut: "fix-stdout",
-			stderrNoise: "fix-stderr",
-			registerTool: func(server *mcp.Server) {
-				registerFixTool(server, mockCommandWithOutput("fix-stdout", "fix-stderr"))
-			},
-		},
 		{
 			name:        "mcp-inspect",
 			toolName:    "mcp-inspect",

From 51c81e1a4aa3dba93e53b8838cd9fb28bec6bc55 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 18 Apr 2026 01:50:18 +0000
Subject: [PATCH 5/7] revert: restore mcp-inspect stream handling in readonly
 tool

Agent-Logs-Url: https://github.com/github/gh-aw/sessions/a4f38db7-d04e-4612-b5e8-df906de5a0fd

Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com>
---
 pkg/cli/mcp_tools_output_streams_test.go | 40 ------------------------
 pkg/cli/mcp_tools_readonly.go            | 15 ++-------
 2 files changed, 3 insertions(+), 52 deletions(-)

diff --git a/pkg/cli/mcp_tools_output_streams_test.go b/pkg/cli/mcp_tools_output_streams_test.go
index e24d56ecec3..8ac228ed276 100644
--- a/pkg/cli/mcp_tools_output_streams_test.go
+++ b/pkg/cli/mcp_tools_output_streams_test.go
@@ -51,43 +51,3 @@ func TestCompileTool_UsesOnlyStdoutOnSuccess(t *testing.T) {
 	assert.JSONEq(t, expectedStdout, output, "compile tool should return subprocess stdout only")
 	assert.NotContains(t, output, stderrNoise, "compile tool output should not contain stderr noise")
 }
-
-func TestMCPInspectTool_UsesOnlyStdoutOnSuccess(t *testing.T) {
-	tests := []struct {
-		name         string
-		toolName     string
-		args         map[string]any
-		expectedOut  string
-		stderrNoise  string
-		registerTool func(server *mcp.Server)
-	}{
-		{
-			name:        "mcp-inspect",
-			toolName:    "mcp-inspect",
-			args:        map[string]any{},
-			expectedOut: "inspect-stdout",
-			stderrNoise: "inspect-stderr",
-			registerTool: func(server *mcp.Server) {
-				registerMCPInspectTool(server, mockCommandWithOutput("inspect-stdout", "inspect-stderr"))
-			},
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			server := mcp.NewServer(&mcp.Implementation{Name: "test", Version: "1.0"}, nil)
-			tt.registerTool(server)
-
-			session := connectInMemory(t, server)
-			result, err := session.CallTool(context.Background(), &mcp.CallToolParams{
-				Name:      tt.toolName,
-				Arguments: tt.args,
-			})
-			require.NoError(t, err, "%s tool call should succeed", tt.toolName)
-
-			output := extractTextResult(t, result)
-			assert.Equal(t, tt.expectedOut, output, "%s tool should return subprocess stdout only", tt.toolName)
-			assert.NotContains(t, output, tt.stderrNoise, "%s tool output should not contain stderr noise", tt.toolName)
-		})
-	}
-}
diff --git a/pkg/cli/mcp_tools_readonly.go b/pkg/cli/mcp_tools_readonly.go
index 138fa3978f6..7af2c902aef 100644
--- a/pkg/cli/mcp_tools_readonly.go
+++ b/pkg/cli/mcp_tools_readonly.go
@@ -326,24 +326,15 @@ Returns formatted text output showing:
 
 		// Execute the CLI command
 		cmd := execCmd(ctx, cmdArgs...)
-		stdout, err := cmd.Output()
+		output, err := cmd.CombinedOutput()
 
 		if err != nil {
-			var stderr string
-			var exitErr *exec.ExitError
-			if errors.As(err, &exitErr) {
-				stderr = string(exitErr.Stderr)
-			}
-			return nil, nil, newMCPError(jsonrpc.CodeInternalError, "failed to inspect MCP servers", map[string]any{
-				"error":  err.Error(),
-				"stdout": string(stdout),
-				"stderr": stderr,
-			})
+			return nil, nil, newMCPError(jsonrpc.CodeInternalError, "failed to inspect MCP servers", map[string]any{"error": err.Error(), "output": string(output)})
 		}
 
 		return &mcp.CallToolResult{
 			Content: []mcp.Content{
-				&mcp.TextContent{Text: string(stdout)},
+				&mcp.TextContent{Text: string(output)},
 			},
 		}, nil, nil
 	})

From 0332f9f38416bf698db68a4df8ce175a7903f2d1 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Sat, 18 Apr 2026 02:04:34 +0000
Subject: [PATCH 6/7] docs(adr): add draft ADR-26968 for MCP compile tool
 stdout-only semantics

Generated by Design Decision Gate to document the architectural decision
that the compile MCP tool must source success responses exclusively from
subprocess stdout, with stderr reserved for diagnostics.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 ...mpile-tool-stdout-only-success-response.md | 79 +++++++++++++++++++
 1 file changed, 79 insertions(+)
 create mode 100644 docs/adr/26968-mcp-compile-tool-stdout-only-success-response.md

diff --git a/docs/adr/26968-mcp-compile-tool-stdout-only-success-response.md b/docs/adr/26968-mcp-compile-tool-stdout-only-success-response.md
new file mode 100644
index 00000000000..77084087392
--- /dev/null
+++ b/docs/adr/26968-mcp-compile-tool-stdout-only-success-response.md
@@ -0,0 +1,79 @@
+# ADR-26968: MCP Compile Tool Uses Subprocess Stdout Exclusively for Success Responses
+
+**Date**: 2026-04-18
+**Status**: Draft
+**Deciders**: pelikhan, Copilot
+
+---
+
+## Part 1 — Narrative (Human-Friendly)
+
+### Context
+
+The `gh-aw` binary exposes a Model Context Protocol (MCP) server over stdio, where the JSON-RPC transport channel (stdout) must carry only well-formed JSON-RPC 2.0 messages. The `compile` tool invokes an external subprocess (the `gh-aw` binary itself in compile mode) to validate workflow files and return structured JSON results. Prior to this decision, there was ambiguity about whether subprocess stderr output — used for diagnostics and progress logging — should also be included in the MCP tool response text returned over the JSON-RPC channel. Mixing stderr into the response text caused JSON parsing failures when MCP clients attempted to parse the response as JSON, because diagnostic log lines are plain text, not JSON.
+
+### Decision
+
+We will configure the `compile` MCP tool to source its success response content exclusively from subprocess stdout, discarding stderr output from the tool response body. When the subprocess exits successfully, only the data written to its stdout (valid JSON) will be returned as the tool result text. Subprocess stderr is reserved for diagnostics visible at the process level (e.g., captured by a test harness or container logs) and must not appear in the JSON-RPC response. This invariant is enforced by both a unit test (mocking the subprocess command) and a binary-level integration test (launching the full MCP server and verifying raw stdio traffic).
+
+### Alternatives Considered
+
+#### Alternative 1: Include Both Stdout and Stderr in the Tool Response
+
+Concatenating stdout and stderr into the tool response was considered because it preserves all diagnostic information visible to an MCP client. This was rejected because MCP clients that expect the compile result text to be parseable JSON would encounter parse failures whenever the subprocess emitted any log line to stderr. The MCP protocol treats tool result text as opaque to the transport but clients — including the test harness — depend on the structured JSON content.
+
+#### Alternative 2: Route All Output Through Structured JSON on Stdout
+
+An alternative is to require the subprocess to emit all output (including diagnostics) as structured JSON on stdout, eliminating stderr entirely. This would allow richer error context in the tool response. It was not chosen because it would require invasive changes to the `compile` subcommand's logging infrastructure across all callers, not just the MCP server. The subprocess is also invoked directly by users and other tooling where plain-text stderr diagnostics are desirable.
+
+#### Alternative 3: Capture Stderr and Append It as a Separate Content Item
+
+The MCP tool result `content` array can hold multiple items. Stderr text could be appended as a second `TextContent` item tagged as diagnostic. This was considered but rejected because it complicates client logic (clients must filter by content item index or type to obtain the JSON result), and it leaks internal diagnostic format changes into the public tool response contract.
+
+### Consequences
+
+#### Positive
+- MCP clients receive a clean, parseable JSON response; no interleaved log noise can break JSON parsing.
+- The stdout/stderr split is a well-understood Unix convention; future subprocess implementers can follow it without reading MCP-specific documentation.
+- Binary-level integration tests provide strong regression protection at the transport boundary.
+
+#### Negative
+- Subprocess diagnostic output is invisible to MCP clients; clients that need to surface compiler warnings embedded only in stderr must rely on the subprocess writing those to stdout instead.
+- The unit test uses shell scripts (`sh -c`) to mock subprocess output; this creates a dependency on a POSIX shell being present in the test environment.
+
+#### Neutral
+- The integration test must be run against a pre-built binary (`make build`); it will skip automatically when the binary is absent. Teams must remember to build before running integration tests.
+- Stderr captured during integration tests is collected into a buffer but not asserted on, leaving stderr content unverified at the integration level.
+
+---
+
+## Part 2 — Normative Specification (RFC 2119)
+
+> The key words **MUST**, **MUST NOT**, **REQUIRED**, **SHALL**, **SHALL NOT**, **SHOULD**, **SHOULD NOT**, **RECOMMENDED**, **MAY**, and **OPTIONAL** in this section are to be interpreted as described in [RFC 2119](https://www.rfc-editor.org/rfc/rfc2119).
+
+### MCP Tool Response Content
+
+1. On subprocess success (exit code 0), the `compile` MCP tool **MUST** populate its `TextContent` response exclusively from the subprocess's stdout stream.
+2. The `compile` MCP tool **MUST NOT** include any bytes read from the subprocess's stderr stream in the tool result `content` field.
+3. The tool response text **MUST** be valid JSON as produced by the subprocess; implementations **MUST NOT** append additional text before or after the subprocess stdout output.
+4. Implementations **MAY** log subprocess stderr to a diagnostic sink (e.g., a container log or test buffer) that is separate from the JSON-RPC stdio channel.
+
+### MCP Server Stdio Channel Purity
+
+1. All output written to the MCP server process's stdout **MUST** be valid JSON-RPC 2.0 messages.
+2. The MCP server process **MUST NOT** write any non-JSON-RPC content (log lines, progress messages, debug output) to its stdout.
+3. Diagnostic output from the MCP server process itself **SHOULD** be directed to stderr or a dedicated log file, never to stdout.
+
+### Test Coverage
+
+1. Any change to `compile` tool output stream handling **MUST** be accompanied by a unit test that verifies stdout-only semantics using a mocked subprocess command.
+2. Any change to MCP server stdio stream handling **SHOULD** be covered by a binary-level integration test that validates raw stdout traffic is valid JSON-RPC 2.0 throughout the full initialize–call–shutdown lifecycle.
+3. Integration tests that depend on a compiled binary **MAY** skip (using `t.Skip`) when the binary is not present, but **MUST NOT** silently pass.
+
+### Conformance
+
+An implementation is considered conformant with this ADR if it satisfies all **MUST** and **MUST NOT** requirements above. Specifically: the `compile` tool result text contains only subprocess stdout bytes on success, the MCP server stdout channel carries only valid JSON-RPC 2.0 messages, and both a unit test and an integration test exist to enforce these invariants. Failure to meet any **MUST** or **MUST NOT** requirement constitutes non-conformance.
+
+---
+
+*This is a DRAFT ADR generated by the [Design Decision Gate](https://github.com/github/gh-aw/actions/runs/24594338493) workflow. The PR author must review, complete, and finalize this document before the PR can merge.*

From 4af17a2340e66ba301814c84f5eca0a22266f231 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 18 Apr 2026 02:33:07 +0000
Subject: [PATCH 7/7] test: address review feedback on MCP stdio tests

Agent-Logs-Url: https://github.com/github/gh-aw/sessions/e4f0478b-70ee-4aea-a26a-440476876a8b

Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com>
---
 pkg/cli/mcp_server_stdio_integration_test.go | 2 +-
 pkg/cli/mcp_tools_output_streams_test.go     | 4 +---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/pkg/cli/mcp_server_stdio_integration_test.go b/pkg/cli/mcp_server_stdio_integration_test.go
index bfeb3a88997..fab8841998d 100644
--- a/pkg/cli/mcp_server_stdio_integration_test.go
+++ b/pkg/cli/mcp_server_stdio_integration_test.go
@@ -123,7 +123,6 @@ engine: copilot
 
 	absBinaryPath := filepath.Join(originalDir, binaryPath)
 	ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second)
-	defer cancel()
 
 	cmd := exec.CommandContext(ctx, absBinaryPath, "mcp-server", "--cmd", absBinaryPath)
 	cmd.Dir = tmpDir
@@ -144,6 +143,7 @@ engine: copilot
 		t.Fatalf("Failed to start MCP server: %v", err)
 	}
 	defer func() {
+		cancel()
 		_ = stdin.Close()
 		_ = cmd.Wait()
 	}()
diff --git a/pkg/cli/mcp_tools_output_streams_test.go b/pkg/cli/mcp_tools_output_streams_test.go
index 8ac228ed276..0d5bb9ad844 100644
--- a/pkg/cli/mcp_tools_output_streams_test.go
+++ b/pkg/cli/mcp_tools_output_streams_test.go
@@ -4,7 +4,6 @@ package cli
 
 import (
 	"context"
-	"fmt"
 	"os/exec"
 	"testing"
 
@@ -15,8 +14,7 @@ import (
 
 func mockCommandWithOutput(stdoutText, stderrText string) execCmdFunc {
 	return func(ctx context.Context, args ...string) *exec.Cmd {
-		script := fmt.Sprintf("printf %q; printf %q 1>&2", stdoutText, stderrText)
-		return exec.CommandContext(ctx, "sh", "-c", script)
+		return exec.CommandContext(ctx, "sh", "-c", `printf '%s' "$1"; printf '%s' "$2" 1>&2`, "sh", stdoutText, stderrText)
 	}
 }