diff --git a/docs/learnings/best-practices/prefer-copilot-sdk-tcp-for-owned-runtimes.md b/docs/learnings/best-practices/prefer-copilot-sdk-tcp-for-owned-runtimes.md new file mode 100644 index 000000000..c465e2efd --- /dev/null +++ b/docs/learnings/best-practices/prefer-copilot-sdk-tcp-for-owned-runtimes.md @@ -0,0 +1,62 @@ +--- +title: "Prefer Copilot SDK TCP transport for owned runtimes" +module: copilot-sdk provider +date: 2026-06-23 +problem_type: best_practice +component: tooling +severity: medium +tags: + - copilot-sdk + - subprocess + - epipe + - runtime-lifecycle + - provider +applies_when: + - Using @github/copilot-sdk to launch a local Copilot runtime process + - Seeing uncaught stdin EPIPE after a Copilot SDK session appears to finish + - Choosing between RuntimeConnection.forTcp and RuntimeConnection.forStdio +--- + +# Prefer Copilot SDK TCP transport for owned runtimes + +## Context + +AgentV's `copilot-sdk` provider can run against an external Copilot runtime URL or own +the local Copilot runtime process for an eval invocation. When AgentV owns the runtime, +`@github/copilot-sdk@1.0.3` offers both stdio and TCP runtime connections. + +During eval smoke testing, the stdio transport could complete the assistant turn and +then crash the Node process with an uncaught child `stdin` `EPIPE`. The upstream SDK has +a matching lifecycle issue: `github/copilot-sdk#1427`. + +## Guidance + +Prefer `RuntimeConnection.forTcp()` when AgentV launches or auto-resolves the local +Copilot runtime. Keep `RuntimeConnection.forStdio()` only as backward compatibility for +older SDK releases that do not expose TCP. + +This is not a reason to add global `uncaughtException` handlers or swallow all `EPIPE` +errors. The narrow fix is to choose the SDK-supported transport that avoids the stdio +stdin lifecycle edge case. + +## Why This Matters + +The provider result is only useful if AgentV can finish writing `index.jsonl`, +per-case artifacts, and benchmark summaries. A post-turn stdio `EPIPE` can crash the +process before result finalization even when the model already produced a usable answer. + +TCP also keeps lifecycle handling inside the SDK transport boundary. AgentV should not +reach into private SDK fields such as `forceStopping`, and it should not install +process-wide exception filters for one provider's subprocess pipe behavior. + +## When to Apply + +- Local owned Copilot runtime: use `RuntimeConnection.forTcp({ path, args })`. +- External Copilot runtime URL: use `RuntimeConnection.forUri(url)`. +- Old SDK without TCP support: fall back to `RuntimeConnection.forStdio({ path, args })`. + +## Related + +- `packages/core/src/evaluation/providers/copilot-sdk.ts` — runtime connection selection +- `packages/core/test/evaluation/providers/copilot-sdk.test.ts` — TCP/URI constructor coverage +- Upstream issue: `github/copilot-sdk#1427` diff --git a/package.json b/package.json index 09b0dadac..614cc03ee 100644 --- a/package.json +++ b/package.json @@ -19,6 +19,7 @@ "agentv": "bun apps/cli/src/cli.ts", "agentv:buildrun": "bun run build && bun apps/cli/dist/cli.js", "beads:check": "bun scripts/check-beads-context.ts", + "debug:pi-sdk-tools": "bun scripts/debug-pi-sdk-tools.ts", "validate:examples": "EVAL_CRITERIA=placeholder CUSTOM_SYSTEM_PROMPT=placeholder bun scripts/validate-example-evals.ts", "eval:baseline-check": "bun scripts/check-eval-baselines.ts", "release": "bun scripts/release.ts", diff --git a/packages/core/src/evaluation/providers/copilot-sdk.ts b/packages/core/src/evaluation/providers/copilot-sdk.ts index 3a09dfe60..9a8011a02 100644 --- a/packages/core/src/evaluation/providers/copilot-sdk.ts +++ b/packages/core/src/evaluation/providers/copilot-sdk.ts @@ -356,7 +356,12 @@ export class CopilotSdkProvider implements Provider { } if (!clientOptions.connection && (this.config.cliPath || this.config.args?.length)) { - if (sdk.RuntimeConnection?.forStdio) { + if (sdk.RuntimeConnection?.forTcp) { + clientOptions.connection = sdk.RuntimeConnection.forTcp({ + ...(this.config.cliPath ? { path: this.config.cliPath } : {}), + ...(this.config.args?.length ? { args: this.config.args } : {}), + }); + } else if (sdk.RuntimeConnection?.forStdio) { clientOptions.connection = sdk.RuntimeConnection.forStdio({ ...(this.config.cliPath ? { path: this.config.cliPath } : {}), ...(this.config.args?.length ? { args: this.config.args } : {}), @@ -369,7 +374,16 @@ export class CopilotSdkProvider implements Provider { // node:sqlite (unavailable in Bun). Auto-resolve the platform-specific native // binary from @github/copilot-{platform}-{arch} when available. const nativePath = resolvePlatformCliPath(); - if (nativePath && sdk.RuntimeConnection?.forStdio && !clientOptions.connection) { + if (nativePath && sdk.RuntimeConnection?.forTcp && !clientOptions.connection) { + // Prefer the SDK's supported TCP transport for owned runtimes. In + // @github/copilot-sdk@1.0.3 the stdio transport can rethrow child + // stdin EPIPE as an uncaughtException when the runtime exits outside + // forceStop(); see github/copilot-sdk#1427. + clientOptions.connection = sdk.RuntimeConnection.forTcp({ + path: nativePath, + ...(this.config.args?.length ? { args: this.config.args } : {}), + }); + } else if (nativePath && sdk.RuntimeConnection?.forStdio && !clientOptions.connection) { clientOptions.connection = sdk.RuntimeConnection.forStdio({ path: nativePath, ...(this.config.args?.length ? { args: this.config.args } : {}), diff --git a/packages/core/src/evaluation/providers/pi-coding-agent.ts b/packages/core/src/evaluation/providers/pi-coding-agent.ts index 11fb978d6..b45446535 100644 --- a/packages/core/src/evaluation/providers/pi-coding-agent.ts +++ b/packages/core/src/evaluation/providers/pi-coding-agent.ts @@ -44,6 +44,8 @@ let piCodingAgentModule: typeof import('@earendil-works/pi-coding-agent') | null let piAiModule: typeof import('@earendil-works/pi-ai') | null = null; let loadingPromise: Promise | null = null; +const PI_BUILT_IN_TOOL_NAMES = new Set(['read', 'bash', 'edit', 'write', 'grep', 'find', 'ls']); + async function promptInstall(): Promise { if (!process.stdout.isTTY) return false; const rl = createInterface({ input: process.stdin, output: process.stdout }); @@ -240,19 +242,8 @@ async function loadSdkModules() { // After doLoadSdkModules resolves, both modules are guaranteed non-null. const piSdk = piCodingAgentModule as NonNullable; const piAi = piAiModule as NonNullable; - const toolMap: Record = { - read: piSdk.readTool, - bash: piSdk.bashTool, - edit: piSdk.editTool, - write: piSdk.writeTool, - grep: piSdk.grepTool, - find: piSdk.findTool, - ls: piSdk.lsTool, - }; return { createAgentSession: piSdk.createAgentSession, - codingTools: piSdk.codingTools, - toolMap, SessionManager: piSdk.SessionManager, getModel: piAi.getModel, // biome-ignore lint/suspicious/noExplicitAny: registerBuiltInApiProviders exists at runtime but not in type defs @@ -337,7 +328,7 @@ export class PiCodingAgentProvider implements Provider { } // Select tools based on config - const tools = this.resolveTools(sdk); + const tools = resolvePiToolNames(this.config.tools); // Create agent session using the SDK const { session } = await sdk.createAgentSession({ @@ -575,22 +566,6 @@ export class PiCodingAgentProvider implements Provider { return process.cwd(); } - private resolveTools(sdk: Awaited>) { - if (!this.config.tools) { - return sdk.codingTools; - } - - const toolNames = this.config.tools.split(',').map((t) => t.trim().toLowerCase()); - const selected = []; - for (const name of toolNames) { - if (name in sdk.toolMap) { - selected.push(sdk.toolMap[name]); - } - } - // biome-ignore lint/suspicious/noExplicitAny: tools are typed dynamically from SDK - return selected.length > 0 ? (selected as any[]) : sdk.codingTools; - } - private resolveLogDirectory(request: ProviderRequest): string | undefined { if (this.config.logDir) { return path.resolve(this.config.logDir); @@ -691,6 +666,18 @@ class PiStreamLogger { } } +function resolvePiToolNames(configTools?: string): readonly string[] | undefined { + if (!configTools) return undefined; + + const selected = configTools + .split(',') + .map((tool) => tool.trim().toLowerCase()) + .filter((tool) => PI_BUILT_IN_TOOL_NAMES.has(tool)); + + // Passing undefined lets the SDK use its default built-ins. + return selected.length > 0 ? selected : undefined; +} + function summarizeSdkEvent(event: unknown): string | undefined { if (!event || typeof event !== 'object') return undefined; const record = event as Record; @@ -846,4 +833,5 @@ export const _internal = { findAgentvRoot, findManagedSdkInstallRoot, resolveGlobalNpmRoot, + resolvePiToolNames, }; diff --git a/packages/core/test/evaluation/providers/copilot-sdk.test.ts b/packages/core/test/evaluation/providers/copilot-sdk.test.ts index c730df94f..10e7f5ba6 100644 --- a/packages/core/test/evaluation/providers/copilot-sdk.test.ts +++ b/packages/core/test/evaluation/providers/copilot-sdk.test.ts @@ -80,6 +80,20 @@ function mockCopilotSdk(client: MockClient) { CopilotClient: mock(function CopilotClient() { return client; }), + RuntimeConnection: { + forTcp: mock((options?: Record) => ({ + kind: 'tcp', + ...options, + })), + forStdio: mock((options?: Record) => ({ + kind: 'stdio', + ...options, + })), + forUri: mock((url: string) => ({ + kind: 'uri', + url, + })), + }, }; } @@ -144,17 +158,21 @@ describe('CopilotSdkProvider', () => { expect(sessionOptions.model).toBe('gpt-5'); }); - it('passes cliUrl to CopilotClient constructor', async () => { + it('passes cliUrl through RuntimeConnection.forUri', async () => { const session = createMockSession({ events: [{ type: 'assistant.message', data: { content: 'response' } }], }); const client = createMockClient(session); + const forUri = mock((url: string) => ({ kind: 'uri', url })); const CopilotClientMock = mock(function CopilotClient() { return client; }); mock.module('@github/copilot-sdk', () => ({ CopilotClient: CopilotClientMock, + RuntimeConnection: { + forUri, + }, })); const { CopilotSdkProvider } = await import('../../../src/evaluation/providers/copilot-sdk.js'); @@ -166,20 +184,29 @@ describe('CopilotSdkProvider', () => { await provider.invoke({ question: 'Test' }); const constructorArgs = CopilotClientMock.mock.calls[0][0]; - expect(constructorArgs.cliUrl).toBe('http://localhost:9999'); + expect(forUri).toHaveBeenCalledWith('http://localhost:9999'); + expect(constructorArgs.connection).toEqual({ + kind: 'uri', + url: 'http://localhost:9999', + }); + expect(session.disconnect).toHaveBeenCalledTimes(1); }); - it('passes args as cliArgs to CopilotClient constructor', async () => { + it('passes args to the local TCP runtime and legacy cliArgs constructor option', async () => { const session = createMockSession({ events: [{ type: 'assistant.message', data: { content: 'response' } }], }); const client = createMockClient(session); + const forTcp = mock((options?: Record) => ({ kind: 'tcp', ...options })); const CopilotClientMock = mock(function CopilotClient() { return client; }); mock.module('@github/copilot-sdk', () => ({ CopilotClient: CopilotClientMock, + RuntimeConnection: { + forTcp, + }, })); const { CopilotSdkProvider } = await import('../../../src/evaluation/providers/copilot-sdk.js'); @@ -191,6 +218,11 @@ describe('CopilotSdkProvider', () => { await provider.invoke({ question: 'Test' }); const constructorArgs = CopilotClientMock.mock.calls[0][0]; + expect(forTcp).toHaveBeenCalledWith({ args: ['--verbose', 'enabled'] }); + expect(constructorArgs.connection).toEqual({ + kind: 'tcp', + args: ['--verbose', 'enabled'], + }); expect(constructorArgs.cliArgs).toEqual(['--verbose', 'enabled']); }); @@ -199,12 +231,16 @@ describe('CopilotSdkProvider', () => { events: [{ type: 'assistant.message', data: { content: 'response' } }], }); const client = createMockClient(session); + const forTcp = mock((options?: Record) => ({ kind: 'tcp', ...options })); const CopilotClientMock = mock(function CopilotClient() { return client; }); mock.module('@github/copilot-sdk', () => ({ CopilotClient: CopilotClientMock, + RuntimeConnection: { + forTcp, + }, })); const { CopilotSdkProvider } = await import('../../../src/evaluation/providers/copilot-sdk.js'); @@ -219,6 +255,10 @@ describe('CopilotSdkProvider', () => { // cwd is set so the subprocess resolves relative paths itself — args are NOT pre-resolved expect(constructorArgs.cwd).toBe(path.resolve(fixturesRoot)); expect(constructorArgs.workingDirectory).toBe(path.resolve(fixturesRoot)); + expect(constructorArgs.connection).toEqual({ + kind: 'tcp', + args: ['--plugin-dir', './plugins', '--shared-dir', '../shared', '--mode', 'agent'], + }); expect(constructorArgs.cliArgs).toEqual([ '--plugin-dir', './plugins', @@ -302,22 +342,28 @@ describe('CopilotSdkProvider', () => { ); }); - it('reuses client across multiple invocations', async () => { + it('reuses external client across multiple invocations', async () => { const session = createMockSession({ events: [{ type: 'assistant.message', data: { content: 'response' } }], }); const client = createMockClient(session); + const forUri = mock((url: string) => ({ kind: 'uri', url })); const CopilotClientMock = mock(function CopilotClient() { return client; }); mock.module('@github/copilot-sdk', () => ({ CopilotClient: CopilotClientMock, + RuntimeConnection: { + forUri, + }, })); const { CopilotSdkProvider } = await import('../../../src/evaluation/providers/copilot-sdk.js'); - const provider = new CopilotSdkProvider('test-target', {}); + const provider = new CopilotSdkProvider('test-target', { + cliUrl: 'http://localhost:9999', + }); await provider.invoke({ question: 'First' }); await provider.invoke({ question: 'Second' }); @@ -326,16 +372,26 @@ describe('CopilotSdkProvider', () => { expect(CopilotClientMock).toHaveBeenCalledTimes(1); // But createSession should be called twice (fresh session per invocation) expect(client.createSession).toHaveBeenCalledTimes(2); + expect(session.disconnect).toHaveBeenCalledTimes(2); + expect(forUri).toHaveBeenCalledTimes(1); }); - it('creates fresh session per invocation', async () => { + it('reuses local TCP client across multiple invocations', async () => { const session = createMockSession({ events: [{ type: 'assistant.message', data: { content: 'response' } }], }); const client = createMockClient(session); - const sdkMock = mockCopilotSdk(client); + const forTcp = mock((options?: Record) => ({ kind: 'tcp', ...options })); - mock.module('@github/copilot-sdk', () => sdkMock); + const CopilotClientMock = mock(function CopilotClient() { + return client; + }); + mock.module('@github/copilot-sdk', () => ({ + CopilotClient: CopilotClientMock, + RuntimeConnection: { + forTcp, + }, + })); const { CopilotSdkProvider } = await import('../../../src/evaluation/providers/copilot-sdk.js'); const provider = new CopilotSdkProvider('test-target', {}); @@ -343,12 +399,14 @@ describe('CopilotSdkProvider', () => { await provider.invoke({ question: 'First' }); await provider.invoke({ question: 'Second' }); - // Session should be disconnected after each invocation - expect(session.disconnect).toHaveBeenCalledTimes(2); + expect(CopilotClientMock).toHaveBeenCalledTimes(1); expect(client.createSession).toHaveBeenCalledTimes(2); + expect(session.disconnect).toHaveBeenCalledTimes(2); + expect(forTcp).toHaveBeenCalledTimes(1); + expect(CopilotClientMock.mock.calls[0][0].connection.kind).toBe('tcp'); }); - it('falls back to destroy for older SDK sessions', async () => { + it('falls back to destroy for older external SDK sessions', async () => { const session = createMockSession({ events: [{ type: 'assistant.message', data: { content: 'response' } }], legacyDestroyOnly: true, @@ -359,7 +417,9 @@ describe('CopilotSdkProvider', () => { mock.module('@github/copilot-sdk', () => sdkMock); const { CopilotSdkProvider } = await import('../../../src/evaluation/providers/copilot-sdk.js'); - const provider = new CopilotSdkProvider('test-target', {}); + const provider = new CopilotSdkProvider('test-target', { + cliUrl: 'http://localhost:9999', + }); await provider.invoke({ question: 'Test' }); diff --git a/packages/core/test/evaluation/providers/pi-coding-agent.test.ts b/packages/core/test/evaluation/providers/pi-coding-agent.test.ts index bd43a4d59..3329eab94 100644 --- a/packages/core/test/evaluation/providers/pi-coding-agent.test.ts +++ b/packages/core/test/evaluation/providers/pi-coding-agent.test.ts @@ -80,4 +80,15 @@ describe('PiCodingAgentProvider', () => { const { sep } = require('node:path'); expect(_internal.findAgentvRoot().endsWith(`packages${sep}core`)).toBe(true); }); + + it('passes PI built-in tool allowlists as SDK tool names', () => { + expect(_internal.resolvePiToolNames(' read, BASH, edit, write ')).toEqual([ + 'read', + 'bash', + 'edit', + 'write', + ]); + expect(_internal.resolvePiToolNames('unknown, nope')).toBeUndefined(); + expect(_internal.resolvePiToolNames()).toBeUndefined(); + }); }); diff --git a/scripts/debug-pi-sdk-tools.ts b/scripts/debug-pi-sdk-tools.ts new file mode 100644 index 000000000..a9d016e5e --- /dev/null +++ b/scripts/debug-pi-sdk-tools.ts @@ -0,0 +1,309 @@ +#!/usr/bin/env bun +import { mkdir, mkdtemp, readFile, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import path from 'node:path'; + +import { PiCodingAgentProvider } from '../packages/core/src/evaluation/providers/pi-coding-agent.js'; +import type { + Message, + ProviderStreamCallbacks, +} from '../packages/core/src/evaluation/providers/types.js'; + +interface Options { + readonly apiKey?: string; + readonly baseUrl?: string; + readonly logDir?: string; + readonly logFormat: 'summary' | 'json'; + readonly model: string; + readonly subprovider: string; + readonly thinking: string; + readonly timeoutMs: number; + readonly workspace?: string; +} + +interface ToolEvent { + readonly toolName: string; + readonly toolCallId?: string; + readonly durationMs?: number; +} + +const DEFAULT_MODEL = + process.env.AGENTV_PI_DEBUG_MODEL ?? process.env.AGENTV_CODEX_MODEL ?? 'gpt-5.3-codex-spark'; +const DEFAULT_BASE_URL = + process.env.AGENTV_PI_DEBUG_BASE_URL ?? + process.env.AGENTV_OPENAI_BASE_URL ?? + process.env.OPENAI_BASE_URL ?? + process.env.OPENAI_ENDPOINT; +const DEFAULT_API_KEY = + process.env.AGENTV_PI_DEBUG_API_KEY ?? + process.env.AGENTV_OPENAI_API_KEY ?? + process.env.OPENAI_API_KEY ?? + (DEFAULT_BASE_URL ? 'agentv-local-debug-key' : undefined); + +function usage(): string { + return [ + 'Usage: bun run debug:pi-sdk-tools [options]', + '', + 'Runs the PI coding-agent SDK provider directly against a temp workspace with a', + 'tool-heavy prompt. Use this to iterate on PI provider fixes without a full eval.', + '', + 'Options:', + ' --model Model id (default: AGENTV_PI_DEBUG_MODEL, AGENTV_CODEX_MODEL, or gpt-5.3-codex-spark)', + ' --base-url OpenAI-compatible base URL (default: AGENTV_PI_DEBUG_BASE_URL, AGENTV_OPENAI_BASE_URL, OPENAI_BASE_URL, or OPENAI_ENDPOINT)', + ' --api-key API key (default: AGENTV_PI_DEBUG_API_KEY, AGENTV_OPENAI_API_KEY, OPENAI_API_KEY, or dummy key with base URL)', + ' --subprovider PI SDK subprovider (default: openai)', + ' --thinking off|minimal|low|medium|high|xhigh (default: low)', + ' --timeout-ms Provider timeout (default: 240000)', + ' --workspace Reuse/create a workspace instead of mkdtemp', + ' --log-dir Provider stream log directory (default: /.agentv-debug/logs)', + ' --json-log Write raw JSON event logs instead of summary logs', + ' --help Show this help', + ].join('\n'); +} + +function parseArgs(argv: readonly string[]): Options { + const options: { + apiKey?: string; + baseUrl?: string; + logDir?: string; + logFormat: 'summary' | 'json'; + model: string; + subprovider: string; + thinking: string; + timeoutMs: number; + workspace?: string; + } = { + apiKey: DEFAULT_API_KEY, + baseUrl: DEFAULT_BASE_URL, + logFormat: 'summary', + model: DEFAULT_MODEL, + subprovider: process.env.AGENTV_PI_DEBUG_SUBPROVIDER ?? 'openai', + thinking: process.env.AGENTV_PI_DEBUG_THINKING ?? 'low', + timeoutMs: Number(process.env.AGENTV_PI_DEBUG_TIMEOUT_MS ?? 240000), + }; + + for (let i = 0; i < argv.length; i++) { + const arg = argv[i]; + switch (arg) { + case '--help': + case '-h': + console.log(usage()); + process.exit(0); + return options; + case '--json-log': + options.logFormat = 'json'; + break; + case '--api-key': + options.apiKey = readRequiredValue(argv, ++i, arg); + break; + case '--base-url': + options.baseUrl = readRequiredValue(argv, ++i, arg); + break; + case '--log-dir': + options.logDir = readRequiredValue(argv, ++i, arg); + break; + case '--model': + options.model = readRequiredValue(argv, ++i, arg); + break; + case '--subprovider': + options.subprovider = readRequiredValue(argv, ++i, arg); + break; + case '--thinking': + options.thinking = readRequiredValue(argv, ++i, arg); + break; + case '--timeout-ms': { + const value = Number(readRequiredValue(argv, ++i, arg)); + if (!Number.isFinite(value) || value <= 0) { + throw new Error(`Invalid --timeout-ms value: ${argv[i]}`); + } + options.timeoutMs = value; + break; + } + case '--workspace': + options.workspace = readRequiredValue(argv, ++i, arg); + break; + default: + throw new Error(`Unknown option: ${arg}\n\n${usage()}`); + } + } + + if (!Number.isFinite(options.timeoutMs) || options.timeoutMs <= 0) { + throw new Error(`Invalid AGENTV_PI_DEBUG_TIMEOUT_MS value: ${options.timeoutMs}`); + } + + return options; +} + +function readRequiredValue(argv: readonly string[], index: number, option: string): string { + const value = argv[index]; + if (!value || value.startsWith('--')) { + throw new Error(`Missing value for ${option}`); + } + return value; +} + +async function prepareWorkspace(workspace?: string): Promise { + const root = workspace + ? path.resolve(workspace) + : await mkdtemp(path.join(tmpdir(), 'agentv-pi-sdk-tools-')); + await mkdir(path.join(root, 'data'), { recursive: true }); + await mkdir(path.join(root, 'scripts'), { recursive: true }); + await mkdir(path.join(root, 'src'), { recursive: true }); + + await writeFile( + path.join(root, 'README.md'), + [ + '# PI SDK Tool Loop Debug Workspace', + '', + 'This temporary workspace is generated by AgentV to reproduce PI SDK tool-heavy provider runs.', + 'The agent should read files, write a summary, edit it, and run a noisy local command.', + '', + ].join('\n'), + ); + await writeFile( + path.join(root, 'data', 'input.txt'), + [ + 'alpha: read this line', + 'beta: preserve this line in the generated summary', + 'gamma: run the local noisy command before finishing', + '', + ].join('\n'), + ); + await writeFile( + path.join(root, 'scripts', 'noisy-output.mjs'), + [ + 'for (let i = 0; i < 400; i += 1) {', + " console.log(`debug-line-${i.toString().padStart(3, '0')}`);", + '}', + ].join('\n'), + ); + + return root; +} + +function countToolCalls(messages: readonly Message[] | undefined): number { + return messages?.reduce((count, message) => count + (message.toolCalls?.length ?? 0), 0) ?? 0; +} + +function summarizeLastAssistant(messages: readonly Message[] | undefined): string { + const assistant = messages + ?.slice() + .reverse() + .find((message) => message.role === 'assistant' && message.content !== undefined); + if (!assistant) return ''; + if (typeof assistant.content === 'string') return assistant.content.slice(0, 800); + return JSON.stringify(assistant.content).slice(0, 800); +} + +async function main(): Promise { + const options = parseArgs(process.argv.slice(2)); + const workspace = await prepareWorkspace(options.workspace); + const logDir = path.resolve(options.logDir ?? path.join(workspace, '.agentv-debug', 'logs')); + await mkdir(logDir, { recursive: true }); + + const toolEvents: ToolEvent[] = []; + const callbacks: ProviderStreamCallbacks = { + onToolCallStart(toolName, toolCallId) { + toolEvents.push({ toolName, toolCallId }); + console.error(`[pi-debug] tool:start ${toolName}${toolCallId ? ` ${toolCallId}` : ''}`); + }, + onToolCallEnd(toolName, _input, _output, durationMs, toolCallId) { + toolEvents.push({ toolName, toolCallId, durationMs }); + console.error( + `[pi-debug] tool:end ${toolName}${toolCallId ? ` ${toolCallId}` : ''} ${durationMs}ms`, + ); + }, + onLlmCallEnd(model, tokenUsage) { + const usage = tokenUsage + ? ` input=${tokenUsage.input} output=${tokenUsage.output}` + : ' usage=unknown'; + console.error(`[pi-debug] llm:end ${model}${usage}`); + }, + }; + + const provider = new PiCodingAgentProvider('debug-pi-sdk-tools', { + apiKey: options.apiKey, + baseUrl: options.baseUrl, + cwd: workspace, + logDir, + logFormat: options.logFormat, + model: options.model, + streamLog: options.logFormat === 'json' ? 'raw' : 'summary', + subprovider: options.subprovider, + thinking: options.thinking, + timeoutMs: options.timeoutMs, + tools: 'read,bash,edit,write', + }); + + const startedAt = Date.now(); + console.error(`[pi-debug] workspace: ${workspace}`); + console.error(`[pi-debug] log dir: ${logDir}`); + console.error(`[pi-debug] model: ${options.subprovider}/${options.model}`); + + const response = await provider.invoke({ + attempt: 0, + cwd: workspace, + evalCaseId: 'debug-pi-sdk-tools', + question: [ + 'You are running a short diagnostic of the PI coding-agent tool loop.', + 'Work only in this repository and do not install dependencies.', + '', + 'Complete these steps in order:', + '1. Use bash to print the current directory and list the top-level files.', + '2. Read README.md and data/input.txt.', + '3. Write src/summary.txt with one bullet for alpha, beta, and gamma.', + '4. Use the edit tool to append a line that says "status: edited".', + '5. Run: node scripts/noisy-output.mjs | tail -n 5', + '6. Reply with DONE and a one-sentence summary.', + ].join('\n'), + streamCallbacks: callbacks, + }); + + const summaryPath = path.join(workspace, 'src', 'summary.txt'); + const summaryFile = await readFile(summaryPath, 'utf8').catch(() => undefined); + const durationMs = Date.now() - startedAt; + + console.log( + JSON.stringify( + { + duration_ms: durationMs, + log_dir: logDir, + message_count: response.output?.length ?? 0, + model: options.model, + summary_file_present: summaryFile !== undefined, + tool_call_count: countToolCalls(response.output), + tool_events: toolEvents, + workspace, + }, + null, + 2, + ), + ); + const finalText = summarizeLastAssistant(response.output); + if (finalText) { + console.log('\nFinal assistant excerpt:\n'); + console.log(finalText); + } + if (summaryFile) { + console.log('\nsrc/summary.txt:\n'); + console.log(summaryFile); + } +} + +process.once('uncaughtException', (error) => { + console.error('[pi-debug] uncaughtException'); + console.error(error); + process.exit(1); +}); + +process.once('unhandledRejection', (error) => { + console.error('[pi-debug] unhandledRejection'); + console.error(error); + process.exit(1); +}); + +main().catch((error) => { + console.error('[pi-debug] failed'); + console.error(error); + process.exit(1); +});