diff --git a/.gitignore b/.gitignore index 539bf374..050e46a2 100644 --- a/.gitignore +++ b/.gitignore @@ -42,6 +42,7 @@ apps/pi-extension/review-core.ts # Claude Code session-local runtime state (lock files, scheduled-task state). # Machine-specific; never belongs in the repo. .claude/ +.playwright-cli/ *.ntvs* *.njsproj *.sln diff --git a/apps/codex/README.md b/apps/codex/README.md index c9283a90..47c54aba 100644 --- a/apps/codex/README.md +++ b/apps/codex/README.md @@ -1,6 +1,8 @@ # Plannotator for Codex -Code review and markdown annotation are supported today. Plan mode is not yet supported — it requires hooks to intercept the agent's plan submission, which Codex does not currently expose. +Code review, markdown annotation, and plan review are supported in Codex. + +Plan review uses Codex's experimental `Stop` hook. This is a post-render review flow: when a turn stops, Plannotator reads the current rollout transcript, extracts the latest plan, and opens the normal plan review UI. If you deny the plan, Plannotator returns continuation feedback so Codex revises the plan in the same turn. ## Install @@ -16,8 +18,61 @@ curl -fsSL https://plannotator.ai/install.sh | bash irm https://plannotator.ai/install.ps1 | iex ``` +## Enable Codex hooks + +Codex hooks are currently experimental and require a feature flag. + +Add this to `~/.codex/config.toml` or `/.codex/config.toml`: + +```toml +[features] +codex_hooks = true +``` + +Then create `~/.codex/hooks.json` or `/.codex/hooks.json`: + +```json +{ + "hooks": { + "Stop": [ + { + "hooks": [ + { + "type": "command", + "command": "plannotator", + "timeout": 345600 + } + ] + } + ] + } +} +``` + +Notes: + +- Codex loads `hooks.json` next to active config layers, so either the global `~/.codex` or repo-local `.codex` location works. +- This currently depends on Codex hooks, which are experimental and disabled on Windows in the current official docs. +- Because this uses `Stop`, the review happens after Codex renders the plan turn, not at a dedicated `ExitPlanMode` interception point. + ## Usage +### Plan Review + +Once hooks are enabled, plan review opens automatically whenever a Codex turn ends with a plan. Approving keeps the turn completed. Sending feedback returns a `Stop` continuation reason so Codex revises the plan and Plannotator shows version history and diffs across revisions. + +### Local End-to-End Harness + +From the repo root, you can run a disposable local E2E flow against a real Codex session: + +```bash +./tests/manual/local/test-codex-plan-review-e2e.sh --keep +``` + +This uses a temporary `HOME`, sample git repo, repo-local Codex CLI, and repo-local `plannotator` wrapper so it +doesn't modify your installed Codex or Plannotator state. If you want to automate the opened review UI with Playwright, +set `PLANNOTATOR_BROWSER=/usr/bin/true` before running the script. + ### Code Review Run `!plannotator review` to open the code review UI for your current changes: diff --git a/apps/hook/server/codex-session.test.ts b/apps/hook/server/codex-session.test.ts index 03121ace..e737147e 100644 --- a/apps/hook/server/codex-session.test.ts +++ b/apps/hook/server/codex-session.test.ts @@ -6,11 +6,11 @@ * Uses synthetic JSONL fixtures matching the real Codex rollout format. */ -import { describe, expect, test, beforeEach, afterEach } from "bun:test"; -import { mkdtempSync, mkdirSync, writeFileSync, rmSync } from "node:fs"; +import { describe, expect, test, afterEach } from "bun:test"; +import { mkdtempSync, writeFileSync, rmSync } from "node:fs"; import { tmpdir } from "node:os"; import { join } from "node:path"; -import { getLastCodexMessage } from "./codex-session"; +import { getLastCodexMessage, getLatestCodexPlan } from "./codex-session"; // --- Fixture Helpers --- @@ -86,6 +86,57 @@ function eventMsg(type: string): string { }); } +function turnStarted(turnId: string): string { + return JSON.stringify({ + timestamp: new Date().toISOString(), + type: "event_msg", + payload: { + type: "task_started", + turn_id: turnId, + }, + }); +} + +function turnCompleted(turnId: string): string { + return JSON.stringify({ + timestamp: new Date().toISOString(), + type: "event_msg", + payload: { + type: "task_complete", + turn_id: turnId, + }, + }); +} + +function completedPlanItem(text: string, turnId: string): string { + return JSON.stringify({ + timestamp: new Date().toISOString(), + type: "event_msg", + payload: { + type: "item_completed", + turn_id: turnId, + item: { + type: "Plan", + id: `plan_${crypto.randomUUID().slice(0, 12)}`, + text, + }, + }, + }); +} + +function hookPrompt(text: string): string { + return rolloutLine("response_item", { + type: "message", + role: "user", + content: [ + { + type: "input_text", + text: `${text}`, + }, + ], + }); +} + function buildRollout(...lines: string[]): string { return lines.join("\n"); } @@ -243,3 +294,147 @@ describe("getLastCodexMessage", () => { expect(result!.text).toBe("Valid message"); }); }); + +describe("getLatestCodexPlan", () => { + test("prefers the latest persisted plan item for the current turn", () => { + const turnId = "turn-plan-item"; + const path = writeTempRollout( + buildRollout( + sessionMeta(), + turnStarted(turnId), + assistantMessage("\nFallback text\n"), + completedPlanItem("Authoritative plan item", turnId) + ) + ); + + const result = getLatestCodexPlan(path, { turnId }); + expect(result).toEqual({ + text: "Authoritative plan item", + source: "plan-item", + }); + }); + + test("falls back to raw proposed_plan blocks for plan-only assistant replies", () => { + const turnId = "turn-plan-only"; + const path = writeTempRollout( + buildRollout( + sessionMeta(), + turnStarted(turnId), + assistantMessage("\n- First\n- Second\n") + ) + ); + + const result = getLatestCodexPlan(path, { turnId }); + expect(result).toEqual({ + text: "- First\n- Second", + source: "assistant-message", + }); + }); + + test("extracts plan blocks surrounded by assistant prose", () => { + const turnId = "turn-prose"; + const path = writeTempRollout( + buildRollout( + sessionMeta(), + turnStarted(turnId), + assistantMessage( + [ + "Here is the plan I recommend.", + "", + "", + "1. Inspect hook payloads", + "2. Launch Plannotator", + "", + "", + "I can revise it if needed.", + ].join("\n") + ) + ) + ); + + const result = getLatestCodexPlan(path, { turnId }); + expect(result).toEqual({ + text: "1. Inspect hook payloads\n2. Launch Plannotator", + source: "assistant-message", + }); + }); + + test("ignores plans from older turns when the current turn has none", () => { + const oldTurnId = "turn-old"; + const currentTurnId = "turn-current"; + const path = writeTempRollout( + buildRollout( + sessionMeta(), + turnStarted(oldTurnId), + completedPlanItem("Old plan", oldTurnId), + turnCompleted(oldTurnId), + turnStarted(currentTurnId), + assistantMessage("Just answering a regular question.") + ) + ); + + const result = getLatestCodexPlan(path, { turnId: currentTurnId }); + expect(result).toBeNull(); + }); + + test("returns null when Stop re-entry has no revised plan after the hook prompt", () => { + const turnId = "turn-stop-no-revision"; + const path = writeTempRollout( + buildRollout( + sessionMeta(), + turnStarted(turnId), + completedPlanItem("Original plan", turnId), + hookPrompt("Please revise the plan."), + assistantMessage("I will think through the feedback.") + ) + ); + + const result = getLatestCodexPlan(path, { + turnId, + stopHookActive: true, + }); + expect(result).toBeNull(); + }); + + test("returns null when Stop re-entry repeats the same plan", () => { + const turnId = "turn-stop-duplicate"; + const path = writeTempRollout( + buildRollout( + sessionMeta(), + turnStarted(turnId), + completedPlanItem("Original plan", turnId), + hookPrompt("Please revise the plan."), + completedPlanItem("Original plan", turnId) + ) + ); + + const result = getLatestCodexPlan(path, { + turnId, + stopHookActive: true, + }); + expect(result).toBeNull(); + }); + + test("returns the revised plan after a denied Stop review", () => { + const turnId = "turn-stop-revised"; + const path = writeTempRollout( + buildRollout( + sessionMeta(), + turnStarted(turnId), + completedPlanItem("Original plan", turnId), + hookPrompt("Please revise the plan."), + assistantMessage("\nRevised fallback plan\n"), + completedPlanItem("Revised authoritative plan", turnId) + ) + ); + + const result = getLatestCodexPlan(path, { + turnId, + stopHookActive: true, + }); + expect(result).toEqual({ + text: "Revised authoritative plan", + source: "plan-item", + }); + }); +}); diff --git a/apps/hook/server/codex-session.ts b/apps/hook/server/codex-session.ts index bcb3c2ea..fd615be9 100644 --- a/apps/hook/server/codex-session.ts +++ b/apps/hook/server/codex-session.ts @@ -18,6 +18,8 @@ import { homedir } from "node:os"; // --- Types --- +type CodexPlanSource = "plan-item" | "assistant-message"; + interface RolloutEntry { timestamp?: string; type: string; @@ -25,10 +27,35 @@ interface RolloutEntry { type?: string; role?: string; content?: { type: string; text?: string }[]; + turn_id?: string; + item?: { + type?: string; + text?: string; + [key: string]: unknown; + }; [key: string]: unknown; }; } +interface CodexPlanCandidate { + index: number; + text: string; + source: CodexPlanSource; +} + +export interface CodexPlanResult { + text: string; + source: CodexPlanSource; +} + +export interface GetLatestCodexPlanOptions { + turnId?: string; + stopHookActive?: boolean; +} + +const TURN_START_TYPES = new Set(["task_started", "turn_started"]); +const PROPOSED_PLAN_RE = /([\s\S]*?)<\/proposed_plan>/gi; + // --- Rollout File Discovery --- /** @@ -84,6 +111,175 @@ function isDir(path: string): boolean { // --- Message Extraction --- +function parseRolloutEntries(rolloutPath: string): RolloutEntry[] { + const content = readFileSync(rolloutPath, "utf-8"); + if (!content.trim()) return []; + + return content + .split(/\r?\n/) + .filter(Boolean) + .flatMap((line) => { + try { + return [JSON.parse(line) as RolloutEntry]; + } catch { + return []; + } + }); +} + +function getMessageText(entry: RolloutEntry): string | null { + if (entry.type !== "response_item") return null; + if (entry.payload?.type !== "message") return null; + + const contentBlocks = entry.payload?.content; + if (!Array.isArray(contentBlocks)) return null; + + const textParts = contentBlocks + .map((block) => (typeof block.text === "string" ? block.text.trim() : "")) + .filter(Boolean); + + if (textParts.length === 0) return null; + + return textParts.join("\n"); +} + +function extractLastProposedPlan(text: string): string | null { + const matches = Array.from(text.matchAll(PROPOSED_PLAN_RE)); + const latest = matches.at(-1)?.[1]?.trim(); + return latest || null; +} + +function normalizePlan(text: string): string { + return text.replace(/\r\n/g, "\n").trim(); +} + +function findLastIndex( + entries: RolloutEntry[], + predicate: (entry: RolloutEntry) => boolean +): number { + for (let i = entries.length - 1; i >= 0; i--) { + if (predicate(entries[i])) return i; + } + return -1; +} + +function findTurnStartIndex(entries: RolloutEntry[], turnId?: string): number { + const matchingTurnStart = findLastIndex( + entries, + (entry) => + entry.type === "event_msg" && + TURN_START_TYPES.has(entry.payload?.type || "") && + (!turnId || entry.payload?.turn_id === turnId) + ); + if (matchingTurnStart !== -1) return matchingTurnStart; + + const matchingTurnContext = findLastIndex( + entries, + (entry) => + entry.type === "turn_context" && + (!turnId || entry.payload?.turn_id === turnId) + ); + if (matchingTurnContext !== -1) return matchingTurnContext; + + const lastTurnStart = findLastIndex( + entries, + (entry) => + entry.type === "event_msg" && + TURN_START_TYPES.has(entry.payload?.type || "") + ); + if (lastTurnStart !== -1) return lastTurnStart; + + const lastTurnContext = findLastIndex( + entries, + (entry) => entry.type === "turn_context" + ); + return lastTurnContext === -1 ? 0 : lastTurnContext; +} + +function isHookPromptMessage(entry: RolloutEntry): boolean { + if (entry.type !== "response_item") return false; + if (entry.payload?.type !== "message") return false; + if (entry.payload?.role !== "user") return false; + + const messageText = getMessageText(entry); + return !!messageText?.includes("= Math.max(startIndex, 0); i--) { + if (isHookPromptMessage(entries[i])) return i; + } + return -1; +} + +function getPlanItemText( + entry: RolloutEntry, + turnId?: string +): string | null { + if (entry.type !== "event_msg") return null; + if (entry.payload?.type !== "item_completed") return null; + if (turnId && entry.payload?.turn_id !== turnId) return null; + + const itemType = entry.payload?.item?.type; + if (itemType !== "Plan" && itemType !== "plan") return null; + + const text = entry.payload?.item?.text; + return typeof text === "string" && text.trim() ? text.trim() : null; +} + +function getAssistantProposedPlanText(entry: RolloutEntry): string | null { + if (entry.type !== "response_item") return null; + if (entry.payload?.type !== "message") return null; + if (entry.payload?.role !== "assistant") return null; + + const messageText = getMessageText(entry); + if (!messageText) return null; + + return extractLastProposedPlan(messageText); +} + +function collectPlanCandidates( + entries: RolloutEntry[], + startIndex: number, + turnId?: string +): CodexPlanCandidate[] { + const candidates: CodexPlanCandidate[] = []; + + for (let i = Math.max(startIndex, 0); i < entries.length; i++) { + const entry = entries[i]; + + const planItemText = getPlanItemText(entry, turnId); + if (planItemText) { + candidates.push({ index: i, text: planItemText, source: "plan-item" }); + } + + const assistantPlanText = getAssistantProposedPlanText(entry); + if (assistantPlanText) { + candidates.push({ + index: i, + text: assistantPlanText, + source: "assistant-message", + }); + } + } + + return candidates; +} + +function pickLatestPreferredPlan( + candidates: CodexPlanCandidate[] +): CodexPlanCandidate | null { + const latestPlanItem = [...candidates] + .reverse() + .find((candidate) => candidate.source === "plan-item"); + if (latestPlanItem) return latestPlanItem; + + return candidates.at(-1) || null; +} + /** * Extract the last assistant message from a Codex rollout file. * @@ -97,33 +293,87 @@ function isDir(path: string): boolean { export function getLastCodexMessage( rolloutPath: string ): { text: string } | null { - const content = readFileSync(rolloutPath, "utf-8"); - const lines = content.trim().split("\n"); + const entries = parseRolloutEntries(rolloutPath); // Walk backward - for (let i = lines.length - 1; i >= 0; i--) { - let entry: RolloutEntry; - try { - entry = JSON.parse(lines[i]); - } catch { - continue; - } - + for (let i = entries.length - 1; i >= 0; i--) { + const entry = entries[i]; if (entry.type !== "response_item") continue; if (entry.payload?.type !== "message") continue; if (entry.payload?.role !== "assistant") continue; - const contentBlocks = entry.payload?.content; - if (!Array.isArray(contentBlocks)) continue; + const messageText = getMessageText(entry); + if (messageText) return { text: messageText }; + } - const textParts = contentBlocks - .filter((b) => b.type === "output_text" && b.text?.trim()) - .map((b) => b.text!); + return null; +} + +/** + * Extract the latest Codex plan from a rollout file. + * + * Primary source: persisted completed TurnItem::Plan events. + * Fallback source: raw assistant response_item messages that still contain a + * block in the rollout transcript. + * + * When stopHookActive is true, this only returns a changed post-feedback plan: + * - no plan after the last hook prompt => null + * - identical plan after the last hook prompt => null + */ +export function getLatestCodexPlan( + rolloutPath: string, + options: GetLatestCodexPlanOptions = {} +): CodexPlanResult | null { + const entries = parseRolloutEntries(rolloutPath); + if (entries.length === 0) return null; - if (textParts.length === 0) continue; + const turnStartIndex = findTurnStartIndex(entries, options.turnId); + const candidates = collectPlanCandidates( + entries, + turnStartIndex, + options.turnId + ); + if (candidates.length === 0) return null; - return { text: textParts.join("\n") }; + if (!options.stopHookActive) { + const latestPlan = pickLatestPreferredPlan(candidates); + return latestPlan + ? { text: latestPlan.text, source: latestPlan.source } + : null; } - return null; + const lastHookPromptIndex = findLastHookPromptIndex(entries, turnStartIndex); + + if (lastHookPromptIndex === -1) { + const latestPlan = pickLatestPreferredPlan(candidates); + return latestPlan + ? { text: latestPlan.text, source: latestPlan.source } + : null; + } + + const plansAfterHookPrompt = candidates.filter( + (candidate) => candidate.index > lastHookPromptIndex + ); + if (plansAfterHookPrompt.length === 0) return null; + + const latestAfterHookPrompt = pickLatestPreferredPlan(plansAfterHookPrompt); + if (!latestAfterHookPrompt) return null; + + const plansBeforeHookPrompt = candidates.filter( + (candidate) => candidate.index < lastHookPromptIndex + ); + const latestBeforeHookPrompt = pickLatestPreferredPlan(plansBeforeHookPrompt); + + if ( + latestBeforeHookPrompt && + normalizePlan(latestBeforeHookPrompt.text) === + normalizePlan(latestAfterHookPrompt.text) + ) { + return null; + } + + return { + text: latestAfterHookPrompt.text, + source: latestAfterHookPrompt.source, + }; } diff --git a/apps/hook/server/index.ts b/apps/hook/server/index.ts index 89284c5e..35fcd423 100644 --- a/apps/hook/server/index.ts +++ b/apps/hook/server/index.ts @@ -1,10 +1,10 @@ /** - * Plannotator CLI for Claude Code & Copilot CLI + * Plannotator CLI for Claude Code, Codex, Gemini CLI, and Copilot CLI * * Supports eight modes: * * 1. Plan Review (default, no args): - * - Spawned by ExitPlanMode hook (Claude Code) + * - Spawned by Claude/Gemini/Codex hook entrypoints * - Reads hook event from stdin, extracts plan content * - Serves UI, returns approve/deny decision to stdout * @@ -82,7 +82,7 @@ import { planDenyFeedback } from "@plannotator/shared/feedback-templates"; import { readImprovementHook } from "@plannotator/shared/improvement-hooks"; import type { Origin } from "@plannotator/shared/agents"; import { findSessionLogsForCwd, resolveSessionLogByPpid, findSessionLogsByAncestorWalk, getLastRenderedMessage, type RenderedMessage } from "./session-log"; -import { findCodexRolloutByThreadId, getLastCodexMessage } from "./codex-session"; +import { findCodexRolloutByThreadId, getLastCodexMessage, getLatestCodexPlan } from "./codex-session"; import { findCopilotPlanContent, findCopilotSessionForCwd, getLastCopilotMessage } from "./copilot-session"; import { formatInteractiveNoArgClarification, @@ -957,35 +957,101 @@ if (args[0] === "sessions") { // Read hook event from stdin const eventJson = await Bun.stdin.text(); - let planContent = ""; - let permissionMode = "default"; - let isGemini = false; - let planFilename = ""; let event: Record; try { event = JSON.parse(eventJson); + } catch (e: any) { + console.error(`Failed to parse hook event from stdin: ${e?.message || e}`); + process.exit(1); + } + + if (event.hook_event_name === "Stop") { + const rolloutPath = + (typeof event.transcript_path === "string" && event.transcript_path) || + (process.env.CODEX_THREAD_ID + ? findCodexRolloutByThreadId(process.env.CODEX_THREAD_ID) + : null); - // Detect harness: Gemini sends plan_filename (file on disk), Claude Code sends plan (inline) - planFilename = event.tool_input?.plan_filename || event.tool_input?.plan_path || ""; - isGemini = !!planFilename; - - if (isGemini) { - // Reconstruct full plan path from transcript_path and session_id: - // transcript_path = /chats/session-...json - // plan lives at = //plans/ - const projectTempDir = path.dirname(path.dirname(event.transcript_path)); - const planFilePath = path.join(projectTempDir, event.session_id, "plans", planFilename); - planContent = await Bun.file(planFilePath).text(); + if (!rolloutPath || !existsSync(rolloutPath)) { + process.exit(0); + } + + const latestPlan = getLatestCodexPlan(rolloutPath, { + turnId: typeof event.turn_id === "string" ? event.turn_id : undefined, + stopHookActive: !!event.stop_hook_active, + }); + + if (!latestPlan?.text) { + process.exit(0); + } + + const planProject = (await detectProjectName()) ?? "_unknown"; + const server = await startPlannotatorServer({ + plan: latestPlan.text, + origin: "codex", + sharingEnabled, + shareBaseUrl, + pasteApiUrl, + htmlContent: planHtmlContent, + onReady: async (url, isRemote, port) => { + handleServerReady(url, isRemote, port); + + if (isRemote && sharingEnabled) { + await writeRemoteShareLink(latestPlan.text, shareBaseUrl, "review the plan", "plan only").catch(() => {}); + } + }, + }); + + registerSession({ + pid: process.pid, + port: server.port, + url: server.url, + mode: "plan", + project: planProject, + startedAt: new Date().toISOString(), + label: `plan-${planProject}`, + }); + + const result = await server.waitForDecision(); + await Bun.sleep(1500); + server.stop(); + + if (result.approved) { + console.log("{}"); } else { - planContent = event.tool_input?.plan || ""; + console.log( + JSON.stringify({ + decision: "block", + reason: planDenyFeedback(result.feedback || "", "Stop"), + }) + ); } - permissionMode = event.permission_mode || "default"; - } catch (e: any) { - console.error(`Failed to parse hook event from stdin: ${e?.message || e}`); - process.exit(1); + process.exit(0); + } + + let planContent = ""; + let permissionMode = "default"; + let isGemini = false; + let planFilename = ""; + + // Detect harness: Gemini sends plan_filename (file on disk), Claude Code sends plan (inline) + planFilename = event.tool_input?.plan_filename || event.tool_input?.plan_path || ""; + isGemini = !!planFilename; + + if (isGemini) { + // Reconstruct full plan path from transcript_path and session_id: + // transcript_path = /chats/session-...json + // plan lives at = //plans/ + const projectTempDir = path.dirname(path.dirname(event.transcript_path)); + const planFilePath = path.join(projectTempDir, event.session_id, "plans", planFilename); + planContent = await Bun.file(planFilePath).text(); + } else { + planContent = event.tool_input?.plan || ""; } + permissionMode = event.permission_mode || "default"; + if (!planContent) { console.error("No plan content in hook event"); process.exit(1); diff --git a/apps/marketing/src/content/docs/commands/plan-review.md b/apps/marketing/src/content/docs/commands/plan-review.md index 3fb1be93..63f8750a 100644 --- a/apps/marketing/src/content/docs/commands/plan-review.md +++ b/apps/marketing/src/content/docs/commands/plan-review.md @@ -1,12 +1,12 @@ --- title: "Plan Review" -description: "The core plan review flow — how Plannotator intercepts ExitPlanMode and presents the annotation UI." +description: "The core plan review flow across Claude Code, Codex, and other supported agent hosts." sidebar: order: 10 section: "Commands" --- -Plan review is the core Plannotator workflow. It's not a slash command — it fires automatically when your agent calls `ExitPlanMode`. +Plan review is the core Plannotator workflow. It's not a slash command. Plannotator opens automatically when the host agent reaches its plan handoff point. ## How it works @@ -29,6 +29,32 @@ Agent resubmits → Plan Diff shows what changed The hook configuration lives at `apps/hook/hooks/hooks.json` and matches the `ExitPlanMode` tool name. +## Codex flow + +Codex does not expose a dedicated `ExitPlanMode` interception point. Instead, Plannotator integrates through Codex's experimental `Stop` hook. + +``` +Codex turn stops + ↓ +Stop hook fires + ↓ +Plannotator reads transcript_path rollout + ↓ +Latest completed plan item is extracted +fallback: raw block from assistant response + ↓ +Browser opens with the normal review UI + ↓ +Approve → turn stays completed +Deny → Stop hook returns continuation feedback + ↓ +Codex revises the plan in the same turn + ↓ +Plannotator reopens only if the revised plan actually changed +``` + +This means Codex plan review is post-render rather than pre-submit, but you still get the same annotations, plan history, diff view, and revision loop. + ## Annotation types When you select text in the plan, the annotation toolbar appears with these options: diff --git a/apps/marketing/src/content/docs/getting-started/installation.md b/apps/marketing/src/content/docs/getting-started/installation.md index 61b78e3a..9687646e 100644 --- a/apps/marketing/src/content/docs/getting-started/installation.md +++ b/apps/marketing/src/content/docs/getting-started/installation.md @@ -122,13 +122,49 @@ Coming soon. ## Codex -Plan mode is not yet supported. +Codex plan review is supported through the experimental `Stop` hook. -Install the binary, then use it directly: +This is a post-render review flow: when a Codex turn stops, Plannotator reads the current transcript, extracts the latest plan, and opens the same plan review UI used by the other integrations. If you deny the plan, Plannotator returns a `Stop` continuation reason so Codex can revise the plan in the same turn. +Enable hooks in `~/.codex/config.toml` or `/.codex/config.toml`: + +```toml +[features] +codex_hooks = true +``` + +Then add `hooks.json` next to that config layer: + +```json +{ + "hooks": { + "Stop": [ + { + "hooks": [ + { + "type": "command", + "command": "plannotator", + "timeout": 345600 + } + ] + } + ] + } +} ``` -!plannotator review # Code review for current changes -!plannotator annotate file.md # Annotate a markdown file + +Notes: + +- Codex discovers hooks from `~/.codex/hooks.json` and `/.codex/hooks.json`, and loads all matching files. +- Codex hooks are currently experimental. +- The current official Codex hooks docs say hooks are disabled on Windows, so this flow is currently macOS/Linux/WSL only. + +You can still use the direct commands at any time: + +```bash +!plannotator review +!plannotator annotate file.md +!plannotator last ``` ## Pi diff --git a/tests/README.md b/tests/README.md index 93eef4a0..72effb19 100644 --- a/tests/README.md +++ b/tests/README.md @@ -11,6 +11,7 @@ These scripts test the UI components and require a browser. ```bash ./tests/manual/local/test-hook.sh # Claude Code simulation ./tests/manual/local/test-hook-2.sh # OpenCode origin badge test +./tests/manual/local/test-codex-plan-review-e2e.sh # Real Codex Stop-hook flow in disposable HOME ``` **Code review UI:** @@ -58,6 +59,23 @@ Options: - `--keep`: Don't clean up sandbox on exit - `--no-git`: Skip git initialization (tests non-git fallback) +**Codex Stop-hook end-to-end harness:** + +```bash +./tests/manual/local/test-codex-plan-review-e2e.sh [--keep] [--skip-build] +``` + +Builds the hook and review apps, creates a disposable `HOME` plus sample git repo, copies your Codex `auth.json`, +enables `codex_hooks`, and runs a real `codex exec` against the sample project. The script writes logs, rollout paths, +history indices, and session URLs into an artifact directory under the temp root. + +Tips: + +- Set `PLANNOTATOR_BROWSER=/usr/bin/true` when you want to drive the opened plan-review session with Playwright + instead of auto-opening a browser. +- The validated workflow is: run the script in one terminal, then point Playwright at the printed session URL from a + second terminal. + **Obsidian utility:** ```bash diff --git a/tests/UI-TESTING.md b/tests/UI-TESTING.md index e01879fa..c61f3a86 100644 --- a/tests/UI-TESTING.md +++ b/tests/UI-TESTING.md @@ -157,6 +157,7 @@ UI test scripts simulate plugin behavior locally: # Plan review UI tests ./tests/manual/local/test-hook.sh # Claude Code simulation ./tests/manual/local/test-hook-2.sh # OpenCode origin badge test +./tests/manual/local/test-codex-plan-review-e2e.sh # Real Codex Stop-hook E2E # Code review UI test ./tests/manual/local/test-opencode-review.sh # Code review UI test @@ -187,6 +188,19 @@ UI test scripts simulate plugin behavior locally: 4. Verifies "OpenCode" badge + "Send Feedback" button (not "Copy Feedback") 5. Tests feedback submission flow +**`test-codex-plan-review-e2e.sh`** + +1. Builds the hook + review apps (unless `--skip-build`) +2. Creates a disposable `HOME` and sample git repo +3. Copies your Codex auth into the disposable config +4. Enables `codex_hooks` and registers a `Stop` hook pointing at the local Plannotator entrypoint +5. Runs a real `codex exec` prompt that returns only a `` block +6. Leaves behind rollout logs, Plannotator history, plan files, and session URLs in an artifact directory + +This is the best harness when you want to verify the full Codex deny/revise/approve loop instead of simulating hook +payloads. For browser automation, set `PLANNOTATOR_BROWSER=/usr/bin/true`, keep the script running in one terminal, +and drive the printed session URL with Playwright from another terminal. + See [tests/README.md](../tests/README.md) for additional integration and utility test scripts. ### Manual Testing Workflow diff --git a/tests/manual/artifacts/codex-plan-review/archive-sidebar.png b/tests/manual/artifacts/codex-plan-review/archive-sidebar.png new file mode 100644 index 00000000..f346b891 Binary files /dev/null and b/tests/manual/artifacts/codex-plan-review/archive-sidebar.png differ diff --git a/tests/manual/artifacts/codex-plan-review/diff-view.png b/tests/manual/artifacts/codex-plan-review/diff-view.png new file mode 100644 index 00000000..efed6461 Binary files /dev/null and b/tests/manual/artifacts/codex-plan-review/diff-view.png differ diff --git a/tests/manual/artifacts/codex-plan-review/feedback-comment.png b/tests/manual/artifacts/codex-plan-review/feedback-comment.png new file mode 100644 index 00000000..c6f27fbb Binary files /dev/null and b/tests/manual/artifacts/codex-plan-review/feedback-comment.png differ diff --git a/tests/manual/artifacts/codex-plan-review/initial-plan.png b/tests/manual/artifacts/codex-plan-review/initial-plan.png new file mode 100644 index 00000000..4a6ea2ef Binary files /dev/null and b/tests/manual/artifacts/codex-plan-review/initial-plan.png differ diff --git a/tests/manual/artifacts/codex-plan-review/revised-plan.png b/tests/manual/artifacts/codex-plan-review/revised-plan.png new file mode 100644 index 00000000..54ed08e7 Binary files /dev/null and b/tests/manual/artifacts/codex-plan-review/revised-plan.png differ diff --git a/tests/manual/artifacts/codex-plan-review/versions-sidebar.png b/tests/manual/artifacts/codex-plan-review/versions-sidebar.png new file mode 100644 index 00000000..9c508f6a Binary files /dev/null and b/tests/manual/artifacts/codex-plan-review/versions-sidebar.png differ diff --git a/tests/manual/local/test-codex-plan-review-e2e.sh b/tests/manual/local/test-codex-plan-review-e2e.sh new file mode 100755 index 00000000..3dfbd8ed --- /dev/null +++ b/tests/manual/local/test-codex-plan-review-e2e.sh @@ -0,0 +1,406 @@ +#!/bin/bash +# End-to-end Codex Stop-hook test harness for Plannotator. +# +# Creates a disposable HOME and sample workspace, enables Codex hooks there, +# runs a real `codex exec` plan-only prompt, and leaves behind artifacts that +# make it easy to inspect rollout files, Plannotator history, and active URLs. +# +# Usage: +# ./tests/manual/local/test-codex-plan-review-e2e.sh [--keep] [--detach] +# [--skip-build] [--root-dir DIR] [--model MODEL] [--sandbox MODE] +# [--codex-bin PATH] [--prompt-file FILE] + +set -euo pipefail + +usage() { + cat <<'EOF' +Usage: ./tests/manual/local/test-codex-plan-review-e2e.sh [options] + +Runs a real Codex exec in a disposable HOME/workspace with Plannotator Stop hooks enabled. + +Options: + --keep Keep the sandbox directory after exit + --detach Best-effort background launch; foreground mode is the validated path + --skip-build Reuse existing build artifacts + --root-dir DIR Use DIR instead of a temp sandbox root + --model MODEL Codex model to use (default: gpt-5.4-mini) + --sandbox MODE Codex sandbox for `exec` (default: read-only) + --codex-bin PATH Override the Codex CLI binary or codex.js path + --prompt-file FILE Use a custom prompt file instead of the built-in sample prompt + --help Show this help + +Environment: + PLANNOTATOR_BROWSER Passed through to the disposable Codex run. Set to + /usr/bin/true when you want to drive the review with + Playwright from another terminal instead of an auto-opened browser. + CODEX_AUTH_JSON Override the auth.json copied into the disposable HOME. +EOF +} + +require_cmd() { + if ! command -v "$1" >/dev/null 2>&1; then + echo "Missing required command: $1" >&2 + exit 1 + fi +} + +resolve_cmd() { + local name="$1" + local fallback="${2:-}" + if command -v "$name" >/dev/null 2>&1; then + command -v "$name" + return + fi + if [[ -n "$fallback" && -x "$fallback" ]]; then + printf '%s\n' "$fallback" + return + fi + echo "Missing required command: $name" >&2 + exit 1 +} + +KEEP_SANDBOX=false +DETACH=false +SKIP_BUILD=false +ROOT_DIR="" +MODEL="${PLANNOTATOR_CODEX_MODEL:-gpt-5.4-mini}" +SANDBOX_MODE="${PLANNOTATOR_CODEX_SANDBOX:-read-only}" +CODEX_BIN="${CODEX_BIN:-}" +PROMPT_FILE="" +ORIGINAL_HOME="$HOME" + +while [[ $# -gt 0 ]]; do + case "$1" in + --keep) + KEEP_SANDBOX=true + ;; + --detach) + DETACH=true + KEEP_SANDBOX=true + ;; + --skip-build) + SKIP_BUILD=true + ;; + --root-dir) + ROOT_DIR="$2" + shift + ;; + --model) + MODEL="$2" + shift + ;; + --sandbox) + SANDBOX_MODE="$2" + shift + ;; + --codex-bin) + CODEX_BIN="$2" + shift + ;; + --prompt-file) + PROMPT_FILE="$2" + shift + ;; + --help) + usage + exit 0 + ;; + *) + echo "Unknown argument: $1" >&2 + echo >&2 + usage >&2 + exit 1 + ;; + esac + shift +done + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)" +BUN_BIN="$(resolve_cmd bun "$ORIGINAL_HOME/.bun/bin/bun")" +GIT_BIN="$(resolve_cmd git)" +NODE_BIN="$(resolve_cmd node)" +export PATH="$(dirname "$BUN_BIN"):$PATH" + +resolve_codex_js() { + find "$PROJECT_ROOT/node_modules" -path '*/@openai/codex/bin/codex.js' | sort | head -n 1 +} + +declare -a CODEX_CMD=() +if [[ -n "$CODEX_BIN" ]]; then + if [[ "$CODEX_BIN" == *.js ]]; then + CODEX_CMD=("$NODE_BIN" "$CODEX_BIN") + else + CODEX_CMD=("$CODEX_BIN") + fi +else + REPO_CODEX_JS="$(resolve_codex_js)" + if [[ -n "$REPO_CODEX_JS" ]]; then + CODEX_CMD=("$NODE_BIN" "$REPO_CODEX_JS") + CODEX_BIN="$REPO_CODEX_JS" + elif command -v codex >/dev/null 2>&1; then + CODEX_CMD=("$(command -v codex)") + CODEX_BIN="${CODEX_CMD[0]}" + else + echo "Could not find a Codex CLI. Install dependencies or pass --codex-bin PATH." >&2 + exit 1 + fi +fi + +if [[ -n "$PROMPT_FILE" && ! -f "$PROMPT_FILE" ]]; then + echo "Prompt file not found: $PROMPT_FILE" >&2 + exit 1 +fi + +if [[ -z "$ROOT_DIR" ]]; then + ROOT_DIR="$(mktemp -d -t plannotator-codex-stop-e2e-XXXXXX)" +else + mkdir -p "$ROOT_DIR" + ROOT_DIR="$(cd "$ROOT_DIR" && pwd)" +fi + +TEMP_HOME="$ROOT_DIR/home" +WORKSPACE_DIR="$ROOT_DIR/workspace/sample-app" +BIN_DIR="$ROOT_DIR/bin" +ARTIFACTS_DIR="$ROOT_DIR/artifacts" +CODEX_LOG="$ARTIFACTS_DIR/codex-output.log" +METADATA_FILE="$ARTIFACTS_DIR/metadata.env" +PROMPT_PATH="$ARTIFACTS_DIR/prompt.txt" +RUNNER_SCRIPT="$BIN_DIR/run-codex-e2e" + +cleanup() { + local exit_code=$? + echo + if [[ "$KEEP_SANDBOX" == "true" || $exit_code -ne 0 ]]; then + echo "Sandbox preserved at: $ROOT_DIR" + if [[ -f "$METADATA_FILE" ]]; then + echo "Artifact metadata: $METADATA_FILE" + fi + return + fi + echo "Cleaning up sandbox: $ROOT_DIR" + rm -rf "$ROOT_DIR" +} +trap cleanup EXIT + +mkdir -p "$TEMP_HOME/.codex" "$WORKSPACE_DIR/src" "$BIN_DIR" "$ARTIFACTS_DIR" + +AUTH_SRC="${CODEX_AUTH_JSON:-$ORIGINAL_HOME/.codex/auth.json}" +if [[ ! -f "$AUTH_SRC" ]]; then + echo "Codex auth file not found: $AUTH_SRC" >&2 + echo "Set CODEX_AUTH_JSON or run codex login first." >&2 + exit 1 +fi + +cp "$AUTH_SRC" "$TEMP_HOME/.codex/auth.json" +if [[ -f "$ORIGINAL_HOME/.codex/installation_id" ]]; then + cp "$ORIGINAL_HOME/.codex/installation_id" "$TEMP_HOME/.codex/installation_id" +fi + +cat > "$TEMP_HOME/.codex/config.toml" <<'EOF' +[features] +codex_hooks = true +EOF + +cat > "$TEMP_HOME/.codex/hooks.json" <<'EOF' +{ + "hooks": { + "Stop": [ + { + "hooks": [ + { + "type": "command", + "command": "plannotator", + "timeout": 345600 + } + ] + } + ] + } +} +EOF + +cat > "$BIN_DIR/plannotator" < "$WORKSPACE_DIR/package.json" <<'EOF' +{ + "name": "sample-app", + "private": true, + "type": "module", + "scripts": { + "test": "echo \"No tests yet\"" + } +} +EOF + +cat > "$WORKSPACE_DIR/README.md" <<'EOF' +# Sample App + +Tiny TypeScript app for exercising Codex plan review through Plannotator. +EOF + +cat > "$WORKSPACE_DIR/src/index.ts" <<'EOF' +export function greet(name: string): string { + return `Hello, ${name}!`; +} + +console.log(greet("World")); +EOF + +( + cd "$WORKSPACE_DIR" + "$GIT_BIN" init -q -b master + "$GIT_BIN" config user.email "test@example.com" + "$GIT_BIN" config user.name "Test User" + "$GIT_BIN" add -A + "$GIT_BIN" commit -q -m "Initial commit" +) + +if [[ -n "$PROMPT_FILE" ]]; then + cp "$PROMPT_FILE" "$PROMPT_PATH" +else + cat > "$PROMPT_PATH" <<'EOF' +Produce a concise implementation plan for adding theme support, tests, and docs to this sample app. Return your final answer ONLY as a ... block and do not implement anything. +EOF +fi + +if [[ "$SKIP_BUILD" != "true" ]]; then + echo "Building hook + review apps..." + ( + cd "$PROJECT_ROOT" + "$BUN_BIN" run build:review >/dev/null + "$BUN_BIN" run build:hook >/dev/null + ) +fi + +echo "Recording Codex metadata..." +env HOME="$TEMP_HOME" "${CODEX_CMD[@]}" --version > "$ARTIFACTS_DIR/codex-version.txt" 2>&1 +env HOME="$TEMP_HOME" "${CODEX_CMD[@]}" features list > "$ARTIFACTS_DIR/codex-features.txt" 2>&1 +env HOME="$TEMP_HOME" "${CODEX_CMD[@]}" login status > "$ARTIFACTS_DIR/codex-login-status.txt" 2>&1 || true + +if ! grep -q 'codex_hooks' "$ARTIFACTS_DIR/codex-features.txt"; then + echo "Selected Codex CLI does not expose codex_hooks." >&2 + echo "See: $ARTIFACTS_DIR/codex-features.txt" >&2 + exit 1 +fi + +cat > "$METADATA_FILE" < "$RUNNER_SCRIPT" +chmod +x "$RUNNER_SCRIPT" + +echo "=== Plannotator Codex Stop-hook E2E ===" +echo "Sandbox root: $ROOT_DIR" +echo "Workspace: $WORKSPACE_DIR" +echo "Artifacts: $ARTIFACTS_DIR" +echo "Codex binary: $CODEX_BIN" +echo "Model: $MODEL" +echo + +if [[ "$DETACH" == "true" ]]; then + nohup "$RUNNER_SCRIPT" >"$CODEX_LOG" 2>&1 < /dev/null & +else + "$RUNNER_SCRIPT" >"$CODEX_LOG" 2>&1 & +fi +CODEX_PID=$! +echo "$CODEX_PID" > "$ARTIFACTS_DIR/codex.pid" + +read_json_field() { + "$NODE_BIN" -e 'const fs=require("fs"); const [file,key]=process.argv.slice(1); const data=JSON.parse(fs.readFileSync(file,"utf8")); const value=data[key]; if (value !== undefined) process.stdout.write(String(value));' "$1" "$2" +} + +FIRST_SESSION_FILE="" +FIRST_SESSION_URL="" +deadline=$((SECONDS + 240)) +while (( SECONDS < deadline )); do + if compgen -G "$TEMP_HOME/.plannotator/sessions/*.json" >/dev/null; then + FIRST_SESSION_FILE="$(find "$TEMP_HOME/.plannotator/sessions" -maxdepth 1 -type f -name '*.json' | sort | tail -n 1)" + FIRST_SESSION_URL="$(read_json_field "$FIRST_SESSION_FILE" url)" + echo "$FIRST_SESSION_FILE" > "$ARTIFACTS_DIR/first-session-file.txt" + printf '%s\n' "$FIRST_SESSION_URL" > "$ARTIFACTS_DIR/first-session-url.txt" + echo "First Plannotator session: $FIRST_SESSION_URL" + break + fi + if ! kill -0 "$CODEX_PID" 2>/dev/null; then + break + fi + sleep 1 +done + +if [[ "$DETACH" == "true" ]]; then + echo + echo "Codex is still running in the background." + echo "PID: $CODEX_PID" + echo "Codex log: $CODEX_LOG" + echo "Metadata: $METADATA_FILE" + echo + echo "To inspect active Plannotator sessions inside the sandbox:" + echo " HOME=\"$TEMP_HOME\" PATH=\"$BIN_DIR:\$PATH\" plannotator sessions" + exit 0 +fi + +set +e +wait "$CODEX_PID" +CODEX_EXIT=$? +set -e +printf '%s\n' "$CODEX_EXIT" > "$ARTIFACTS_DIR/codex-exit-code.txt" + +ROLLOUT_PATH="$(find "$TEMP_HOME/.codex/sessions" -type f -name 'rollout-*.jsonl' | sort | tail -n 1 || true)" +if [[ -n "$ROLLOUT_PATH" ]]; then + printf '%s\n' "$ROLLOUT_PATH" > "$ARTIFACTS_DIR/rollout-path.txt" +fi + +if [[ -d "$TEMP_HOME/.plannotator/history" ]]; then + find "$TEMP_HOME/.plannotator/history" -type f | sort > "$ARTIFACTS_DIR/plannotator-history-files.txt" +fi + +if [[ -d "$TEMP_HOME/.plannotator/plans" ]]; then + find "$TEMP_HOME/.plannotator/plans" -type f | sort > "$ARTIFACTS_DIR/plannotator-plan-files.txt" +fi + +echo +echo "Codex exit code: $CODEX_EXIT" +echo "Codex log: $CODEX_LOG" +if [[ -n "$ROLLOUT_PATH" ]]; then + echo "Rollout: $ROLLOUT_PATH" +fi +if [[ -f "$ARTIFACTS_DIR/plannotator-history-files.txt" ]]; then + echo "History index: $ARTIFACTS_DIR/plannotator-history-files.txt" +fi +if [[ -f "$ARTIFACTS_DIR/plannotator-plan-files.txt" ]]; then + echo "Plan index: $ARTIFACTS_DIR/plannotator-plan-files.txt" +fi + +exit "$CODEX_EXIT"