From 4f73e764de2d0b8a7cef54f61ebd346e667e7214 Mon Sep 17 00:00:00 2001 From: MrFlounder Date: Tue, 17 Mar 2026 21:11:51 -0700 Subject: [PATCH 1/2] feat: default crabcode to codex --- .ralph/PROMPT.md | 2 +- README.md | 35 +-- docs/index.html | 12 +- docs/llms.txt | 13 +- docs/promptfoo-plugin.md | 2 +- examples/nodejs-monorepo.yaml | 2 +- examples/promptfoo-cloud.yaml | 2 +- examples/python-project.yaml | 2 +- plugins/promptfoo/src/agent/providers.ts | 2 +- plugins/promptfoo/src/serve.ts | 2 +- src/crabcode | 385 ++++++++++++++--------- tests/unit/test_agent_helpers.bats | 46 ++- 12 files changed, 317 insertions(+), 188 deletions(-) diff --git a/.ralph/PROMPT.md b/.ralph/PROMPT.md index 2224124..28bafca 100644 --- a/.ralph/PROMPT.md +++ b/.ralph/PROMPT.md @@ -94,7 +94,7 @@ layout: - name: server command: pnpm dev - name: main - command: claude --dangerously-skip-permissions + command: codex --full-auto env_sync: files: diff --git a/README.md b/README.md index 29b568c..93169a8 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ < > ``` -A lightning-fast tmux-based workspace manager for multi-repo development. Agent-agnostic — works with both [Claude Code](https://claude.ai/code) and [Codex CLI](https://github.com/openai/codex). Manage multiple projects, start full dev environments in seconds. +A lightning-fast tmux-based workspace manager for multi-repo development, built around [Codex CLI](https://github.com/openai/codex). Manage multiple projects, start full dev environments in seconds. ## Quick Start @@ -260,9 +260,9 @@ crab court 3230 # Judge + 2 reviewers ``` **Court Review** uses the judge pattern: -- **Judge (Claude)**: Orchestrates, verifies findings, delivers verdict -- **Reviewer A (Claude teammate)**: Independent code review -- **Reviewer B (Codex)**: Independent code review +- **Judge (Codex)**: Orchestrates, verifies findings, delivers verdict +- **Reviewer A (teammate pass)**: Independent code review +- **Reviewer B (Codex)**: Secondary review pass The judge traces every finding to actual code, resolves disagreements, and produces a verdict with zero false positives. @@ -286,14 +286,14 @@ crab session delete "feature-x" # Delete a session ### Agent Sync (`crab agent`) -Sync user-level configurations (MCP servers, custom agents/skills) between Claude Code and Codex CLI. Useful when switching a project's agent or maintaining parity across both. +Sync user-level Codex configuration, MCP servers, and skills. ```bash crab agent status # Audit what's configured on each side -crab agent sync mcp --from claude # Preview MCP server sync (dry run) -crab agent sync mcp --from claude --apply # Sync MCP servers Claude → Codex -crab agent sync agents --from claude # Preview agent → skill rewrites (dry run) -crab agent sync agents --from claude --apply # Rewrite Claude agents as Codex skills (LLM-assisted) +crab agent sync mcp # Preview MCP server sync (dry run) +crab agent sync mcp --apply # Apply MCP server sync +crab agent sync agents # Preview skill sync (dry run) +crab agent sync agents --apply # Apply skill sync crab agent sync all --apply # Sync everything both directions ``` @@ -407,7 +407,7 @@ Per-project config (`~/.crabcode/projects/.yaml`): ```yaml session_name: pf -agent: claude # or "codex" — defaults to claude if omitted +agent: codex # defaults to codex if omitted workspace_base: ~/Dev/my-project-workspaces main_repo: ~/Dev/my-project @@ -430,7 +430,7 @@ layout: - name: server command: pnpm dev - name: main - command: claude --dangerously-skip-permissions # or: codex --full-auto + command: "" # defaults to codex --full-auto; override directly if needed # Optional: persistent storage across resets shared_volume: @@ -457,18 +457,17 @@ See `examples/` for more configuration examples. - `OPENAI_API_KEY` or `ANTHROPIC_API_KEY` (for `crab pf`) - Slack bot token (for `crab pf serve`) -**For AI agents (pick one or both):** -- [Claude Code](https://claude.ai/code): `npm install -g @anthropic-ai/claude-code` +**For AI agents:** - [Codex CLI](https://github.com/openai/codex): `npm install -g @openai/codex` **For PR reviews (`crab review`, `crab court`):** - [gh](https://cli.github.com/) -- Claude Code and/or Codex CLI (court review uses both) +- Codex CLI ```bash # macOS brew install tmux yq zip gh -npm install -g @anthropic-ai/claude-code # and/or @openai/codex +npm install -g @openai/codex ``` ## Installation @@ -506,7 +505,7 @@ git pull origin main ┌─────────────────────────┬─────────────────────────┐ │ terminal │ │ │ (shell) │ main │ -├─────────────────────────┤ (claude/codex/editor) │ +├─────────────────────────┤ (codex/editor) │ │ server │ │ │ (pnpm dev) │ │ └─────────────────────────┴─────────────────────────┘ @@ -573,9 +572,9 @@ The restore agent walks through each phase — installing tools, restoring confi 6. **Edit config for your project:** ```bash # Set your layout commands in ~/.crabcode/projects/.yaml - # - agent: claude or codex (defaults to claude) + # - agent: codex (defaults to codex) # - server pane: your dev server (e.g., pnpm dev) - # - main pane: your agent command (e.g., claude --dangerously-skip-permissions, codex --full-auto) + # - main pane: optional override (defaults to codex --full-auto) ``` 7. **Start working:** diff --git a/docs/index.html b/docs/index.html index 1705e10..fa8f43b 100644 --- a/docs/index.html +++ b/docs/index.html @@ -1599,7 +1599,7 @@

Multi-Agent Code Review Tribunal

$ crab court 3230
⚖️ Assembling review tribunal...
-
✓ Reviewer A (Claude): analyzing architecture impact
+
✓ Reviewer A (Teammate): analyzing architecture impact
✓ Reviewer B (Codex): analyzing implementation correctness
✓ Judge: cross-referencing findings against source...
✓ Verdict delivered. 0 false positives. Justice served. 🦀
@@ -1609,7 +1609,7 @@

Multi-Agent Code Review Tribunal

🎫

Ticket-Driven Development

-

Connect workspaces directly to Linear tickets for automatic context injection. Branch naming, ticket metadata, and Claude context — all provisioned from a single identifier. Reduces ticket-to-first-commit latency by 76%.

+

Connect workspaces directly to Linear tickets for automatic context injection. Branch naming, ticket metadata, and agent context are all provisioned from a single identifier. Reduces ticket-to-first-commit latency by 76%.

@@ -1708,7 +1708,7 @@

Optimized Developer Experience

main
- claude >
+ codex >

AI pair programming
session operationalized. 🦀 @@ -1996,7 +1996,7 @@

Extensibility & Customization

crab session resume <name> -

Restore named Claude session with full context continuity

+

Restore named agent session with full context continuity

@@ -2473,7 +2473,7 @@

Extensibility & Customization

lines: [ { type: 'typed', text: '$ crab review 3230' }, { type: 'output', text: '📋 Fetching PR context...' }, - { type: 'output', text: '🤖 Claude reviewing...' }, + { type: 'output', text: '🤖 Codex reviewing...' }, { type: 'output', text: '✓ Review saved!' } ] }, @@ -2490,7 +2490,7 @@

Extensibility & Customization

{ type: 'typed', text: '$ crab ticket ENG-123' }, { type: 'output', text: '🎫 Fetching ticket context...' }, { type: 'output', text: '→ Workspace 4 provisioned' }, - { type: 'output', text: '✓ Claude has ticket context' } + { type: 'output', text: '✓ Codex has ticket context' } ] }, 'crab projects': { diff --git a/docs/llms.txt b/docs/llms.txt index cfad4af..9ba1017 100644 --- a/docs/llms.txt +++ b/docs/llms.txt @@ -82,7 +82,7 @@ crab review https://github.com/... # Full URL crab court 3230 # Judge + 2 reviewers ``` -Court review uses the judge pattern: a Judge (Claude) orchestrates two independent reviewers (Claude teammate + Codex), verifies every finding against actual code, resolves disagreements, and delivers a verdict with zero false positives. +Court review uses the judge pattern: a Codex-led judge by default orchestrates two independent review passes, verifies every finding against actual code, resolves disagreements, and delivers a verdict with zero false positives. ```bash crab review ls # List review sessions @@ -151,7 +151,7 @@ crab alias rm rr # Remove alias ### Session Management -Track and resume named Claude conversations: +Track and resume named agent conversations: ```bash crab session ls # List sessions @@ -190,6 +190,7 @@ Per-project config (`~/.crabcode/projects/.yaml`): ```yaml session_name: pf +agent: codex workspace_base: ~/Dev/my-project-workspaces main_repo: ~/Dev/my-project @@ -213,7 +214,7 @@ layout: - name: server command: pnpm dev - name: main - command: claude + command: "" # defaults to codex --full-auto # Persistent storage across workspace resets shared_volume: @@ -243,7 +244,7 @@ slack: ┌─────────────────────────┬─────────────────────────┐ │ terminal │ │ │ (shell) │ main │ -├─────────────────────────┤ (claude/editor) │ +├─────────────────────────┤ (codex/editor) │ │ server │ │ │ (pnpm dev) │ │ └─────────────────────────┴─────────────────────────┘ @@ -259,9 +260,7 @@ Keybindings (prefix: `Ctrl+a`): **Core:** bash, tmux, git, [yq](https://github.com/mikefarah/yq), zip -**For PR reviews:** [gh](https://cli.github.com/) (GitHub CLI), [Claude Code](https://claude.ai/code) (`claude` CLI) - -**Optional:** [Codex CLI](https://github.com/openai/codex) (for court review with Codex reviewer) +**For PR reviews:** [gh](https://cli.github.com/) (GitHub CLI), [Codex CLI](https://github.com/openai/codex) (`codex` CLI) ## Links diff --git a/docs/promptfoo-plugin.md b/docs/promptfoo-plugin.md index 0297f79..c373106 100644 --- a/docs/promptfoo-plugin.md +++ b/docs/promptfoo-plugin.md @@ -91,7 +91,7 @@ promptfoo eval |------|-------------| | `--file`, `-f` | Input file path | | `--output`, `-o` | Output directory (default: current dir) | -| `--provider` | LLM provider (default: `openai:gpt-4o`) | +| `--provider` | LLM provider (default: `openai:gpt-5`) | | `--verbose`, `-v` | Show detailed agent output | | `--max-turns` | Max agent iterations (default: 30) | diff --git a/examples/nodejs-monorepo.yaml b/examples/nodejs-monorepo.yaml index d62732a..aac34db 100644 --- a/examples/nodejs-monorepo.yaml +++ b/examples/nodejs-monorepo.yaml @@ -23,7 +23,7 @@ layout: - name: server command: pnpm dev - name: main - command: claude --dangerously-skip-permissions + command: codex --full-auto env_sync: files: diff --git a/examples/promptfoo-cloud.yaml b/examples/promptfoo-cloud.yaml index 3a1da98..c0647af 100644 --- a/examples/promptfoo-cloud.yaml +++ b/examples/promptfoo-cloud.yaml @@ -23,7 +23,7 @@ layout: - name: server command: pnpm dev - name: main - command: claude --dangerously-skip-permissions --chrome + command: codex --full-auto env_sync: files: diff --git a/examples/python-project.yaml b/examples/python-project.yaml index 3356a21..7e65a4a 100644 --- a/examples/python-project.yaml +++ b/examples/python-project.yaml @@ -23,7 +23,7 @@ layout: - name: server command: uvicorn app.main:app --reload --port $API_PORT - name: main - command: claude --dangerously-skip-permissions + command: codex --full-auto env_sync: files: diff --git a/plugins/promptfoo/src/agent/providers.ts b/plugins/promptfoo/src/agent/providers.ts index 9b21b62..7cb04c6 100644 --- a/plugins/promptfoo/src/agent/providers.ts +++ b/plugins/promptfoo/src/agent/providers.ts @@ -46,7 +46,7 @@ export class OpenAIProvider implements LLMProvider { constructor(options: { apiKey?: string; model?: string; baseUrl?: string; reasoningEffort?: string }) { this.apiKey = options.apiKey || process.env.OPENAI_API_KEY || ''; - this.model = options.model || 'gpt-4o'; + this.model = options.model || 'gpt-5'; this.baseUrl = options.baseUrl || 'https://api.openai.com/v1'; this.reasoningEffort = options.reasoningEffort; diff --git a/plugins/promptfoo/src/serve.ts b/plugins/promptfoo/src/serve.ts index 824724c..c65e3b2 100644 --- a/plugins/promptfoo/src/serve.ts +++ b/plugins/promptfoo/src/serve.ts @@ -469,7 +469,7 @@ Usage: Options: --setup One-time configuration - --provider LLM provider (default: from config or openai:gpt-4o) + --provider LLM provider (default: from config or openai:gpt-5) --interval Poll interval in ms (default: 5000) --verbose, -v Show detailed output --help, -h Show this help diff --git a/src/crabcode b/src/crabcode index 0a8ef00..faef762 100755 --- a/src/crabcode +++ b/src/crabcode @@ -79,54 +79,142 @@ is_negative_response() { # ============================================================================= # Agent Abstraction Layer # ============================================================================= -# Crabcode supports multiple coding agents (claude, codex, etc.) +# Crabcode supports multiple coding agents (codex, claude, etc.) # The agent is configured per-project via the `agent:` field in project YAML. -# Get configured agent type: "claude" or "codex" +# Get configured agent type: "codex" or "claude" get_agent_type() { - local agent=$(yq -r '.agent // "claude"' "$CONFIG_FILE" 2>/dev/null) - echo "${agent:-claude}" + local agent="" + + if [ -f "$CONFIG_FILE" ]; then + agent=$(yq -r '.agent // ""' "$CONFIG_FILE" 2>/dev/null) + [ "$agent" = "null" ] && agent="" + + case "${agent:-}" in + codex|claude) + echo "$agent" + return + ;; + esac + + # Backward compat: infer the agent from an explicitly configured main pane command. + local main_cmd=$(yq -r '.layout.panes[]? | select(.name == "main") | .command // ""' "$CONFIG_FILE" 2>/dev/null | head -1) + case "$main_cmd" in + *claude*) echo "claude" ; return ;; + *codex*) echo "codex" ; return ;; + esac + fi + + echo "codex" } -# Get the base agent command (interactive mode, autonomous permissions) -get_agent_base_cmd() { - local agent=$(get_agent_type) +agent_base_cmd_for_type() { + local agent="${1:-codex}" case "$agent" in - codex) echo "codex --full-auto" ;; - claude|*) echo "claude --dangerously-skip-permissions" ;; + claude) echo "claude --dangerously-skip-permissions" ;; + codex|*) echo "codex --full-auto --skip-git-repo-check" ;; esac } -# Get the agent command in safe/interactive mode (user approves each action) -get_agent_safe_cmd() { - local agent=$(get_agent_type) +agent_safe_cmd_for_type() { + local agent="${1:-codex}" case "$agent" in - codex) echo "codex" ;; - claude|*) echo "claude" ;; + claude) echo "claude" ;; + codex|*) echo "codex --skip-git-repo-check" ;; esac } -# Build agent command with "continue last session" semantics -agent_cmd_continue() { - local cmd="$1" - local agent=$(get_agent_type) +agent_cmd_continue_for_type() { + local agent="${1:-codex}" + local cmd="$2" case "$agent" in - codex) echo "codex resume --last --full-auto" ;; - claude|*) echo "$cmd --continue" ;; + claude) echo "$cmd --continue" ;; + codex|*) echo "codex resume --last --full-auto" ;; esac } -# Build agent command with "resume specific session" semantics -agent_cmd_resume() { - local cmd="$1" - local session_id="$2" - local agent=$(get_agent_type) +agent_cmd_resume_for_type() { + local agent="${1:-codex}" + local cmd="$2" + local session_id="$3" case "$agent" in - codex) echo "codex resume $session_id --full-auto" ;; - claude|*) echo "$cmd --resume $session_id" ;; + claude) echo "$cmd --resume $session_id" ;; + codex|*) echo "codex resume $session_id --full-auto" ;; esac } +agent_print_cmd_for_type() { + local agent="${1:-codex}" + case "$agent" in + claude) echo "claude --print" ;; + codex|*) echo "codex exec" ;; + esac +} + +agent_display_name_for_type() { + local agent="${1:-codex}" + case "$agent" in + claude) echo "Claude" ;; + codex|*) echo "Codex" ;; + esac +} + +agent_resume_file_for_type() { + local agent="${1:-codex}" + local dir="$2" + case "$agent" in + claude) echo "$dir/.claude-resume-session" ;; + codex|*) echo "$dir/.codex-resume-session" ;; + esac +} + +agent_session_dir_for_type() { + local agent="${1:-codex}" + local workspace_dir="$2" + case "$agent" in + claude) echo "$HOME/.claude/projects/$(echo "$workspace_dir" | tr '/.' '--')" ;; + codex|*) echo "$HOME/.codex" ;; + esac +} + +agent_type_from_metadata() { + local metadata="$1" + local saved_agent=$(grep -o '"agent"[[:space:]]*:[[:space:]]*"[^"]*"' "$metadata" | sed 's/"agent"[[:space:]]*:[[:space:]]*"//' | sed 's/"$//') + + case "${saved_agent:-}" in + codex|claude) + echo "$saved_agent" + ;; + *) + if grep -q '"claude_session"' "$metadata" 2>/dev/null; then + echo "claude" + else + echo "codex" + fi + ;; + esac +} + +# Get the base agent command (interactive mode, autonomous permissions) +get_agent_base_cmd() { + agent_base_cmd_for_type "$(get_agent_type)" +} + +# Get the agent command in safe/interactive mode (user approves each action) +get_agent_safe_cmd() { + agent_safe_cmd_for_type "$(get_agent_type)" +} + +# Build agent command with "continue last session" semantics +agent_cmd_continue() { + agent_cmd_continue_for_type "$(get_agent_type)" "$1" +} + +# Build agent command with "resume specific session" semantics +agent_cmd_resume() { + agent_cmd_resume_for_type "$(get_agent_type)" "$1" "$2" +} + # Append prompt to agent command agent_cmd_with_prompt() { local cmd="$1" @@ -138,11 +226,7 @@ agent_cmd_with_prompt() { # Get the non-interactive print command for the agent agent_print_cmd() { - local agent=$(get_agent_type) - case "$agent" in - codex) echo "codex exec" ;; - claude|*) echo "claude --print" ;; - esac + agent_print_cmd_for_type "$(get_agent_type)" } # Run agent non-interactively with a prompt (stdin) @@ -150,8 +234,8 @@ agent_run_print() { local prompt="$1" local agent=$(get_agent_type) case "$agent" in - codex) echo "$prompt" | timeout 30 codex exec 2>/dev/null || echo "" ;; - claude|*) echo "$prompt" | timeout 30 claude --print 2>/dev/null || echo "" ;; + claude) echo "$prompt" | timeout 30 claude --print 2>/dev/null || echo "" ;; + codex|*) echo "$prompt" | timeout 30 codex exec --skip-git-repo-check -C "$(pwd)" 2>/dev/null || echo "" ;; esac } @@ -159,38 +243,24 @@ agent_run_print() { agent_cli_exists() { local agent=$(get_agent_type) case "$agent" in - codex) command_exists codex ;; - claude|*) command_exists claude ;; + claude) command_exists claude ;; + codex|*) command_exists codex ;; esac } # Get agent display name (for user-facing messages) agent_display_name() { - local agent=$(get_agent_type) - case "$agent" in - codex) echo "Codex" ;; - claude|*) echo "Claude" ;; - esac + agent_display_name_for_type "$(get_agent_type)" } # Get agent resume file name (per-workspace) agent_resume_file() { - local dir="$1" - local agent=$(get_agent_type) - case "$agent" in - codex) echo "$dir/.codex-resume-session" ;; - claude|*) echo "$dir/.claude-resume-session" ;; - esac + agent_resume_file_for_type "$(get_agent_type)" "$1" } # Get agent session history directory agent_session_dir() { - local workspace_dir="$1" - local agent=$(get_agent_type) - case "$agent" in - codex) echo "$HOME/.codex" ;; - claude|*) echo "$HOME/.claude/projects/$(echo "$workspace_dir" | tr '/.' '--')" ;; - esac + agent_session_dir_for_type "$(get_agent_type)" "$1" } # Get the agent system prompt file path and ensure it exists @@ -198,18 +268,7 @@ agent_ensure_system_prompt() { local dir="$1" local agent=$(get_agent_type) case "$agent" in - codex) - local agents_file="$dir/AGENTS.md" - if ! grep -q "^## Team Mode$" "$agents_file" 2>/dev/null; then - cat >> "$agents_file" << 'AGENTSEOF' - -## Team Mode - -You can spawn agent teammates for complex tasks. Create specialized agents (researcher, implementer, reviewer, debugger) that work in parallel. Coordinate the team, assign tasks, and synthesize results. Only spawn teams when the task benefits from parallel work. -AGENTSEOF - fi - ;; - claude|*) + claude) local team_file="$dir/.claude/CLAUDE.md" mkdir -p "$dir/.claude" if ! grep -q "^## Team Mode$" "$team_file" 2>/dev/null; then @@ -221,6 +280,17 @@ You can spawn agent teammates for complex tasks. Use the Task tool to create spe CLAUDEEOF fi ;; + codex|*) + local agents_file="$dir/AGENTS.md" + if ! grep -q "^## Team Mode$" "$agents_file" 2>/dev/null; then + cat >> "$agents_file" << 'AGENTSEOF' + +## Team Mode + +You can spawn agent teammates for complex tasks. Create specialized agents (researcher, implementer, reviewer, debugger) that work in parallel. Coordinate the team, assign tasks, and synthesize results. Only spawn teams when the task benefits from parallel work. +AGENTSEOF + fi + ;; esac } @@ -229,7 +299,15 @@ agent_capture_session_id() { local dir="$1" local agent=$(get_agent_type) case "$agent" in - codex) + claude) + local claude_project_dir="$HOME/.claude/projects/$(echo "$dir" | tr '/.' '--')" + if [ -d "$claude_project_dir" ]; then + ls -t "$claude_project_dir"/*.jsonl 2>/dev/null | head -1 | xargs -I{} basename {} .jsonl 2>/dev/null || echo "" + else + echo "" + fi + ;; + codex|*) # Codex stores sessions in ~/.codex/sessions/YYYY/MM/DD/rollout--.jsonl local codex_sessions="$HOME/.codex/sessions" if [ -d "$codex_sessions" ]; then @@ -244,14 +322,6 @@ agent_capture_session_id() { echo "" fi ;; - claude|*) - local claude_project_dir="$HOME/.claude/projects/$(echo "$dir" | tr '/.' '--')" - if [ -d "$claude_project_dir" ]; then - ls -t "$claude_project_dir"/*.jsonl 2>/dev/null | head -1 | xargs -I{} basename {} .jsonl 2>/dev/null || echo "" - else - echo "" - fi - ;; esac } @@ -272,8 +342,8 @@ agent_cmd_with_context() { local context_file="$2" local agent=$(get_agent_type) case "$agent" in - codex) echo "$cmd \"$(cat "$context_file" 2>/dev/null | head -c 4000)\"" ;; - claude|*) echo "$cmd \"$context_file\"" ;; + claude) echo "$cmd \"$context_file\"" ;; + codex|*) echo "$cmd \"$(cat "$context_file" 2>/dev/null | head -c 4000)\"" ;; esac } @@ -282,12 +352,12 @@ agent_generate_summary() { local prompt="$1" local agent=$(get_agent_type) case "$agent" in - codex) - codex exec "$prompt" 2>/dev/null | tail -1 || echo "" - ;; - claude|*) + claude) claude --continue --print -p "$prompt" 2>/dev/null | tail -1 || echo "" ;; + codex|*) + codex exec --skip-git-repo-check -C "$(pwd)" "$prompt" 2>/dev/null | tail -1 || echo "" + ;; esac } @@ -2432,11 +2502,21 @@ get_pane_command() { for ((i=0; i/dev/null) if [ "$name" = "$pane_name" ]; then - yq -r ".layout.panes[$i].command // \"\"" "$CONFIG_FILE" 2>/dev/null + local cmd=$(yq -r ".layout.panes[$i].command // \"\"" "$CONFIG_FILE" 2>/dev/null) + if [ "$pane_name" = "main" ] && { [ -z "$cmd" ] || [ "$cmd" = "null" ]; }; then + get_agent_base_cmd + else + echo "$cmd" + fi return fi done - echo "" + + if [ "$pane_name" = "main" ]; then + get_agent_base_cmd + else + echo "" + fi } # Check and setup workspace (dependencies, .env sync, shared volume) @@ -3198,9 +3278,8 @@ wip_list() { local cs=$(grep -o '"agent_session"[[:space:]]*:[[:space:]]*"[^"]*"' "$metadata" | sed 's/"agent_session"[[:space:]]*:[[:space:]]*"//' | sed 's/"$//') # Backward compat: also check claude_session [ -z "$cs" ] && cs=$(grep -o '"claude_session"[[:space:]]*:[[:space:]]*"[^"]*"' "$metadata" | sed 's/"claude_session"[[:space:]]*:[[:space:]]*"//' | sed 's/"$//') - local saved_agent=$(grep -o '"agent"[[:space:]]*:[[:space:]]*"[^"]*"' "$metadata" | sed 's/"agent"[[:space:]]*:[[:space:]]*"//' | sed 's/"$//') - [ -z "$saved_agent" ] && saved_agent="claude" - local agent_label=$(echo "$saved_agent" | awk '{print toupper(substr($0,1,1)) substr($0,2)}') + local saved_agent=$(agent_type_from_metadata "$metadata") + local agent_label=$(agent_display_name_for_type "$saved_agent") [ -n "$cs" ] && session_tag=" ${CYAN}${agent_label}: saved${NC}" echo -e " ${GREEN}[$i]${NC} $name" @@ -3299,9 +3378,8 @@ wip_list_global() { local session_tag="" local agent_sess=$(grep -o '"agent_session"[[:space:]]*:[[:space:]]*"[^"]*"' "$metadata" | sed 's/"agent_session"[[:space:]]*:[[:space:]]*"//' | sed 's/"$//') [ -z "$agent_sess" ] && agent_sess=$(grep -o '"claude_session"[[:space:]]*:[[:space:]]*"[^"]*"' "$metadata" | sed 's/"claude_session"[[:space:]]*:[[:space:]]*"//' | sed 's/"$//') - local saved_agent=$(grep -o '"agent"[[:space:]]*:[[:space:]]*"[^"]*"' "$metadata" | sed 's/"agent"[[:space:]]*:[[:space:]]*"//' | sed 's/"$//') - [ -z "$saved_agent" ] && saved_agent="claude" - local agent_label=$(echo "$saved_agent" | awk '{print toupper(substr($0,1,1)) substr($0,2)}') + local saved_agent=$(agent_type_from_metadata "$metadata") + local agent_label=$(agent_display_name_for_type "$saved_agent") [ -n "$agent_sess" ] && session_tag=" ${CYAN}${agent_label}: saved${NC}" echo -e " ${GRAY}Workspace: ${NC}$ws_num ${GRAY}Branch: ${NC}$branch ${GRAY}Files: ${NC}$file_count patches${session_tag}" @@ -3794,19 +3872,35 @@ _restore_wip() { success "WIP restored: $wip_name" + local restored_saved_agent="" + # Write agent session resume file if available if [ -f "$metadata" ]; then local agent_sess=$(grep -o '"agent_session"[[:space:]]*:[[:space:]]*"[^"]*"' "$metadata" | sed 's/"agent_session"[[:space:]]*:[[:space:]]*"//' | sed 's/"$//') # Backward compat: also check claude_session [ -z "$agent_sess" ] && agent_sess=$(grep -o '"claude_session"[[:space:]]*:[[:space:]]*"[^"]*"' "$metadata" | sed 's/"claude_session"[[:space:]]*:[[:space:]]*"//' | sed 's/"$//') - agent_write_resume_file "$dir" "$agent_sess" + restored_saved_agent=$(agent_type_from_metadata "$metadata") + + if [ -n "$agent_sess" ] && [ "$agent_sess" != "" ]; then + if [ "$restored_saved_agent" = "$(get_agent_type)" ]; then + agent_write_resume_file "$dir" "$agent_sess" + else + echo "$agent_sess" > "$(agent_resume_file_for_type "$restored_saved_agent" "$dir")" + echo -e " ${YELLOW}Saved conversation belongs to $(agent_display_name_for_type "$restored_saved_agent").${NC}" + echo -e " ${YELLOW}Current project agent is $(agent_display_name), so it will only resume after switching the project agent.${NC}" + fi + fi fi # Relaunch the workspace tmux window with agent session resume if [ "$open_after" = "true" ]; then echo "" echo " Relaunching workspace $num..." - continue_workspace "$num" + if [ -n "$restored_saved_agent" ] && [ "$restored_saved_agent" != "$(get_agent_type)" ]; then + open_workspace "$num" + else + continue_workspace "$num" + fi else echo "" local resume_file=$(agent_resume_file "$dir") @@ -4966,7 +5060,7 @@ layout: - name: server command: pnpm dev - name: main - command: claude --dangerously-skip-permissions --chrome + command: codex --full-auto install_command: pnpm install install_env: PROMPTFOO_NODE_MODULES_CACHED=true @@ -5297,7 +5391,7 @@ layout: # - name: main # command: "" # Set via 'agent:' field, or override directly # -# agent: claude # or "codex" - configures the coding agent for this project +# agent: codex # or "claude" - defaults to codex if omitted # # shared_volume: # enabled: true @@ -5319,7 +5413,7 @@ EOF echo " 1. Run 'crab @$alias_input config scan' to detect .env files and ports" echo " 2. Edit $CONFIG_FILE to set your layout commands:" echo " - server pane: your dev server command (e.g., pnpm dev)" - echo " - main pane: your coding agent (e.g., claude, codex)" + echo " - main pane: your coding agent (e.g., codex, claude)" echo " 3. Run 'crab @$alias_input ws 1' to create your first workspace" echo "" } @@ -6971,7 +7065,7 @@ create_handoff() { fi fi ;; - claude|*) + claude) local claude_history="$HOME/.claude/projects" if [ -d "$claude_history" ]; then local project_dir=$(find "$claude_history" -type d -name "*$(basename "$dir")*" 2>/dev/null | head -1) @@ -7958,7 +8052,7 @@ show_cheat() { ║ crab wip restore Interactive restore from all WIPs ║ ║ crab wip restore Restore WIP #N to original workspace ║ ║ crab wip restore --to Restore to different workspace ║ -║ crab wip restore --open Restore and open workspace with claude ║ +║ crab wip restore --open Restore and open workspace with configured agent ║ ║ crab wip --continue Restore most recent WIP (current workspace) ║ ║ crab wip delete Delete a saved WIP state ║ ║ ║ @@ -8434,7 +8528,7 @@ pf_help() { echo "" echo -e "${BOLD}Options:${NC}" echo " --output, -o Output directory (default: current dir)" - echo " --provider LLM provider (default: openai:gpt-4o)" + echo " --provider LLM provider (default: openai:gpt-5)" echo " --verbose, -v Show detailed output" echo "" echo -e "${BOLD}Supported formats:${NC}" @@ -9020,8 +9114,8 @@ session_start() { local agent=$(get_agent_type) if [ -f "$context_file" ]; then case "$agent" in - codex) $base_cmd "$(cat "$context_file" | head -c 4000)" ;; - claude|*) $base_cmd "$context_file" ;; + claude) $base_cmd "$context_file" ;; + codex|*) $base_cmd "$(cat "$context_file" | head -c 4000)" ;; esac else $base_cmd @@ -9051,7 +9145,13 @@ session_resume() { return 1 fi - echo -e "${CYAN}Resuming session: $name${NC}" + local session_agent=$(yq -r '.agent // ""' "$session_file" 2>/dev/null) + case "${session_agent:-}" in + codex|claude) ;; + *) session_agent="$(get_agent_type)" ;; + esac + + echo -e "${CYAN}Resuming session: $name ($(agent_display_name_for_type "$session_agent"))${NC}" local summary=$(session_get "$name" "summary") [ -n "$summary" ] && [ "$summary" != "null" ] && echo -e " ${GRAY}$summary${NC}" @@ -9066,8 +9166,8 @@ session_resume() { # Resume agent from session directory cd "$session_dir" - local base_cmd=$(get_agent_base_cmd) - local resume_cmd=$(agent_cmd_continue "$base_cmd") + local base_cmd=$(agent_base_cmd_for_type "$session_agent") + local resume_cmd=$(agent_cmd_continue_for_type "$session_agent" "$base_cmd") $resume_cmd } @@ -9234,7 +9334,7 @@ get_court_instructions() { ## Court Review Protocol -You are the **JUDGE** in a code review court. You will orchestrate two reviewers (Claude teammate + Codex) and deliver a final verdict. +You are **Codex acting as the JUDGE** in a code review court. You will orchestrate two independent review passes and deliver a final verdict. ### Your Role as Judge: - **Orchestrate** the review process @@ -9244,13 +9344,14 @@ You are the **JUDGE** in a code review court. You will orchestrate two reviewers ### Phase 1: Empanel the Reviewers -Spawn two reviewer agents to analyze the PR independently: +Run two independent review passes: -**Reviewer A (Claude):** +**Reviewer A (Teammate Review):** ``` -Use Task tool: - subagent_type: "general-purpose" - prompt: "You are Reviewer A. Review this PR for bugs, security issues, and code quality. Be thorough but avoid false positives. Structure findings as Critical/Warning/Suggestion with file:line references. +Use your available teammate/subagent capability if present. If you do not have one, do a separate first-pass review yourself and label it Reviewer A. + +Review brief: +You are Reviewer A. Review this PR for bugs, security issues, and code quality. Be thorough but avoid false positives. Structure findings as Critical/Warning/Suggestion with file:line references. Beyond standard review, pay special attention to these high-risk patterns: @@ -9262,25 +9363,23 @@ Beyond standard review, pay special attention to these high-risk patterns: 4. **Data contract completeness at merge boundaries**: When code spreads/merges one data source onto another (e.g. {...existing, ...fresh}), verify the fresh source returns ALL expected fields. Missing fields silently zero out or null existing data. -Here is the context: [include PR diff]" +Here is the context: [include PR diff] ``` -**Reviewer B (Codex):** +**Reviewer B (Secondary Codex Pass):** ``` -Use Task tool: - subagent_type: "Bash" - prompt: "Run: codex --print -p 'You are Reviewer B. Review this code for bugs, security issues, and quality problems. Structure as Critical/Warning/Suggestion with file:line. +Run a separate Codex pass via shell from the session directory: + +codex exec --skip-git-repo-check -C . "Read ./context.md, then review this PR for bugs, security issues, and quality problems. Structure findings as Critical/Warning/Suggestion with file:line references. Beyond standard review, check for these high-risk patterns: 1. Called-but-not-changed code: If the diff calls existing functions (especially data mutation), verify those functions return/do what the caller assumes. 2. Mutations on read paths: Flag any DB write or cache mutation triggered by a GET/read endpoint. Fire-and-forget data rewrites are high risk. 3. Blast radius from initial state: If new code processes records conditionally (refresh if stale), check if null/default initial state means ALL existing records get hit at once. 4. Data contract completeness: When code spreads/merges data sources ({...existing, ...fresh}), verify the fresh source returns ALL expected fields. Missing fields silently zero out existing data. - -Diff: [include relevant sections]'" ``` -Wait for BOTH reviewers to complete before proceeding. +Wait for BOTH review passes to complete before proceeding. ### Phase 2: Collect Testimony @@ -9526,8 +9625,8 @@ EOF local base_cmd=$(get_agent_base_cmd) local agent=$(get_agent_type) case "$agent" in - codex) $base_cmd "$(cat context.md | head -c 4000)" ;; - claude|*) $base_cmd "context.md" ;; + claude) $base_cmd "context.md" ;; + codex|*) $base_cmd "$(cat context.md | head -c 4000)" ;; esac # Prompt for summary after agent exits @@ -9639,8 +9738,8 @@ EOF local base_cmd=$(get_agent_base_cmd) local agent=$(get_agent_type) case "$agent" in - codex) $base_cmd "$(cat context.md | head -c 4000)" ;; - claude|*) $base_cmd "context.md" ;; + claude) $base_cmd "context.md" ;; + codex|*) $base_cmd "$(cat context.md | head -c 4000)" ;; esac # Prompt for summary after agent exits @@ -9675,7 +9774,7 @@ _show_court_intro() { echo -e " ${DIM}\"Order in the court!\"${RST}" echo "" echo -e " ${C2}┌─────────────┐${RST} ${C3}┌─────────────┐${RST}" - echo -e " ${C2}│ Claude │${RST} vs ${C3}│ Codex │${RST}" + echo -e " ${C2}│ Teammate │${RST} vs ${C3}│ Codex │${RST}" echo -e " ${C2}│ Reviewer A │${RST} ${C3}│ Reviewer B │${RST}" echo -e " ${C2}└─────────────┘${RST} ${C3}└─────────────┘${RST}" echo "" @@ -9692,7 +9791,7 @@ _show_court_intro() { echo "" } -# Court review - Judge pattern with Claude + Codex reviewers +# Court review - Codex judge plus independent reviewer passes review_court() { local pr_id="$1" @@ -9702,9 +9801,8 @@ review_court() { # Check if codex CLI is available if ! command_exists codex; then - warn "Codex CLI not found. Install with: npm install -g @openai/codex" - echo "Court review will proceed with Claude teammate only." - echo "" + error "Codex CLI required for crab court. Install with: npm install -g @openai/codex" + return 1 fi local parsed=($(parse_pr_identifier "$pr_id")) @@ -9753,7 +9851,7 @@ name: $name project: ${PROJECT_ALIAS:-default} created: $(date -u +"%Y-%m-%dT%H:%M:%SZ") last_accessed: $(date -u +"%Y-%m-%dT%H:%M:%SZ") -agent: $(get_agent_type) +agent: codex agent_session_id: "" summary: "" type: court @@ -9766,12 +9864,13 @@ EOF echo -e "${GREEN}Court review session created: $name${NC}" echo "" - # Start Claude as the judge + # Start Codex as the judge session_update "$name" "last_accessed" "$(date -u +"%Y-%m-%dT%H:%M:%SZ")" cd "$session_dir" - claude --dangerously-skip-permissions --chrome "context.md" + local judge_cmd=$(agent_base_cmd_for_type "codex") + $judge_cmd "$(cat context.md | head -c 4000)" - # Prompt for summary after Claude exits + # Prompt for summary after the judge exits _prompt_review_summary "$name" } @@ -9855,7 +9954,7 @@ handle_review_command() { cat "$output_file" else error "No saved review output for '$name'" - echo "The review hasn't been saved yet. Resume the review and ask Claude to save findings." + echo "The review hasn't been saved yet. Resume the review and ask the active agent to save findings." fi ;; *) @@ -9879,9 +9978,8 @@ court_new_interactive() { # Check if codex CLI is available if ! command_exists codex; then - warn "Codex CLI not found. Install with: npm install -g @openai/codex" - echo "Court review will proceed with Claude teammate only." - echo "" + error "Codex CLI required for crab court. Install with: npm install -g @openai/codex" + return 1 fi # Collect PRs @@ -9957,7 +10055,7 @@ name: $name project: ${PROJECT_ALIAS:-default} created: $(date -u +"%Y-%m-%dT%H:%M:%SZ") last_accessed: $(date -u +"%Y-%m-%dT%H:%M:%SZ") -agent: $(get_agent_type) +agent: codex agent_session_id: "" summary: "" type: court @@ -9972,14 +10070,15 @@ EOF echo -e "${GREEN}Court review session created: $name${NC}" echo "" - # Start Claude + # Start Codex as the judge read -p "Start court session now? [Y/n] " start_now if [[ ! "$start_now" =~ ^[Nn]$ ]]; then session_update "$name" "last_accessed" "$(date -u +"%Y-%m-%dT%H:%M:%SZ")" cd "$session_dir" - claude --dangerously-skip-permissions --chrome "context.md" + local judge_cmd=$(agent_base_cmd_for_type "codex") + $judge_cmd "$(cat context.md | head -c 4000)" - # Prompt for summary after Claude exits + # Prompt for summary after the judge exits _prompt_review_summary "$name" fi } @@ -9994,9 +10093,9 @@ handle_court_command() { error "Usage: crab court or crab court new" echo "" echo "Court review: thorough multi-agent review with:" - echo " - Judge (Claude) - orchestrates, verifies, delivers verdict" - echo " - Reviewer A (Claude teammate) - independent review" - echo " - Reviewer B (Codex) - independent review" + echo " - Judge (Codex) - orchestrates, verifies, delivers verdict" + echo " - Reviewer A (teammate/independent pass) - parallel review" + echo " - Reviewer B (Codex) - secondary Codex review pass" echo "" echo "Commands:" echo " crab court Quick court review for single PR" @@ -11369,8 +11468,8 @@ PROMPT_EOF local agent_cmd=$(get_agent_base_cmd) local agent=$(get_agent_type) case "$agent" in - codex) $agent_cmd --skip-git-repo-check -C "$staging_dir" "$(cat "$prompt_file")" ;; - claude|*) $agent_cmd "$prompt_file" ;; + claude) $agent_cmd "$prompt_file" ;; + codex|*) $agent_cmd -C "$staging_dir" "$(cat "$prompt_file")" ;; esac rm -f "$prompt_file" @@ -11545,8 +11644,8 @@ RESTORE_EOF local agent_cmd=$(get_agent_safe_cmd) local agent=$(get_agent_type) case "$agent" in - codex) $agent_cmd --skip-git-repo-check -C "$restore_dir" "$(cat "$prompt_file")" ;; - claude|*) $agent_cmd "$prompt_file" ;; + claude) $agent_cmd "$prompt_file" ;; + codex|*) $agent_cmd -C "$restore_dir" "$(cat "$prompt_file")" ;; esac rm -f "$prompt_file" diff --git a/tests/unit/test_agent_helpers.bats b/tests/unit/test_agent_helpers.bats index 4d0eaa9..ec65008 100644 --- a/tests/unit/test_agent_helpers.bats +++ b/tests/unit/test_agent_helpers.bats @@ -48,6 +48,17 @@ EOF # Config with no agent field (tests default behavior) cat > "${HOME}/.crabcode/projects/no-agent-project.yaml" << 'EOF' session_name: no-agent-test +layout: + panes: + - name: terminal + command: "" + - name: main + command: "" +EOF + + # Backward compat: infer Claude when old configs omitted agent but hardcoded the main command + cat > "${HOME}/.crabcode/projects/inferred-claude-project.yaml" << 'EOF' +session_name: inferred-claude-test layout: panes: - name: terminal @@ -79,29 +90,36 @@ teardown() { assert_output "claude" } -@test "get_agent_type: defaults to claude when agent field missing" { +@test "get_agent_type: defaults to codex when agent field missing and main pane is blank" { CONFIG_FILE="${HOME}/.crabcode/projects/no-agent-project.yaml" run get_agent_type assert_success + assert_output "codex" +} + +@test "get_agent_type: infers claude from main pane command when agent field is missing" { + CONFIG_FILE="${HOME}/.crabcode/projects/inferred-claude-project.yaml" + run get_agent_type + assert_success assert_output "claude" } -@test "get_agent_type: defaults to claude when config file missing" { +@test "get_agent_type: defaults to codex when config file missing" { CONFIG_FILE="/nonexistent/path.yaml" run get_agent_type assert_success - assert_output "claude" + assert_output "codex" } # ============================================================================= # get_agent_base_cmd # ============================================================================= -@test "get_agent_base_cmd: codex returns codex --full-auto" { +@test "get_agent_base_cmd: codex returns codex --full-auto --skip-git-repo-check" { CONFIG_FILE="${HOME}/.crabcode/projects/codex-project.yaml" run get_agent_base_cmd assert_success - assert_output "codex --full-auto" + assert_output "codex --full-auto --skip-git-repo-check" } @test "get_agent_base_cmd: claude returns claude --dangerously-skip-permissions" { @@ -176,11 +194,11 @@ teardown() { assert_output "Claude" } -@test "agent_display_name: defaults to Claude" { +@test "agent_display_name: defaults to Codex" { CONFIG_FILE="${HOME}/.crabcode/projects/no-agent-project.yaml" run agent_display_name assert_success - assert_output "Claude" + assert_output "Codex" } # ============================================================================= @@ -378,6 +396,20 @@ teardown() { [ ! -f "${ws_dir}/.codex-resume-session" ] } +@test "get_pane_command: main defaults to codex base command when blank" { + CONFIG_FILE="${HOME}/.crabcode/projects/no-agent-project.yaml" + run get_pane_command "main" + assert_success + assert_output "codex --full-auto --skip-git-repo-check" +} + +@test "get_pane_command: main preserves explicit claude command for old configs" { + CONFIG_FILE="${HOME}/.crabcode/projects/inferred-claude-project.yaml" + run get_pane_command "main" + assert_success + assert_output "claude --dangerously-skip-permissions" +} + # ============================================================================= # agent_session_dir # ============================================================================= From 47e9bcf21d6e4ea82118c6789b4e67d10bd2d934 Mon Sep 17 00:00:00 2001 From: MrFlounder Date: Tue, 17 Mar 2026 21:19:55 -0700 Subject: [PATCH 2/2] chore: align release metadata to 0.13.0 --- .release-please-manifest.json | 2 +- CHANGELOG.md | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/.release-please-manifest.json b/.release-please-manifest.json index 8032c17..ed21d28 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "0.12.0" + ".": "0.13.0" } diff --git a/CHANGELOG.md b/CHANGELOG.md index c4b9835..91e5bdf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,11 @@ ## Unreleased -## [0.12.0] - 2026-02-20 +### Changed + +- Codex is now the default agent when `agent:` is omitted, `crab court` uses a Codex judge, and the top-level docs/examples reflect Codex-first defaults + +## [0.13.0] - 2026-03-11 ### Added