From 6ad2f8968647c54d21739cf44e67819a5c7a513b Mon Sep 17 00:00:00 2001 From: Drew Miller Date: Thu, 12 Feb 2026 14:13:55 -0500 Subject: [PATCH 1/6] feat: rewrite /deepen-plan with context-managed map-reduce architecture (v3) Replace unbounded v1 agent output with phased file-based map-reduce pattern that keeps parent context under ~12k tokens. Adds plan manifest analysis, validation, judge phase with source attribution priority, and preservation checking. Aligns with plugin ecosystem conventions. --- .../.claude-plugin/plugin.json | 2 +- plugins/compound-engineering/CHANGELOG.md | 18 + .../commands/deepen-plan.md | 1052 +++++++++++------ 3 files changed, 706 insertions(+), 366 deletions(-) diff --git a/plugins/compound-engineering/.claude-plugin/plugin.json b/plugins/compound-engineering/.claude-plugin/plugin.json index a74039ac..9b35c5a7 100644 --- a/plugins/compound-engineering/.claude-plugin/plugin.json +++ b/plugins/compound-engineering/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "compound-engineering", - "version": "2.33.0", + "version": "2.34.0", "description": "AI-powered development tools. 29 agents, 22 commands, 19 skills, 1 MCP server for code review, research, design, and workflow automation.", "author": { "name": "Kieran Klaassen", diff --git a/plugins/compound-engineering/CHANGELOG.md b/plugins/compound-engineering/CHANGELOG.md index b80621c6..a397d9fa 100644 --- a/plugins/compound-engineering/CHANGELOG.md +++ b/plugins/compound-engineering/CHANGELOG.md @@ -5,6 +5,24 @@ All notable changes to the compound-engineering plugin will be documented in thi The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [2.34.0] - 2026-02-12 + +### Changed + +- **`/deepen-plan` command** — Rewritten with **phased file-based map-reduce** architecture (v3) to prevent context overflow + - Sub-agents write full analysis JSON to `.deepen/` on disk and return only ~100 token summaries to parent + - Parent context stays under ~12k tokens regardless of agent count (vs unbounded in v1) + - New phases: **Plan Manifest Analysis** (structured context for all agents), **Validation** (catches silent failures + hallucination flagging), **Judge** (dedup + conflict resolution with source attribution priority), **Preservation Check** (catches rewrite-instead-of-append errors) + - Smart agent selection: always-run cross-cutting agents (security, architecture, performance) + manifest-matched domain agents + - Skill agents now read `references/`, `assets/`, and `templates/` subdirectories for deeper context + - Compound insights option (Step 9d) now uses `compound-docs` skill YAML schema for properly validated learning files + - Cross-platform safe: uses project-relative `.deepen/` instead of `/tmp/` (fixes Windows path issues) + - Uses Node.js for validation scripts (Python3 may not be installed on all platforms) + - Next steps offer `/plan_review` (not `/workflows:review` which is for code, not plans) + - Correct agent name references matching actual plugin agent filenames + +--- + ## [2.33.0] - 2026-02-12 ### Added diff --git a/plugins/compound-engineering/commands/deepen-plan.md b/plugins/compound-engineering/commands/deepen-plan.md index a7054764..2138da64 100644 --- a/plugins/compound-engineering/commands/deepen-plan.md +++ b/plugins/compound-engineering/commands/deepen-plan.md @@ -4,543 +4,865 @@ description: Enhance a plan with parallel research agents for each section to ad argument-hint: "[path to plan file]" --- -# Deepen Plan - Power Enhancement Mode +# Deepen Plan (v3 — Production-Hardened) + +**Note: The current year is 2026.** Use this when searching for recent documentation and best practices. + +Take an existing plan (from `/workflows:plan`) and enhance each section with parallel research, skill application, and review agents — using file-based synthesis to prevent context overflow while maximizing depth. ## Introduction -**Note: The current year is 2026.** Use this when searching for recent documentation and best practices. +Senior Technical Research Lead with expertise in architecture, best practices, and production-ready implementation patterns -This command takes an existing plan (from `/workflows:plan`) and enhances each section with parallel research agents. Each major element gets its own dedicated research sub-agent to find: -- Best practices and industry patterns -- Performance optimizations -- UI/UX improvements (if applicable) -- Quality enhancements and edge cases -- Real-world implementation examples +## Architecture: Why This Command Works -The result is a deeply grounded, production-ready plan with concrete implementation details. +This command uses the **Phased File-Based Map-Reduce** pattern — the same architecture as the review command, adapted for research and plan enhancement: + +1. **Analyze Phase** (sequential) — Parse the plan into a structured manifest of sections, technologies, and domains. All agents receive this manifest. +2. **Discover Phase** (parent) — Find all available skills, learnings, and agents using native Glob/Read tools. Match against the manifest. Skip clearly irrelevant ones. +3. **Research Phase** (parallel) — Matched agents write structured recommendations to `.deepen/`, return only a single sentence to the parent. +4. **Validate** — Verify all expected agent files exist, conform to schema, and flag zero-tool-use hallucination risk. +5. **Judge Phase** — A judge agent deduplicates, resolves conflicts with source attribution priority, groups by plan section, assigns impact levels, and ranks. +6. **Judge Validation** — Verify judge output references real manifest sections. +7. **Enhance Phase** — A synthesis agent reads the consolidated recommendations + original plan and writes the enhanced version. +8. **Preservation Check** — Verify the enhanced plan still contains every original section. +9. **Present** — Parent reads only the enhancement summary and presents next steps. + +This keeps the parent context under ~15k tokens of agent output regardless of how many research agents run. ## Plan File #$ARGUMENTS **If the plan path above is empty:** -1. Check for recent plans: `ls -la docs/plans/` -2. Ask the user: "Which plan would you like to deepen? Please provide the path (e.g., `docs/plans/2026-01-15-feat-my-feature-plan.md`)." +1. Check for recent plans: `ls -la plans/` +2. Ask the user: "Which plan would you like to deepen? Please provide the path." Do not proceed until you have a valid plan file path. ## Main Tasks -### 1. Parse and Analyze Plan Structure +### 1. Prepare the Scratchpad Directory - -First, read and parse the plan to identify each major section that can be enhanced with research. - + +Use a project-relative path, NOT /tmp/. The /tmp/ path causes two problems: +1. Claude Code's Read tool and MCP filesystem tools cannot access /tmp/ (outside allowed directories) +2. On Windows, /tmp/ resolves to different locations depending on the subprocess (MSYS2 vs literal C:\tmp), splitting agent files across directories -**Read the plan file and extract:** -- [ ] Overview/Problem Statement -- [ ] Proposed Solution sections -- [ ] Technical Approach/Architecture -- [ ] Implementation phases/steps -- [ ] Code examples and file references -- [ ] Acceptance criteria -- [ ] Any UI/UX components mentioned -- [ ] Technologies/frameworks mentioned (Rails, React, Python, TypeScript, etc.) -- [ ] Domain areas (data models, APIs, UI, security, performance, etc.) +Using .deepen/ inside the project avoids both issues. All tools (Read, Write, Bash, MCP) can access project-relative paths reliably. + -**Create a section manifest:** -``` -Section 1: [Title] - [Brief description of what to research] -Section 2: [Title] - [Brief description of what to research] -... +```bash +# Create the deepen session directory (project-relative, cross-platform safe) +DEEPEN_DIR=".deepen" +rm -rf "$DEEPEN_DIR" +mkdir -p "$DEEPEN_DIR" +grep -qxF '.deepen/' .gitignore 2>/dev/null || echo '.deepen/' >> .gitignore + +# Copy the plan to the scratchpad for agents to reference +cp "$DEEPEN_DIR/original_plan.md" ``` -### 2. Discover and Apply Available Skills +All references below use `.deepen/` — this is the ONLY path agents should use. Do not use `/tmp/` anywhere. - -Dynamically discover all available skills and match them to plan sections. Don't assume what skills exist - discover them at runtime. - +### 2. Analyze Plan Structure (Phase 0 — Sequential) -**Step 1: Discover ALL available skills from ALL sources** + +Run this BEFORE discovering or launching any agents. This produces the structured manifest that drives intelligent agent selection and gives every agent shared context about the plan. + -```bash -# 1. Project-local skills (highest priority - project-specific) -ls .claude/skills/ - -# 2. User's global skills (~/.claude/) -ls ~/.claude/skills/ +``` +Task plan-analyzer(" +You are a Plan Structure Analyzer. Parse a development plan into a structured manifest that research agents will use for targeted enhancement. + +## Instructions: +1. Read .deepen/original_plan.md +2. Write your analysis to .deepen/PLAN_MANIFEST.json using this EXACT schema: + +{ + \"plan_title\": \"\", + \"plan_path\": \"<original file path>\", + \"technologies\": [\"Rails\", \"React\", \"TypeScript\", \"Redis\", ...], + \"domains\": [\"authentication\", \"caching\", \"API design\", \"UI/UX\", ...], + \"sections\": [ + { + \"id\": 1, + \"title\": \"<section title>\", + \"summary\": \"<what this section covers, 1-2 sentences>\", + \"technologies\": [\"subset relevant to this section\"], + \"domains\": [\"subset relevant to this section\"], + \"has_code_examples\": true|false, + \"has_ui_components\": true|false, + \"has_data_models\": true|false, + \"has_api_design\": true|false, + \"has_security_concerns\": true|false, + \"has_performance_concerns\": true|false, + \"has_testing_strategy\": true|false, + \"has_deployment_concerns\": true|false, + \"enhancement_opportunities\": \"<what kind of research would improve this section>\" + } + ], + \"frameworks_with_versions\": {\"React\": \"19\", \"Next.js\": \"15\", \"Rails\": \"8.0\", ...}, + \"overall_risk_areas\": [\"<area 1>\", \"<area 2>\"], + \"acceptance_criteria_count\": <number>, + \"implementation_phases_count\": <number> +} + +3. Also write a human-readable summary to .deepen/PLAN_MANIFEST.md (max 300 words) covering: + - What the plan is about + - Key technical decisions + - Areas that would benefit most from deeper research + - Technologies involved + +4. Return to parent: 'Plan analysis complete. <N> sections identified across <M> technologies. Written to .deepen/PLAN_MANIFEST.json' +") +``` -# 3. compound-engineering plugin skills -ls ~/.claude/plugins/cache/*/compound-engineering/*/skills/ +Wait for this to complete before proceeding. -# 4. ALL other installed plugins - check every plugin for skills -find ~/.claude/plugins/cache -type d -name "skills" 2>/dev/null +### 3. Discover Available Skills, Learnings, and Agents -# 5. Also check installed_plugins.json for all plugin locations -cat ~/.claude/plugins/installed_plugins.json -``` +<critical_instruction> +This step runs in the PARENT context. It's a discovery phase — the parent reads directory listings and frontmatter, NOT full file contents. Keep this lightweight. +</critical_instruction> -**Important:** Check EVERY source. Don't assume compound-engineering is the only plugin. Use skills from ANY installed plugin that's relevant. +#### Step 3a: Discover Skills -**Step 2: For each discovered skill, read its SKILL.md to understand what it does** +Use Claude Code's native tools for cross-platform compatibility (bash `find`/`head` fails on Windows): -```bash -# For each skill directory found, read its documentation -cat [skill-path]/SKILL.md ``` +# Use Glob tool to discover skill directories +Glob: .claude/skills/*/SKILL.md +Glob: ~/.claude/skills/*/SKILL.md +Glob: ~/.claude/plugins/cache/**/skills/*/SKILL.md -**Step 3: Match skills to plan content** +# For each discovered SKILL.md, use Read tool to get first 10 lines (frontmatter/description only) +Read: [skill-path]/SKILL.md (first 10 lines) +``` -For each skill discovered: -- Read its SKILL.md description -- Check if any plan sections match the skill's domain -- If there's a match, spawn a sub-agent to apply that skill's knowledge +Do NOT read full skill contents into parent context — only descriptions for matching. -**Step 4: Spawn a sub-agent for EVERY matched skill** +#### Step 3b: Discover Learnings -**CRITICAL: For EACH skill that matches, spawn a separate sub-agent and instruct it to USE that skill.** +``` +# Use Glob to find all learning files +Glob: docs/solutions/**/*.md -For each matched skill: +# For each found file, use Read tool to get first 15 lines (frontmatter only) +Read: [learning-path] (first 15 lines) ``` -Task general-purpose: "You have the [skill-name] skill available at [skill-path]. -YOUR JOB: Use this skill on the plan. +Each learning file has YAML frontmatter with tags, category, and module — use these for filtering. -1. Read the skill: cat [skill-path]/SKILL.md -2. Follow the skill's instructions exactly -3. Apply the skill to this content: +#### Step 3c: Discover Review/Research Agents -[relevant plan section or full plan] +``` +# Use Glob to find all agent files from all sources +Glob: .claude/agents/*.md +Glob: ~/.claude/agents/*.md +Glob: ~/.claude/plugins/cache/**/agents/**/*.md +``` -4. Return the skill's full output +For compound-engineering plugin agents: +- USE: `agents/review/*`, `agents/research/*`, `agents/design/*`, `agents/docs/*` +- SKIP: `agents/workflow/*` (workflow orchestrators, not reviewers) -The skill tells you what to do - follow it. Execute the skill completely." -``` +#### Step 3d: Match Against Manifest -**Spawn ALL skill sub-agents in PARALLEL:** -- 1 sub-agent per matched skill -- Each sub-agent reads and uses its assigned skill -- All run simultaneously -- 10, 20, 30 skill sub-agents is fine +Read `.deepen/PLAN_MANIFEST.json` and match discovered resources: -**Each sub-agent:** -1. Reads its skill's SKILL.md -2. Follows the skill's workflow/instructions -3. Applies the skill to the plan -4. Returns whatever the skill produces (code, recommendations, patterns, reviews, etc.) +**Skills** — Match if skill's domain overlaps with any plan technology or domain. Common skill-to-domain mappings: +- Rails plans → `dhh-rails-style` +- Ruby gem plans → `andrew-kane-gem-writer` +- Frontend/UI plans → `frontend-design` +- AI/agent plans → `agent-native-architecture` +- LLM integration plans → `dspy-ruby` +- Documentation-heavy plans → `every-style-editor`, `compound-docs` +- Skill creation plans → `create-agent-skills` +- Security-sensitive plans → any security-related skills -**Example spawns:** -``` -Task general-purpose: "Use the dhh-rails-style skill at ~/.claude/plugins/.../dhh-rails-style. Read SKILL.md and apply it to: [Rails sections of plan]" +**Important:** Skills may have `references/` subdirectories with additional context files. When spawning skill agents in Step 4, instruct them to also check for and read files in `references/`, `assets/`, and `templates/` directories within the skill path. -Task general-purpose: "Use the frontend-design skill at ~/.claude/plugins/.../frontend-design. Read SKILL.md and apply it to: [UI sections of plan]" +**Learnings** — Match if learning's tags, category, or module overlaps with plan technologies, domains, or modules being changed. -Task general-purpose: "Use the agent-native-architecture skill at ~/.claude/plugins/.../agent-native-architecture. Read SKILL.md and apply it to: [agent/tool sections of plan]" +**Agents** — Two tiers: -Task general-purpose: "Use the security-patterns skill at ~/.claude/skills/security-patterns. Read SKILL.md and apply it to: [full plan]" -``` +**Always run (cross-cutting — these catch what you don't expect):** +- Security agents (security-sentinel, any security reviewer) +- Architecture agents (architecture-strategist) +- Performance agents (performance-oracle) -**No limit on skill sub-agents. Spawn one for every skill that could possibly be relevant.** +These run regardless of manifest matching because their domains are relevant to virtually every plan. A security agent catching a data exposure risk in a "simple UI plan" is exactly the kind of cross-cutting insight that makes deepening valuable. -### 3. Discover and Apply Learnings/Solutions +**Manifest-matched (run if domain overlap):** +- Framework-specific review agents (dhh-rails-reviewer for Rails, kieran-rails-reviewer for Rails, kieran-typescript-reviewer for TypeScript, kieran-python-reviewer for Python) +- Domain-specific research agents (data-integrity-guardian for database plans, deployment-verification-agent for deployment plans) +- Frontend agents (julik-frontend-races-reviewer for JS/Stimulus, design agents for UI plans) +- Code quality agents (code-simplicity-reviewer, pattern-recognition-specialist) +- Agent-native reviewer (agent-native-reviewer for plans involving agent/tool features) -<thinking> -Check for documented learnings from /workflows:compound. These are solved problems stored as markdown files. Spawn a sub-agent for each learning to check if it's relevant. -</thinking> +#### Learnings Filtering Examples -**LEARNINGS LOCATION - Check these exact folders:** +Given 12 learning files and a plan about "Rails API caching with Redis": +**SPAWN (likely relevant):** ``` -docs/solutions/ <-- PRIMARY: Project-level learnings (created by /workflows:compound) -├── performance-issues/ -│ └── *.md -├── debugging-patterns/ -│ └── *.md -├── configuration-fixes/ -│ └── *.md -├── integration-issues/ -│ └── *.md -├── deployment-issues/ -│ └── *.md -└── [other-categories]/ - └── *.md +docs/solutions/performance-issues/n-plus-one-queries.md # tags: [activerecord] — matches Rails +docs/solutions/performance-issues/redis-cache-stampede.md # tags: [caching, redis] — exact match +docs/solutions/configuration-fixes/redis-connection-pool.md # tags: [redis] — matches Redis +docs/solutions/integration-issues/api-versioning-gotcha.md # tags: [api, rails] — matches API ``` -**Step 1: Find ALL learning markdown files** +**SKIP (clearly not applicable):** +``` +docs/solutions/deployment-issues/heroku-memory-quota.md # plan has no deployment concerns +docs/solutions/frontend-issues/stimulus-race-condition.md # plan is API, not frontend +docs/solutions/authentication-issues/jwt-expiry.md # plan has no auth +``` -Run these commands to get every learning file: +When in doubt, spawn it. A learning agent that returns "Not applicable" wastes one context window. A missed learning that would have prevented a production bug wastes days. -```bash -# PRIMARY LOCATION - Project learnings -find docs/solutions -name "*.md" -type f 2>/dev/null +#### Handling Sparse Discovery -# If docs/solutions doesn't exist, check alternate locations: -find .claude/docs -name "*.md" -type f 2>/dev/null -find ~/.claude/docs -name "*.md" -type f 2>/dev/null -``` +If discovery finds few or no matched skills/learnings (e.g., a plan for a technology stack with no installed skills), the command still works — the 3 always-run cross-cutting agents plus per-technology docs-researchers provide meaningful enhancement. Acknowledge this in the summary: "Limited institutional knowledge available for [technology]. Enhancement based primarily on framework documentation and cross-cutting analysis. Consider running `/deepen-plan` again after building project-specific skills." -**Step 2: Read frontmatter of each learning to filter** +Write the matched resources list to `.deepen/MATCHED_RESOURCES.md` for reference. -Each learning file has YAML frontmatter with metadata. Read the first ~20 lines of each file to get: +### 4. Launch Research Agents (Parallel) -```yaml ---- -title: "N+1 Query Fix for Briefs" -category: performance-issues -tags: [activerecord, n-plus-one, includes, eager-loading] -module: Briefs -symptom: "Slow page load, multiple queries in logs" -root_cause: "Missing includes on association" ---- -``` +<critical_instruction> +EVERY agent prompt MUST include these output constraints. This is what prevents context overflow. -**For each .md file, quickly scan its frontmatter:** +Append this to EVERY agent spawn prompt: -```bash -# Read first 20 lines of each learning (frontmatter + summary) -head -20 docs/solutions/**/*.md ``` +## SHARED CONTEXT +Read .deepen/PLAN_MANIFEST.md first for plan overview, technologies, and risk areas. +Read .deepen/original_plan.md for the full plan content. + +## OUTPUT RULES (MANDATORY) +1. Write your FULL analysis as JSON to .deepen/{your_agent_name}.json +2. Use this EXACT schema (hard caps enforced): + { + "agent_type": "skill|learning|research|review", + "agent_name": "<your name>", + "source_type": "skill|documented-learning|official-docs|community-web", + "summary": "<500 chars max — your key contribution to this plan>", + "tools_used": ["read_file:path/to/file", "web_search:query-terms", "mcp:context7:query-docs", ...], + "recommendations": [ + { + "section_id": <which plan section this applies to, from manifest>, + "type": "best-practice|edge-case|anti-pattern|performance|security|code-example|architecture|ux|testing", + "title": "<100 chars>", + "recommendation": "<500 chars — the actual advice>", + "code_example": "<optional — concrete code snippet, max 800 chars>", + "references": ["<URL or doc reference>"], + "priority": "high|medium|low", + "confidence": 0.0-1.0 + } + ] + } +3. Max 8 recommendations per agent. Prioritize by impact on plan quality. +4. Only include recommendations with confidence >= 0.6. +5. Every recommendation MUST reference a specific section_id from the plan manifest. +6. Code examples are ENCOURAGED — concrete implementation details make the plan actionable. +7. The tools_used field is MANDATORY. List every tool call you made (file reads, web searches, MCP queries). If you did not use any tools, your recommendations are based on training data alone — set confidence to 0.5 max. +8. Return ONLY this to the parent (do NOT return the full analysis): + "Research complete. Wrote <N> recommendations for <M> sections to .deepen/{agent_name}.json. Key contribution: <1 sentence>" +``` +</critical_instruction> -**Step 3: Filter - only spawn sub-agents for LIKELY relevant learnings** - -Compare each learning's frontmatter against the plan: -- `tags:` - Do any tags match technologies/patterns in the plan? -- `category:` - Is this category relevant? (e.g., skip deployment-issues if plan is UI-only) -- `module:` - Does the plan touch this module? -- `symptom:` / `root_cause:` - Could this problem occur with the plan? - -**SKIP learnings that are clearly not applicable:** -- Plan is frontend-only → skip `database-migrations/` learnings -- Plan is Python → skip `rails-specific/` learnings -- Plan has no auth → skip `authentication-issues/` learnings +#### Launch All Matched Agents in Parallel -**SPAWN sub-agents for learnings that MIGHT apply:** -- Any tag overlap with plan technologies -- Same category as plan domain -- Similar patterns or concerns +**For each matched SKILL:** +``` +Task skill-agent(" +You have the [skill-name] skill available at [skill-path]. +1. Read the skill: Read [skill-path]/SKILL.md +2. Check for additional skill resources: + - Glob [skill-path]/references/*.md — read any reference files for deeper context + - Glob [skill-path]/assets/* — check for templates or examples + - Glob [skill-path]/templates/* — check for code templates +3. Read the plan context from .deepen/ +4. Apply the skill's expertise to the plan +5. Write recommendations following the OUTPUT RULES +" + SHARED_CONTEXT + OUTPUT_RULES) +``` -**Step 4: Spawn sub-agents for filtered learnings** +**For each matched LEARNING:** +``` +Task learning-agent(" +Read this learning file completely: [path to learning .md] +This documents a previously solved problem. Check if it applies to the plan. +If relevant: write specific recommendations about how to avoid this problem. +If not relevant after analysis: write an empty recommendations array with summary 'Not applicable: [reason]' +" + SHARED_CONTEXT + OUTPUT_RULES) +``` -For each learning that passes the filter: +**For each matched REVIEW/RESEARCH AGENT:** +``` +Task [agent-name](" +Review this plan using your expertise. Focus on your domain. +" + SHARED_CONTEXT + OUTPUT_RULES) +``` +**For each technology in the manifest, spawn a dedicated docs-researcher:** +``` +Task docs-researcher-[technology](" +Research current (2025-2026) best practices for [technology] [version from manifest if available]. + +## Documentation Research Steps: +1. Query Context7 MCP for official framework documentation: + - First: mcp__plugin_compound-engineering_context7__resolve-library-id for '[technology]' + - Then: mcp__plugin_compound-engineering_context7__query-docs with the resolved library ID for patterns relevant to this plan +2. Web search for recent (2025-2026) articles, migration guides, and changelog notes +3. Search for version-specific changes if manifest includes a version (e.g., React 19 vs 18, Rails 8 vs 7) +4. Find concrete code patterns and configuration recommendations + +Focus on areas the plan manifest identifies as enhancement opportunities for this technology. +Budget: 3-5 searches per technology for thorough coverage. +" + SHARED_CONTEXT + OUTPUT_RULES) ``` -Task general-purpose: " -LEARNING FILE: [full path to .md file] -1. Read this learning file completely -2. This learning documents a previously solved problem +Example: For a plan using React 19, TypeScript 5.5, and PostgreSQL 17, spawn three separate agents — one per technology. Each gets a full context window to research deeply. -Check if this learning applies to this plan: +Wait for ALL agents to complete. ---- -[full plan content] ---- +<late_notification_handling> +Late agent completion notifications are expected and harmless. The Task tool reports completions asynchronously — you may receive "Agent completed" messages after you've already proceeded to Step 5 or even Step 6. If you've already moved past the research phase, ignore late notifications. The agent's JSON file is already written to `.deepen/` and will be picked up by validation. +</late_notification_handling> -If relevant: -- Explain specifically how it applies -- Quote the key insight or solution -- Suggest where/how to incorporate it +### 5. Verify and Validate Agent Outputs -If NOT relevant after deeper analysis: -- Say 'Not applicable: [reason]' -" -``` +#### Step 5a: Verify All Expected Files Exist -**Example filtering:** +```bash +# List expected agent files based on what was launched in Step 4 +EXPECTED_AGENTS="<list of agent names you launched>" + +MISSING="" +for agent in $EXPECTED_AGENTS; do + if ! ls .deepen/${agent}*.json 1>/dev/null 2>&1; then + MISSING="$MISSING $agent" + echo "MISSING: $agent — no output file found in .deepen/" + fi +done + +if [ -n "$MISSING" ]; then + echo "⚠️ Missing agent files:$MISSING" + echo "Re-run these agents before proceeding to judge." +fi ``` -# Found 15 learning files, plan is about "Rails API caching" -# SPAWN (likely relevant): -docs/solutions/performance-issues/n-plus-one-queries.md # tags: [activerecord] ✓ -docs/solutions/performance-issues/redis-cache-stampede.md # tags: [caching, redis] ✓ -docs/solutions/configuration-fixes/redis-connection-pool.md # tags: [redis] ✓ +If any agent file is missing, re-launch that agent before proceeding. -# SKIP (clearly not applicable): -docs/solutions/deployment-issues/heroku-memory-quota.md # not about caching -docs/solutions/frontend-issues/stimulus-race-condition.md # plan is API, not frontend -docs/solutions/authentication-issues/jwt-expiry.md # plan has no auth -``` +#### Step 5b: Validate JSON Schema and Flag Hallucination Risk + +<critical_instruction> +Use Node.js for validation — any project using Claude Code has Node available. Python3 may not be installed and bash `python3 -c` fails on some Windows environments. +</critical_instruction> -**Spawn sub-agents in PARALLEL for all filtered learnings.** +```bash +node -e " +const fs = require('fs'); +const path = require('path'); +const files = fs.readdirSync('.deepen').filter(f => f.endsWith('.json') && f !== 'PLAN_MANIFEST.json'); +for (const file of files) { + const fp = path.join('.deepen', file); + try { + const data = JSON.parse(fs.readFileSync(fp, 'utf8')); + if (!Array.isArray(data.recommendations)) throw new Error('recommendations not an array'); + if (data.recommendations.length > 8) throw new Error('too many recommendations: ' + data.recommendations.length); + for (let i = 0; i < data.recommendations.length; i++) { + const rec = data.recommendations[i]; + if (rec.section_id == null) throw new Error('rec ' + i + ': missing section_id'); + if (!rec.type) throw new Error('rec ' + i + ': missing type'); + if (!rec.recommendation) throw new Error('rec ' + i + ': missing recommendation'); + } + const tools = data.tools_used || []; + if (tools.length === 0) { + console.log('⚠️ NO TOOLS USED: ' + file + ' — recommendations may be hallucinated (training data only)'); + } else { + console.log('VALID: ' + file + ' - ' + data.recommendations.length + ' recs, ' + tools.length + ' tools used'); + } + } catch (e) { + console.log('INVALID: ' + file + ' — ' + e.message + ' — removing'); + fs.unlinkSync(fp); + } +} +" +``` -**These learnings are institutional knowledge - applying them prevents repeating past mistakes.** +Agents with empty `tools_used` are not removed — their recommendations may still be valid — but they're flagged so the judge can weight them lower. -### 4. Launch Per-Section Research Agents +**Checkpoint:** Every launched agent should have a valid JSON file in `.deepen/`. If not, re-run missing/invalid agents. -<thinking> -For each major section in the plan, spawn dedicated sub-agents to research improvements. Use the Explore agent type for open-ended research. -</thinking> +### 6. Judge Phase — Deduplicate, Group, and Rank -**For each identified section, launch parallel research:** +<critical_instruction> +Do NOT read individual agent JSON files into your context. Launch a JUDGE agent that reads them in its own context window. +</critical_instruction> ``` -Task Explore: "Research best practices, patterns, and real-world examples for: [section topic]. -Find: -- Industry standards and conventions -- Performance considerations -- Common pitfalls and how to avoid them -- Documentation and tutorials -Return concrete, actionable recommendations." +Task judge-recommendations(" +You are a Plan Enhancement Judge. Consolidate recommendations from multiple research agents into a single, organized, high-quality enhancement plan. + +## Instructions: +1. Read .deepen/PLAN_MANIFEST.json for plan structure +2. Read ALL JSON files in .deepen/*.json (skip PLAN_MANIFEST.json) +3. Collect all recommendations across agents + +4. EVIDENCE CHECK: For each agent, check its tools_used field. If tools_used is empty AND source_type is NOT 'skill' (skill agents read files but may not log them as tool calls), downweight their confidence by 0.2 (e.g., 0.8 → 0.6). This prevents hallucinated web-research claims from ranking above grounded work. + +5. GROUP by section_id — organize all recommendations under the plan section they target + +6. Within each section group: + a. DEDUPLICATE: Remove semantically similar recommendations (keep the higher-confidence one) + b. RESOLVE CONFLICTS: If agents contradict each other, prefer the source with higher attribution priority (see below) + c. RANK by: source_type priority FIRST, then priority (high > medium > low), then confidence score + d. SELECT top 8 recommendations per section maximum + +**Source Attribution Priority (highest to lowest):** +- `skill` — Institutional knowledge, curated patterns specific to this project/team +- `documented-learning` — Previously solved problems from docs/solutions/ +- `official-docs` — Framework documentation via Context7 or official sites +- `community-web` — Blog posts, tutorials, community articles + +When two recommendations conflict, the higher-source-type wins. A skill-based recommendation that says "use pattern X" outranks a blog post that says "use pattern Y." + +7. For recommendations with code_example fields, preserve them — these are high-value + +8. Assign an impact level to each final recommendation: + - `must_change` — Plan has a gap that will cause failures if not addressed + - `should_change` — Significant improvement to plan quality + - `consider` — Valuable enhancement worth evaluating + - `informational` — Context or reference that deepens understanding + +9. Write the consolidated report to .deepen/JUDGED_RECOMMENDATIONS.json: + +{ + \"plan_title\": \"<from manifest>\", + \"total_raw_recommendations\": <count across all agents>, + \"duplicates_removed\": <count>, + \"conflicts_resolved\": <count>, + \"low_evidence_downweighted\": <count of recs from agents with empty tools_used>, + \"sections\": [ + { + \"section_id\": 1, + \"section_title\": \"<from manifest>\", + \"recommendations\": [ + { + \"id\": 1, + \"type\": \"best-practice|edge-case|...\", + \"impact\": \"must_change|should_change|consider|informational\", + \"title\": \"<100 chars>\", + \"recommendation\": \"<500 chars>\", + \"code_example\": \"<preserved from agent, or null>\", + \"references\": [\"...\"], + \"priority\": \"high|medium|low\", + \"confidence\": 0.0-1.0, + \"source_agents\": [\"agent1\", \"agent2\"] + } + ] + } + ], + \"cross_cutting_concerns\": [ + { + \"title\": \"<concern that spans multiple sections>\", + \"description\": \"<explanation>\", + \"affected_sections\": [1, 3, 5] + } + ], + \"agent_summaries\": [ + {\"agent\": \"name\", \"summary\": \"<their 500-char summary>\"} + ] +} + +10. Return to parent: 'Judging complete. <X> raw recommendations consolidated to <Y> across <Z> sections. Written to .deepen/JUDGED_RECOMMENDATIONS.json' +") ``` -**Also use Context7 MCP for framework documentation:** +#### Step 6b: Validate Judge Output -For any technologies/frameworks mentioned in the plan, query Context7: -``` -mcp__plugin_compound-engineering_context7__resolve-library-id: Find library ID for [framework] -mcp__plugin_compound-engineering_context7__query-docs: Query documentation for specific patterns +<critical_instruction> +The judge is the highest-leverage agent — if its output is malformed, the enhancer reads garbage. Spot-check before proceeding. +</critical_instruction> + +```bash +node -e " +const fs = require('fs'); +try { + const judged = JSON.parse(fs.readFileSync('.deepen/JUDGED_RECOMMENDATIONS.json', 'utf8')); + const manifest = JSON.parse(fs.readFileSync('.deepen/PLAN_MANIFEST.json', 'utf8')); + const manifestIds = new Set(manifest.sections.map(s => s.id)); + + if (!Array.isArray(judged.sections)) throw new Error('sections is not an array'); + if (judged.sections.length === 0) throw new Error('sections is empty — judge produced no output'); + + let totalRecs = 0; + for (const section of judged.sections) { + if (!manifestIds.has(section.section_id)) { + console.log('⚠️ Section ID ' + section.section_id + ' not in manifest — may be hallucinated'); + } + totalRecs += section.recommendations.length; + for (const rec of section.recommendations) { + if (!rec.recommendation) throw new Error('Empty recommendation in section ' + section.section_id); + } + } + console.log('JUDGE OUTPUT VALID: ' + judged.sections.length + ' sections, ' + totalRecs + ' recommendations'); +} catch (e) { + console.log('❌ JUDGE OUTPUT INVALID: ' + e.message); + console.log('Re-run the judge agent before proceeding.'); +} +" ``` -**Use WebSearch for current best practices:** +If judge output is invalid, re-run the judge. Do not proceed to enhancement with malformed data. -Search for recent (2024-2026) articles, blog posts, and documentation on topics in the plan. +### 7. Enhance the Plan (Synthesis Phase) -### 5. Discover and Run ALL Review Agents +<critical_instruction> +Do NOT read the judged recommendations into parent context. Launch a SYNTHESIS agent that reads both the original plan and the judged recommendations in its own context window and writes the enhanced plan directly. +</critical_instruction> -<thinking> -Dynamically discover every available agent and run them ALL against the plan. Don't filter, don't skip, don't assume relevance. 40+ parallel agents is fine. Use everything available. -</thinking> +``` +Task plan-enhancer(" +You are a Plan Enhancement Writer. Your job is to merge research recommendations into the original plan, producing an implementation-ready enhanced version that an AI developer (Claude Code) can execute directly. -**Step 1: Discover ALL available agents from ALL sources** +## Instructions: +1. Read .deepen/original_plan.md — this is the source plan to enhance +2. Read .deepen/JUDGED_RECOMMENDATIONS.json — these are the consolidated research findings +3. Read .deepen/PLAN_MANIFEST.json — for section structure reference -```bash -# 1. Project-local agents (highest priority - project-specific) -find .claude/agents -name "*.md" 2>/dev/null +## Enhancement Rules: -# 2. User's global agents (~/.claude/) -find ~/.claude/agents -name "*.md" 2>/dev/null +### Preservation — Mode-Switched by Content Type -# 3. compound-engineering plugin agents (all subdirectories) -find ~/.claude/plugins/cache/*/compound-engineering/*/agents -name "*.md" 2>/dev/null +**For prose sections (architecture decisions, descriptions, rationale):** +- Preserve the original text exactly — never rewrite the user's words +- Append a `### Research Insights` block AFTER the original prose +- If you find yourself editing the user's original sentences, STOP -# 4. ALL other installed plugins - check every plugin for agents -find ~/.claude/plugins/cache -path "*/agents/*.md" 2>/dev/null +**For code blocks (implementation examples, configuration, schemas):** +- When a `must_change` or `should_change` recommendation modifies a code block, merge it DIRECTLY into the code +- Produce one final code block with all enhancements applied inline +- Mark each enhancement with a `// ENHANCED: <reason>` comment +- REPLACE the original code block — do NOT show original and enhanced side-by-side +- This eliminates the two-pass problem where a developer reads the plan once for structure and again for changes -# 5. Check installed_plugins.json to find all plugin locations -cat ~/.claude/plugins/installed_plugins.json +**For all sections:** +- Preserve original section structure and ordering +- Preserve all acceptance criteria -# 6. For local plugins (isLocal: true), check their source directories -# Parse installed_plugins.json and find local plugin paths -``` +### Enhancement Format — Per-Section -**Important:** Check EVERY source. Include agents from: -- Project `.claude/agents/` -- User's `~/.claude/agents/` -- compound-engineering plugin (but SKIP workflow/ agents - only use review/, research/, design/, docs/) -- ALL other installed plugins (agent-sdk-dev, frontend-design, etc.) -- Any local plugins +**For sections with code blocks that have recommendations:** -**For compound-engineering plugin specifically:** -- USE: `agents/review/*` (all reviewers) -- USE: `agents/research/*` (all researchers) -- USE: `agents/design/*` (design agents) -- USE: `agents/docs/*` (documentation agents) -- SKIP: `agents/workflow/*` (these are workflow orchestrators, not reviewers) +```[language] +// Original code preserved where unchanged +const config = { + staleTime: 5 * 60 * 1000, + // ENHANCED: Add retry with backoff — prevents cascade failures on transient network issues + retry: 3, + retryDelay: (attempt) => Math.min(1000 * 2 ** attempt, 30000), + // ENHANCED: Disable refetch on focus — reduces unnecessary requests for stable data + refetchOnWindowFocus: false, +}; +``` -**Step 2: For each discovered agent, read its description** +**For prose sections with recommendations:** -Read the first few lines of each agent file to understand what it reviews/analyzes. +```markdown +### Research Insights -**Step 3: Launch ALL agents in parallel** +**Best Practices:** +- [Concrete recommendation with rationale] -For EVERY agent discovered, launch a Task in parallel: +**Edge Cases & Pitfalls:** +- [Edge case and how to handle it] -``` -Task [agent-name]: "Review this plan using your expertise. Apply all your checks and patterns. Plan content: [full plan content]" +**References:** +- [URL or documentation link] ``` -**CRITICAL RULES:** -- Do NOT filter agents by "relevance" - run them ALL -- Do NOT skip agents because they "might not apply" - let them decide -- Launch ALL agents in a SINGLE message with multiple Task tool calls -- 20, 30, 40 parallel agents is fine - use everything -- Each agent may catch something others miss -- The goal is MAXIMUM coverage, not efficiency +Only include subsections that have actual recommendations. Do NOT include empty subsections. -**Step 4: Also discover and run research agents** +### Action Classification -Research agents (like `best-practices-researcher`, `framework-docs-researcher`, `git-history-analyzer`, `repo-research-analyst`) should also be run for relevant plan sections. +Classify every recommendation into one of three buckets. Do NOT interleave them — group clearly: -### 6. Wait for ALL Agents and Synthesize Everything +**`implement`** — Code changes to make. These go directly into code blocks (for code sections) or into Research Insights (for prose sections). -<thinking> -Wait for ALL parallel agents to complete - skills, research agents, review agents, everything. Then synthesize all findings into a comprehensive enhancement. -</thinking> +**`verify`** — Checks or tests to run BEFORE implementing certain changes. Examples: 'confirm API supports batch mode before switching to batch implementation', 'verify session format matches expected pattern'. These go into the Pre-Implementation Verification section. -**Collect outputs from ALL sources:** +**`defer`** — Items explicitly out of scope for this plan. `consider` and `informational` impact items from the judge typically land here. These go into the Deferred section. -1. **Skill-based sub-agents** - Each skill's full output (code examples, patterns, recommendations) -2. **Learnings/Solutions sub-agents** - Relevant documented learnings from /workflows:compound -3. **Research agents** - Best practices, documentation, real-world examples -4. **Review agents** - All feedback from every reviewer (architecture, security, performance, simplicity, etc.) -5. **Context7 queries** - Framework documentation and patterns -6. **Web searches** - Current best practices and articles +### Sequencing -**For each agent's findings, extract:** -- [ ] Concrete recommendations (actionable items) -- [ ] Code patterns and examples (copy-paste ready) -- [ ] Anti-patterns to avoid (warnings) -- [ ] Performance considerations (metrics, benchmarks) -- [ ] Security considerations (vulnerabilities, mitigations) -- [ ] Edge cases discovered (handling strategies) -- [ ] Documentation links (references) -- [ ] Skill-specific patterns (from matched skills) -- [ ] Relevant learnings (past solutions that apply - prevent repeating mistakes) +When two fixes have a dependency relationship, state the sequence explicitly: +- 'Fix X must be implemented before Fix Y because Y depends on X's output' +- 'Fix X → deploy → observe metrics → then decide on Fix Y' +- 'Fix X and Fix Y are independent — implement both regardless' -**Deduplicate and prioritize:** -- Merge similar recommendations from multiple agents -- Prioritize by impact (high-value improvements first) -- Flag conflicting advice for human review -- Group by plan section +### Enhancement Summary -### 7. Enhance Plan Sections +Add this block at the TOP of the plan (before the first section): -<thinking> -Merge research findings back into the plan, adding depth without changing the original structure. -</thinking> +```markdown +## Enhancement Summary -**Enhancement format for each section:** +**Deepened on:** [today's date] +**Sections enhanced:** [count] of [total] +**Research agents used:** [count] +**Total recommendations applied:** [count] -```markdown -## [Original Section Title] +### Pre-Implementation Verification +Tasks to check BEFORE writing code: +1. [ ] [Verification task — what to check and why] +2. [ ] [Verification task] -[Original content preserved] +### Implementation Sequence +Order of operations when fixes have dependencies: +1. [Fix/enhancement] — implement first because [reason] +2. [Fix/enhancement] — depends on #1's output +3. [Fix/enhancement] — independent, implement anytime -### Research Insights +If no dependencies exist, state: 'All enhancements are independent — implement in any order.' -**Best Practices:** -- [Concrete recommendation 1] -- [Concrete recommendation 2] +### Key Improvements +1. [Most impactful improvement] +2. [Second most impactful] +3. [Third most impactful] + +### New Considerations Discovered +- [Important finding that wasn't in the original plan] +- [Risk or edge case not previously considered] -**Performance Considerations:** -- [Optimization opportunity] -- [Benchmark or metric to target] +### Cross-Cutting Concerns +- [Concern spanning multiple sections, if any] -**Implementation Details:** -```[language] -// Concrete code example from research +### Deferred to Future Work +Items out of scope for this plan: +- [CONSIDER/INFORMATIONAL item] — why it's deferred ``` -**Edge Cases:** -- [Edge case 1 and how to handle] -- [Edge case 2 and how to handle] +### Content Rules +- Code examples are high-value — merge them into code blocks wherever possible. +- Keep enhancement text concise and actionable — no filler prose. +- If multiple agents recommended the same thing, that's a strong signal — note it. +- If agents identified cross-cutting concerns, add a dedicated section at the end. +- Every `must_change` recommendation MUST appear in the enhanced plan — either merged into code or in Research Insights. Do not drop them. -**References:** -- [Documentation URL 1] -- [Documentation URL 2] +4. Write the enhanced plan to .deepen/ENHANCED_PLAN.md +5. Return to parent: 'Enhancement complete. Enhanced <N> of <M> sections with <X> recommendations (<Y> implemented, <Z> deferred). Written to .deepen/ENHANCED_PLAN.md' +") ``` -### 8. Add Enhancement Summary +### 8. Verify Enhanced Plan Integrity -At the top of the plan, add a summary section: +<critical_instruction> +Verify the enhancer preserved the original plan structure. If sections are missing, the enhancer rewrote instead of appending. +</critical_instruction> -```markdown -## Enhancement Summary +```bash +node -e " +const fs = require('fs'); +const manifest = JSON.parse(fs.readFileSync('.deepen/PLAN_MANIFEST.json', 'utf8')); +const enhanced = fs.readFileSync('.deepen/ENHANCED_PLAN.md', 'utf8'); + +let missing = []; +for (const section of manifest.sections) { + // Check that each original section title still appears in the enhanced plan + if (!enhanced.includes(section.title)) { + missing.push(section.title); + } +} + +if (missing.length > 0) { + console.log('❌ PRESERVATION FAILURE — these original sections are missing from the enhanced plan:'); + missing.forEach(t => console.log(' - ' + t)); + console.log('The enhancer may have rewritten the plan instead of appending. Re-run the enhancer.'); +} else { + console.log('✅ All ' + manifest.sections.length + ' original sections preserved in enhanced plan.'); +} +" +``` -**Deepened on:** [Date] -**Sections enhanced:** [Count] -**Research agents used:** [List] +If sections are missing, re-run the enhancer with stronger preservation instructions. Do not overwrite the original plan with a broken enhancement. -### Key Improvements -1. [Major improvement 1] -2. [Major improvement 2] -3. [Major improvement 3] +### 9. Present Enhanced Plan -### New Considerations Discovered -- [Important finding 1] -- [Important finding 2] +<critical_instruction> +NOW read `.deepen/ENHANCED_PLAN.md` — or rather, copy it to the original location and present the summary. +</critical_instruction> + +#### Step 9a: Copy Enhanced Plan to Final Location + +```bash +# Option A: Update in place (default) +cp .deepen/ENHANCED_PLAN.md <original_plan_path> + +# Option B: Create separate file (if user prefers) +# cp .deepen/ENHANCED_PLAN.md <plan_path_with_-deepened_suffix> ``` -### 9. Update Plan File +#### Step 9b: Read the Enhancement Summary -**Write the enhanced plan:** -- Preserve original filename -- Add `-deepened` suffix if user prefers a new file -- Update any timestamps or metadata +Read ONLY the Enhancement Summary block from the top of the enhanced plan (first ~30 lines). Do NOT read the entire enhanced plan into parent context — the user can read the file directly. -## Output Format +#### Step 9c: Present Summary -Update the plan file in place (or if user requests a separate file, append `-deepened` after `-plan`, e.g., `2026-01-15-feat-auth-plan-deepened.md`). +```markdown +## Plan Deepened -## Quality Checks +**Plan:** [plan title] +**File:** [path to enhanced plan] -Before finalizing: -- [ ] All original content preserved -- [ ] Research insights clearly marked and attributed -- [ ] Code examples are syntactically correct -- [ ] Links are valid and relevant -- [ ] No contradictions between sections -- [ ] Enhancement summary accurately reflects changes +### Enhancement Summary: +- **Sections Enhanced:** [N] of [M] +- **Research Agents Used:** [count] +- **Total Recommendations Applied:** [count] +- **Duplicates Removed:** [count] -## Post-Enhancement Options +### Key Improvements: +1. [Most impactful improvement] +2. [Second most impactful] +3. [Third most impactful] -After writing the enhanced plan, use the **AskUserQuestion tool** to present these options: +### New Considerations Discovered: +- [Finding 1] +- [Finding 2] +``` -**Question:** "Plan deepened at `[plan_path]`. What would you like to do next?" +#### Step 9d: Offer Next Steps -**Options:** -1. **View diff** - Show what was added/changed -2. **Run `/technical_review`** - Get feedback from reviewers on enhanced plan -3. **Start `/workflows:work`** - Begin implementing this enhanced plan -4. **Deepen further** - Run another round of research on specific sections -5. **Revert** - Restore original plan (if backup exists) +Ask the user: -Based on selection: -- **View diff** → Run `git diff [plan_path]` or show before/after -- **`/technical_review`** → Call the /technical_review command with the plan file path -- **`/workflows:work`** → Call the /workflows:work command with the plan file path -- **Deepen further** → Ask which sections need more research, then re-run those agents -- **Revert** → Restore from git or backup +**"Plan deepened. What would you like to do next?"** -## Example Enhancement +1. **View diff** — `git diff <plan_path>` +2. **Run `/plan_review`** — Get review agents' feedback on enhanced plan +3. **Start `/workflows:work`** — Begin implementing the enhanced plan +4. **Deepen further** — Run another round on specific sections +5. **Revert** — `git checkout <plan_path>` +6. **Compound insights** — Extract novel patterns discovered during deepening into `docs/solutions/` for future sessions -**Before (from /workflows:plan):** -```markdown -## Technical Approach +If user selects option 6: +- Read `.deepen/JUDGED_RECOMMENDATIONS.json` +- Identify recommendations that represent novel discoveries (not already in `docs/solutions/`) +- For each novel finding, use the `compound-docs` skill to create a properly validated learning file: + 1. Read the compound-docs skill at the plugin's skills path for the full YAML schema and template + 2. Create files in `docs/solutions/[category]/` using the skill's required YAML frontmatter fields (module, date, problem_type, component, symptoms, root_cause, resolution_type, severity, tags) + 3. Use the category mapping from the skill's yaml-schema reference to determine the correct subdirectory +- This closes the compound engineering loop — future `/deepen-plan` runs will discover these learnings and apply them automatically -Use React Query for data fetching with optimistic updates. -``` +--- -**After (from /workflows:deepen-plan):** -```markdown -## Technical Approach +## Appendix: Token Budget Reference + +**Parent context (what matters for avoiding overflow):** + +| Component | Token Budget | Notes | +|-----------|-------------|-------| +| Plan manifest analysis return | ~100 | One sentence confirmation | +| Discovery (directory listings) | ~1,000-2,000 | File lists, frontmatter scans | +| Matched resources list | ~500 | Names and paths only | +| Per-agent summary returned to parent | ~100-150 | One sentence + counts (10-20 agents) | +| Validation script | ~0 | Bash, no LLM tokens | +| Judge return | ~100 | One sentence + counts | +| Enhancement return | ~100 | One sentence confirmation | +| Enhancement summary (top of plan) | ~500 | Read only the summary block | +| Parent orchestrator overhead | ~5,000 | Instructions, synthesis, report | +| **Total parent context from agents** | **~8,000-12,000** | **vs unbounded in v1** | + +**Sub-agent spawns:** + +| Agent | Context Cost | Purpose | +|-------|-------------|---------| +| Plan analyzer | 1 window | Structured manifest for all agents | +| 3 always-run agents (security, arch, perf) | 3 windows | Cross-cutting analysis | +| 5-15 matched skill/learning/review agents | 5-15 windows | Domain-specific recommendations | +| 2-5 per-technology docs researchers | 2-5 windows | Deep framework/library research via Context7 + web | +| Judge | 1 window | Dedup, group by section, rank with source priority | +| Plan enhancer | 1 window | Writes the final enhanced plan | +| **Total** | **13-26 windows** | **Each isolated, parent stays lean** | + +The key insight: sub-agent context windows are independent and disposable. Only what they *return* to the parent matters for overflow. Every sub-agent returns ~100 tokens. The parent reads only the enhancement summary (~500 tokens). The full enhanced plan lives on disk at the original file path. -Use React Query for data fetching with optimistic updates. +--- -### Research Insights +## Example Enhancements -**Best Practices:** -- Configure `staleTime` and `cacheTime` based on data freshness requirements -- Use `queryKey` factories for consistent cache invalidation -- Implement error boundaries around query-dependent components +### Example 1: Code Block — Merge Mode + +**Before (from `/workflows:plan`):** +```markdown +## Query Configuration + +```typescript +const queryClient = new QueryClient({ + defaultOptions: { + queries: { + staleTime: 5 * 60 * 1000, + }, + }, +}); +``` +``` -**Performance Considerations:** -- Enable `refetchOnWindowFocus: false` for stable data to reduce unnecessary requests -- Use `select` option to transform and memoize data at query level -- Consider `placeholderData` for instant perceived loading +**After (from `/deepen-plan`):** +```markdown +## Query Configuration -**Implementation Details:** ```typescript -// Recommended query configuration const queryClient = new QueryClient({ defaultOptions: { queries: { - staleTime: 5 * 60 * 1000, // 5 minutes - retry: 2, + staleTime: 5 * 60 * 1000, + // ENHANCED: Add retry with exponential backoff — prevents cascade failures on transient network issues + retry: 3, + retryDelay: (attempt) => Math.min(1000 * 2 ** attempt, 30000), + // ENHANCED: Disable refetch on focus — reduces unnecessary requests for stable data refetchOnWindowFocus: false, }, }, }); + +// ENHANCED: Query key factory for consistent cache invalidation across components +const productKeys = { + all: ['products'] as const, + lists: () => [...productKeys.all, 'list'] as const, + detail: (id: string) => [...productKeys.all, 'detail', id] as const, +}; +``` ``` -**Edge Cases:** -- Handle race conditions with `cancelQueries` on component unmount -- Implement retry logic for transient network failures -- Consider offline support with `persistQueryClient` +Note: The code block is replaced, not duplicated. `// ENHANCED:` comments mark what was added and why. An AI developer can implement this as-written. + +### Example 2: Prose Section — Append Mode + +**Before (from `/workflows:plan`):** +```markdown +## Technical Approach + +Use React Query for data fetching with optimistic updates. The cart state will be managed in Zustand with SSE providing real-time sync. +``` + +**After (from `/deepen-plan`):** +```markdown +## Technical Approach + +Use React Query for data fetching with optimistic updates. The cart state will be managed in Zustand with SSE providing real-time sync. + +### Research Insights + +**Edge Cases & Pitfalls:** +- Handle race conditions with `cancelQueries` on component unmount — stale SSE responses can overwrite fresh optimistic data +- Zustand store should validate SSE payloads before writing (untrusted data boundary) **References:** - https://tanstack.com/query/latest/docs/react/guides/optimistic-updates - https://tkdodo.eu/blog/practical-react-query ``` -NEVER CODE! Just research and enhance the plan. +Note: Original prose is untouched. Research insights are appended after. From 52c60f3ebbfafe7c9f156314f5121379c5be7610 Mon Sep 17 00:00:00 2001 From: Drew Miller <drew@drewx.design> Date: Thu, 12 Feb 2026 14:22:44 -0500 Subject: [PATCH 2/6] fix: delegate compound insights to /workflows:compound instead of reimplementing The compound-docs skill already has a validated YAML schema and 7-step process. Instead of reimplementing it inside deepen-plan, offer the user the option to run /workflows:compound themselves. --- .../compound-engineering/commands/deepen-plan.md | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/plugins/compound-engineering/commands/deepen-plan.md b/plugins/compound-engineering/commands/deepen-plan.md index 2138da64..c53fb6a7 100644 --- a/plugins/compound-engineering/commands/deepen-plan.md +++ b/plugins/compound-engineering/commands/deepen-plan.md @@ -745,16 +745,13 @@ Ask the user: 3. **Start `/workflows:work`** — Begin implementing the enhanced plan 4. **Deepen further** — Run another round on specific sections 5. **Revert** — `git checkout <plan_path>` -6. **Compound insights** — Extract novel patterns discovered during deepening into `docs/solutions/` for future sessions +6. **Compound insights** — Run `/workflows:compound` to extract novel patterns into `docs/solutions/` If user selects option 6: -- Read `.deepen/JUDGED_RECOMMENDATIONS.json` -- Identify recommendations that represent novel discoveries (not already in `docs/solutions/`) -- For each novel finding, use the `compound-docs` skill to create a properly validated learning file: - 1. Read the compound-docs skill at the plugin's skills path for the full YAML schema and template - 2. Create files in `docs/solutions/[category]/` using the skill's required YAML frontmatter fields (module, date, problem_type, component, symptoms, root_cause, resolution_type, severity, tags) - 3. Use the category mapping from the skill's yaml-schema reference to determine the correct subdirectory -- This closes the compound engineering loop — future `/deepen-plan` runs will discover these learnings and apply them automatically +- Summarize the key novel discoveries from the deepening session (findings that aren't already in `docs/solutions/`) +- Tell the user to run `/workflows:compound` which will capture these insights using the proper `compound-docs` skill with validated YAML schema +- The user can run `/workflows:compound` once per novel finding, or describe multiple findings and let the command handle them +- This closes the compound engineering loop — future `/deepen-plan` runs will discover these learnings in Step 3b and apply them automatically --- From 7a9c9c870fd86db792dbd225d9842b046ee79b23 Mon Sep 17 00:00:00 2001 From: Drew Miller <drew@drewx.design> Date: Thu, 12 Feb 2026 15:40:27 -0500 Subject: [PATCH 3/6] feat: add architectural decision challenge phase to deepen-plan The deepen-plan command deepens decisions but never challenges them. Real reviewer feedback showed it misses redundant tool params, YAGNI violations built despite being flagged, and misplaced business logic. Adds two new always-run agents: - agent-native-architecture-reviewer: routes to skill checklist, anti-patterns, and reference files (not generic prompt) - project-architecture-challenger: reads CLAUDE.md and challenges every decision against project-specific principles Also injects PROJECT ARCHITECTURE CONTEXT into all review/research agent prompts so they evaluate against project conventions. --- plugins/compound-engineering/CHANGELOG.md | 4 ++ .../commands/deepen-plan.md | 72 ++++++++++++++++++- 2 files changed, 75 insertions(+), 1 deletion(-) diff --git a/plugins/compound-engineering/CHANGELOG.md b/plugins/compound-engineering/CHANGELOG.md index a397d9fa..8ef15d08 100644 --- a/plugins/compound-engineering/CHANGELOG.md +++ b/plugins/compound-engineering/CHANGELOG.md @@ -20,6 +20,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Uses Node.js for validation scripts (Python3 may not be installed on all platforms) - Next steps offer `/plan_review` (not `/workflows:review` which is for code, not plans) - Correct agent name references matching actual plugin agent filenames + - **New: Architectural Decision Challenge phase** — Two new always-run agents that challenge plan decisions (not just deepen them): + - `agent-native-architecture-reviewer`: Properly routes to the `agent-native-architecture` skill's checklist, anti-patterns, and reference files (`from-primitives-to-domain-tools.md`, `mcp-tool-design.md`, `refactoring-to-prompt-native.md`) + - `project-architecture-challenger`: Reads CLAUDE.md and challenges every decision against the project's own architectural principles (redundancy, layer placement, YAGNI enforcement, convention drift) + - Review/research agents now receive `## PROJECT ARCHITECTURE CONTEXT` — they read the project's CLAUDE.md to evaluate against project-specific principles, not just generic best practices --- diff --git a/plugins/compound-engineering/commands/deepen-plan.md b/plugins/compound-engineering/commands/deepen-plan.md index c53fb6a7..5211f11a 100644 --- a/plugins/compound-engineering/commands/deepen-plan.md +++ b/plugins/compound-engineering/commands/deepen-plan.md @@ -183,6 +183,8 @@ Read `.deepen/PLAN_MANIFEST.json` and match discovered resources: **Important:** Skills may have `references/` subdirectories with additional context files. When spawning skill agents in Step 4, instruct them to also check for and read files in `references/`, `assets/`, and `templates/` directories within the skill path. +**Special routing — `agent-native-architecture` skill:** This skill is interactive with a routing table. Do NOT use the generic skill template. Use the dedicated template in Step 4 that routes to the Architecture Review Checklist, Anti-Patterns sections, and specific reference files (`from-primitives-to-domain-tools.md`, `mcp-tool-design.md`, `refactoring-to-prompt-native.md`). + **Learnings** — Match if learning's tags, category, or module overlaps with plan technologies, domains, or modules being changed. **Agents** — Two tiers: @@ -191,8 +193,9 @@ Read `.deepen/PLAN_MANIFEST.json` and match discovered resources: - Security agents (security-sentinel, any security reviewer) - Architecture agents (architecture-strategist) - Performance agents (performance-oracle) +- **Project Architecture Challenger** (dedicated agent — see Step 4 template) -These run regardless of manifest matching because their domains are relevant to virtually every plan. A security agent catching a data exposure risk in a "simple UI plan" is exactly the kind of cross-cutting insight that makes deepening valuable. +These run regardless of manifest matching because their domains are relevant to virtually every plan. A security agent catching a data exposure risk in a "simple UI plan" is exactly the kind of cross-cutting insight that makes deepening valuable. The Project Architecture Challenger specifically questions whether the plan's decisions align with the project's own CLAUDE.md principles — catching redundant features, YAGNI violations, and logic in the wrong layer. **Manifest-matched (run if domain overlap):** - Framework-specific review agents (dhh-rails-reviewer for Rails, kieran-rails-reviewer for Rails, kieran-typescript-reviewer for TypeScript, kieran-python-reviewer for Python) @@ -303,6 +306,9 @@ If not relevant after analysis: write an empty recommendations array with summar ``` Task [agent-name](" Review this plan using your expertise. Focus on your domain. + +## PROJECT ARCHITECTURE CONTEXT +Read the project's CLAUDE.md (or .claude/CLAUDE.md) for project-specific architectural principles, patterns, and conventions. Evaluate the plan against THESE principles, not just generic best practices. If no CLAUDE.md exists, skip this step. " + SHARED_CONTEXT + OUTPUT_RULES) ``` @@ -326,6 +332,70 @@ Budget: 3-5 searches per technology for thorough coverage. Example: For a plan using React 19, TypeScript 5.5, and PostgreSQL 17, spawn three separate agents — one per technology. Each gets a full context window to research deeply. +**SPECIAL: `agent-native-architecture` skill (if matched):** + +Do NOT use the generic skill template. This skill has an interactive routing table that doesn't work with "read and follow." Instead, route directly to the architectural review references: + +``` +Task agent-native-architecture-reviewer(" +You are an Agent-Native Architecture Reviewer. Your job is to evaluate whether this plan's tool design, feature decisions, and state management align with agent-native principles. + +## Instructions: +1. Read the agent-native-architecture skill: [skill-path]/SKILL.md + - Focus on: <architecture_checklist>, <anti_patterns>, and <core_principles> sections +2. Read these SPECIFIC reference files for deeper context: + - [skill-path]/references/from-primitives-to-domain-tools.md — when to add domain tools vs stay with primitives + - [skill-path]/references/mcp-tool-design.md — tools should be primitives, not workflows + - [skill-path]/references/refactoring-to-prompt-native.md — moving logic from code to prompts +3. Read the project's CLAUDE.md for project-specific architectural principles +4. Read the plan from .deepen/ + +## Apply these checks to EVERY tool change and new feature in the plan: +- Does a new tool parameter duplicate capability already available through another tool? +- Does the tool encode business logic (judgment/decisions) that should live in the agent prompt? +- Are there two ways to accomplish the same outcome? (redundancy — confuses LLMs about which path to choose) +- Is logic placed in the right layer? (backend tool vs frontend vs agent prompt vs skill guidance) +- Do hardcoded values belong in skills, or should the agent discover them from data? (emergent capability) +- Are features truly needed now, or are they YAGNI? If the plan flags something as YAGNI but builds it anyway, that's a finding. +- Does the Architecture Review Checklist pass? (Parity, Granularity, Composability, Emergent Capability) + +Write findings as recommendations following the OUTPUT RULES. Use type 'architecture' or 'anti-pattern' for findings. +" + SHARED_CONTEXT + OUTPUT_RULES) +``` + +**ALWAYS RUN: Project Architecture Challenger** + +<critical_instruction> +This agent runs on EVERY plan, regardless of manifest matching. It is the architectural decision challenge phase — it questions whether the plan's decisions should exist, not just whether they're implemented correctly. This catches the class of issues that deepening misses: redundant features, logic in the wrong layer, YAGNI violations built despite being acknowledged, and drift from project conventions. +</critical_instruction> + +``` +Task project-architecture-challenger(" +You are a Project Architecture Challenger. Your job is NOT to deepen the plan — it's to CHALLENGE the plan's decisions against the project's own architectural principles. + +## Instructions: +1. Read the project's CLAUDE.md (or .claude/CLAUDE.md) — extract the architectural principles, patterns, and conventions this project follows +2. Read .deepen/original_plan.md — the plan to challenge +3. Read .deepen/PLAN_MANIFEST.md — for context + +## For each major decision in the plan, ask: +- **Redundancy**: Does this new feature/tool/parameter duplicate something that already exists? Can the same outcome be achieved with existing tools via composition? +- **Layer placement**: Is business logic in the right place? Frontend should be a thin rendering layer. Tools should be atomic primitives. Judgment belongs in prompts/skills, not code. +- **YAGNI enforcement**: Does the plan acknowledge something as YAGNI but build it anyway? Flag these — the plan is contradicting itself. +- **Hardcoded vs emergent**: Are mappings/values hardcoded that the agent could discover from data? Skill guidance should teach patterns, not enumerate values. +- **Convention drift**: Does any decision contradict the project's stated conventions in CLAUDE.md? +- **Complexity budget**: Does each feature earn its complexity? Three similar lines are better than a premature abstraction. + +## What to produce: +- Recommendations with type 'architecture' for structural concerns +- Recommendations with type 'anti-pattern' for agent-native violations +- High confidence (0.8+) when CLAUDE.md explicitly contradicts the plan +- Medium confidence (0.6-0.7) when the concern is a judgment call + +Be specific. Quote the principle from CLAUDE.md that the plan violates. Name the alternative approach. +" + SHARED_CONTEXT + OUTPUT_RULES) +``` + Wait for ALL agents to complete. <late_notification_handling> From 520071f9bc08e3acbe78776f582fa674f0a5495b Mon Sep 17 00:00:00 2001 From: Drew Miller <drew@drewx.design> Date: Sat, 14 Feb 2026 14:52:02 -0500 Subject: [PATCH 4/6] Update deepen-plan to v3.3 with crash prevention and quality improvements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Validated across two real-world pipeline runs. Key changes: - Batched agent launches (max 4 pending) to prevent context overflow crashes - 200-char return cap on agent messages (all analysis in JSON files) - Version grounding: lockfile > package.json > plan text priority - Per-section judge parallelization (~21 min -> ~8-10 min) - Two-part output: Decision Record (reviewers) + Implementation Spec (developers) - Quality review phase (CoVe pattern) catches self-contradictions and code gaps - Enhancer resolves conditionals, verifies API versions, checks accessibility - fast_follow classification bucket for ticketable items - Convergence signals with [Strong Signal] markers - Task() failure recovery (retry once on infrastructure errors) - truncated_count field for judge convergence weighting - Pipeline checkpoint logging for diagnostics 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> --- .claude-plugin/marketplace.json | 2 +- plugins/compound-engineering/CHANGELOG.md | 63 +- .../commands/deepen-plan.md | 1182 ++++++++++------- 3 files changed, 754 insertions(+), 493 deletions(-) diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 563dfcd8..a1b7be99 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -12,7 +12,7 @@ { "name": "compound-engineering", "description": "AI-powered development tools that get smarter with every use. Make each unit of engineering work easier than the last. Includes 29 specialized agents, 22 commands, and 19 skills.", - "version": "2.33.0", + "version": "2.34.0", "author": { "name": "Kieran Klaassen", "url": "https://github.com/kieranklaassen", diff --git a/plugins/compound-engineering/CHANGELOG.md b/plugins/compound-engineering/CHANGELOG.md index 8ef15d08..bce7f8d7 100644 --- a/plugins/compound-engineering/CHANGELOG.md +++ b/plugins/compound-engineering/CHANGELOG.md @@ -5,25 +5,56 @@ All notable changes to the compound-engineering plugin will be documented in thi The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [2.34.0] - 2026-02-12 +## [2.34.0] - 2026-02-13 ### Changed -- **`/deepen-plan` command** — Rewritten with **phased file-based map-reduce** architecture (v3) to prevent context overflow - - Sub-agents write full analysis JSON to `.deepen/` on disk and return only ~100 token summaries to parent - - Parent context stays under ~12k tokens regardless of agent count (vs unbounded in v1) - - New phases: **Plan Manifest Analysis** (structured context for all agents), **Validation** (catches silent failures + hallucination flagging), **Judge** (dedup + conflict resolution with source attribution priority), **Preservation Check** (catches rewrite-instead-of-append errors) - - Smart agent selection: always-run cross-cutting agents (security, architecture, performance) + manifest-matched domain agents - - Skill agents now read `references/`, `assets/`, and `templates/` subdirectories for deeper context - - Compound insights option (Step 9d) now uses `compound-docs` skill YAML schema for properly validated learning files - - Cross-platform safe: uses project-relative `.deepen/` instead of `/tmp/` (fixes Windows path issues) - - Uses Node.js for validation scripts (Python3 may not be installed on all platforms) - - Next steps offer `/plan_review` (not `/workflows:review` which is for code, not plans) - - Correct agent name references matching actual plugin agent filenames - - **New: Architectural Decision Challenge phase** — Two new always-run agents that challenge plan decisions (not just deepen them): - - `agent-native-architecture-reviewer`: Properly routes to the `agent-native-architecture` skill's checklist, anti-patterns, and reference files (`from-primitives-to-domain-tools.md`, `mcp-tool-design.md`, `refactoring-to-prompt-native.md`) - - `project-architecture-challenger`: Reads CLAUDE.md and challenges every decision against the project's own architectural principles (redundancy, layer placement, YAGNI enforcement, convention drift) - - Review/research agents now receive `## PROJECT ARCHITECTURE CONTEXT` — they read the project's CLAUDE.md to evaluate against project-specific principles, not just generic best practices +- **`/deepen-plan` command** — Complete rewrite with **context-managed map-reduce** architecture to prevent context overflow. Validated across multiple real-world runs. + + **Architecture:** + - Sub-agents write full analysis JSON to `.deepen/` on disk, return only a ~200-char completion signal to parent + - Parent context stays under ~13k tokens regardless of agent count (vs unbounded in v1) + - 10-phase pipeline: Analyze → Discover → Research (batched) → Validate → Judge (parallel per-section + merge) → Enhance → Quality Review → Preservation Check → Present + + **Context Overflow Prevention (crash-tested):** + - **Batched agent launches** — Max 4 Task() agents pending simultaneously. Prevents session crash from simultaneous returns (anthropics/claude-code#11280, #8136) + - **200-char return cap** — Hard limit on agent return messages. All analysis lives in JSON files on disk + - **Task() failure recovery** — Retry once on silent infrastructure errors (`[Tool result missing due to internal error]`) + + **Version Grounding:** + - Plan-analyzer reads lockfile → package.json → plan text (priority order) to resolve actual framework versions + - Prevents downstream agents from researching wrong library versions (e.g., MUI 5 when project uses MUI 7) + - `version_mismatches` field flags discrepancies between plan text and actual dependencies + + **Per-Section Judge Parallelization:** + - Replaces single monolithic judge with parallel per-section judges + merge judge + - Section judges run in parallel (batched max 4), each deduplicates and ranks within its section + - Merge judge resolves cross-section conflicts, identifies cross-section convergence + - Reduced judge time from ~21 min to ~8-10 min in testing + + **Two-Part Output Structure:** + - **Decision Record** (reviewer-facing): Enhancement summary, agent consensus, research insights, strong signal markers, fast follow items, verification checklist + - **Implementation Spec** (developer-facing): Clean, linear implementation guidance with ready-to-copy code blocks — no `// ENHANCED:` annotations or `(Rec #X)` references + + **Quality Review (CoVe Pattern):** + - Post-enhancement agent checks for self-contradictions, PR scope assessment, defensive stacking, code completeness (undefined references), integration test gap detection, deferred items needing bridge mitigations + - Runs in isolated context — does not inherit enhancer's perspective + + **Enhancer Improvements:** + - **Resolve conditionals** — Reads codebase to determine which implementation path applies, eliminates "if X use A, if Y use B" forks + - **Version verification** — Checks `frameworks_with_versions` before suggesting APIs (prevents ES2023+ suggestions for ES2022 targets) + - **Accessibility verification** — Ensures `prefers-reduced-motion` fallbacks don't leave permanent visual artifacts + - **Convergence signals** — `[Strong Signal — N agents]` markers when 3+ agents independently flag same concern + - **`fast_follow` classification** — Fourth action bucket for items with real UX impact but out of PR scope (must be ticketed before merge) + + **Other Improvements:** + - `truncated_count` required field — Agents report omitted recommendations beyond 8-cap; judge weights convergence accordingly + - `learnings-researcher` integration — Single dedicated agent replaces N per-file learning agents + - Pipeline checkpoint logging to `.deepen/PIPELINE_LOG.md` for diagnostics + - Cross-platform safe: project-relative `.deepen/`, Node.js validation (no Python3 dependency) + - Architectural Decision Challenge phase with `project-architecture-challenger` agent + - `agent-native-architecture-reviewer` with dedicated skill routing + - PROJECT ARCHITECTURE CONTEXT block for all review/research agents --- diff --git a/plugins/compound-engineering/commands/deepen-plan.md b/plugins/compound-engineering/commands/deepen-plan.md index 5211f11a..f0e3f24a 100644 --- a/plugins/compound-engineering/commands/deepen-plan.md +++ b/plugins/compound-engineering/commands/deepen-plan.md @@ -4,7 +4,7 @@ description: Enhance a plan with parallel research agents for each section to ad argument-hint: "[path to plan file]" --- -# Deepen Plan (v3 — Production-Hardened) +# Deepen Plan (v3 — Context-Managed Map-Reduce) **Note: The current year is 2026.** Use this when searching for recent documentation and best practices. @@ -14,21 +14,26 @@ argument-hint: "[path to plan file]" <role>Senior Technical Research Lead with expertise in architecture, best practices, and production-ready implementation patterns</role> -## Architecture: Why This Command Works +## Architecture: Phased File-Based Map-Reduce -This command uses the **Phased File-Based Map-Reduce** pattern — the same architecture as the review command, adapted for research and plan enhancement: - -1. **Analyze Phase** (sequential) — Parse the plan into a structured manifest of sections, technologies, and domains. All agents receive this manifest. -2. **Discover Phase** (parent) — Find all available skills, learnings, and agents using native Glob/Read tools. Match against the manifest. Skip clearly irrelevant ones. -3. **Research Phase** (parallel) — Matched agents write structured recommendations to `.deepen/`, return only a single sentence to the parent. -4. **Validate** — Verify all expected agent files exist, conform to schema, and flag zero-tool-use hallucination risk. -5. **Judge Phase** — A judge agent deduplicates, resolves conflicts with source attribution priority, groups by plan section, assigns impact levels, and ranks. +1. **Analyze Phase** (sequential) — Parse plan into structured manifest. **Grounds versions in lockfile/package.json**, not plan text. +2. **Discover Phase** (parent) — Find available skills, learnings, agents using Glob/Read. Match against manifest. +3. **Research Phase** (batched parallel) — Matched agents write structured recommendations to `.deepen/`, return only a completion signal. Agents report `truncated_count` when capped. +4. **Validate** — Verify all expected agent files exist, conform to schema (including required `truncated_count`), flag zero-tool-use hallucination risk. +5. **Judge Phase** (parallel per-section + merge) — Per-section judges run in parallel (batched, max 4). Each deduplicates and ranks within its section. Merge judge then resolves cross-section conflicts/convergence and produces final consolidated output. 6. **Judge Validation** — Verify judge output references real manifest sections. -7. **Enhance Phase** — A synthesis agent reads the consolidated recommendations + original plan and writes the enhanced version. -8. **Preservation Check** — Verify the enhanced plan still contains every original section. -9. **Present** — Parent reads only the enhancement summary and presents next steps. +7. **Enhance Phase** — Synthesis agent reads consolidated recommendations + original plan, writes enhanced version. **Verifies APIs exist in resolved versions before suggesting code.** Classifies items as implement/verify/fast_follow/defer. Two-part output: Decision Record + Implementation Spec. +8. **Quality Review** — CoVe-pattern agent checks enhanced plan for self-contradictions, PR scope, defensive stacking, deferred items needing bridge mitigations. +9. **Preservation Check** — Single-pass verification that enhanced plan contains every original section. +10. **Present** — Parent reads enhancement summary + quality review and presents next steps. + +Parent context stays under ~15k tokens of agent output regardless of agent count. -This keeps the parent context under ~15k tokens of agent output regardless of how many research agents run. +## Task() Failure Recovery + +<critical_instruction> +If any Task() call returns an error, empty result, or `[Tool result missing due to internal error]`, retry ONCE with identical parameters before failing the phase. This is a known Claude Code infrastructure issue — the subprocess can silently fail due to timeout, OOM, or connection drop. The retry almost always succeeds. Log the failure and retry in the pipeline log. +</critical_instruction> ## Plan File @@ -40,6 +45,21 @@ This keeps the parent context under ~15k tokens of agent output regardless of ho Do not proceed until you have a valid plan file path. +## Checkpoint Logging + +<critical_instruction> +After EVERY phase, write a checkpoint to `.deepen/PIPELINE_LOG.md`. This is diagnostic — report these results back. + +Format each checkpoint as: +``` +## Phase N: [Name] — [PASS/FAIL/PARTIAL] +- Started: [timestamp from date command] +- Completed: [timestamp] +- Notes: [what happened, any issues] +- Files created: [list] +``` +</critical_instruction> + ## Main Tasks ### 1. Prepare the Scratchpad Directory @@ -47,50 +67,64 @@ Do not proceed until you have a valid plan file path. <critical_instruction> Use a project-relative path, NOT /tmp/. The /tmp/ path causes two problems: 1. Claude Code's Read tool and MCP filesystem tools cannot access /tmp/ (outside allowed directories) -2. On Windows, /tmp/ resolves to different locations depending on the subprocess (MSYS2 vs literal C:\tmp), splitting agent files across directories - -Using .deepen/ inside the project avoids both issues. All tools (Read, Write, Bash, MCP) can access project-relative paths reliably. +2. On Windows, /tmp/ resolves to different locations depending on the subprocess </critical_instruction> ```bash -# Create the deepen session directory (project-relative, cross-platform safe) DEEPEN_DIR=".deepen" rm -rf "$DEEPEN_DIR" mkdir -p "$DEEPEN_DIR" grep -qxF '.deepen/' .gitignore 2>/dev/null || echo '.deepen/' >> .gitignore -# Copy the plan to the scratchpad for agents to reference cp <plan_path> "$DEEPEN_DIR/original_plan.md" -``` -All references below use `.deepen/` — this is the ONLY path agents should use. Do not use `/tmp/` anywhere. +# Initialize pipeline log +echo "# Deepen Plan Pipeline Log" > "$DEEPEN_DIR/PIPELINE_LOG.md" +echo "" >> "$DEEPEN_DIR/PIPELINE_LOG.md" +echo "## Phase 0: Setup — PASS" >> "$DEEPEN_DIR/PIPELINE_LOG.md" +echo "- Started: $(date -u +%H:%M:%S)" >> "$DEEPEN_DIR/PIPELINE_LOG.md" +echo "- Plan copied to .deepen/original_plan.md" >> "$DEEPEN_DIR/PIPELINE_LOG.md" +echo "" >> "$DEEPEN_DIR/PIPELINE_LOG.md" +``` -### 2. Analyze Plan Structure (Phase 0 — Sequential) +### 2. Analyze Plan Structure (Phase 1 — Sequential) <critical_instruction> -Run this BEFORE discovering or launching any agents. This produces the structured manifest that drives intelligent agent selection and gives every agent shared context about the plan. +Run this BEFORE discovering or launching any agents. This produces the structured manifest that drives intelligent agent selection. </critical_instruction> ``` Task plan-analyzer(" -You are a Plan Structure Analyzer. Parse a development plan into a structured manifest that research agents will use for targeted enhancement. +You are a Plan Structure Analyzer. Parse a development plan into a structured manifest. ## Instructions: 1. Read .deepen/original_plan.md -2. Write your analysis to .deepen/PLAN_MANIFEST.json using this EXACT schema: + +2. **GROUND versions in actual dependency files — do NOT trust plan text for versions.** + Resolve framework/library versions using this priority order (highest trust first): + a. **Lockfile** (exact resolved versions): Glob for package-lock.json, yarn.lock, pnpm-lock.yaml, Gemfile.lock, poetry.lock. Read the relevant entries. + b. **Dependency file** (semver ranges): Read package.json, Gemfile, pyproject.toml, etc. Extract version ranges. + c. **Plan text** (lowest trust): Only use versions stated in the plan if no dependency file exists. Mark as unverified. + + For each technology, record: + - The resolved version from the lockfile/dependency file + - Whether the plan text stated a different version (version mismatch) + - The source: \"lockfile\", \"dependency_file\", or \"plan_text_unverified\" + +3. Write your analysis to .deepen/PLAN_MANIFEST.json using this EXACT schema: { \"plan_title\": \"<title>\", \"plan_path\": \"<original file path>\", - \"technologies\": [\"Rails\", \"React\", \"TypeScript\", \"Redis\", ...], - \"domains\": [\"authentication\", \"caching\", \"API design\", \"UI/UX\", ...], + \"technologies\": [\"Rails\", \"React\", \"TypeScript\", ...], + \"domains\": [\"authentication\", \"caching\", \"API design\", ...], \"sections\": [ { \"id\": 1, \"title\": \"<section title>\", - \"summary\": \"<what this section covers, 1-2 sentences>\", - \"technologies\": [\"subset relevant to this section\"], - \"domains\": [\"subset relevant to this section\"], + \"summary\": \"<1-2 sentences>\", + \"technologies\": [\"subset\"], + \"domains\": [\"subset\"], \"has_code_examples\": true|false, \"has_ui_components\": true|false, \"has_data_models\": true|false, @@ -99,65 +133,75 @@ You are a Plan Structure Analyzer. Parse a development plan into a structured ma \"has_performance_concerns\": true|false, \"has_testing_strategy\": true|false, \"has_deployment_concerns\": true|false, - \"enhancement_opportunities\": \"<what kind of research would improve this section>\" + \"enhancement_opportunities\": \"<what research would improve this section>\" + } + ], + \"frameworks_with_versions\": { + \"React\": {\"version\": \"19.1.0\", \"source\": \"lockfile\"}, + \"MUI\": {\"version\": \"7.3.7\", \"source\": \"lockfile\"} + }, + \"version_mismatches\": [ + { + \"technology\": \"MUI\", + \"plan_stated\": \"5\", + \"actual_resolved\": \"7.3.7\", + \"source\": \"lockfile\", + \"impact\": \"All MUI API recommendations must target v7, not v5\" } ], - \"frameworks_with_versions\": {\"React\": \"19\", \"Next.js\": \"15\", \"Rails\": \"8.0\", ...}, - \"overall_risk_areas\": [\"<area 1>\", \"<area 2>\"], + \"overall_risk_areas\": [\"<area>\"], \"acceptance_criteria_count\": <number>, \"implementation_phases_count\": <number> } -3. Also write a human-readable summary to .deepen/PLAN_MANIFEST.md (max 300 words) covering: - - What the plan is about - - Key technical decisions - - Areas that would benefit most from deeper research - - Technologies involved +4. Also write a human-readable summary to .deepen/PLAN_MANIFEST.md (max 300 words). If version mismatches were found, list them prominently at the top. -4. Return to parent: 'Plan analysis complete. <N> sections identified across <M> technologies. Written to .deepen/PLAN_MANIFEST.json' +5. Return to parent: 'Plan analysis complete. <N> sections identified across <M> technologies. [X version mismatches found.] Written to .deepen/PLAN_MANIFEST.json' ") ``` -Wait for this to complete before proceeding. +Wait for completion. Then log checkpoint: -### 3. Discover Available Skills, Learnings, and Agents +```bash +echo "## Phase 1: Plan Analysis — $([ -f .deepen/PLAN_MANIFEST.json ] && echo 'PASS' || echo 'FAIL')" >> .deepen/PIPELINE_LOG.md +echo "- Completed: $(date -u +%H:%M:%S)" >> .deepen/PIPELINE_LOG.md +echo "- Files: $(ls .deepen/PLAN_MANIFEST.* 2>/dev/null)" >> .deepen/PIPELINE_LOG.md +echo "" >> .deepen/PIPELINE_LOG.md +``` + +### 3. Discover Available Skills, Learnings, and Agents (Phase 2) <critical_instruction> -This step runs in the PARENT context. It's a discovery phase — the parent reads directory listings and frontmatter, NOT full file contents. Keep this lightweight. +This step runs in the PARENT context. Discovery only — read directory listings and frontmatter, NOT full file contents. Keep lightweight. </critical_instruction> #### Step 3a: Discover Skills -Use Claude Code's native tools for cross-platform compatibility (bash `find`/`head` fails on Windows): +Use Claude Code's native tools: ``` -# Use Glob tool to discover skill directories Glob: .claude/skills/*/SKILL.md Glob: ~/.claude/skills/*/SKILL.md Glob: ~/.claude/plugins/cache/**/skills/*/SKILL.md - -# For each discovered SKILL.md, use Read tool to get first 10 lines (frontmatter/description only) -Read: [skill-path]/SKILL.md (first 10 lines) ``` -Do NOT read full skill contents into parent context — only descriptions for matching. +For each discovered SKILL.md, Read first 10 lines only (frontmatter/description). #### Step 3b: Discover Learnings +**Preferred method:** If the compound-engineering plugin's `learnings-researcher` agent is available (check `~/.claude/plugins/cache/**/agents/research/learnings-researcher.md`), use it as a single dedicated agent in Step 4 instead of spawning per-file learning agents. It searches `docs/solutions/` by frontmatter metadata — one specialized agent replaces N generic ones with better quality. + +**Fallback (no learnings-researcher available):** + ``` -# Use Glob to find all learning files Glob: docs/solutions/**/*.md - -# For each found file, use Read tool to get first 15 lines (frontmatter only) -Read: [learning-path] (first 15 lines) ``` -Each learning file has YAML frontmatter with tags, category, and module — use these for filtering. +For each found file, Read first 15 lines (frontmatter only). #### Step 3c: Discover Review/Research Agents ``` -# Use Glob to find all agent files from all sources Glob: .claude/agents/*.md Glob: ~/.claude/agents/*.md Glob: ~/.claude/plugins/cache/**/agents/**/*.md @@ -171,38 +215,20 @@ For compound-engineering plugin agents: Read `.deepen/PLAN_MANIFEST.json` and match discovered resources: -**Skills** — Match if skill's domain overlaps with any plan technology or domain. Common skill-to-domain mappings: -- Rails plans → `dhh-rails-style` -- Ruby gem plans → `andrew-kane-gem-writer` -- Frontend/UI plans → `frontend-design` -- AI/agent plans → `agent-native-architecture` -- LLM integration plans → `dspy-ruby` -- Documentation-heavy plans → `every-style-editor`, `compound-docs` -- Skill creation plans → `create-agent-skills` -- Security-sensitive plans → any security-related skills +**Skills** — Match if skill's domain overlaps with any plan technology or domain: +- Rails plans -> `dhh-rails-style` +- Ruby gem plans -> `andrew-kane-gem-writer` +- Frontend/UI plans -> `frontend-design` +- AI/agent plans -> `agent-native-architecture` +- LLM integration plans -> `dspy-ruby` +- Documentation plans -> `every-style-editor`, `compound-docs` +- Skill creation plans -> `create-agent-skills` -**Important:** Skills may have `references/` subdirectories with additional context files. When spawning skill agents in Step 4, instruct them to also check for and read files in `references/`, `assets/`, and `templates/` directories within the skill path. +**Important:** Skills may have `references/` subdirectories. Instruct skill agents to also check `references/`, `assets/`, `templates/` directories within the skill path. -**Special routing — `agent-native-architecture` skill:** This skill is interactive with a routing table. Do NOT use the generic skill template. Use the dedicated template in Step 4 that routes to the Architecture Review Checklist, Anti-Patterns sections, and specific reference files (`from-primitives-to-domain-tools.md`, `mcp-tool-design.md`, `refactoring-to-prompt-native.md`). +**Special routing — `agent-native-architecture` skill:** This skill is interactive with a routing table. Do NOT use the generic skill template. Use the dedicated template in Step 4. -**Learnings** — Match if learning's tags, category, or module overlaps with plan technologies, domains, or modules being changed. - -**Agents** — Two tiers: - -**Always run (cross-cutting — these catch what you don't expect):** -- Security agents (security-sentinel, any security reviewer) -- Architecture agents (architecture-strategist) -- Performance agents (performance-oracle) -- **Project Architecture Challenger** (dedicated agent — see Step 4 template) - -These run regardless of manifest matching because their domains are relevant to virtually every plan. A security agent catching a data exposure risk in a "simple UI plan" is exactly the kind of cross-cutting insight that makes deepening valuable. The Project Architecture Challenger specifically questions whether the plan's decisions align with the project's own CLAUDE.md principles — catching redundant features, YAGNI violations, and logic in the wrong layer. - -**Manifest-matched (run if domain overlap):** -- Framework-specific review agents (dhh-rails-reviewer for Rails, kieran-rails-reviewer for Rails, kieran-typescript-reviewer for TypeScript, kieran-python-reviewer for Python) -- Domain-specific research agents (data-integrity-guardian for database plans, deployment-verification-agent for deployment plans) -- Frontend agents (julik-frontend-races-reviewer for JS/Stimulus, design agents for UI plans) -- Code quality agents (code-simplicity-reviewer, pattern-recognition-specialist) -- Agent-native reviewer (agent-native-reviewer for plans involving agent/tool features) +**Learnings** — Match if tags, category, or module overlaps with plan technologies/domains. #### Learnings Filtering Examples @@ -225,67 +251,143 @@ docs/solutions/authentication-issues/jwt-expiry.md # plan has no auth When in doubt, spawn it. A learning agent that returns "Not applicable" wastes one context window. A missed learning that would have prevented a production bug wastes days. +**Agents** — Two tiers: + +**Always run (cross-cutting):** +- Security agents (security-sentinel) +- Architecture agents (architecture-strategist) +- Performance agents (performance-oracle) +- Project Architecture Challenger (see Step 4) + +**Manifest-matched (run if domain overlap):** +- Framework-specific reviewers (dhh-rails-reviewer, kieran-rails-reviewer, kieran-typescript-reviewer, kieran-python-reviewer) +- Domain-specific agents (data-integrity-guardian, deployment-verification-agent) +- Frontend agents (julik-frontend-races-reviewer, design agents) +- Code quality agents (code-simplicity-reviewer, pattern-recognition-specialist) +- Agent-native reviewer (for plans involving agent/tool features) + #### Handling Sparse Discovery -If discovery finds few or no matched skills/learnings (e.g., a plan for a technology stack with no installed skills), the command still works — the 3 always-run cross-cutting agents plus per-technology docs-researchers provide meaningful enhancement. Acknowledge this in the summary: "Limited institutional knowledge available for [technology]. Enhancement based primarily on framework documentation and cross-cutting analysis. Consider running `/deepen-plan` again after building project-specific skills." +If few/no matched skills/learnings found, acknowledge: "Limited institutional knowledge available. Enhancement based primarily on framework documentation and cross-cutting analysis." + +Write matched resources list to `.deepen/MATCHED_RESOURCES.md`. + +Log checkpoint: + +```bash +echo "## Phase 2: Discovery — PASS" >> .deepen/PIPELINE_LOG.md +echo "- Completed: $(date -u +%H:%M:%S)" >> .deepen/PIPELINE_LOG.md +echo "- Skills found: $(grep -c 'skill' .deepen/MATCHED_RESOURCES.md 2>/dev/null || echo 0)" >> .deepen/PIPELINE_LOG.md +echo "- Learnings found: $(grep -c 'learning' .deepen/MATCHED_RESOURCES.md 2>/dev/null || echo 0)" >> .deepen/PIPELINE_LOG.md +echo "- Agents found: $(grep -c 'agent' .deepen/MATCHED_RESOURCES.md 2>/dev/null || echo 0)" >> .deepen/PIPELINE_LOG.md +echo "" >> .deepen/PIPELINE_LOG.md +``` + +### 4. Launch Research Agents (Phase 3 — Batched Parallel) -Write the matched resources list to `.deepen/MATCHED_RESOURCES.md` for reference. +<critical_instruction> +KNOWN ISSUE: When 10+ Task() agents return simultaneously, they can dump ~100-200K tokens into the parent context at once. Claude Code's compaction triggers too late (~98-99% usage) and the session locks up (anthropics/claude-code#11280, #8136). + +MITIGATION: Launch agents in BATCHES of 3-4. Wait for each batch to complete before launching the next. This caps simultaneous returns and gives compaction room to fire between batches. -### 4. Launch Research Agents (Parallel) +Batch order: +- **Batch 1:** Always-run cross-cutting agents (security-sentinel, architecture-strategist, performance-oracle, project-architecture-challenger) +- **Batch 2:** Manifest-matched review agents (framework reviewers, domain agents, code quality) +- **Batch 3:** Skill agents + learnings-researcher +- **Batch 4:** Docs-researchers (one per technology) + +Wait for each batch to fully complete before starting the next. Between batches, log a checkpoint. +</critical_instruction> <critical_instruction> -EVERY agent prompt MUST include these output constraints. This is what prevents context overflow. +EVERY agent prompt MUST include these output constraints. This prevents context overflow. -Append this to EVERY agent spawn prompt: +Append this SHARED_CONTEXT + OUTPUT_RULES block to EVERY agent spawn prompt: ``` ## SHARED CONTEXT Read .deepen/PLAN_MANIFEST.md first for plan overview, technologies, and risk areas. Read .deepen/original_plan.md for the full plan content. -## OUTPUT RULES (MANDATORY) +## OUTPUT RULES (MANDATORY — VIOLATION CAUSES SESSION CRASH) 1. Write your FULL analysis as JSON to .deepen/{your_agent_name}.json -2. Use this EXACT schema (hard caps enforced): +2. Use this EXACT schema: { "agent_type": "skill|learning|research|review", "agent_name": "<your name>", "source_type": "skill|documented-learning|official-docs|community-web", - "summary": "<500 chars max — your key contribution to this plan>", - "tools_used": ["read_file:path/to/file", "web_search:query-terms", "mcp:context7:query-docs", ...], + "summary": "<500 chars max>", + "tools_used": ["read_file:path", "web_search:query", ...], "recommendations": [ { - "section_id": <which plan section this applies to, from manifest>, + "section_id": <from manifest>, "type": "best-practice|edge-case|anti-pattern|performance|security|code-example|architecture|ux|testing", "title": "<100 chars>", - "recommendation": "<500 chars — the actual advice>", - "code_example": "<optional — concrete code snippet, max 800 chars>", - "references": ["<URL or doc reference>"], + "recommendation": "<500 chars>", + "code_example": "<optional, max 800 chars>", + "references": ["<URL or doc>"], "priority": "high|medium|low", "confidence": 0.0-1.0 } - ] + ], + "truncated_count": 0 } -3. Max 8 recommendations per agent. Prioritize by impact on plan quality. +3. Max 8 recommendations per agent. Prioritize by impact. 4. Only include recommendations with confidence >= 0.6. 5. Every recommendation MUST reference a specific section_id from the plan manifest. -6. Code examples are ENCOURAGED — concrete implementation details make the plan actionable. -7. The tools_used field is MANDATORY. List every tool call you made (file reads, web searches, MCP queries). If you did not use any tools, your recommendations are based on training data alone — set confidence to 0.5 max. -8. Return ONLY this to the parent (do NOT return the full analysis): - "Research complete. Wrote <N> recommendations for <M> sections to .deepen/{agent_name}.json. Key contribution: <1 sentence>" +6. Code examples are ENCOURAGED. +7. tools_used is MANDATORY. If empty, set confidence to 0.5 max. +8. **truncated_count is REQUIRED (default 0).** If you had more recommendations beyond the 8 cap, set this to the number you omitted. Example: you found 12 relevant issues but only wrote the top 8 → truncated_count: 4. The judge uses this to weight convergence signals. +8. **CRITICAL — YOUR RETURN MESSAGE TO PARENT MUST BE UNDER 200 CHARACTERS.** + Return ONLY this exact format: + "Done. <N> recs for <M> sections in .deepen/{agent_name}.json" + Do NOT return recommendations, analysis, code, or explanations to the parent. + Do NOT summarize your findings in the return message. + ALL analysis goes in the JSON file. The return message is just a completion signal. + If you return more than 200 characters, you risk crashing the parent session. ``` </critical_instruction> -#### Launch All Matched Agents in Parallel +#### Batch Execution + +<critical_instruction> +DO NOT launch all agents at once. Follow this batch sequence: + +**BATCH 1 — Cross-cutting (always-run):** Launch these 3-4 agents in parallel. Wait for ALL to complete. +- security-sentinel +- architecture-strategist +- performance-oracle +- project-architecture-challenger + +Log: `echo "## Phase 3a: Batch 1 (cross-cutting) — PASS" >> .deepen/PIPELINE_LOG.md` + +**BATCH 2 — Manifest-matched reviewers:** Launch matched review agents in parallel (max 4). Wait for ALL to complete. +- Framework reviewers, domain agents, code quality agents, agent-native reviewer + +Log: `echo "## Phase 3b: Batch 2 (reviewers) — PASS" >> .deepen/PIPELINE_LOG.md` + +**BATCH 3 — Skills + Learnings:** Launch matched skill agents + learnings-researcher in parallel (max 4). Wait for ALL to complete. + +Log: `echo "## Phase 3c: Batch 3 (skills+learnings) — PASS" >> .deepen/PIPELINE_LOG.md` + +**BATCH 4 — Docs researchers:** Launch per-technology docs researchers in parallel (max 4). Wait for ALL to complete. + +Log: `echo "## Phase 3d: Batch 4 (docs) — PASS" >> .deepen/PIPELINE_LOG.md` + +If a batch has more than 4 agents, split it into sub-batches of 4. Never have more than 4 Task() calls pending simultaneously. +</critical_instruction> + +#### Agent Templates **For each matched SKILL:** ``` Task skill-agent(" -You have the [skill-name] skill available at [skill-path]. +You have the [skill-name] skill at [skill-path]. 1. Read the skill: Read [skill-path]/SKILL.md -2. Check for additional skill resources: - - Glob [skill-path]/references/*.md — read any reference files for deeper context - - Glob [skill-path]/assets/* — check for templates or examples - - Glob [skill-path]/templates/* — check for code templates +2. Check for additional resources: + - Glob [skill-path]/references/*.md + - Glob [skill-path]/assets/* + - Glob [skill-path]/templates/* 3. Read the plan context from .deepen/ 4. Apply the skill's expertise to the plan 5. Write recommendations following the OUTPUT RULES @@ -295,10 +397,10 @@ You have the [skill-name] skill available at [skill-path]. **For each matched LEARNING:** ``` Task learning-agent(" -Read this learning file completely: [path to learning .md] +Read this learning file completely: [path] This documents a previously solved problem. Check if it applies to the plan. -If relevant: write specific recommendations about how to avoid this problem. -If not relevant after analysis: write an empty recommendations array with summary 'Not applicable: [reason]' +If relevant: write specific recommendations. +If not relevant: write empty recommendations array with summary 'Not applicable: [reason]' " + SHARED_CONTEXT + OUTPUT_RULES) ``` @@ -308,128 +410,118 @@ Task [agent-name](" Review this plan using your expertise. Focus on your domain. ## PROJECT ARCHITECTURE CONTEXT -Read the project's CLAUDE.md (or .claude/CLAUDE.md) for project-specific architectural principles, patterns, and conventions. Evaluate the plan against THESE principles, not just generic best practices. If no CLAUDE.md exists, skip this step. +Read the project's CLAUDE.md for project-specific architectural principles. Evaluate the plan against THESE principles. " + SHARED_CONTEXT + OUTPUT_RULES) ``` -**For each technology in the manifest, spawn a dedicated docs-researcher:** +**For each technology in the manifest, spawn a docs-researcher:** ``` Task docs-researcher-[technology](" -Research current (2025-2026) best practices for [technology] [version from manifest if available]. +Research current (2025-2026) best practices for [technology] [version if available]. ## Documentation Research Steps: 1. Query Context7 MCP for official framework documentation: - First: mcp__plugin_compound-engineering_context7__resolve-library-id for '[technology]' - - Then: mcp__plugin_compound-engineering_context7__query-docs with the resolved library ID for patterns relevant to this plan -2. Web search for recent (2025-2026) articles, migration guides, and changelog notes -3. Search for version-specific changes if manifest includes a version (e.g., React 19 vs 18, Rails 8 vs 7) + - Then: mcp__plugin_compound-engineering_context7__query-docs with the resolved ID +2. Web search for recent (2025-2026) articles, migration guides, changelog notes +3. Search for version-specific changes if manifest includes a version 4. Find concrete code patterns and configuration recommendations -Focus on areas the plan manifest identifies as enhancement opportunities for this technology. -Budget: 3-5 searches per technology for thorough coverage. +Budget: 3-5 searches per technology. " + SHARED_CONTEXT + OUTPUT_RULES) ``` -Example: For a plan using React 19, TypeScript 5.5, and PostgreSQL 17, spawn three separate agents — one per technology. Each gets a full context window to research deeply. - **SPECIAL: `agent-native-architecture` skill (if matched):** - -Do NOT use the generic skill template. This skill has an interactive routing table that doesn't work with "read and follow." Instead, route directly to the architectural review references: - ``` Task agent-native-architecture-reviewer(" -You are an Agent-Native Architecture Reviewer. Your job is to evaluate whether this plan's tool design, feature decisions, and state management align with agent-native principles. +You are an Agent-Native Architecture Reviewer. ## Instructions: -1. Read the agent-native-architecture skill: [skill-path]/SKILL.md - - Focus on: <architecture_checklist>, <anti_patterns>, and <core_principles> sections -2. Read these SPECIFIC reference files for deeper context: - - [skill-path]/references/from-primitives-to-domain-tools.md — when to add domain tools vs stay with primitives - - [skill-path]/references/mcp-tool-design.md — tools should be primitives, not workflows - - [skill-path]/references/refactoring-to-prompt-native.md — moving logic from code to prompts -3. Read the project's CLAUDE.md for project-specific architectural principles -4. Read the plan from .deepen/ - -## Apply these checks to EVERY tool change and new feature in the plan: -- Does a new tool parameter duplicate capability already available through another tool? -- Does the tool encode business logic (judgment/decisions) that should live in the agent prompt? -- Are there two ways to accomplish the same outcome? (redundancy — confuses LLMs about which path to choose) -- Is logic placed in the right layer? (backend tool vs frontend vs agent prompt vs skill guidance) -- Do hardcoded values belong in skills, or should the agent discover them from data? (emergent capability) -- Are features truly needed now, or are they YAGNI? If the plan flags something as YAGNI but builds it anyway, that's a finding. -- Does the Architecture Review Checklist pass? (Parity, Granularity, Composability, Emergent Capability) - -Write findings as recommendations following the OUTPUT RULES. Use type 'architecture' or 'anti-pattern' for findings. +1. Read [skill-path]/SKILL.md — focus on <architecture_checklist>, <anti_patterns>, <core_principles> +2. Read these reference files: + - [skill-path]/references/from-primitives-to-domain-tools.md + - [skill-path]/references/mcp-tool-design.md + - [skill-path]/references/refactoring-to-prompt-native.md +3. Read project's CLAUDE.md +4. Read .deepen/ plan context + +## Apply these checks: +- Does a new tool duplicate existing capability? +- Does the tool encode business logic that should live in the agent prompt? +- Are there two ways to accomplish the same outcome? +- Is logic in the right layer? +- Do hardcoded values belong in skills? +- Are features truly needed now, or YAGNI? +- Does the Architecture Review Checklist pass? + +Use type 'architecture' or 'anti-pattern' for findings. " + SHARED_CONTEXT + OUTPUT_RULES) ``` **ALWAYS RUN: Project Architecture Challenger** - -<critical_instruction> -This agent runs on EVERY plan, regardless of manifest matching. It is the architectural decision challenge phase — it questions whether the plan's decisions should exist, not just whether they're implemented correctly. This catches the class of issues that deepening misses: redundant features, logic in the wrong layer, YAGNI violations built despite being acknowledged, and drift from project conventions. -</critical_instruction> - ``` Task project-architecture-challenger(" -You are a Project Architecture Challenger. Your job is NOT to deepen the plan — it's to CHALLENGE the plan's decisions against the project's own architectural principles. +You are a Project Architecture Challenger. Your job is to CHALLENGE the plan's decisions against the project's own architectural principles. ## Instructions: -1. Read the project's CLAUDE.md (or .claude/CLAUDE.md) — extract the architectural principles, patterns, and conventions this project follows -2. Read .deepen/original_plan.md — the plan to challenge -3. Read .deepen/PLAN_MANIFEST.md — for context - -## For each major decision in the plan, ask: -- **Redundancy**: Does this new feature/tool/parameter duplicate something that already exists? Can the same outcome be achieved with existing tools via composition? -- **Layer placement**: Is business logic in the right place? Frontend should be a thin rendering layer. Tools should be atomic primitives. Judgment belongs in prompts/skills, not code. -- **YAGNI enforcement**: Does the plan acknowledge something as YAGNI but build it anyway? Flag these — the plan is contradicting itself. -- **Hardcoded vs emergent**: Are mappings/values hardcoded that the agent could discover from data? Skill guidance should teach patterns, not enumerate values. -- **Convention drift**: Does any decision contradict the project's stated conventions in CLAUDE.md? -- **Complexity budget**: Does each feature earn its complexity? Three similar lines are better than a premature abstraction. - -## What to produce: -- Recommendations with type 'architecture' for structural concerns -- Recommendations with type 'anti-pattern' for agent-native violations -- High confidence (0.8+) when CLAUDE.md explicitly contradicts the plan -- Medium confidence (0.6-0.7) when the concern is a judgment call - -Be specific. Quote the principle from CLAUDE.md that the plan violates. Name the alternative approach. +1. Read project's CLAUDE.md — extract architectural principles, patterns, conventions +2. Read .deepen/original_plan.md +3. Read .deepen/PLAN_MANIFEST.md + +## For each major decision, ask: +- **Redundancy**: Does this duplicate something existing? +- **Layer placement**: Is business logic in the right place? +- **YAGNI enforcement**: Does the plan acknowledge YAGNI but build it anyway? +- **Hardcoded vs emergent**: Are values hardcoded that could be discovered? +- **Convention drift**: Does any decision contradict CLAUDE.md? +- **Complexity budget**: Does each feature earn its complexity? + +High confidence (0.8+) when CLAUDE.md explicitly contradicts the plan. +Medium confidence (0.6-0.7) for judgment calls. " + SHARED_CONTEXT + OUTPUT_RULES) ``` -Wait for ALL agents to complete. +After ALL batches complete, log the overall checkpoint: + +```bash +AGENT_COUNT=$(ls .deepen/*.json 2>/dev/null | grep -v PLAN_MANIFEST | wc -l) +echo "## Phase 3: Research Agents (All Batches) — $([ $AGENT_COUNT -gt 0 ] && echo 'PASS' || echo 'FAIL')" >> .deepen/PIPELINE_LOG.md +echo "- Completed: $(date -u +%H:%M:%S)" >> .deepen/PIPELINE_LOG.md +echo "- Agent JSON files written: $AGENT_COUNT" >> .deepen/PIPELINE_LOG.md +echo "- Files: $(ls .deepen/*.json 2>/dev/null | grep -v PLAN_MANIFEST)" >> .deepen/PIPELINE_LOG.md +echo "" >> .deepen/PIPELINE_LOG.md +``` <late_notification_handling> -Late agent completion notifications are expected and harmless. The Task tool reports completions asynchronously — you may receive "Agent completed" messages after you've already proceeded to Step 5 or even Step 6. If you've already moved past the research phase, ignore late notifications. The agent's JSON file is already written to `.deepen/` and will be picked up by validation. +Late agent completion notifications are expected and harmless. Because agents are batched, late notifications should be rare — but if you receive one after moving to Step 5+, ignore it. The agent's JSON file is already on disk. </late_notification_handling> -### 5. Verify and Validate Agent Outputs +### 5. Verify and Validate Agent Outputs (Phase 4) -#### Step 5a: Verify All Expected Files Exist +#### Step 5a: Verify Expected Files Exist ```bash -# List expected agent files based on what was launched in Step 4 EXPECTED_AGENTS="<list of agent names you launched>" MISSING="" for agent in $EXPECTED_AGENTS; do if ! ls .deepen/${agent}*.json 1>/dev/null 2>&1; then MISSING="$MISSING $agent" - echo "MISSING: $agent — no output file found in .deepen/" + echo "MISSING: $agent" fi done if [ -n "$MISSING" ]; then - echo "⚠️ Missing agent files:$MISSING" - echo "Re-run these agents before proceeding to judge." + echo "WARNING: Missing agent files:$MISSING" fi ``` -If any agent file is missing, re-launch that agent before proceeding. +Re-launch missing agents before proceeding. #### Step 5b: Validate JSON Schema and Flag Hallucination Risk <critical_instruction> -Use Node.js for validation — any project using Claude Code has Node available. Python3 may not be installed and bash `python3 -c` fails on some Windows environments. +Use Node.js for validation — Python3 may not be installed on Windows. </critical_instruction> ```bash @@ -437,12 +529,14 @@ node -e " const fs = require('fs'); const path = require('path'); const files = fs.readdirSync('.deepen').filter(f => f.endsWith('.json') && f !== 'PLAN_MANIFEST.json'); +let valid = 0, invalid = 0, noTools = 0, totalTruncated = 0; for (const file of files) { const fp = path.join('.deepen', file); try { const data = JSON.parse(fs.readFileSync(fp, 'utf8')); if (!Array.isArray(data.recommendations)) throw new Error('recommendations not an array'); if (data.recommendations.length > 8) throw new Error('too many recommendations: ' + data.recommendations.length); + if (typeof data.truncated_count !== 'number') throw new Error('missing required field: truncated_count'); for (let i = 0; i < data.recommendations.length; i++) { const rec = data.recommendations[i]; if (rec.section_id == null) throw new Error('rec ' + i + ': missing section_id'); @@ -450,113 +544,173 @@ for (const file of files) { if (!rec.recommendation) throw new Error('rec ' + i + ': missing recommendation'); } const tools = data.tools_used || []; + const truncNote = data.truncated_count > 0 ? ' (truncated ' + data.truncated_count + ')' : ''; if (tools.length === 0) { - console.log('⚠️ NO TOOLS USED: ' + file + ' — recommendations may be hallucinated (training data only)'); + console.log('WARNING NO TOOLS: ' + file + truncNote); + noTools++; } else { - console.log('VALID: ' + file + ' - ' + data.recommendations.length + ' recs, ' + tools.length + ' tools used'); + console.log('VALID: ' + file + ' - ' + data.recommendations.length + ' recs, ' + tools.length + ' tools' + truncNote); } + totalTruncated += data.truncated_count; + valid++; } catch (e) { - console.log('INVALID: ' + file + ' — ' + e.message + ' — removing'); + console.log('INVALID: ' + file + ' -- ' + e.message + ' -- removing'); fs.unlinkSync(fp); + invalid++; } } +console.log('Summary: ' + valid + ' valid, ' + invalid + ' invalid, ' + noTools + ' no-tools-used, ' + totalTruncated + ' total truncated recs'); " ``` -Agents with empty `tools_used` are not removed — their recommendations may still be valid — but they're flagged so the judge can weight them lower. +Log checkpoint: -**Checkpoint:** Every launched agent should have a valid JSON file in `.deepen/`. If not, re-run missing/invalid agents. +```bash +echo "## Phase 4: Validation — PASS" >> .deepen/PIPELINE_LOG.md +echo "- Completed: $(date -u +%H:%M:%S)" >> .deepen/PIPELINE_LOG.md +echo "" >> .deepen/PIPELINE_LOG.md +``` -### 6. Judge Phase — Deduplicate, Group, and Rank +### 6. Judge Phase — Per-Section Parallel Judging + Merge (Phase 5) <critical_instruction> -Do NOT read individual agent JSON files into your context. Launch a JUDGE agent that reads them in its own context window. +Do NOT read individual agent JSON files into parent context. Launch PARALLEL per-section JUDGE agents that each read them in their own context windows. + +The judge phase has two steps: +1. **Section Judges** (parallel, batched) — One judge per manifest section. Each deduplicates, ranks, and assigns convergence signals for its section only. +2. **Merge Judge** (sequential) — Reads all section judgments, resolves cross-section conflicts, identifies cross-section convergence, produces final consolidated output. + +This replaces the single monolithic judge, cutting judge time from ~21 min to ~8-10 min. </critical_instruction> +#### Step 6a: Read section count and plan batching + +Read `.deepen/PLAN_MANIFEST.json` to get the section count. Calculate how many judge batches are needed (max 4 per batch). + +#### Step 6b: Launch Per-Section Judges (batched) + +For each section in the manifest, launch a section judge. Batch in groups of max 4, wait for each batch to complete. + ``` -Task judge-recommendations(" -You are a Plan Enhancement Judge. Consolidate recommendations from multiple research agents into a single, organized, high-quality enhancement plan. +Task judge-section-N(" +You are a Section Judge for section N: '[section_title]'. Consolidate recommendations targeting THIS section only. ## Instructions: -1. Read .deepen/PLAN_MANIFEST.json for plan structure -2. Read ALL JSON files in .deepen/*.json (skip PLAN_MANIFEST.json) -3. Collect all recommendations across agents - -4. EVIDENCE CHECK: For each agent, check its tools_used field. If tools_used is empty AND source_type is NOT 'skill' (skill agents read files but may not log them as tool calls), downweight their confidence by 0.2 (e.g., 0.8 → 0.6). This prevents hallucinated web-research claims from ranking above grounded work. +1. Read .deepen/PLAN_MANIFEST.json for section N's structure +2. Read ALL JSON files in .deepen/*.json (skip PLAN_MANIFEST.json, skip JUDGED_*.json) +3. Collect ONLY recommendations where section_id == N -5. GROUP by section_id — organize all recommendations under the plan section they target +4. EVIDENCE CHECK: If tools_used is empty AND source_type is NOT 'skill', downweight confidence by 0.2. -6. Within each section group: - a. DEDUPLICATE: Remove semantically similar recommendations (keep the higher-confidence one) - b. RESOLVE CONFLICTS: If agents contradict each other, prefer the source with higher attribution priority (see below) - c. RANK by: source_type priority FIRST, then priority (high > medium > low), then confidence score - d. SELECT top 8 recommendations per section maximum +5. Within this section's recommendations: + a. DEDUPLICATE: Remove semantically similar recs (keep higher-confidence) + b. RESOLVE CONFLICTS: Prefer higher attribution priority source + c. RANK by: source_type priority FIRST, then priority, then confidence + d. SELECT top 8 maximum **Source Attribution Priority (highest to lowest):** -- `skill` — Institutional knowledge, curated patterns specific to this project/team -- `documented-learning` — Previously solved problems from docs/solutions/ -- `official-docs` — Framework documentation via Context7 or official sites -- `community-web` — Blog posts, tutorials, community articles +- skill — Institutional knowledge +- documented-learning — Previously solved problems +- official-docs — Framework documentation +- community-web — Blog posts, tutorials -When two recommendations conflict, the higher-source-type wins. A skill-based recommendation that says "use pattern X" outranks a blog post that says "use pattern Y." +6. Preserve code_example fields -7. For recommendations with code_example fields, preserve them — these are high-value +7. Assign impact level: + - must_change — Plan has gap causing failures if not addressed + - should_change — Significant improvement + - consider — Valuable enhancement worth evaluating + - informational — Context or reference -8. Assign an impact level to each final recommendation: - - `must_change` — Plan has a gap that will cause failures if not addressed - - `should_change` — Significant improvement to plan quality - - `consider` — Valuable enhancement worth evaluating - - `informational` — Context or reference that deepens understanding +8. CONVERGENCE SIGNAL: If 3+ agents independently flagged the same concern, mark with convergence_count. TRUNCATION-AWARE: If an agent has truncated_count > 0, it may have had additional matching recommendations. If 2 agents converge AND both were truncated, treat as 3-agent strength. -9. Write the consolidated report to .deepen/JUDGED_RECOMMENDATIONS.json: +9. DEFENSIVE STACKING CHECK: If multiple recommendations add validation for the same data at different layers, flag as a cross-cutting concern. + +10. Write to .deepen/JUDGED_SECTION_N.json: { - \"plan_title\": \"<from manifest>\", - \"total_raw_recommendations\": <count across all agents>, + \"section_id\": N, + \"section_title\": \"<from manifest>\", + \"raw_count\": <recs targeting this section>, \"duplicates_removed\": <count>, \"conflicts_resolved\": <count>, - \"low_evidence_downweighted\": <count of recs from agents with empty tools_used>, - \"sections\": [ + \"recommendations\": [ { - \"section_id\": 1, - \"section_title\": \"<from manifest>\", - \"recommendations\": [ - { - \"id\": 1, - \"type\": \"best-practice|edge-case|...\", - \"impact\": \"must_change|should_change|consider|informational\", - \"title\": \"<100 chars>\", - \"recommendation\": \"<500 chars>\", - \"code_example\": \"<preserved from agent, or null>\", - \"references\": [\"...\"], - \"priority\": \"high|medium|low\", - \"confidence\": 0.0-1.0, - \"source_agents\": [\"agent1\", \"agent2\"] - } - ] + \"id\": 1, + \"type\": \"best-practice|...\", + \"impact\": \"must_change|should_change|consider|informational\", + \"title\": \"<100 chars>\", + \"recommendation\": \"<500 chars>\", + \"code_example\": \"<or null>\", + \"references\": [\"...\"], + \"priority\": \"high|medium|low\", + \"confidence\": 0.0-1.0, + \"source_agents\": [\"agent1\", \"agent2\"], + \"convergence_count\": <number> } ], + \"section_concerns\": [\"<any defensive stacking or within-section issues>\"] +} + +11. Return to parent: 'Section N judged. <X> raw -> <Y> after dedup. Written to .deepen/JUDGED_SECTION_N.json' +") +``` + +Log checkpoint per batch: +```bash +echo "## Phase 5a: Section Judges Batch [B] — PASS" >> .deepen/PIPELINE_LOG.md +echo "- Completed: $(date -u +%H:%M:%S)" >> .deepen/PIPELINE_LOG.md +echo "" >> .deepen/PIPELINE_LOG.md +``` + +#### Step 6c: Merge Judge (sequential) + +After ALL section judges complete, launch a single merge judge to handle cross-section concerns. + +``` +Task judge-merge(" +You are the Merge Judge. Combine per-section judgments into the final consolidated report. + +## Instructions: +1. Read .deepen/PLAN_MANIFEST.json for plan structure +2. Read ALL .deepen/JUDGED_SECTION_*.json files +3. Read ALL agent JSON files in .deepen/*.json (skip PLAN_MANIFEST.json, JUDGED_*.json) — ONLY to extract agent_summaries (agent_name + summary field) + +## Cross-Section Analysis (the merge judge's unique job): +4. CROSS-SECTION CONFLICTS: Check if any recommendation in Section A contradicts one in Section C (e.g., same file referenced with conflicting guidance on where logic should live). Flag conflicts with both section IDs and a resolution recommendation. + +5. CROSS-SECTION CONVERGENCE: Check if different sections independently recommend the same pattern (e.g., Section 1 recommends typed filterContext AND Section 3 recommends deriving from typed context). This strengthens both signals — note the cross-section reinforcement. + +6. RENUMBER recommendation IDs sequentially across all sections (1, 2, 3... not per-section). + +7. Write to .deepen/JUDGED_RECOMMENDATIONS.json: + +{ + \"plan_title\": \"<from manifest>\", + \"total_raw_recommendations\": <sum of raw_count from all section judges>, + \"duplicates_removed\": <sum of duplicates_removed>, + \"conflicts_resolved\": <sum of section conflicts + cross-section conflicts>, + \"low_evidence_downweighted\": <count>, + \"sections\": [ + <each section's recommendations from JUDGED_SECTION_*.json, with renumbered IDs> + ], \"cross_cutting_concerns\": [ { - \"title\": \"<concern that spans multiple sections>\", - \"description\": \"<explanation>\", + \"title\": \"<concern spanning multiple sections>\", + \"description\": \"<explanation including cross-section conflict/convergence analysis>\", \"affected_sections\": [1, 3, 5] } ], \"agent_summaries\": [ - {\"agent\": \"name\", \"summary\": \"<their 500-char summary>\"} + {\"agent\": \"name\", \"summary\": \"<500 chars>\"} ] } -10. Return to parent: 'Judging complete. <X> raw recommendations consolidated to <Y> across <Z> sections. Written to .deepen/JUDGED_RECOMMENDATIONS.json' +8. Return to parent: 'Merge complete. <X> total recs across <Y> sections. <Z> cross-section concerns. Written to .deepen/JUDGED_RECOMMENDATIONS.json' ") ``` -#### Step 6b: Validate Judge Output - -<critical_instruction> -The judge is the highest-leverage agent — if its output is malformed, the enhancer reads garbage. Spot-check before proceeding. -</critical_instruction> +#### Step 6d: Validate Judge Output ```bash node -e " @@ -566,219 +720,384 @@ try { const manifest = JSON.parse(fs.readFileSync('.deepen/PLAN_MANIFEST.json', 'utf8')); const manifestIds = new Set(manifest.sections.map(s => s.id)); - if (!Array.isArray(judged.sections)) throw new Error('sections is not an array'); - if (judged.sections.length === 0) throw new Error('sections is empty — judge produced no output'); + if (!Array.isArray(judged.sections)) throw new Error('sections not array'); + if (judged.sections.length === 0) throw new Error('sections empty'); let totalRecs = 0; for (const section of judged.sections) { if (!manifestIds.has(section.section_id)) { - console.log('⚠️ Section ID ' + section.section_id + ' not in manifest — may be hallucinated'); + console.log('WARNING: Section ID ' + section.section_id + ' not in manifest'); } totalRecs += section.recommendations.length; - for (const rec of section.recommendations) { - if (!rec.recommendation) throw new Error('Empty recommendation in section ' + section.section_id); - } } - console.log('JUDGE OUTPUT VALID: ' + judged.sections.length + ' sections, ' + totalRecs + ' recommendations'); + console.log('JUDGE VALID: ' + judged.sections.length + ' sections, ' + totalRecs + ' recommendations'); } catch (e) { - console.log('❌ JUDGE OUTPUT INVALID: ' + e.message); - console.log('Re-run the judge agent before proceeding.'); + console.log('JUDGE INVALID: ' + e.message); } " ``` -If judge output is invalid, re-run the judge. Do not proceed to enhancement with malformed data. +Log checkpoint: + +```bash +echo "## Phase 5: Judge (all sections + merge) — $([ -f .deepen/JUDGED_RECOMMENDATIONS.json ] && echo 'PASS' || echo 'FAIL')" >> .deepen/PIPELINE_LOG.md +echo "- Completed: $(date -u +%H:%M:%S)" >> .deepen/PIPELINE_LOG.md +echo "" >> .deepen/PIPELINE_LOG.md +``` -### 7. Enhance the Plan (Synthesis Phase) +### 7. Enhance the Plan (Phase 6 — Synthesis) <critical_instruction> -Do NOT read the judged recommendations into parent context. Launch a SYNTHESIS agent that reads both the original plan and the judged recommendations in its own context window and writes the enhanced plan directly. +Do NOT read judged recommendations into parent context. Launch a SYNTHESIS agent. </critical_instruction> ``` Task plan-enhancer(" -You are a Plan Enhancement Writer. Your job is to merge research recommendations into the original plan, producing an implementation-ready enhanced version that an AI developer (Claude Code) can execute directly. +You are a Plan Enhancement Writer. Merge research recommendations into the original plan. ## Instructions: -1. Read .deepen/original_plan.md — this is the source plan to enhance -2. Read .deepen/JUDGED_RECOMMENDATIONS.json — these are the consolidated research findings -3. Read .deepen/PLAN_MANIFEST.json — for section structure reference +1. Read .deepen/original_plan.md — source plan +2. Read .deepen/JUDGED_RECOMMENDATIONS.json — consolidated findings +3. Read .deepen/PLAN_MANIFEST.json — section structure ## Enhancement Rules: -### Preservation — Mode-Switched by Content Type +### Output Structure — Two Audiences, Two Sections + +The enhanced plan MUST have two clearly separated parts: + +**PART 1: Decision Record** (top of file) +This section is for reviewers and future-you. It explains WHAT changed from the original plan and WHY. It contains: +- Enhancement Summary (counts, agents, dates) +- Pre-Implementation Verification checklist +- Key Improvements with agent consensus signals and [Strong Signal] markers +- Research Insights (consolidated from all sections — NOT interleaved in the spec) +- New Considerations Discovered +- Fast Follow items +- Cross-Cutting Concerns +- Deferred items + +**PART 2: Implementation Spec** (rest of file) +This section is for the developer implementing the plan. It is a clean, linear 'do this, then this, then this' document. It contains: +- The original plan structure with enhancements merged seamlessly +- Clean code blocks ready to copy — NO `// ENHANCED: <reason>` annotations, NO `(Rec #X, Y agents)` references +- No Research Insights blocks interrupting the flow +- Clear marking of code snippets: add `<!-- ready-to-copy -->` before code blocks that are final, add `<!-- illustrative -->` before code blocks that are pseudocode or depend on project-specific details + +Separate the two parts with: +``` +--- +# Implementation Spec +--- +``` -**For prose sections (architecture decisions, descriptions, rationale):** -- Preserve the original text exactly — never rewrite the user's words -- Append a `### Research Insights` block AFTER the original prose -- If you find yourself editing the user's original sentences, STOP +### Preservation -**For code blocks (implementation examples, configuration, schemas):** -- When a `must_change` or `should_change` recommendation modifies a code block, merge it DIRECTLY into the code -- Produce one final code block with all enhancements applied inline -- Mark each enhancement with a `// ENHANCED: <reason>` comment -- REPLACE the original code block — do NOT show original and enhanced side-by-side -- This eliminates the two-pass problem where a developer reads the plan once for structure and again for changes +**All sections:** Preserve original section structure, ordering, and acceptance criteria. -**For all sections:** -- Preserve original section structure and ordering -- Preserve all acceptance criteria +**Prose sections:** Preserve original text exactly. If a recommendation changes the guidance, rewrite the prose to incorporate the improvement naturally — do NOT append a separate 'Research Insights' block. The developer should read one coherent document, not an original + annotations. -### Enhancement Format — Per-Section +**Code blocks:** When must_change or should_change recommendations modify a code block, produce the FINAL corrected version. Do not annotate what changed — the Decision Record covers that. The developer should be able to copy the code block directly. -**For sections with code blocks that have recommendations:** +### Convergence Signals -```[language] -// Original code preserved where unchanged -const config = { - staleTime: 5 * 60 * 1000, - // ENHANCED: Add retry with backoff — prevents cascade failures on transient network issues - retry: 3, - retryDelay: (attempt) => Math.min(1000 * 2 ** attempt, 30000), - // ENHANCED: Disable refetch on focus — reduces unnecessary requests for stable data - refetchOnWindowFocus: false, -}; -``` +When a recommendation has convergence_count >= 3, prefix it with **[Strong Signal — N agents]**. This means multiple independent agents flagged the same concern. Strong signals should: +- Be given elevated visibility in the enhanced plan +- Trigger a PR scope question: 'If this strong signal represents a standalone fix (e.g., type consolidation, performance fix), recommend it as a separate prerequisite PR rather than bundling into this feature PR.' -**For prose sections with recommendations:** +### Action Classification -```markdown -### Research Insights +Classify every recommendation into one of FOUR buckets: -**Best Practices:** -- [Concrete recommendation with rationale] +**implement** — Code changes for this PR. Go into code blocks or Research Insights. +**verify** — Checks before implementing. Go into Pre-Implementation Verification section. +**fast_follow** — Out of scope for this PR but with real user-facing impact. These are NOT generic deferrals — they are specific, actionable items that should be ticketed before merge. Examples: type consolidation that multiple agents flagged, performance fixes unrelated to the feature, cleanup work that reduces technical debt. Go into Fast Follow section. +**defer** — Lower-priority items or nice-to-haves. Go into Deferred section. -**Edge Cases & Pitfalls:** -- [Edge case and how to handle it] +The difference between fast_follow and defer: fast_follow items have real UX or reliability impact and MUST be ticketed. Deferred items are genuine nice-to-haves. -**References:** -- [URL or documentation link] -``` +### Sequencing -Only include subsections that have actual recommendations. Do NOT include empty subsections. +State dependency relationships explicitly: +- 'Fix X must be implemented before Fix Y because...' +- 'Fix X and Fix Y are independent' -### Action Classification +### Resolve Conditionals — Do Not Leave Forks for the Developer -Classify every recommendation into one of three buckets. Do NOT interleave them — group clearly: +If the plan provides alternative implementations contingent on codebase state (e.g., "if computeScopedFilterCounts is in-memory, use approach A; if DB-based, use approach B"), READ the actual codebase to determine which applies. Include ONLY the applicable approach in the Implementation Spec. Note the discarded alternative briefly in the Decision Record. -**`implement`** — Code changes to make. These go directly into code blocks (for code sections) or into Research Insights (for prose sections). +Do NOT leave "if X, do A; if Y, do B" in the Implementation Spec. The developer should never have to stop implementing to investigate which branch applies — that's the enhancer's job. If the codebase state genuinely cannot be determined (e.g., the file doesn't exist yet), state the assumption explicitly and pick one path. -**`verify`** — Checks or tests to run BEFORE implementing certain changes. Examples: 'confirm API supports batch mode before switching to batch implementation', 'verify session format matches expected pattern'. These go into the Pre-Implementation Verification section. +### Version Verification -**`defer`** — Items explicitly out of scope for this plan. `consider` and `informational` impact items from the judge typically land here. These go into the Deferred section. +BEFORE suggesting any code change, check PLAN_MANIFEST.json's `frameworks_with_versions` for the resolved version. Do NOT suggest APIs that don't exist in the installed version: +- If the manifest says React 19, verify the API exists in React 19 (not just React 18 or 20) +- If the manifest says ES2022 target (check tsconfig.json if available), do NOT use ES2023+ APIs like Array.findLast +- If the manifest has `version_mismatches`, use the ACTUAL resolved version, not what the plan text stated +- When suggesting library APIs, verify they exist in the specific major version -### Sequencing +This single check prevents the most common category of enhancer-introduced bugs. -When two fixes have a dependency relationship, state the sequence explicitly: -- 'Fix X must be implemented before Fix Y because Y depends on X's output' -- 'Fix X → deploy → observe metrics → then decide on Fix Y' -- 'Fix X and Fix Y are independent — implement both regardless' +### Accessibility Verification -### Enhancement Summary +When suggesting CSS animations or transitions: +- Verify `prefers-reduced-motion` fallbacks do NOT leave permanent visual artifacts (stuck opacity, stuck transforms, permanent overlays). Reduced-motion alternatives must be time-bounded or produce no visual change. +- Verify `aria-live` regions are pre-mounted in the DOM, not conditionally rendered — screen readers silently drop announcements from newly mounted live regions. -Add this block at the TOP of the plan (before the first section): +### Self-Consistency Check -```markdown -## Enhancement Summary +BEFORE writing the final output, review your own enhancement for internal contradictions: +- If you say content should go in 'primacy position', verify it actually IS placed early in the file, not at the bottom +- If you describe something as 'ephemeral', verify no other section assumes it persists +- If you recommend a validation layer, check you haven't already recommended the same validation at another boundary +- If two sections give conflicting guidance on where logic should live, resolve the conflict explicitly + +Flag any contradictions you catch as a note: '**Self-check:** [what was caught and resolved]' + +### Decision Record (PART 1) -**Deepened on:** [today's date] +Add this block at the TOP of the plan. This is the reviewer-facing section. + +# Decision Record + +**Deepened on:** [date] **Sections enhanced:** [count] of [total] **Research agents used:** [count] -**Total recommendations applied:** [count] +**Total recommendations applied:** [count] ([N] implement, [M] fast_follow, [P] defer) -### Pre-Implementation Verification -Tasks to check BEFORE writing code: -1. [ ] [Verification task — what to check and why] -2. [ ] [Verification task] +## Pre-Implementation Verification +Run these checks BEFORE writing any code: +1. [ ] [Verification task — e.g., confirm library version, check existing types] -### Implementation Sequence -Order of operations when fixes have dependencies: -1. [Fix/enhancement] — implement first because [reason] -2. [Fix/enhancement] — depends on #1's output -3. [Fix/enhancement] — independent, implement anytime +**IMPORTANT:** This is the ONLY location for the verification checklist. Do NOT repeat or duplicate this list in the Implementation Spec. The Implementation Spec should open with: "Run the Pre-Implementation Verification in the Decision Record above before starting." -If no dependencies exist, state: 'All enhancements are independent — implement in any order.' +## Implementation Sequence +1. [Fix] — implement first because [reason] -### Key Improvements -1. [Most impactful improvement] +## Key Improvements +1. [Most impactful] [Strong Signal — N agents] if applicable 2. [Second most impactful] 3. [Third most impactful] -### New Considerations Discovered -- [Important finding that wasn't in the original plan] -- [Risk or edge case not previously considered] +## Research Insights +Consolidated findings from all research agents. Organized by theme, not by plan section. -### Cross-Cutting Concerns -- [Concern spanning multiple sections, if any] +### [Theme 1 — e.g., State Management] +- [Insight with source attribution] +- [Insight with source attribution] -### Deferred to Future Work -Items out of scope for this plan: -- [CONSIDER/INFORMATIONAL item] — why it's deferred -``` +### [Theme 2 — e.g., Accessibility] +- [Insight with source attribution] + +## New Considerations Discovered +- [Finding not in original plan] + +## Fast Follow (ticket before merge) +Items out of this PR's scope but with real user-facing impact: +- [ ] [Item] — why it matters, suggested ticket scope + +## Cross-Cutting Concerns +- [Concern spanning multiple sections] + +## Deferred to Future Work +- [Item] — why deferred (low impact, speculative, or blocked) + +--- +# Implementation Spec +--- + +[The clean, implementation-ready plan follows here] ### Content Rules -- Code examples are high-value — merge them into code blocks wherever possible. -- Keep enhancement text concise and actionable — no filler prose. -- If multiple agents recommended the same thing, that's a strong signal — note it. -- If agents identified cross-cutting concerns, add a dedicated section at the end. -- Every `must_change` recommendation MUST appear in the enhanced plan — either merged into code or in Research Insights. Do not drop them. - -4. Write the enhanced plan to .deepen/ENHANCED_PLAN.md -5. Return to parent: 'Enhancement complete. Enhanced <N> of <M> sections with <X> recommendations (<Y> implemented, <Z> deferred). Written to .deepen/ENHANCED_PLAN.md' +- The Decision Record is for reviewers. The Implementation Spec is for developers. Do not mix audiences. +- In the Implementation Spec: NO `// ENHANCED:` comments, NO `(Rec #X, Y agents)` references, NO `### Research Insights` blocks. Just clean, implementable guidance. +- In the Decision Record: agent consensus signals, strong signal markers, and research attribution ARE appropriate. +- Mark code blocks: `<!-- ready-to-copy -->` for final code, `<!-- illustrative -->` for pseudocode that depends on project-specific details. +- Every must_change recommendation MUST appear in the Implementation Spec (merged naturally into the plan content). +- Strong signal items (3+ agents) get **[Strong Signal]** prefix in the Decision Record and PR scope assessment. +- When deferring an item that has UX consequences, add a bridge mitigation: a lightweight prompt-level or code-level workaround that partially addresses the gap until the full fix ships. + +4. Write to .deepen/ENHANCED_PLAN.md +5. Return to parent: 'Enhancement complete. Enhanced <N> of <M> sections with <X> recommendations (<Y> implement, <Z> fast_follow). Written to .deepen/ENHANCED_PLAN.md' ") ``` -### 8. Verify Enhanced Plan Integrity +Log checkpoint: + +```bash +echo "## Phase 6: Enhancement — $([ -f .deepen/ENHANCED_PLAN.md ] && echo 'PASS' || echo 'FAIL')" >> .deepen/PIPELINE_LOG.md +echo "- Completed: $(date -u +%H:%M:%S)" >> .deepen/PIPELINE_LOG.md +echo "" >> .deepen/PIPELINE_LOG.md +``` + +### 7b. Quality Review (Phase 6b — CoVe Pattern) <critical_instruction> -Verify the enhancer preserved the original plan structure. If sections are missing, the enhancer rewrote instead of appending. +This is a POST-ENHANCEMENT verification agent. It reads ONLY the enhanced plan — NOT the intermediate recommendations. This context isolation prevents the reviewer from inheriting the enhancer's perspective. </critical_instruction> +``` +Task quality-reviewer(" +You are a Plan Quality Reviewer using the Chain-of-Verification (CoVe) pattern. Your job is to find problems in the ENHANCED plan that the enhancement process may have introduced. + +## Instructions: +1. Read .deepen/ENHANCED_PLAN.md — the enhanced plan to review +2. Read .deepen/original_plan.md — the original for comparison +3. Read .deepen/PLAN_MANIFEST.json — section structure + +## Step 1: Extract Claims +List every concrete claim or instruction the enhanced plan makes. Focus on: +- Where it says content should be placed (file, section, position) +- What it describes as ephemeral vs persistent +- What validation/checking layers it adds +- What it says is in/out of scope +- Sequencing dependencies between items + +## Step 2: Verification Questions +For each claim, form a verification question: +- 'The plan says X should go in primacy position — is it actually placed at the top of the file?' +- 'The plan says suggestions are ephemeral — does any other section assume they persist?' +- 'The plan adds validation at layer A — does it also add the same validation at layer B and C?' + +## Step 3: Code Block Completeness Check + +For every constant, type, function, or import referenced in `<!-- ready-to-copy -->` code blocks: +- Verify it is EITHER: (a) defined elsewhere in the plan, (b) listed in Pre-Implementation Verification as something to check/confirm, OR (c) a standard library/framework API +- Flag any undefined references as 'undefined_references' in the output. Example: a code block uses `FILTER_KEY_TO_PRODUCT_FIELD[key]` but this constant is never defined in the plan and not in the verification checklist. + +## Step 4: Integration Test Coverage Check + +If the plan describes N interconnected layers or components of a feature (e.g., "three layers: delta counts + conversational repair + visual brushing"), verify there is at least ONE test that exercises all N layers end-to-end for the same user action. Flag missing cross-layer integration tests. + +## Step 5: Check and Report + +Write to .deepen/QUALITY_REVIEW.json: + +{ + \"self_contradictions\": [ + { + \"claim_a\": \"<what the plan says in one place>\", + \"claim_b\": \"<what the plan says elsewhere that contradicts>\", + \"severity\": \"high|medium|low\", + \"suggested_resolution\": \"<which claim should win and why>\" + } + ], + \"pr_scope_assessment\": { + \"recommended_split\": true|false, + \"reason\": \"<why split or not>\", + \"suggested_prs\": [ + { + \"title\": \"<PR title>\", + \"scope\": \"<what it contains>\", + \"rationale\": \"<why separate>\" + } + ] + }, + \"defensive_stacking\": [ + { + \"what\": \"<data being validated>\", + \"layers\": [\"schema\", \"backend\", \"frontend\"], + \"recommendation\": \"<which layers to keep and which are redundant>\" + } + ], + \"deferred_without_mitigation\": [ + { + \"item\": \"<what was deferred>\", + \"ux_consequence\": \"<what users will experience>\", + \"bridge_mitigation\": \"<lightweight workaround to add now>\" + } + ], + \"undefined_references\": [ + { + \"code_block_location\": \"<which section/commit the code block is in>\", + \"reference\": \"<the constant/type/function used but not defined>\", + \"suggestion\": \"<define it, add to verification checklist, or confirm it exists in codebase>\" + } + ], + \"missing_integration_tests\": [ + { + \"layers\": [\"<layer 1>\", \"<layer 2>\", \"<layer 3>\"], + \"missing_test\": \"<description of the end-to-end test that should exist>\", + \"user_action\": \"<the user action that should trigger all layers>\" + } + ], + \"overall_quality\": \"good|needs_revision|major_issues\", + \"summary\": \"<200 chars — overall assessment>\" +} + +4. Return to parent: 'Quality review complete. [overall_quality]. [count] self-contradictions, PR split: [yes/no], [count] defensive stacking issues. Written to .deepen/QUALITY_REVIEW.json' +") +``` + +Log checkpoint: + +```bash +echo "## Phase 6b: Quality Review — $([ -f .deepen/QUALITY_REVIEW.json ] && echo 'PASS' || echo 'FAIL')" >> .deepen/PIPELINE_LOG.md +echo "- Completed: $(date -u +%H:%M:%S)" >> .deepen/PIPELINE_LOG.md +echo "" >> .deepen/PIPELINE_LOG.md +``` + +### 8. Verify Enhanced Plan Integrity (Phase 7) + ```bash node -e " const fs = require('fs'); const manifest = JSON.parse(fs.readFileSync('.deepen/PLAN_MANIFEST.json', 'utf8')); const enhanced = fs.readFileSync('.deepen/ENHANCED_PLAN.md', 'utf8'); +const enhancedLower = enhanced.toLowerCase(); -let missing = []; +let found = 0, missing = []; for (const section of manifest.sections) { - // Check that each original section title still appears in the enhanced plan - if (!enhanced.includes(section.title)) { + // Try exact match first, then case-insensitive substring + if (enhanced.includes(section.title)) { + found++; + } else if (enhancedLower.includes(section.title.toLowerCase())) { + found++; + console.log('FUZZY MATCH: \"' + section.title + '\" (case mismatch but present)'); + } else { missing.push(section.title); } } if (missing.length > 0) { - console.log('❌ PRESERVATION FAILURE — these original sections are missing from the enhanced plan:'); + console.log('PRESERVATION FAILURE — missing ' + missing.length + ' of ' + manifest.sections.length + ' sections:'); missing.forEach(t => console.log(' - ' + t)); - console.log('The enhancer may have rewritten the plan instead of appending. Re-run the enhancer.'); } else { - console.log('✅ All ' + manifest.sections.length + ' original sections preserved in enhanced plan.'); + console.log('ALL ' + manifest.sections.length + ' sections preserved (' + found + ' found).'); } " ``` -If sections are missing, re-run the enhancer with stronger preservation instructions. Do not overwrite the original plan with a broken enhancement. +Log checkpoint (single entry — do NOT run preservation check twice): -### 9. Present Enhanced Plan +```bash +PRES_RESULT=$(node -e " +const fs = require('fs'); +const m = JSON.parse(fs.readFileSync('.deepen/PLAN_MANIFEST.json', 'utf8')); +const e = fs.readFileSync('.deepen/ENHANCED_PLAN.md', 'utf8').toLowerCase(); +const missing = m.sections.filter(s => !e.includes(s.title.toLowerCase())); +console.log(missing.length === 0 ? 'PASS' : 'PARTIAL'); +") +echo "## Phase 7: Preservation Check — $PRES_RESULT" >> .deepen/PIPELINE_LOG.md +echo "- Completed: $(date -u +%H:%M:%S)" >> .deepen/PIPELINE_LOG.md +echo "" >> .deepen/PIPELINE_LOG.md +echo "## PIPELINE COMPLETE" >> .deepen/PIPELINE_LOG.md +echo "- End: $(date -u +%H:%M:%S)" >> .deepen/PIPELINE_LOG.md +``` -<critical_instruction> -NOW read `.deepen/ENHANCED_PLAN.md` — or rather, copy it to the original location and present the summary. -</critical_instruction> +### 9. Present Enhanced Plan -#### Step 9a: Copy Enhanced Plan to Final Location +#### Step 9a: Copy to Final Location ```bash -# Option A: Update in place (default) cp .deepen/ENHANCED_PLAN.md <original_plan_path> - -# Option B: Create separate file (if user prefers) -# cp .deepen/ENHANCED_PLAN.md <plan_path_with_-deepened_suffix> ``` -#### Step 9b: Read the Enhancement Summary +#### Step 9b: Read Enhancement Summary and Quality Review -Read ONLY the Enhancement Summary block from the top of the enhanced plan (first ~30 lines). Do NOT read the entire enhanced plan into parent context — the user can read the file directly. +Read ONLY the Enhancement Summary block from the top of the enhanced plan (first ~30 lines). Do NOT read the entire plan into parent context. + +Also read `.deepen/QUALITY_REVIEW.json` for the quality assessment. Present the quality findings alongside the enhancement summary. #### Step 9c: Present Summary @@ -795,141 +1114,52 @@ Read ONLY the Enhancement Summary block from the top of the enhanced plan (first - **Duplicates Removed:** [count] ### Key Improvements: -1. [Most impactful improvement] +1. [Most impactful] 2. [Second most impactful] 3. [Third most impactful] ### New Considerations Discovered: - [Finding 1] - [Finding 2] + +### Quality Review: +- **Overall:** [good/needs_revision/major_issues] +- **Self-contradictions found:** [count] — [brief description if any] +- **PR scope:** [single PR / recommend split into N PRs] + - [If split recommended: list suggested PRs] +- **Defensive stacking:** [count] issues — [brief description if any] +- **Deferred items needing bridge mitigation:** [count] ``` -#### Step 9d: Offer Next Steps +#### Step 9d: Present Pipeline Log -Ask the user: +Read and display the contents of `.deepen/PIPELINE_LOG.md` to the user so they can report diagnostics. + +#### Step 9e: Offer Next Steps **"Plan deepened. What would you like to do next?"** 1. **View diff** — `git diff <plan_path>` -2. **Run `/plan_review`** — Get review agents' feedback on enhanced plan -3. **Start `/workflows:work`** — Begin implementing the enhanced plan +2. **Run `/plan_review`** — Get review agents' feedback +3. **Start `/workflows:work`** — Begin implementing 4. **Deepen further** — Run another round on specific sections 5. **Revert** — `git checkout <plan_path>` -6. **Compound insights** — Run `/workflows:compound` to extract novel patterns into `docs/solutions/` - -If user selects option 6: -- Summarize the key novel discoveries from the deepening session (findings that aren't already in `docs/solutions/`) -- Tell the user to run `/workflows:compound` which will capture these insights using the proper `compound-docs` skill with validated YAML schema -- The user can run `/workflows:compound` once per novel finding, or describe multiple findings and let the command handle them -- This closes the compound engineering loop — future `/deepen-plan` runs will discover these learnings in Step 3b and apply them automatically - ---- +6. **Compound insights** — Run `/workflows:compound` to extract novel patterns ## Appendix: Token Budget Reference -**Parent context (what matters for avoiding overflow):** - | Component | Token Budget | Notes | |-----------|-------------|-------| -| Plan manifest analysis return | ~100 | One sentence confirmation | -| Discovery (directory listings) | ~1,000-2,000 | File lists, frontmatter scans | -| Matched resources list | ~500 | Names and paths only | -| Per-agent summary returned to parent | ~100-150 | One sentence + counts (10-20 agents) | -| Validation script | ~0 | Bash, no LLM tokens | -| Judge return | ~100 | One sentence + counts | -| Enhancement return | ~100 | One sentence confirmation | -| Enhancement summary (top of plan) | ~500 | Read only the summary block | -| Parent orchestrator overhead | ~5,000 | Instructions, synthesis, report | -| **Total parent context from agents** | **~8,000-12,000** | **vs unbounded in v1** | - -**Sub-agent spawns:** - -| Agent | Context Cost | Purpose | -|-------|-------------|---------| -| Plan analyzer | 1 window | Structured manifest for all agents | -| 3 always-run agents (security, arch, perf) | 3 windows | Cross-cutting analysis | -| 5-15 matched skill/learning/review agents | 5-15 windows | Domain-specific recommendations | -| 2-5 per-technology docs researchers | 2-5 windows | Deep framework/library research via Context7 + web | -| Judge | 1 window | Dedup, group by section, rank with source priority | -| Plan enhancer | 1 window | Writes the final enhanced plan | -| **Total** | **13-26 windows** | **Each isolated, parent stays lean** | - -The key insight: sub-agent context windows are independent and disposable. Only what they *return* to the parent matters for overflow. Every sub-agent returns ~100 tokens. The parent reads only the enhancement summary (~500 tokens). The full enhanced plan lives on disk at the original file path. - ---- - -## Example Enhancements - -### Example 1: Code Block — Merge Mode - -**Before (from `/workflows:plan`):** -```markdown -## Query Configuration - -```typescript -const queryClient = new QueryClient({ - defaultOptions: { - queries: { - staleTime: 5 * 60 * 1000, - }, - }, -}); -``` -``` - -**After (from `/deepen-plan`):** -```markdown -## Query Configuration - -```typescript -const queryClient = new QueryClient({ - defaultOptions: { - queries: { - staleTime: 5 * 60 * 1000, - // ENHANCED: Add retry with exponential backoff — prevents cascade failures on transient network issues - retry: 3, - retryDelay: (attempt) => Math.min(1000 * 2 ** attempt, 30000), - // ENHANCED: Disable refetch on focus — reduces unnecessary requests for stable data - refetchOnWindowFocus: false, - }, - }, -}); - -// ENHANCED: Query key factory for consistent cache invalidation across components -const productKeys = { - all: ['products'] as const, - lists: () => [...productKeys.all, 'list'] as const, - detail: (id: string) => [...productKeys.all, 'detail', id] as const, -}; -``` -``` - -Note: The code block is replaced, not duplicated. `// ENHANCED:` comments mark what was added and why. An AI developer can implement this as-written. - -### Example 2: Prose Section — Append Mode - -**Before (from `/workflows:plan`):** -```markdown -## Technical Approach - -Use React Query for data fetching with optimistic updates. The cart state will be managed in Zustand with SSE providing real-time sync. -``` - -**After (from `/deepen-plan`):** -```markdown -## Technical Approach - -Use React Query for data fetching with optimistic updates. The cart state will be managed in Zustand with SSE providing real-time sync. - -### Research Insights - -**Edge Cases & Pitfalls:** -- Handle race conditions with `cancelQueries` on component unmount — stale SSE responses can overwrite fresh optimistic data -- Zustand store should validate SSE payloads before writing (untrusted data boundary) - -**References:** -- https://tanstack.com/query/latest/docs/react/guides/optimistic-updates -- https://tkdodo.eu/blog/practical-react-query -``` - -Note: Original prose is untouched. Research insights are appended after. +| Plan manifest return | ~100 | One sentence + version mismatch count | +| Discovery (listings) | ~1,000-2,000 | File lists, frontmatter | +| Matched resources list | ~500 | Names and paths | +| Per-agent summary (10-20) | ~100-150 each | One sentence + counts | +| Validation script | ~0 | Bash (now reports truncated_count totals) | +| Per-section judge returns (N) | ~100 each | One sentence per section | +| Merge judge return | ~100 | One sentence + cross-section count | +| Enhancement return | ~100 | One sentence | +| Quality review return | ~100 | One sentence | +| Quality review JSON (parent reads) | ~500 | PR scope + contradictions | +| Enhancement summary | ~500 | Top of plan | +| Parent overhead | ~5,000 | Instructions, synthesis | +| **Total parent from agents** | **~8,500-13,000** | **Slightly more returns but judge ~75% faster** | From 8e544d01740faf8a3b18e5ad7d80443f858ce71c Mon Sep 17 00:00:00 2001 From: Drew Miller <drew@drewx.design> Date: Sun, 15 Feb 2026 14:02:25 -0500 Subject: [PATCH 5/6] Fix merge judge timeout, bash escape, and preservation false positives MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three fixes from v3.4 test run: 1. Split merge judge into Data Prep Agent (haiku, mechanical I/O) + Merge Judge (reasoning only). Data prep reads 20+ files and compiles MERGE_INPUT.json. Merge judge reads one file, focuses on cross-section analysis. Fixes OOM/timeout failures where merge judge was spending half its budget on file reads. 2. Replace all ! operators in bash-embedded node -e scripts with === false and == null patterns. Bash history expansion escapes ! as \! which Node.js rejects as SyntaxError. 3. Add dash normalization to preservation check — em-dashes and en-dashes normalized before comparing section titles. Prevents false positives when enhancer normalizes typography. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> --- .../commands/deepen-plan.md | 135 ++++++++++-------- 1 file changed, 79 insertions(+), 56 deletions(-) diff --git a/plugins/compound-engineering/commands/deepen-plan.md b/plugins/compound-engineering/commands/deepen-plan.md index f0e3f24a..23f864bd 100644 --- a/plugins/compound-engineering/commands/deepen-plan.md +++ b/plugins/compound-engineering/commands/deepen-plan.md @@ -20,7 +20,7 @@ argument-hint: "[path to plan file]" 2. **Discover Phase** (parent) — Find available skills, learnings, agents using Glob/Read. Match against manifest. 3. **Research Phase** (batched parallel) — Matched agents write structured recommendations to `.deepen/`, return only a completion signal. Agents report `truncated_count` when capped. 4. **Validate** — Verify all expected agent files exist, conform to schema (including required `truncated_count`), flag zero-tool-use hallucination risk. -5. **Judge Phase** (parallel per-section + merge) — Per-section judges run in parallel (batched, max 4). Each deduplicates and ranks within its section. Merge judge then resolves cross-section conflicts/convergence and produces final consolidated output. +5. **Judge Phase** (parallel per-section + data prep + merge) — Per-section judges run in parallel (batched, max 4). Data prep agent (haiku) compiles all results into a single `MERGE_INPUT.json`. Merge judge reads one file and focuses on cross-section conflict/convergence reasoning. 6. **Judge Validation** — Verify judge output references real manifest sections. 7. **Enhance Phase** — Synthesis agent reads consolidated recommendations + original plan, writes enhanced version. **Verifies APIs exist in resolved versions before suggesting code.** Classifies items as implement/verify/fast_follow/defer. Two-part output: Decision Record + Implementation Spec. 8. **Quality Review** — CoVe-pattern agent checks enhanced plan for self-contradictions, PR scope, defensive stacking, deferred items needing bridge mitigations. @@ -230,27 +230,6 @@ Read `.deepen/PLAN_MANIFEST.json` and match discovered resources: **Learnings** — Match if tags, category, or module overlaps with plan technologies/domains. -#### Learnings Filtering Examples - -Given 12 learning files and a plan about "Rails API caching with Redis": - -**SPAWN (likely relevant):** -``` -docs/solutions/performance-issues/n-plus-one-queries.md # tags: [activerecord] — matches Rails -docs/solutions/performance-issues/redis-cache-stampede.md # tags: [caching, redis] — exact match -docs/solutions/configuration-fixes/redis-connection-pool.md # tags: [redis] — matches Redis -docs/solutions/integration-issues/api-versioning-gotcha.md # tags: [api, rails] — matches API -``` - -**SKIP (clearly not applicable):** -``` -docs/solutions/deployment-issues/heroku-memory-quota.md # plan has no deployment concerns -docs/solutions/frontend-issues/stimulus-race-condition.md # plan is API, not frontend -docs/solutions/authentication-issues/jwt-expiry.md # plan has no auth -``` - -When in doubt, spawn it. A learning agent that returns "Not applicable" wastes one context window. A missed learning that would have prevented a production bug wastes days. - **Agents** — Two tiers: **Always run (cross-cutting):** @@ -534,14 +513,14 @@ for (const file of files) { const fp = path.join('.deepen', file); try { const data = JSON.parse(fs.readFileSync(fp, 'utf8')); - if (!Array.isArray(data.recommendations)) throw new Error('recommendations not an array'); + if (Array.isArray(data.recommendations) === false) throw new Error('recommendations not an array'); if (data.recommendations.length > 8) throw new Error('too many recommendations: ' + data.recommendations.length); if (typeof data.truncated_count !== 'number') throw new Error('missing required field: truncated_count'); for (let i = 0; i < data.recommendations.length; i++) { const rec = data.recommendations[i]; if (rec.section_id == null) throw new Error('rec ' + i + ': missing section_id'); - if (!rec.type) throw new Error('rec ' + i + ': missing type'); - if (!rec.recommendation) throw new Error('rec ' + i + ': missing recommendation'); + if (rec.type == null || rec.type === '') throw new Error('rec ' + i + ': missing type'); + if (rec.recommendation == null || rec.recommendation === '') throw new Error('rec ' + i + ': missing recommendation'); } const tools = data.tools_used || []; const truncNote = data.truncated_count > 0 ? ' (truncated ' + data.truncated_count + ')' : ''; @@ -663,36 +642,79 @@ echo "- Completed: $(date -u +%H:%M:%S)" >> .deepen/PIPELINE_LOG.md echo "" >> .deepen/PIPELINE_LOG.md ``` -#### Step 6c: Merge Judge (sequential) +#### Step 6c: Data Prep Agent (mechanical — model: haiku) -After ALL section judges complete, launch a single merge judge to handle cross-section concerns. +<critical_instruction> +The merge judge previously failed due to OOM/timeout when reading 20+ files AND doing cross-section reasoning in one context. Split into two agents: a cheap data prep agent handles all I/O, then the merge judge focuses entirely on reasoning from a single pre-compiled input file. +</critical_instruction> + +``` +Task judge-data-prep(" +You are a Data Preparation Agent. Your job is purely mechanical — extract and compile data from multiple files into a single structured input for the merge judge. No judgment, no synthesis. + +## Instructions: +1. Read .deepen/PLAN_MANIFEST.json — extract plan_title, section count +2. Read ALL .deepen/JUDGED_SECTION_*.json files — extract each section's full recommendations array, raw_count, duplicates_removed, conflicts_resolved, section_concerns +3. Read ALL agent JSON files in .deepen/*.json (skip PLAN_MANIFEST.json, JUDGED_*.json) — extract ONLY agent_name and summary fields (ignore recommendations — those are already in section judges) + +4. Write to .deepen/MERGE_INPUT.json: + +{ + \"plan_title\": \"<from manifest>\", + \"section_count\": <N>, + \"sections\": [ + { + \"section_id\": <id>, + \"section_title\": \"<title>\", + \"raw_count\": <from section judge>, + \"duplicates_removed\": <from section judge>, + \"conflicts_resolved\": <from section judge>, + \"section_concerns\": [\"<from section judge>\"], + \"recommendations\": [<full array from section judge>] + } + ], + \"agent_summaries\": [ + {\"agent\": \"<name>\", \"summary\": \"<500 chars>\"} + ], + \"totals\": { + \"total_raw\": <sum of all raw_count>, + \"total_duplicates_removed\": <sum>, + \"total_conflicts_resolved\": <sum> + } +} + +5. Return to parent: 'Data prep complete. <N> sections, <M> agent summaries compiled to .deepen/MERGE_INPUT.json' +", model: haiku) +``` + +#### Step 6d: Merge Judge (reasoning — reads one file) + +After data prep completes, the merge judge reads a single pre-compiled input and focuses entirely on cross-section analysis. ``` Task judge-merge(" -You are the Merge Judge. Combine per-section judgments into the final consolidated report. +You are the Merge Judge. Your job is cross-section reasoning — conflict detection, convergence analysis, and final consolidation. All data has been pre-compiled for you in one file. ## Instructions: -1. Read .deepen/PLAN_MANIFEST.json for plan structure -2. Read ALL .deepen/JUDGED_SECTION_*.json files -3. Read ALL agent JSON files in .deepen/*.json (skip PLAN_MANIFEST.json, JUDGED_*.json) — ONLY to extract agent_summaries (agent_name + summary field) +1. Read .deepen/MERGE_INPUT.json — this contains ALL section judgments and agent summaries in one file. Do NOT read individual agent or section judge files. -## Cross-Section Analysis (the merge judge's unique job): -4. CROSS-SECTION CONFLICTS: Check if any recommendation in Section A contradicts one in Section C (e.g., same file referenced with conflicting guidance on where logic should live). Flag conflicts with both section IDs and a resolution recommendation. +## Cross-Section Analysis (your unique job): +2. CROSS-SECTION CONFLICTS: Check if any recommendation in Section A contradicts one in Section C (e.g., same file referenced with conflicting guidance on where logic should live). Flag conflicts with both section IDs and a resolution recommendation. -5. CROSS-SECTION CONVERGENCE: Check if different sections independently recommend the same pattern (e.g., Section 1 recommends typed filterContext AND Section 3 recommends deriving from typed context). This strengthens both signals — note the cross-section reinforcement. +3. CROSS-SECTION CONVERGENCE: Check if different sections independently recommend the same pattern (e.g., Section 1 recommends typed filterContext AND Section 3 recommends deriving from typed context). This strengthens both signals — note the cross-section reinforcement. -6. RENUMBER recommendation IDs sequentially across all sections (1, 2, 3... not per-section). +4. RENUMBER recommendation IDs sequentially across all sections (1, 2, 3... not per-section). -7. Write to .deepen/JUDGED_RECOMMENDATIONS.json: +5. Write to .deepen/JUDGED_RECOMMENDATIONS.json: { - \"plan_title\": \"<from manifest>\", - \"total_raw_recommendations\": <sum of raw_count from all section judges>, - \"duplicates_removed\": <sum of duplicates_removed>, - \"conflicts_resolved\": <sum of section conflicts + cross-section conflicts>, + \"plan_title\": \"<from MERGE_INPUT>\", + \"total_raw_recommendations\": <from MERGE_INPUT totals>, + \"duplicates_removed\": <from MERGE_INPUT totals>, + \"conflicts_resolved\": <MERGE_INPUT totals + any new cross-section conflicts>, \"low_evidence_downweighted\": <count>, \"sections\": [ - <each section's recommendations from JUDGED_SECTION_*.json, with renumbered IDs> + <each section's recommendations from MERGE_INPUT, with renumbered IDs> ], \"cross_cutting_concerns\": [ { @@ -701,16 +723,14 @@ You are the Merge Judge. Combine per-section judgments into the final consolidat \"affected_sections\": [1, 3, 5] } ], - \"agent_summaries\": [ - {\"agent\": \"name\", \"summary\": \"<500 chars>\"} - ] + \"agent_summaries\": <from MERGE_INPUT> } -8. Return to parent: 'Merge complete. <X> total recs across <Y> sections. <Z> cross-section concerns. Written to .deepen/JUDGED_RECOMMENDATIONS.json' +6. Return to parent: 'Merge complete. <X> total recs across <Y> sections. <Z> cross-section concerns. Written to .deepen/JUDGED_RECOMMENDATIONS.json' ") ``` -#### Step 6d: Validate Judge Output +#### Step 6e: Validate Judge Output ```bash node -e " @@ -720,12 +740,12 @@ try { const manifest = JSON.parse(fs.readFileSync('.deepen/PLAN_MANIFEST.json', 'utf8')); const manifestIds = new Set(manifest.sections.map(s => s.id)); - if (!Array.isArray(judged.sections)) throw new Error('sections not array'); + if (Array.isArray(judged.sections) === false) throw new Error('sections not array'); if (judged.sections.length === 0) throw new Error('sections empty'); let totalRecs = 0; for (const section of judged.sections) { - if (!manifestIds.has(section.section_id)) { + if (manifestIds.has(section.section_id) === false) { console.log('WARNING: Section ID ' + section.section_id + ' not in manifest'); } totalRecs += section.recommendations.length; @@ -1042,25 +1062,26 @@ echo "" >> .deepen/PIPELINE_LOG.md ```bash node -e " const fs = require('fs'); +const norm = s => s.replace(/\u2014/g, '--').replace(/\u2013/g, '-'); const manifest = JSON.parse(fs.readFileSync('.deepen/PLAN_MANIFEST.json', 'utf8')); -const enhanced = fs.readFileSync('.deepen/ENHANCED_PLAN.md', 'utf8'); +const enhanced = norm(fs.readFileSync('.deepen/ENHANCED_PLAN.md', 'utf8')); const enhancedLower = enhanced.toLowerCase(); let found = 0, missing = []; for (const section of manifest.sections) { - // Try exact match first, then case-insensitive substring - if (enhanced.includes(section.title)) { + const title = norm(section.title); + if (enhanced.includes(title)) { found++; - } else if (enhancedLower.includes(section.title.toLowerCase())) { + } else if (enhancedLower.includes(title.toLowerCase())) { found++; - console.log('FUZZY MATCH: \"' + section.title + '\" (case mismatch but present)'); + console.log('FUZZY MATCH: ' + JSON.stringify(section.title) + ' (case mismatch but present)'); } else { missing.push(section.title); } } if (missing.length > 0) { - console.log('PRESERVATION FAILURE — missing ' + missing.length + ' of ' + manifest.sections.length + ' sections:'); + console.log('PRESERVATION FAILURE -- missing ' + missing.length + ' of ' + manifest.sections.length + ' sections:'); missing.forEach(t => console.log(' - ' + t)); } else { console.log('ALL ' + manifest.sections.length + ' sections preserved (' + found + ' found).'); @@ -1073,9 +1094,10 @@ Log checkpoint (single entry — do NOT run preservation check twice): ```bash PRES_RESULT=$(node -e " const fs = require('fs'); +const norm = s => s.replace(/\u2014/g, '--').replace(/\u2013/g, '-'); const m = JSON.parse(fs.readFileSync('.deepen/PLAN_MANIFEST.json', 'utf8')); -const e = fs.readFileSync('.deepen/ENHANCED_PLAN.md', 'utf8').toLowerCase(); -const missing = m.sections.filter(s => !e.includes(s.title.toLowerCase())); +const e = norm(fs.readFileSync('.deepen/ENHANCED_PLAN.md', 'utf8')).toLowerCase(); +const missing = m.sections.filter(s => e.includes(norm(s.title).toLowerCase()) === false); console.log(missing.length === 0 ? 'PASS' : 'PARTIAL'); ") echo "## Phase 7: Preservation Check — $PRES_RESULT" >> .deepen/PIPELINE_LOG.md @@ -1156,6 +1178,7 @@ Read and display the contents of `.deepen/PIPELINE_LOG.md` to the user so they c | Per-agent summary (10-20) | ~100-150 each | One sentence + counts | | Validation script | ~0 | Bash (now reports truncated_count totals) | | Per-section judge returns (N) | ~100 each | One sentence per section | +| Data prep agent return | ~100 | One sentence (compiles MERGE_INPUT.json) | | Merge judge return | ~100 | One sentence + cross-section count | | Enhancement return | ~100 | One sentence | | Quality review return | ~100 | One sentence | From 47258e693d40344a0f157cf2097b4674119b9244 Mon Sep 17 00:00:00 2001 From: Drew Miller <drew@drewx.design> Date: Sun, 15 Feb 2026 14:37:35 -0500 Subject: [PATCH 6/6] Fix learnings-researcher section_id type mismatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Log analysis revealed learnings-researcher used string section_ids ("Phase-1-Types-Store") while manifest uses numeric ids (1, 2, 3). Section judges filter on numeric equality, so 3 of 5 learnings recs were silently dropped -- including high-value documented-learning source recs for debounce, frame budgeting, and checkpoint handling. Fixes: - OUTPUT RULES now explicitly say "must be a numeric id like 1, 2, 3. NOT a string like Phase-1" - Instruction #5 warns string IDs will be silently dropped - Validation script warns on non-numeric section_ids 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> --- plugins/compound-engineering/commands/deepen-plan.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/plugins/compound-engineering/commands/deepen-plan.md b/plugins/compound-engineering/commands/deepen-plan.md index 23f864bd..c8d3a6ed 100644 --- a/plugins/compound-engineering/commands/deepen-plan.md +++ b/plugins/compound-engineering/commands/deepen-plan.md @@ -299,7 +299,7 @@ Read .deepen/original_plan.md for the full plan content. "tools_used": ["read_file:path", "web_search:query", ...], "recommendations": [ { - "section_id": <from manifest>, + "section_id": <NUMBER from manifest — must be a numeric id like 1, 2, 3. NOT a string like "Phase-1">, "type": "best-practice|edge-case|anti-pattern|performance|security|code-example|architecture|ux|testing", "title": "<100 chars>", "recommendation": "<500 chars>", @@ -313,7 +313,7 @@ Read .deepen/original_plan.md for the full plan content. } 3. Max 8 recommendations per agent. Prioritize by impact. 4. Only include recommendations with confidence >= 0.6. -5. Every recommendation MUST reference a specific section_id from the plan manifest. +5. Every recommendation MUST reference a NUMERIC section_id from the plan manifest (e.g., 1, 2, 3 — NOT "Phase-1" or "Phase-1-Types-Store"). String section IDs will be silently dropped by section judges. 6. Code examples are ENCOURAGED. 7. tools_used is MANDATORY. If empty, set confidence to 0.5 max. 8. **truncated_count is REQUIRED (default 0).** If you had more recommendations beyond the 8 cap, set this to the number you omitted. Example: you found 12 relevant issues but only wrote the top 8 → truncated_count: 4. The judge uses this to weight convergence signals. @@ -519,6 +519,9 @@ for (const file of files) { for (let i = 0; i < data.recommendations.length; i++) { const rec = data.recommendations[i]; if (rec.section_id == null) throw new Error('rec ' + i + ': missing section_id'); + if (typeof rec.section_id !== 'number') { + console.log('WARNING: ' + file + ' rec ' + i + ': section_id is ' + JSON.stringify(rec.section_id) + ' (string) — must be numeric. Section judges may drop this rec.'); + } if (rec.type == null || rec.type === '') throw new Error('rec ' + i + ': missing type'); if (rec.recommendation == null || rec.recommendation === '') throw new Error('rec ' + i + ': missing recommendation'); }