diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 6ecc7ed1..2a8ab7bb 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -142,6 +142,7 @@ For complex investigation tasks, use these skills (read the skill file for detai | **test-planner** | `.github/skills/test-planner/SKILL.md` | "create test plan", "write test cases", "add tests to ADO", "export test plan", "E2E tests for" | | **threat-modeler** | `.github/skills/threat-modeler/SKILL.md` | "create a threat model", "threat model for", "threat model diagram", "STRIDE analysis for", "security diagram for" | | **copilot-review-analyst** | `.github/skills/copilot-review-analyst/SKILL.md` | "analyze Copilot reviews", "Copilot review effectiveness", "review analysis report", "how helpful are Copilot reviews" | +| **skill-evolver** | `.github/skills/skill-evolver/SKILL.md` | "improve/evolve/fix my skills", "run a skill retrospective", "what went wrong with X skill", "why didn't skill Y trigger", "this skill is outdated/wrong", "review skill friction", "you keep making the same mistake", "that didn't go well" | ## 13. Azure DevOps Integration diff --git a/.github/hooks/friction-capture.js b/.github/hooks/friction-capture.js new file mode 100644 index 00000000..ff5d2beb --- /dev/null +++ b/.github/hooks/friction-capture.js @@ -0,0 +1,142 @@ +#!/usr/bin/env node +/** + * friction-capture.js — OPTIONAL automatic friction capture for skill-evolver. + * + * ┌─────────────────────────────────────────────────────────────────────────┐ + * │ DORMANT ON GITHUB COPILOT CLI. │ + * │ This is a Claude Code-style lifecycle hook (PostToolUse / Stop). The │ + * │ GitHub Copilot CLI runtime has no hooks system, so it NEVER fires here │ + * │ and is intentionally NOT registered in orchestrator.json. │ + * │ │ + * │ On Copilot CLI, ACTIVE capture is the real mechanism: the agent records │ + * │ friction itself via `journal-utils.js record` (see the skill-evolver │ + * │ SKILL.md). Do not rely on this file to catch anything on Copilot CLI. │ + * │ │ + * │ Kept for teams that run this repo under Claude Code: register it in │ + * │ `.claude/settings.json` (PostToolUse + Stop) and it will work there. │ + * └─────────────────────────────────────────────────────────────────────────┘ + * + * Behavior when it DOES fire (Claude Code): reads the hook payload from stdin and + * - PostToolUse: if the tool reported a failure/error, appends a high-signal + * `tool_error` friction event to the journal (attributed to the active skill). + * - Stop / SubagentStop: clears the active-skill marker so attribution does not + * leak across tasks. + * + * Design rules: + * - Never block the tool flow. Always print {continue:true} and exit 0. + * - Wrap everything in try/catch; capture is best-effort. + * - Only record on detected failure to keep the journal high-signal. + */ + +'use strict'; + +var fs = require('fs'); +var path = require('path'); + +function emitAndExit() { + console.log(JSON.stringify({ continue: true })); + process.exit(0); +} + +// Read stdin (hook input) +var hookInput = {}; +try { + hookInput = JSON.parse(fs.readFileSync(0, 'utf-8')); +} catch (e) { + // no stdin / not JSON — nothing to capture + emitAndExit(); +} + +// Global off switch — set SKILL_EVOLUTION_DISABLE=1 to silence all capture. +if (process.env.SKILL_EVOLUTION_DISABLE) { + emitAndExit(); +} + +// Avoid re-entry loops +if (hookInput.stop_hook_active) { + emitAndExit(); +} + +var journal; +try { + journal = require('./journal-utils.js'); +} catch (e) { + // store unavailable — never block the tool flow + emitAndExit(); +} + +var eventName = hookInput.hook_event_name || hookInput.hookEventName || ''; + +try { + // End-of-task events: clear attribution so the next task starts clean. + if (eventName === 'Stop' || eventName === 'SubagentStop') { + journal.clearActive(); + emitAndExit(); + } + + // From here we treat the payload as a (Post)ToolUse event. + var toolName = hookInput.tool_name || hookInput.toolName || 'unknown-tool'; + var resp = hookInput.tool_response || hookInput.toolResponse || hookInput.result || {}; + + var failure = detectFailure(resp); + if (failure.failed) { + journal.recordEvent({ + tool: toolName, + eventType: 'tool_error', + severity: failure.severity, + expected: 'Tool call to complete successfully', + actual: failure.summary, + detail: failure.detail, + source: 'hook', + sessionId: hookInput.session_id || hookInput.sessionId || null + }); + } +} catch (e) { + // swallow — capture must never break the session +} + +emitAndExit(); + +/** + * Heuristically decide whether a tool response represents a failure, and how bad. + * Conservative on purpose: false positives create journal noise. + */ +function detectFailure(resp) { + var result = { failed: false, severity: 'medium', summary: '', detail: '' }; + if (resp === null || resp === undefined) return result; + + // Explicit structured failure signals + if (resp.success === false || resp.is_error === true || resp.isError === true || resp.error) { + result.failed = true; + } + + // Non-zero exit codes (powershell / shell-style tools) + var exitCode = resp.exit_code !== undefined ? resp.exit_code + : (resp.exitCode !== undefined ? resp.exitCode : undefined); + if (typeof exitCode === 'number' && exitCode !== 0) { + result.failed = true; + result.severity = 'high'; + } + + // String/text payloads that smell like errors + var text = ''; + if (typeof resp === 'string') text = resp; + else text = [resp.error, resp.stderr, resp.message, resp.output, resp.content] + .filter(function (x) { return typeof x === 'string'; }).join('\n'); + + if (!result.failed && text) { + if (/\b(error|exception|failed|fatal|cannot find|not found|denied|traceback)\b/i.test(text)) { + result.failed = true; + } + } + + if (result.failed) { + var src = (typeof resp.error === 'string' && resp.error) || + (typeof resp.stderr === 'string' && resp.stderr) || + (typeof resp.message === 'string' && resp.message) || text || 'Tool reported a failure'; + result.detail = String(src); + result.summary = result.detail.split('\n')[0].slice(0, 200); + if (/\b(fatal|denied|traceback|exception)\b/i.test(result.detail)) result.severity = 'high'; + } + return result; +} diff --git a/.github/hooks/journal-utils.js b/.github/hooks/journal-utils.js new file mode 100644 index 00000000..d7149542 --- /dev/null +++ b/.github/hooks/journal-utils.js @@ -0,0 +1,332 @@ +#!/usr/bin/env node +/** + * journal-utils.js — friction journal store for the skill-evolver system. + * + * Single source of truth for the append-only friction journal (JSONL) and the + * "active skill" attribution marker. Used by: + * - the friction-capture.js hook (require()d as a module), and + * - the skill-evolver skill / agent (invoked as a CLI). + * + * Mirrors the state-utils.js pattern. The live store lives outside the repo so + * it never pollutes git status. Override with SKILL_EVOLUTION_HOME. + * + * Store layout (default ~/.skill-evolution/): + * journal.jsonl — one friction event per line (see references/friction-schema.md) + * active-skill.json — { "skill": "", "ts": } + * + * CLI usage: + * node journal-utils.js record '' → append a friction event + * node journal-utils.js set-active → mark the active skill for attribution + * node journal-utils.js clear-active → clear the active-skill marker + * node journal-utils.js active → print the active skill (or "unknown") + * node journal-utils.js list [--skill X] [--type Y] [--since ISO] [--limit N] + * node journal-utils.js stats [--md] → aggregated digest (JSON by default) + * node journal-utils.js path → print store paths + * node journal-utils.js clear --yes → wipe the journal (keeps a .bak) + */ + +'use strict'; + +var fs = require('fs'); +var os = require('os'); +var path = require('path'); + +var STORE_DIR = process.env.SKILL_EVOLUTION_HOME || + path.join(os.homedir(), '.skill-evolution'); +var JOURNAL_FILE = path.join(STORE_DIR, 'journal.jsonl'); +var ACTIVE_FILE = path.join(STORE_DIR, 'active-skill.json'); + +var MAX_FIELD = 1200; // truncate long text fields to keep the journal lean + +function ensureStore() { + if (!fs.existsSync(STORE_DIR)) { + fs.mkdirSync(STORE_DIR, { recursive: true }); + } +} + +function truncate(val) { + if (typeof val !== 'string') return val; + if (val.length <= MAX_FIELD) return val; + return val.slice(0, MAX_FIELD) + ' …[truncated]'; +} + +function genId() { + return 'fr-' + Date.now().toString(36) + '-' + + Math.random().toString(36).slice(2, 7); +} + +var VALID_EVENT_TYPES = [ + 'tool_error', 'retry', 'user_correction', 'dead_end', 'missing_context', + 'ambiguity', 'trigger_miss', 'skill_step_mismatch', 'note' +]; +var VALID_SEVERITY = ['low', 'medium', 'high']; + +/** + * Append a friction event. Fills in id/ts/iso/source/skill defaults and + * truncates verbose fields. Returns the stored event. + */ +function recordEvent(evt) { + // Global off switch — when disabled, capture is a silent no-op. + // Read paths (stats/list) still work so past data stays reviewable. + if (process.env.SKILL_EVOLUTION_DISABLE) { + return null; + } + ensureStore(); + evt = evt || {}; + + var now = Date.now(); + var stored = { + id: evt.id || genId(), + ts: evt.ts || now, + iso: evt.iso || new Date(now).toISOString(), + skill: evt.skill || getActive() || 'unknown', + tool: evt.tool || null, + eventType: VALID_EVENT_TYPES.indexOf(evt.eventType) !== -1 ? evt.eventType : 'note', + severity: VALID_SEVERITY.indexOf(evt.severity) !== -1 ? evt.severity : 'medium', + expected: truncate(evt.expected || ''), + actual: truncate(evt.actual || ''), + detail: truncate(evt.detail || ''), + turnsCost: typeof evt.turnsCost === 'number' ? evt.turnsCost : 0, + fixHint: truncate(evt.fixHint || ''), + source: evt.source || 'agent', + sessionId: evt.sessionId || null + }; + + fs.appendFileSync(JOURNAL_FILE, JSON.stringify(stored) + '\n', 'utf-8'); + return stored; +} + +function setActive(skill) { + ensureStore(); + fs.writeFileSync(ACTIVE_FILE, JSON.stringify({ skill: skill, ts: Date.now() }), 'utf-8'); +} + +function clearActive() { + try { + if (fs.existsSync(ACTIVE_FILE)) fs.unlinkSync(ACTIVE_FILE); + } catch (e) { /* ignore */ } +} + +function getActive() { + try { + if (!fs.existsSync(ACTIVE_FILE)) return null; + var obj = JSON.parse(fs.readFileSync(ACTIVE_FILE, 'utf-8')); + return obj && obj.skill ? obj.skill : null; + } catch (e) { + return null; + } +} + +function readEvents() { + if (!fs.existsSync(JOURNAL_FILE)) return []; + var lines = fs.readFileSync(JOURNAL_FILE, 'utf-8').split('\n'); + var out = []; + for (var i = 0; i < lines.length; i++) { + var line = lines[i].trim(); + if (!line) continue; + try { out.push(JSON.parse(line)); } catch (e) { /* skip corrupt line */ } + } + return out; +} + +/** + * Aggregate the journal into a digest the agent can reason over: + * totals, per-skill / per-type / per-severity counts, and ranked recurring + * issues (grouped by skill + eventType + a normalized actual/detail signature). + */ +function computeStats() { + var events = readEvents(); + var bySkill = {}, byType = {}, bySeverity = {}, groups = {}; + var severityWeight = { low: 1, medium: 3, high: 8 }; + + for (var i = 0; i < events.length; i++) { + var e = events[i]; + bySkill[e.skill] = (bySkill[e.skill] || 0) + 1; + byType[e.eventType] = (byType[e.eventType] || 0) + 1; + bySeverity[e.severity] = (bySeverity[e.severity] || 0) + 1; + + var sig = (e.actual || e.detail || '').toLowerCase() + .replace(/[0-9]+/g, '#') // normalize ids/numbers + .replace(/[^a-z#]+/g, ' ') + .trim().split(' ').slice(0, 8).join(' '); + var key = e.skill + '::' + e.eventType + '::' + sig; + if (!groups[key]) { + groups[key] = { skill: e.skill, eventType: e.eventType, signature: sig, count: 0, score: 0, sample: e, lastIso: e.iso }; + } + groups[key].count += 1; + groups[key].score += (severityWeight[e.severity] || 3); + if (e.iso > groups[key].lastIso) groups[key].lastIso = e.iso; + } + + var recurring = Object.keys(groups).map(function (k) { return groups[k]; }) + .sort(function (a, b) { return b.score - a.score; }); + + return { + total: events.length, + bySkill: bySkill, + byEventType: byType, + bySeverity: bySeverity, + recurring: recurring.slice(0, 25), + recent: events.slice(-10) + }; +} + +// --------------------------------------------------------------------------- +// Skill size budget (anti-bloat tripwire). Scans every SKILL.md and flags any +// whose body or description is over budget, so the retrospective can propose +// pruning / moving detail to references/ instead of letting skills grow forever. +// --------------------------------------------------------------------------- +var BODY_WARN = 400; // SKILL.md body lines — start consolidating +var BODY_OVER = 500; // skill-creator's stated maximum +var DESC_WARN = 900; // description chars — getting close to the limit +var DESC_MAX = 1024; // hard frontmatter limit + +function computeSkillSizes() { + var skillsDir = path.join(__dirname, '..', 'skills'); + var out = []; + if (!fs.existsSync(skillsDir)) return out; + var entries = fs.readdirSync(skillsDir); + for (var i = 0; i < entries.length; i++) { + var md = path.join(skillsDir, entries[i], 'SKILL.md'); + if (!fs.existsSync(md)) continue; + var content = fs.readFileSync(md, 'utf-8'); + var lineCount = content.split('\n').length; + var descMatch = content.match(/^description:\s*(.*)$/m); + var descLen = descMatch ? descMatch[1].length : 0; + var flags = []; + if (lineCount > BODY_OVER) flags.push('BODY_OVER'); + else if (lineCount > BODY_WARN) flags.push('BODY_WARN'); + if (descLen > DESC_MAX) flags.push('DESC_OVER'); + else if (descLen > DESC_WARN) flags.push('DESC_WARN'); + out.push({ skill: entries[i], lines: lineCount, descLen: descLen, flags: flags }); + } + return out.sort(function (a, b) { return b.lines - a.lines; }); +} + +function skillSizesToMarkdown(sizes) { + var lines = ['# Skill Size Budget', '', + 'Body budget: warn >' + BODY_WARN + ', over >' + BODY_OVER + + ' lines. Description: warn >' + DESC_WARN + ', max ' + DESC_MAX + ' chars.', '', + '| Skill | Lines | Desc chars | Flags |', + '|-------|-------|-----------|-------|']; + var flagged = 0; + sizes.forEach(function (s) { + if (s.flags.length) flagged++; + lines.push('| ' + s.skill + ' | ' + s.lines + ' | ' + s.descLen + ' | ' + + (s.flags.join(', ') || '—') + ' |'); + }); + lines.push(''); + lines.push(flagged ? ('⚠️ ' + flagged + ' skill(s) over budget — consider pruning or moving detail to references/.') + : '✅ All skills within budget.'); + return lines.join('\n'); +} + +function statsToMarkdown(s) { + var lines = []; + lines.push('# Friction Digest'); + lines.push(''); + lines.push('Total events: **' + s.total + '**'); + lines.push(''); + lines.push('## Top recurring issues (ranked by frequency × severity)'); + lines.push(''); + lines.push('| Rank | Skill | Type | Count | Score | Signature | Last seen |'); + lines.push('|------|-------|------|-------|-------|-----------|-----------|'); + s.recurring.forEach(function (g, i) { + lines.push('| ' + (i + 1) + ' | ' + g.skill + ' | ' + g.eventType + ' | ' + + g.count + ' | ' + g.score + ' | ' + g.signature + ' | ' + g.lastIso + ' |'); + }); + lines.push(''); + lines.push('## Counts by skill'); + Object.keys(s.bySkill).sort(function (a, b) { return s.bySkill[b] - s.bySkill[a]; }) + .forEach(function (k) { lines.push('- ' + k + ': ' + s.bySkill[k]); }); + return lines.join('\n'); +} + +// --------------------------------------------------------------------------- +// CLI +// --------------------------------------------------------------------------- +function parseFlags(args) { + var flags = {}; + for (var i = 0; i < args.length; i++) { + if (args[i].indexOf('--') === 0) { + var key = args[i].slice(2); + var val = (i + 1 < args.length && args[i + 1].indexOf('--') !== 0) ? args[++i] : true; + flags[key] = val; + } + } + return flags; +} + +function runCli() { + var argv = process.argv.slice(2); + var cmd = argv[0]; + var rest = argv.slice(1); + + try { + if (cmd === 'record') { + var json = rest[0]; + var evt = json ? JSON.parse(json) : {}; + evt.source = evt.source || 'cli'; + var rec = recordEvent(evt); + console.log(rec ? JSON.stringify(rec) : 'capture disabled (SKILL_EVOLUTION_DISABLE set) — not recorded'); + } else if (cmd === 'set-active') { + setActive(rest[0] || 'unknown'); + console.log('active skill set to: ' + (rest[0] || 'unknown')); + } else if (cmd === 'clear-active') { + clearActive(); + console.log('active skill cleared'); + } else if (cmd === 'active') { + console.log(getActive() || 'unknown'); + } else if (cmd === 'list') { + var f = parseFlags(rest); + var events = readEvents(); + if (f.skill) events = events.filter(function (e) { return e.skill === f.skill; }); + if (f.type) events = events.filter(function (e) { return e.eventType === f.type; }); + if (f.since) events = events.filter(function (e) { return e.iso >= f.since; }); + if (f.limit) events = events.slice(-parseInt(f.limit, 10)); + console.log(JSON.stringify(events, null, 2)); + } else if (cmd === 'stats') { + var s = computeStats(); + var fl = parseFlags(rest); + console.log(fl.md ? statsToMarkdown(s) : JSON.stringify(s, null, 2)); + } else if (cmd === 'skill-sizes') { + var sizes = computeSkillSizes(); + var szf = parseFlags(rest); + console.log(szf.md ? skillSizesToMarkdown(sizes) : JSON.stringify(sizes, null, 2)); + } else if (cmd === 'path') { + console.log(JSON.stringify({ storeDir: STORE_DIR, journal: JOURNAL_FILE, activeMarker: ACTIVE_FILE }, null, 2)); + } else if (cmd === 'clear') { + var cf = parseFlags(rest); + if (!cf.yes) { console.error('Refusing to clear without --yes'); process.exit(1); } + if (fs.existsSync(JOURNAL_FILE)) fs.renameSync(JOURNAL_FILE, JOURNAL_FILE + '.bak'); + console.log('journal cleared (backup at ' + JOURNAL_FILE + '.bak)'); + } else { + console.error('Unknown command: ' + cmd); + console.error('Commands: record, set-active, clear-active, active, list, stats, skill-sizes, path, clear'); + process.exit(1); + } + } catch (e) { + console.error('journal-utils error: ' + e.message); + process.exit(1); + } +} + +module.exports = { + recordEvent: recordEvent, + setActive: setActive, + clearActive: clearActive, + getActive: getActive, + readEvents: readEvents, + computeStats: computeStats, + statsToMarkdown: statsToMarkdown, + computeSkillSizes: computeSkillSizes, + skillSizesToMarkdown: skillSizesToMarkdown, + STORE_DIR: STORE_DIR, + JOURNAL_FILE: JOURNAL_FILE, + ACTIVE_FILE: ACTIVE_FILE, + VALID_EVENT_TYPES: VALID_EVENT_TYPES +}; + +if (require.main === module) { + runCli(); +} diff --git a/.github/skill-evolution/.gitignore b/.github/skill-evolution/.gitignore new file mode 100644 index 00000000..0f4d7ea9 --- /dev/null +++ b/.github/skill-evolution/.gitignore @@ -0,0 +1,5 @@ +# The live friction journal is stored outside the repo (~/.skill-evolution/ by default). +# If SKILL_EVOLUTION_HOME is pointed here, ignore the generated journal artifacts. +journal.jsonl +journal.jsonl.bak +active-skill.json diff --git a/.github/skill-evolution/evolution-log.md b/.github/skill-evolution/evolution-log.md new file mode 100644 index 00000000..57abfd94 --- /dev/null +++ b/.github/skill-evolution/evolution-log.md @@ -0,0 +1,174 @@ +# Skill Evolution Log + +Auditable changelog of changes applied by the `skill-evolver` system. Each entry links a +captured-friction finding to the concrete edit that addressed it, with a rollback reference. + +Newest entries on top. See `.github/skills/skill-evolver/references/edit-safety-rules.md` for the +entry format. + + + +## 2026-06-16 — skill-creator ⇄ skill-evolver: bidirectional lifecycle handoff + "Needs a new skill" outcome + +- **Target:** skill-evolver → `SKILL.md` (§2 target decision), `references/classification-rubric.md`, + `references/bloat-control.md`; skill-creator → `SKILL.md` (Step 6 Iterate) +- **Root cause:** missing_context (design gap, user-raised). The build-time (creator) and run-time + (evolver) halves of the skill lifecycle had no documented handoff, and the evolver had **no branch + for "evolving an existing skill is insufficient — a new skill is needed."** The rubric's 5 + categories all resolved to edit/no-edit; "Novel task" only said "add a section." +- **Evidence:** user questions — (1) does a creator→evolver pointer skip Step 6 / does evolver cover + it; (2) does the evolver ever recommend a *new* skill. Verified against files: no new-skill path + existed, no cross-references existed. +- **Change:** + - **creator → evolver:** Step 6 now notes it covers immediate authoring tweaks, and points to + skill-evolver for continuous, evidence-based iteration after the skill is in use. + - **evolver → creator:** new **"Needs a new skill"** classification outcome (substantial + out-of-scope task, or splitting an over-budget skill doing two jobs) → recommend creating via + skill-creator. Added to the rubric table, §2 target-decision list, and bloat-control's prune + procedure (split vs. cram). +- **Decision:** keep the skills separate (different triggers, freedom levels, and the 1024-char + description ceiling) — integrate via lightweight cross-references, not a merge. +- **Validation:** both skills pass `quick_validate.py`; `skill-sizes` reports all within budget + (skill-creator 361, skill-evolver 112 lines). +- **Commit:** branch `skill-evolution/copilot-cli-active-capture` (rollback: `git revert `). + + +## 2026-06-16 — skill-evolver: trim description to clear self-flagged DESC_WARN + +- **Target:** skill-evolver → `.github/skills/skill-evolver/SKILL.md` (frontmatter `description`) +- **Root cause:** bloat (low). The `skill-sizes` tripwire added in the prior commit immediately + flagged skill-evolver's own description at 1019/1024 chars (DESC_WARN). +- **Evidence:** `skill-sizes --md` output — skill-evolver was the only flagged skill. +- **Change:** removed redundant trigger phrasings ("note that something went wrong, didn't work, or + was confusing" overlapped with "that didn't go well"; dropped one duplicate example) and tightened + the global-lessons clause. Strongest trigger phrases preserved. +- **Validation:** `quick_validate.py` passes; description now 887 chars (under the 900 warn); + `skill-sizes --md` reports "✅ All skills within budget." +- **Commit:** branch `skill-evolution/copilot-cli-active-capture` (rollback: `git revert `). +- **Result/trend:** the tripwire flagged its own author and the prune cleared it — the anti-bloat + loop works end to end. + + +## 2026-06-16 — skill-evolver: add anti-bloat guardrails (#1 prune, #2 tripwire, #3 consolidate, #4 references) + +- **Target:** skill-evolver → `journal-utils.js`, `SKILL.md`, `references/edit-safety-rules.md`, + `references/bloat-control.md` (new) +- **Root cause:** design risk (user-raised) — the loop has an addition bias; every retrospective + tends to *add* a rule, so skills bloat over time and pay a per-trigger token tax. Nothing in the + loop pruned, consolidated, or measured skill weight. +- **Evidence:** in one session skill-evolver took 4 edits, all additions; largest skills already + 320–369 lines vs the 500-line guideline. +- **Change:** + - **#2 tripwire:** new `journal-utils.js skill-sizes` command scans every SKILL.md and flags + body >400/500 lines and description >900/1024 chars. + - **#1 prune:** SKILL.md §4 renamed "Measure & prune" — run `skill-sizes` each retro; every ~5th + retro (or when flagged) propose *removals*, not just additions. + - **#3 + #4:** new edit-safety rule 6 (consolidate over append; references over body; don't add + to an over-budget skill without pruning). + - New `references/bloat-control.md` holds the budgets + prune procedure (kept out of the + always-loaded body — practicing #4). +- **Validation:** `quick_validate.py` passes; `skill-sizes --md` runs and correctly flags + skill-evolver's own description (1019 chars, DESC_WARN). SKILL.md body grew only 104→111 lines + because detail went into the reference. +- **Commit:** branch `skill-evolution/copilot-cli-active-capture` (rollback: `git revert `). +- **Follow-up:** skill-evolver's description (1019/1024) should be trimmed at the next pass — the + tripwire is already flagging the tool's own author. + + +## 2026-06-16 — skill-evolver: require proposals to name target skill + file + +- **Target:** skill-evolver → `.github/skills/skill-evolver/SKILL.md` (§3 Propose, review, apply) +- **Root cause:** skill defect (medium). The Propose section required "concrete diffs" but did + not require each proposal to state *which skill and file* it targets. Since this skill evolves + many skills, reviewers couldn't tell at a glance what each fix changed. +- **Evidence:** `missing_context`, medium (user-reported) — "I didn't see which skill the fix was + for" during retro #1/#2 proposals. +- **Change:** §3 now mandates a per-proposal header + `Target: · · `, a summary table (# · Target skill · File · + Root cause · Severity) when proposing multiple fixes, and naming the target skill in per-fix + approval questions. +- **Validation:** `quick_validate.py` passes. +- **Commit:** branch `skill-evolution/copilot-cli-active-capture` (rollback: `git revert `). +- **Result/trend:** future retrospective proposals will be unambiguous about scope per skill. + + +## 2026-06-16 — skill-evolver: clarify git branch creation uses powershell tool (retro #2) + +Source: retrospective #2. 7 events in journal (4 carried from retro #1 — all confirmed fixed, +no recurrence). 3 new events captured. 1 skill defect actioned. + +### 4. skill-evolver: `git checkout -b` clarification in edit-safety-rules +- **Root cause:** skill defect (medium). edit-safety-rules said `git checkout -b` without + specifying *which* tool — I used `gitkraken-git_checkout` (doesn't support `-b`) when + the `powershell` tool works fine with native git. +- **Evidence:** `tool_error`, medium — had to use an unnecessary two-step workaround + (git_branch create + git_checkout), costing an extra turn. Verified: `git checkout -b` + works perfectly via the powershell tool. +- **Change:** one-line clarification in Workflow step 1 of + `.github/skills/skill-evolver/references/edit-safety-rules.md`: specify + "via the powershell tool (not gitkraken-git_checkout, which doesn't support -b)". +- **Not actioned:** event #6 (`ask_user` interruption — environmental, no fix) and + event #7 (dirty workspace file — environmental, user skipped the doc nudge). + +- **Validation:** `quick_validate.py` passes. +- **Commit:** see branch `skill-evolution/copilot-cli-active-capture` (rollback: `git revert `). +- **Result/trend:** 4/4 carried-over defects still resolved; 1 new defect fixed; 2 environmental. + Velocity: retro #2 closed faster than retro #1 — journal patterns are getting cleaner. + + +## 2026-06-16 — skill-evolver: make active capture first-class, quarantine non-firing hook (Option A) + +Source: investigation of "why isn't PostToolUse/Stop firing". Root cause: the GitHub +Copilot CLI runtime has no hooks system, and `orchestrator.json` used the Claude Code hook +schema, so `friction-capture.js` never fired. Developer chose **Option A** (Copilot CLI only). + +- **Root cause:** environmental / `skill_step_mismatch` (high) — automatic capture was + presented as primary but cannot fire on this runtime. +- **Evidence:** empty journal despite real tool failures; CLI docs show no hooks feature; + no runtime config references `orchestrator.json`. +- **Change:** + - `orchestrator.json`: removed the `PostToolUse`/`Stop` and the second `SubagentStop` + `friction-capture.js` registrations (kept the orchestrator's own subagent hooks). + - `friction-capture.js`: marked DORMANT with a header banner — Claude Code-only, not + registered on Copilot CLI; documents how to enable via `.claude/settings.json`. + - `skill-evolver/SKILL.md`: reframed capture so **active capture is the primary mechanism** + (Architecture, Capture section table, attribution note, non-intrusiveness + off-switch + wording all updated to stop implying an automatic hook runs here). +- **Validation:** `quick_validate.py` passes; `orchestrator.json` parses and no longer + references friction-capture; CLI `record`/`stats` still work (active capture intact). +- **Commit:** see branch `skill-evolution/copilot-cli-active-capture` (rollback: `git revert `). +- **Result/trend:** capture now honestly reflects the runtime; no false reliance on a hook + that never fires. + + +## 2026-06-16 — skill-creator, skill-evolver: first retrospective (3 fixes) + +Source: retrospective run over `~/.skill-evolution/journal.jsonl` (3 active-captured events +from the build session). All fixes approved individually by the developer. + +### 1. skill-creator: document PyYAML prerequisite +- **Root cause:** skill defect (missing context). `quick_validate.py`/`package_skill.py` import + `yaml`, but the skill never states the dependency. +- **Evidence:** `missing_context`, medium — `ModuleNotFoundError: No module named 'yaml'` hit while + validating skill-evolver; required `pip install pyyaml` (1 extra turn). +- **Change:** added a "Prerequisite: requires PyYAML" note to Step 5 (Packaging) in + `.github/skills/skill-creator/SKILL.md`. + +### 2. skill-evolver: clarify automatic capture is best-effort +- **Root cause:** environmental (this runtime did not fire `PostToolUse`/`Stop`), not a code bug. + Doc-clarification only. +- **Evidence:** `trigger_miss`, medium — journal was empty despite real tool failures this session. +- **Change:** sharpened the Architecture bullet in `.github/skills/skill-evolver/SKILL.md` to mark + automatic capture best-effort and active capture the PRIMARY path. + +### 3. skill-evolver: make the 1024-char description limit explicit +- **Root cause:** skill defect (low). `edit-safety-rules.md` said "keep under the size limits" + without the number or a check command. +- **Evidence:** `retry`, low — description overshot 1024 (1184 → 1054) twice before fitting (2 turns). +- **Change:** added explicit ≤1024 limit + a PowerShell length-check command to rule 5 in + `.github/skills/skill-evolver/references/edit-safety-rules.md`. + +- **Validation:** `quick_validate.py` passes for both skill-evolver and skill-creator. +- **Commit:** see branch `skill-evolution/retro-2026-06-16` (rollback: `git revert `). +- **Result/trend:** to be measured on the next retrospective (expect these signatures not to recur). + diff --git a/.github/skills/skill-creator/SKILL.md b/.github/skills/skill-creator/SKILL.md index b7f86598..2d6e96f6 100644 --- a/.github/skills/skill-creator/SKILL.md +++ b/.github/skills/skill-creator/SKILL.md @@ -321,6 +321,8 @@ Write instructions for using the skill and its bundled resources. Once development of the skill is complete, it must be packaged into a distributable .skill file that gets shared with the user. The packaging process automatically validates the skill first to ensure it meets all requirements: +**Prerequisite:** the validation and packaging scripts require PyYAML. If you hit `ModuleNotFoundError: No module named 'yaml'`, run `pip install pyyaml` first. + ```bash scripts/package_skill.py ``` @@ -354,3 +356,5 @@ After testing the skill, users may request improvements. Often this happens righ 2. Notice struggles or inefficiencies 3. Identify how SKILL.md or bundled resources should be updated 4. Implement changes and test again + +This step covers immediate, in-the-moment tweaks while authoring. For **continuous, evidence-based iteration after the skill is in use** — capturing friction during real tasks, running retrospectives, and proposing reviewed edits — use the **skill-evolver** skill (`.github/skills/skill-evolver/SKILL.md`). diff --git a/.github/skills/skill-evolver/SKILL.md b/.github/skills/skill-evolver/SKILL.md new file mode 100644 index 00000000..313a3535 --- /dev/null +++ b/.github/skills/skill-evolver/SKILL.md @@ -0,0 +1,111 @@ +--- +name: skill-evolver +description: Closed-loop self-improvement for skills, prompts, and AI tools. Captures friction (tool errors, repeated retries, wrong or outdated instructions, missing context, missed or wrong skill triggers, user corrections) into a structured journal, then runs retrospectives that classify root causes and propose concrete, reviewable edits to the offending SKILL.md, references, scripts, or copilot-instructions.md. Use whenever the user wants to improve, evolve, tune, or fix a skill or its instructions; run a skill retrospective; review or analyze skill friction; or says things like "improve my skills", "what went wrong with X skill", "why didn't skill Y trigger", "this skill is outdated or wrong", "you keep making the same mistake", or "that didn't go well". Also use PROACTIVELY at the end of any task that hit notable friction (repeated tool failures or a user correction) to log a note. +--- + +# Skill Evolver + +Make skills and tools get better over time. The loop: **capture → analyze → propose → review → apply → validate → measure**. + +## Architecture (already wired in this repo) + +- **Store CLI**: `.github/hooks/journal-utils.js` — single writer for the JSONL friction journal (`~/.skill-evolution/journal.jsonl`) and the active-skill attribution marker. +- **Capture is ACTIVE on this runtime.** On the GitHub Copilot CLI there is **no hooks system**, so capture happens because **you (the agent) record friction yourself** via `journal-utils.js record`. This is the primary and only reliable mechanism here — treat logging friction as part of doing the task, not something a hook does for you. +- **Dormant auto-capture hook**: `.github/hooks/friction-capture.js` is a Claude Code-style `PostToolUse`/`Stop` hook. It does **not** fire on Copilot CLI and is intentionally **not** registered in `orchestrator.json`. It's kept only for teams running this repo under Claude Code (register it in `.claude/settings.json` there). Do not rely on it here. +- **Validation**: reuse `.github/skills/skill-creator/scripts/quick_validate.py` after every edit. +- **Changelog**: `.github/skill-evolution/evolution-log.md` records every applied change (for audit + rollback). + +## Non-intrusiveness & controls + +This system is designed to stay out of the way: + +- **Silent capture.** Recording a friction event only appends one line to the journal file. It never interrupts the user, never asks a question, and never changes your task flow. Log **only real friction** (failures, retries, wrong instructions, corrections) so the journal stays high-signal. +- **No mid-task edits.** Skills are never auto-edited. Analysis and proposals happen only when you invoke a retrospective, and every behavior-affecting edit is gated on your approval. +- **Proactive logging must not derail the user.** If you log a friction note proactively at the end of a friction-heavy task, do it in **one line, recorded silently** via the CLI — do NOT ask the user a question, pause their task, or expand scope to discuss it. They review the journal later. +- **Off switch.** Set the environment variable `SKILL_EVOLUTION_DISABLE=1` to silence capture (CLI `record` becomes a no-op; the dormant hook is already inert). Reviewing past data (`stats`, `list`) still works. Unset it to re-enable. + +## 1. Capture (active — this is the main job on Copilot CLI) + +**You are the capture mechanism.** There is no background hook on this runtime, so friction is only recorded if you record it. Make this a habit: whenever you hit friction, append one line to the journal before moving on. + +| Path | Who | How | +|------|-----|-----| +| **Active (primary)** | you (agent) | The moment you notice friction, record it via the CLI (below). | +| User-flagged | user | "that didn't go well" → record the last friction with their context. | +| Dormant hook | — | Not active on Copilot CLI; see Architecture. Ignore for capture here. | + +**Record a friction event** (see [references/friction-schema.md](references/friction-schema.md) for the schema and the `eventType` catalog). Use single quotes around the JSON on PowerShell: + +```powershell +node .github/hooks/journal-utils.js record '{"skill":"release-helper","tool":"powershell","eventType":"skill_step_mismatch","severity":"high","expected":"pipeline YAML under 1ES-Pipelines/","actual":"skill pointed to azure-pipelines/ which is deprecated","fixHint":"update path reference in SKILL.md step 3"}' +``` + +**Attribute events to a skill**: optionally mark the skill you're working under so events default to it (otherwise they record as `skill: "unknown"` and get triaged later): + +```powershell +node .github/hooks/journal-utils.js set-active +# ... work ... +node .github/hooks/journal-utils.js clear-active +``` + +**When to actively record** (don't log noise — log signal): +- A skill step referenced a wrong/outdated path, file, command, or API. +- The skill that *should* have triggered didn't (`trigger_miss`) — the description needs tuning. +- You needed context the skill should have provided and had to go discover it (`missing_context`). +- The user corrected your approach in a way a better instruction would have prevented (`user_correction`). +- A documented step failed or contradicted reality (`skill_step_mismatch`, `dead_end`). + +## 2. Retrospective (analyze) + +Run when asked to improve/evolve skills or review friction. + +1. **Pull the digest** (deterministic aggregation; ranks recurring issues by frequency × severity): + ```powershell + node .github/hooks/journal-utils.js stats --md + node .github/hooks/journal-utils.js skill-sizes --md + ``` + For raw events of one skill: `node .github/hooks/journal-utils.js list --skill `. + `skill-sizes` flags any skill over its body/description budget — those are pruning candidates (see step 4). + +2. **Classify each recurring group** using [references/classification-rubric.md](references/classification-rubric.md). The critical judgment: is this a **skill defect** (fixable by editing the skill), a **model mistake**, an **environment issue**, or a **genuinely novel task**? Only skill defects (and global-convention gaps) become edits. + +3. **Decide the target** of each fix: + - Single-skill defect → edit that skill's `SKILL.md` / `references/` / `scripts/`. + - Cross-cutting lesson that applies to many skills → edit `.github/copilot-instructions.md` instead. + - Trigger miss → tune the skill's `description` frontmatter (the activation mechanism). + - **Needs a new skill** → when a substantial novel task fits no existing skill, or an over-budget skill is really doing two jobs and should be **split**, don't force-fit it. Recommend creating a new skill via the **skill-creator** skill (`.github/skills/skill-creator/SKILL.md`) and hand off. + +## 3. Propose, review, apply + +Follow [references/edit-safety-rules.md](references/edit-safety-rules.md) strictly. Summary: + +1. **Lead every proposal with its target.** Before the diff, each proposed fix MUST state, on one line: + **`Target: ` · `` · ``** + (for global lessons use `Target: copilot-instructions.md (all skills)`). This skill evolves *many* + skills, so the reviewer must see at a glance which skill each fix changes — never bury it. + When proposing multiple fixes, also include a one-row-per-fix summary table with columns + **# · Target skill · File · Root cause · Severity** before the detailed diffs. +2. **Propose concrete diffs** — never vague advice. Show the exact before/after for each file. +3. **Gate on human review** — present proposals and use `ask_user` to get approval. When asking + per-fix, name the target skill in the question (e.g. "Apply fix #2 to **release-helper**?"). + Never silently change behavior-affecting instructions. +4. **Apply on a branch** (`skill-evolution/`), one logical change per commit. +5. **Validate** every edited skill: + ```powershell + python .github/skills/skill-creator/scripts/quick_validate.py .github/skills/ + ``` +6. **Log it** — append an entry to `.github/skill-evolution/evolution-log.md` (issue, evidence, change, target, rollback ref). +7. **Offer a PR** for the branch when the user wants it. + +## 4. Measure & prune + +After fixes land, re-run `stats` over time to confirm the friction rate for the edited skill is trending down. Note the trend in the evolution-log entry. If an edit didn't help, roll it back (see edit-safety-rules) and try a different fix. + +**Counter the addition bias.** The loop naturally *adds* rules; without pushback, skills bloat. So: +- Run `skill-sizes --md` each retrospective; any flagged skill is a **pruning candidate**. +- Every ~5th retrospective (or whenever a skill is flagged), propose **removals** — obsolete, redundant, one-off, or contradictory rules — not just additions. See [references/bloat-control.md](references/bloat-control.md) for the prune procedure and budgets. A prune goes through the same review gate as any edit. + +## Scope notes + +- This system also applies to non-skill assets: prompt templates, agent instruction files, and MCP-usage notes — the same capture/analyze/propose loop works for them. +- Do **not** mass-edit every skill to call `set-active`; attribution is opt-in. Unattributed events default to `skill: "unknown"` and are triaged during the retrospective. diff --git a/.github/skills/skill-evolver/references/bloat-control.md b/.github/skills/skill-evolver/references/bloat-control.md new file mode 100644 index 00000000..178041d7 --- /dev/null +++ b/.github/skills/skill-evolver/references/bloat-control.md @@ -0,0 +1,49 @@ +# Bloat Control + +The evolver has a built-in **addition bias**: every retrospective tends to *add* a rule. +Without counter-pressure, skills grow into unreadable caveat-soup and pay a per-trigger token +tax. These guardrails keep skills lean. Read this when running the Prune phase or when +`skill-sizes` flags a skill. + +## Size budget (tripwire) + +Run the automated check during every retrospective: + +```powershell +node .github/hooks/journal-utils.js skill-sizes --md +``` + +Thresholds (enforced by the tool): + +| Dimension | Warn | Over | +|-----------|------|------| +| SKILL.md body lines | > 400 | > 500 (skill-creator's stated max) | +| `description` chars | > 900 | > 1024 (hard limit) | + +Any skill flagged `BODY_OVER` / `BODY_WARN` / `DESC_*` is a candidate for pruning or relocation +**before** adding anything new to it. + +## Prune procedure + +When a skill is flagged (or every ~5th retrospective), look for and propose **removals**, not just +additions: + +1. **Obsolete** — rules for a path/API/tool that no longer exists. Delete. +2. **Redundant** — two bullets saying the same thing, or a rule the model would follow anyway. Merge or drop. +3. **One-off `low`-severity notes** — caveats added for a single incident that never recurred (check the journal: if the signature appears once and is old, expire it). +4. **Contradictions** — a newer rule that supersedes an older one. Keep one, remove the other. + +If a skill is over budget because it's covering **two distinct jobs**, pruning won't help — the right fix is to **split** it into a new skill (classify as "Needs a new skill" and hand off to `skill-creator`), not to keep cramming. + +Propose prunes through the same review gate as any edit (lead with `Target:`, get approval, log it). +A retrospective that removes a stale rule is as valuable as one that adds a needed rule. + +## Append discipline (stop new bloat at the source) + +- **Consolidate over append.** Prefer editing or tightening an existing instruction over adding a + new bullet. Two short rules that overlap should become one. +- **References over body.** Put detailed caveats, examples, and edge-case handling in `references/` + (progressive disclosure), not in the always-loaded SKILL.md body. The body stays a lean index; + the detail loads only when needed. This is skill-creator's core principle. +- **Earn the line.** Every line added to a SKILL.md body costs tokens on every trigger. Only add to + the body if the lesson is core and high-frequency; otherwise it goes in a reference or is dropped. diff --git a/.github/skills/skill-evolver/references/classification-rubric.md b/.github/skills/skill-evolver/references/classification-rubric.md new file mode 100644 index 00000000..46522ff6 --- /dev/null +++ b/.github/skills/skill-evolver/references/classification-rubric.md @@ -0,0 +1,33 @@ +# Classification Rubric + +Most friction is **not** a skill defect. Classify before editing, or you will pollute skills +with noise. For each recurring group from `journal-utils.js stats`, assign one root cause. + +## Root-cause categories + +| Category | Signals | Action | +|----------|---------|--------| +| **Skill defect** | Documented step is wrong/outdated; path/API/command no longer exists; missing a step the task always needs; description too narrow to trigger | **Edit the skill.** This is the only category that normally changes a skill. | +| **Global-convention gap** | The same lesson would apply to many skills/tasks (e.g. a repo-wide path move, a naming rule) | **Edit `copilot-instructions.md`**, not a single skill. | +| **Model mistake** | The skill was correct; the agent misread or skipped it; one-off reasoning slip | **No edit.** Optionally tighten wording only if the instruction was genuinely easy to misread. | +| **Environment issue** | Network/auth failure, missing local tool, transient flake, permissions | **No skill edit.** Note it; route to setup docs if recurring. | +| **Novel task** | Legitimately new scenario the skill never claimed to cover | **No edit** for a small case (add a section if now in-scope). For a substantial out-of-scope task → **Needs a new skill** (below). | +| **Needs a new skill** | No existing skill fits a substantial task; or an over-budget skill is doing two distinct jobs and should be **split** | **Don't force-fit.** Recommend creating a new skill via the `skill-creator` skill, then hand off. Editing an unrelated skill here just causes bloat and trigger confusion. | + +## Decision heuristics + +- **Frequency × severity first.** Use the ranked `recurring` list; start at the top. A single + low-severity event is rarely worth a change. +- **Reproducibility.** If the documented step demonstrably contradicts the current repo/codebase, + it's a skill defect — verify against the actual file/path/API before editing. +- **Was the instruction present and correct?** If yes and the agent still erred → model mistake, + not a skill defect. Don't bloat the skill to patch a one-off. +- **Trigger misses are description bugs.** If the right skill didn't fire, the fix is almost always + the `description` frontmatter (add the missing trigger phrasing/scenario), not the body. +- **One lesson, right home.** If a fix would need to be copied into 3+ skills, it belongs in + `copilot-instructions.md` instead. + +## Output of classification + +For each group produce: `{ skill, eventType, rootCause, evidence (event ids/quotes), target file, +proposed change, severity }`. Carry this into the propose/review step. diff --git a/.github/skills/skill-evolver/references/edit-safety-rules.md b/.github/skills/skill-evolver/references/edit-safety-rules.md new file mode 100644 index 00000000..9922e611 --- /dev/null +++ b/.github/skills/skill-evolver/references/edit-safety-rules.md @@ -0,0 +1,57 @@ +# Edit Safety Rules + +Skills change agent behavior. Treat every edit as a reviewed code change. + +## Hard rules + +1. **Propose, don't silently edit.** Always show concrete before/after diffs and get explicit + human approval (`ask_user`) before applying a behavior-affecting change. Trivial fixes (typos, + a dead path → correct path) may be batched, but still listed for review. +2. **Verify against reality first.** Before claiming a step is wrong, confirm the correct + path/API/command exists in the current codebase. Never "fix" based on the journal alone. +3. **Smallest change that resolves the issue.** Don't rewrite a skill to patch one defect. Prefer + editing the specific step/reference over restructuring. +4. **Right target.** Single-skill defect → that skill. Cross-cutting → `copilot-instructions.md`. + Trigger miss → the skill `description`. +5. **Preserve the skill contract.** Keep `SKILL.md` frontmatter to allowed keys only + (`name`, `description`, `license`, `allowed-tools`, `metadata`); no angle brackets in + `description`; `description` must be **≤1024 characters**. Check before saving: + `(Select-String -Path -Pattern '^description:').Line.Length` (subtract the + `description: ` prefix). Keep it under the size limits. +6. **Consolidate over append (anti-bloat).** Prefer editing or merging an existing instruction + over adding a new bullet. Put detailed caveats/examples in `references/`, not the always-loaded + SKILL.md body. Don't add to a skill already over budget (`skill-sizes`) without pruning first. + See [bloat-control.md](bloat-control.md). + +## Workflow + +1. Create a branch: run `git checkout -b skill-evolution/` via the **powershell tool** (not `gitkraken-git_checkout`, which doesn't support `-b`). +2. Make one logical change per commit; reference the journal event ids in the commit body. +3. **Validate** each edited skill: + ```powershell + python .github/skills/skill-creator/scripts/quick_validate.py .github/skills/ + ``` + For larger changes also run the packager validation: + `python .github/skills/skill-creator/scripts/package_skill.py .github/skills/`. +4. Append an evolution-log entry (format below). +5. Offer to open a PR. Do not auto-merge. + +## Rollback + +- Each evolution-log entry records the commit SHA. To revert: `git revert ` (or restore the + pre-change version of the file from that commit) and add a follow-up log entry noting the revert + and why the fix didn't help. + +## Evolution-log entry format + +Append to `.github/skill-evolution/evolution-log.md`: + +```markdown +## : +- **Root cause:** skill defect | global-convention gap | ... +- **Evidence:** event ids / quotes from the journal (frequency × severity) +- **Change:** what was edited (file + nature of change) +- **Target:** path to the edited file(s) +- **Commit:** (rollback: `git revert `) +- **Result/trend:** (fill in after measuring) friction for this skill before vs after +``` diff --git a/.github/skills/skill-evolver/references/friction-schema.md b/.github/skills/skill-evolver/references/friction-schema.md new file mode 100644 index 00000000..91bc92af --- /dev/null +++ b/.github/skills/skill-evolver/references/friction-schema.md @@ -0,0 +1,43 @@ +# Friction Event Schema + +One JSON object per line in `~/.skill-evolution/journal.jsonl`. Written only via +`journal-utils.js record` (single writer). Fields auto-filled by the store are marked *(auto)*. + +## Fields + +| Field | Type | Notes | +|-------|------|-------| +| `id` | string | *(auto)* `fr--` | +| `ts` | number | *(auto)* epoch ms | +| `iso` | string | *(auto)* ISO-8601 timestamp | +| `skill` | string | Owning skill; defaults to the active-skill marker, else `"unknown"` | +| `tool` | string or null | Tool involved (e.g. `powershell`, `ado-wit_create_work_item`) | +| `eventType` | enum | See catalog below; invalid values coerced to `note` | +| `severity` | enum | `low`, `medium`, or `high` (default `medium`) | +| `expected` | string | What should have happened | +| `actual` | string | What actually happened | +| `detail` | string | Error text / context snippet (truncated ~1200 chars) | +| `turnsCost` | number | Approx. extra turns the friction cost (default 0) | +| `fixHint` | string | Optional concrete suggestion for the fix | +| `source` | enum | `hook`, `agent`, `cli`, or `user` | +| `sessionId` | string or null | Optional session correlation id | + +## eventType catalog + +| Type | Use when | Typical fix target | +|------|----------|--------------------| +| `tool_error` | A tool/command failed or returned an error | Skill step, script, or environment | +| `retry` | The same operation needed repeated attempts | Skill step clarity / determinism | +| `user_correction` | The user redirected the approach | Skill instructions / defaults | +| `dead_end` | An approach was pursued then abandoned | Skill decision guidance | +| `missing_context` | Needed info the skill should have supplied | Skill body / references | +| `ambiguity` | A clarifying question was required that a better instruction would prevent | Skill instructions | +| `trigger_miss` | The skill failed to activate (or the wrong skill fired) | Skill `description` frontmatter | +| `skill_step_mismatch` | A documented step contradicted reality (wrong path/API/command) | Skill step / references | +| `note` | Free-form observation that doesn't fit above | Triage during retrospective | + +## Severity guidance + +- `high` — blocked progress, caused a wrong result, or wasted many turns. +- `medium` — slowed things down, required a workaround. +- `low` — minor friction, cosmetic, or easily self-corrected.