From 7d158b40e36e1f8ed7bb07986c63f780d9f62536 Mon Sep 17 00:00:00 2001 From: wzhipan Date: Tue, 16 Jun 2026 13:03:04 -0700 Subject: [PATCH 01/10] Add skill-evolver self-improving skills system MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces a closed feedback loop (capture → analyze → propose → review → apply → validate → measure) for evolving skills and AI tools. - .github/hooks/journal-utils.js: single-writer JSONL friction journal store CLI + module (record, set-active, stats, list) under ~/.skill-evolution/ - .github/hooks/friction-capture.js: PostToolUse/Stop hook that auto-logs tool failures, attributes them to the active skill, clears attribution on Stop - .github/hooks/orchestrator.json: register PostToolUse + Stop capture hooks - .github/skills/skill-evolver/: SKILL.md + friction-schema, classification-rubric, edit-safety-rules references - .github/skill-evolution/: evolution-log changelog + .gitignore for local journal - .github/copilot-instructions.md: register skill-evolver in the skills table Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/copilot-instructions.md | 1 + .github/hooks/friction-capture.js | 124 ++++++++ .github/hooks/journal-utils.js | 270 ++++++++++++++++++ .github/hooks/orchestrator.json | 19 ++ .github/skill-evolution/.gitignore | 5 + .github/skill-evolution/evolution-log.md | 9 + .github/skills/skill-evolver/SKILL.md | 86 ++++++ .../references/classification-rubric.md | 32 +++ .../references/edit-safety-rules.md | 51 ++++ .../references/friction-schema.md | 43 +++ 10 files changed, 640 insertions(+) create mode 100644 .github/hooks/friction-capture.js create mode 100644 .github/hooks/journal-utils.js create mode 100644 .github/skill-evolution/.gitignore create mode 100644 .github/skill-evolution/evolution-log.md create mode 100644 .github/skills/skill-evolver/SKILL.md create mode 100644 .github/skills/skill-evolver/references/classification-rubric.md create mode 100644 .github/skills/skill-evolver/references/edit-safety-rules.md create mode 100644 .github/skills/skill-evolver/references/friction-schema.md diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 6ecc7ed1..013014ac 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -142,6 +142,7 @@ For complex investigation tasks, use these skills (read the skill file for detai | **test-planner** | `.github/skills/test-planner/SKILL.md` | "create test plan", "write test cases", "add tests to ADO", "export test plan", "E2E tests for" | | **threat-modeler** | `.github/skills/threat-modeler/SKILL.md` | "create a threat model", "threat model for", "threat model diagram", "STRIDE analysis for", "security diagram for" | | **copilot-review-analyst** | `.github/skills/copilot-review-analyst/SKILL.md` | "analyze Copilot reviews", "Copilot review effectiveness", "review analysis report", "how helpful are Copilot reviews" | +| **skill-evolver** | `.github/skills/skill-evolver/SKILL.md` | "improve/evolve skills", "run a skill retrospective", "what went wrong with X skill", "why didn't skill Y trigger", "review skill friction", "that didn't go well" | ## 13. Azure DevOps Integration diff --git a/.github/hooks/friction-capture.js b/.github/hooks/friction-capture.js new file mode 100644 index 00000000..0ec6d502 --- /dev/null +++ b/.github/hooks/friction-capture.js @@ -0,0 +1,124 @@ +#!/usr/bin/env node +/** + * friction-capture.js — automatic friction capture for the skill-evolver system. + * + * Registered as a PostToolUse and Stop hook (see orchestrator.json). It reads the + * hook payload from stdin and: + * - PostToolUse: if the tool reported a failure/error, appends a high-signal + * `tool_error` friction event to the journal (attributed to the active skill). + * - Stop / SubagentStop: clears the active-skill marker so attribution does not + * leak across tasks. + * + * Design rules (match the existing hooks in this folder): + * - Never block the tool flow. Always print {continue:true} and exit 0. + * - Wrap everything in try/catch; capture is best-effort. + * - Only record on detected failure to keep the journal high-signal. + */ + +'use strict'; + +var fs = require('fs'); +var path = require('path'); + +function emitAndExit() { + console.log(JSON.stringify({ continue: true })); + process.exit(0); +} + +// Read stdin (hook input) +var hookInput = {}; +try { + hookInput = JSON.parse(fs.readFileSync(0, 'utf-8')); +} catch (e) { + // no stdin / not JSON — nothing to capture + emitAndExit(); +} + +// Avoid re-entry loops +if (hookInput.stop_hook_active) { + emitAndExit(); +} + +var journal; +try { + journal = require('./journal-utils.js'); +} catch (e) { + // store unavailable — never block the tool flow + emitAndExit(); +} + +var eventName = hookInput.hook_event_name || hookInput.hookEventName || ''; + +try { + // End-of-task events: clear attribution so the next task starts clean. + if (eventName === 'Stop' || eventName === 'SubagentStop') { + journal.clearActive(); + emitAndExit(); + } + + // From here we treat the payload as a (Post)ToolUse event. + var toolName = hookInput.tool_name || hookInput.toolName || 'unknown-tool'; + var resp = hookInput.tool_response || hookInput.toolResponse || hookInput.result || {}; + + var failure = detectFailure(resp); + if (failure.failed) { + journal.recordEvent({ + tool: toolName, + eventType: 'tool_error', + severity: failure.severity, + expected: 'Tool call to complete successfully', + actual: failure.summary, + detail: failure.detail, + source: 'hook', + sessionId: hookInput.session_id || hookInput.sessionId || null + }); + } +} catch (e) { + // swallow — capture must never break the session +} + +emitAndExit(); + +/** + * Heuristically decide whether a tool response represents a failure, and how bad. + * Conservative on purpose: false positives create journal noise. + */ +function detectFailure(resp) { + var result = { failed: false, severity: 'medium', summary: '', detail: '' }; + if (resp === null || resp === undefined) return result; + + // Explicit structured failure signals + if (resp.success === false || resp.is_error === true || resp.isError === true || resp.error) { + result.failed = true; + } + + // Non-zero exit codes (powershell / shell-style tools) + var exitCode = resp.exit_code !== undefined ? resp.exit_code + : (resp.exitCode !== undefined ? resp.exitCode : undefined); + if (typeof exitCode === 'number' && exitCode !== 0) { + result.failed = true; + result.severity = 'high'; + } + + // String/text payloads that smell like errors + var text = ''; + if (typeof resp === 'string') text = resp; + else text = [resp.error, resp.stderr, resp.message, resp.output, resp.content] + .filter(function (x) { return typeof x === 'string'; }).join('\n'); + + if (!result.failed && text) { + if (/\b(error|exception|failed|fatal|cannot find|not found|denied|traceback)\b/i.test(text)) { + result.failed = true; + } + } + + if (result.failed) { + var src = (typeof resp.error === 'string' && resp.error) || + (typeof resp.stderr === 'string' && resp.stderr) || + (typeof resp.message === 'string' && resp.message) || text || 'Tool reported a failure'; + result.detail = String(src); + result.summary = result.detail.split('\n')[0].slice(0, 200); + if (/\b(fatal|denied|traceback|exception)\b/i.test(result.detail)) result.severity = 'high'; + } + return result; +} diff --git a/.github/hooks/journal-utils.js b/.github/hooks/journal-utils.js new file mode 100644 index 00000000..a89e38a5 --- /dev/null +++ b/.github/hooks/journal-utils.js @@ -0,0 +1,270 @@ +#!/usr/bin/env node +/** + * journal-utils.js — friction journal store for the skill-evolver system. + * + * Single source of truth for the append-only friction journal (JSONL) and the + * "active skill" attribution marker. Used by: + * - the friction-capture.js hook (require()d as a module), and + * - the skill-evolver skill / agent (invoked as a CLI). + * + * Mirrors the state-utils.js pattern. The live store lives outside the repo so + * it never pollutes git status. Override with SKILL_EVOLUTION_HOME. + * + * Store layout (default ~/.skill-evolution/): + * journal.jsonl — one friction event per line (see references/friction-schema.md) + * active-skill.json — { "skill": "", "ts": } + * + * CLI usage: + * node journal-utils.js record '' → append a friction event + * node journal-utils.js set-active → mark the active skill for attribution + * node journal-utils.js clear-active → clear the active-skill marker + * node journal-utils.js active → print the active skill (or "unknown") + * node journal-utils.js list [--skill X] [--type Y] [--since ISO] [--limit N] + * node journal-utils.js stats [--md] → aggregated digest (JSON by default) + * node journal-utils.js path → print store paths + * node journal-utils.js clear --yes → wipe the journal (keeps a .bak) + */ + +'use strict'; + +var fs = require('fs'); +var os = require('os'); +var path = require('path'); + +var STORE_DIR = process.env.SKILL_EVOLUTION_HOME || + path.join(os.homedir(), '.skill-evolution'); +var JOURNAL_FILE = path.join(STORE_DIR, 'journal.jsonl'); +var ACTIVE_FILE = path.join(STORE_DIR, 'active-skill.json'); + +var MAX_FIELD = 1200; // truncate long text fields to keep the journal lean + +function ensureStore() { + if (!fs.existsSync(STORE_DIR)) { + fs.mkdirSync(STORE_DIR, { recursive: true }); + } +} + +function truncate(val) { + if (typeof val !== 'string') return val; + if (val.length <= MAX_FIELD) return val; + return val.slice(0, MAX_FIELD) + ' …[truncated]'; +} + +function genId() { + return 'fr-' + Date.now().toString(36) + '-' + + Math.random().toString(36).slice(2, 7); +} + +var VALID_EVENT_TYPES = [ + 'tool_error', 'retry', 'user_correction', 'dead_end', 'missing_context', + 'ambiguity', 'trigger_miss', 'skill_step_mismatch', 'note' +]; +var VALID_SEVERITY = ['low', 'medium', 'high']; + +/** + * Append a friction event. Fills in id/ts/iso/source/skill defaults and + * truncates verbose fields. Returns the stored event. + */ +function recordEvent(evt) { + ensureStore(); + evt = evt || {}; + + var now = Date.now(); + var stored = { + id: evt.id || genId(), + ts: evt.ts || now, + iso: evt.iso || new Date(now).toISOString(), + skill: evt.skill || getActive() || 'unknown', + tool: evt.tool || null, + eventType: VALID_EVENT_TYPES.indexOf(evt.eventType) !== -1 ? evt.eventType : 'note', + severity: VALID_SEVERITY.indexOf(evt.severity) !== -1 ? evt.severity : 'medium', + expected: truncate(evt.expected || ''), + actual: truncate(evt.actual || ''), + detail: truncate(evt.detail || ''), + turnsCost: typeof evt.turnsCost === 'number' ? evt.turnsCost : 0, + fixHint: truncate(evt.fixHint || ''), + source: evt.source || 'agent', + sessionId: evt.sessionId || null + }; + + fs.appendFileSync(JOURNAL_FILE, JSON.stringify(stored) + '\n', 'utf-8'); + return stored; +} + +function setActive(skill) { + ensureStore(); + fs.writeFileSync(ACTIVE_FILE, JSON.stringify({ skill: skill, ts: Date.now() }), 'utf-8'); +} + +function clearActive() { + try { + if (fs.existsSync(ACTIVE_FILE)) fs.unlinkSync(ACTIVE_FILE); + } catch (e) { /* ignore */ } +} + +function getActive() { + try { + if (!fs.existsSync(ACTIVE_FILE)) return null; + var obj = JSON.parse(fs.readFileSync(ACTIVE_FILE, 'utf-8')); + return obj && obj.skill ? obj.skill : null; + } catch (e) { + return null; + } +} + +function readEvents() { + if (!fs.existsSync(JOURNAL_FILE)) return []; + var lines = fs.readFileSync(JOURNAL_FILE, 'utf-8').split('\n'); + var out = []; + for (var i = 0; i < lines.length; i++) { + var line = lines[i].trim(); + if (!line) continue; + try { out.push(JSON.parse(line)); } catch (e) { /* skip corrupt line */ } + } + return out; +} + +/** + * Aggregate the journal into a digest the agent can reason over: + * totals, per-skill / per-type / per-severity counts, and ranked recurring + * issues (grouped by skill + eventType + a normalized actual/detail signature). + */ +function computeStats() { + var events = readEvents(); + var bySkill = {}, byType = {}, bySeverity = {}, groups = {}; + var severityWeight = { low: 1, medium: 3, high: 8 }; + + for (var i = 0; i < events.length; i++) { + var e = events[i]; + bySkill[e.skill] = (bySkill[e.skill] || 0) + 1; + byType[e.eventType] = (byType[e.eventType] || 0) + 1; + bySeverity[e.severity] = (bySeverity[e.severity] || 0) + 1; + + var sig = (e.actual || e.detail || '').toLowerCase() + .replace(/[0-9]+/g, '#') // normalize ids/numbers + .replace(/[^a-z#]+/g, ' ') + .trim().split(' ').slice(0, 8).join(' '); + var key = e.skill + '::' + e.eventType + '::' + sig; + if (!groups[key]) { + groups[key] = { skill: e.skill, eventType: e.eventType, signature: sig, count: 0, score: 0, sample: e, lastIso: e.iso }; + } + groups[key].count += 1; + groups[key].score += (severityWeight[e.severity] || 3); + if (e.iso > groups[key].lastIso) groups[key].lastIso = e.iso; + } + + var recurring = Object.keys(groups).map(function (k) { return groups[k]; }) + .sort(function (a, b) { return b.score - a.score; }); + + return { + total: events.length, + bySkill: bySkill, + byEventType: byType, + bySeverity: bySeverity, + recurring: recurring.slice(0, 25), + recent: events.slice(-10) + }; +} + +function statsToMarkdown(s) { + var lines = []; + lines.push('# Friction Digest'); + lines.push(''); + lines.push('Total events: **' + s.total + '**'); + lines.push(''); + lines.push('## Top recurring issues (ranked by frequency × severity)'); + lines.push(''); + lines.push('| Rank | Skill | Type | Count | Score | Signature | Last seen |'); + lines.push('|------|-------|------|-------|-------|-----------|-----------|'); + s.recurring.forEach(function (g, i) { + lines.push('| ' + (i + 1) + ' | ' + g.skill + ' | ' + g.eventType + ' | ' + + g.count + ' | ' + g.score + ' | ' + g.signature + ' | ' + g.lastIso + ' |'); + }); + lines.push(''); + lines.push('## Counts by skill'); + Object.keys(s.bySkill).sort(function (a, b) { return s.bySkill[b] - s.bySkill[a]; }) + .forEach(function (k) { lines.push('- ' + k + ': ' + s.bySkill[k]); }); + return lines.join('\n'); +} + +// --------------------------------------------------------------------------- +// CLI +// --------------------------------------------------------------------------- +function parseFlags(args) { + var flags = {}; + for (var i = 0; i < args.length; i++) { + if (args[i].indexOf('--') === 0) { + var key = args[i].slice(2); + var val = (i + 1 < args.length && args[i + 1].indexOf('--') !== 0) ? args[++i] : true; + flags[key] = val; + } + } + return flags; +} + +function runCli() { + var argv = process.argv.slice(2); + var cmd = argv[0]; + var rest = argv.slice(1); + + try { + if (cmd === 'record') { + var json = rest[0]; + var evt = json ? JSON.parse(json) : {}; + evt.source = evt.source || 'cli'; + console.log(JSON.stringify(recordEvent(evt))); + } else if (cmd === 'set-active') { + setActive(rest[0] || 'unknown'); + console.log('active skill set to: ' + (rest[0] || 'unknown')); + } else if (cmd === 'clear-active') { + clearActive(); + console.log('active skill cleared'); + } else if (cmd === 'active') { + console.log(getActive() || 'unknown'); + } else if (cmd === 'list') { + var f = parseFlags(rest); + var events = readEvents(); + if (f.skill) events = events.filter(function (e) { return e.skill === f.skill; }); + if (f.type) events = events.filter(function (e) { return e.eventType === f.type; }); + if (f.since) events = events.filter(function (e) { return e.iso >= f.since; }); + if (f.limit) events = events.slice(-parseInt(f.limit, 10)); + console.log(JSON.stringify(events, null, 2)); + } else if (cmd === 'stats') { + var s = computeStats(); + var fl = parseFlags(rest); + console.log(fl.md ? statsToMarkdown(s) : JSON.stringify(s, null, 2)); + } else if (cmd === 'path') { + console.log(JSON.stringify({ storeDir: STORE_DIR, journal: JOURNAL_FILE, activeMarker: ACTIVE_FILE }, null, 2)); + } else if (cmd === 'clear') { + var cf = parseFlags(rest); + if (!cf.yes) { console.error('Refusing to clear without --yes'); process.exit(1); } + if (fs.existsSync(JOURNAL_FILE)) fs.renameSync(JOURNAL_FILE, JOURNAL_FILE + '.bak'); + console.log('journal cleared (backup at ' + JOURNAL_FILE + '.bak)'); + } else { + console.error('Unknown command: ' + cmd); + console.error('Commands: record, set-active, clear-active, active, list, stats, path, clear'); + process.exit(1); + } + } catch (e) { + console.error('journal-utils error: ' + e.message); + process.exit(1); + } +} + +module.exports = { + recordEvent: recordEvent, + setActive: setActive, + clearActive: clearActive, + getActive: getActive, + readEvents: readEvents, + computeStats: computeStats, + statsToMarkdown: statsToMarkdown, + STORE_DIR: STORE_DIR, + JOURNAL_FILE: JOURNAL_FILE, + ACTIVE_FILE: ACTIVE_FILE, + VALID_EVENT_TYPES: VALID_EVENT_TYPES +}; + +if (require.main === module) { + runCli(); +} diff --git a/.github/hooks/orchestrator.json b/.github/hooks/orchestrator.json index adec1e5c..c0fbe77e 100644 --- a/.github/hooks/orchestrator.json +++ b/.github/hooks/orchestrator.json @@ -12,6 +12,25 @@ "type": "command", "command": "node .github/hooks/subagent-stop.js", "timeout": 10 + }, + { + "type": "command", + "command": "node .github/hooks/friction-capture.js", + "timeout": 10 + } + ], + "PostToolUse": [ + { + "type": "command", + "command": "node .github/hooks/friction-capture.js", + "timeout": 10 + } + ], + "Stop": [ + { + "type": "command", + "command": "node .github/hooks/friction-capture.js", + "timeout": 10 } ] } diff --git a/.github/skill-evolution/.gitignore b/.github/skill-evolution/.gitignore new file mode 100644 index 00000000..0f4d7ea9 --- /dev/null +++ b/.github/skill-evolution/.gitignore @@ -0,0 +1,5 @@ +# The live friction journal is stored outside the repo (~/.skill-evolution/ by default). +# If SKILL_EVOLUTION_HOME is pointed here, ignore the generated journal artifacts. +journal.jsonl +journal.jsonl.bak +active-skill.json diff --git a/.github/skill-evolution/evolution-log.md b/.github/skill-evolution/evolution-log.md new file mode 100644 index 00000000..d435c15d --- /dev/null +++ b/.github/skill-evolution/evolution-log.md @@ -0,0 +1,9 @@ +# Skill Evolution Log + +Auditable changelog of changes applied by the `skill-evolver` system. Each entry links a +captured-friction finding to the concrete edit that addressed it, with a rollback reference. + +Newest entries on top. See `.github/skills/skill-evolver/references/edit-safety-rules.md` for the +entry format. + + diff --git a/.github/skills/skill-evolver/SKILL.md b/.github/skills/skill-evolver/SKILL.md new file mode 100644 index 00000000..44a9b6af --- /dev/null +++ b/.github/skills/skill-evolver/SKILL.md @@ -0,0 +1,86 @@ +--- +name: skill-evolver +description: Closed-loop self-improvement for skills and AI tools. Captures friction (tool errors, retries, wrong/outdated instructions, missing context, trigger misses, user corrections) into a structured journal, then runs retrospectives that classify root causes and propose concrete, reviewable edits to the offending SKILL.md / references / scripts (or to copilot-instructions.md for global lessons). Use when the user wants to improve, evolve, or tune skills, run a skill retrospective, record that something went wrong, asks "what went wrong with X skill", "why didn't skill Y trigger", "review skill friction", or says "that didn't go well". Also use proactively at the end of a task that hit notable friction to log a note for later. +--- + +# Skill Evolver + +Make skills and tools get better over time. The loop: **capture → analyze → propose → review → apply → validate → measure**. + +## Architecture (already wired in this repo) + +- **Store CLI**: `.github/hooks/journal-utils.js` — single writer for the JSONL friction journal (`~/.skill-evolution/journal.jsonl`) and the active-skill attribution marker. +- **Automatic capture**: `.github/hooks/friction-capture.js` runs on `PostToolUse`/`Stop` (registered in `.github/hooks/orchestrator.json`) and logs tool failures. If the runtime doesn't fire those events, automatic capture is silently skipped — **active capture (below) is the reliable fallback.** +- **Validation**: reuse `.github/skills/skill-creator/scripts/quick_validate.py` after every edit. +- **Changelog**: `.github/skill-evolution/evolution-log.md` records every applied change (for audit + rollback). + +## 1. Capture + +Three capture paths feed the same journal: + +| Path | Who | How | +|------|-----|-----| +| Automatic | hook | Tool errors logged on `PostToolUse`. No action needed. | +| Active | you (agent) | When you notice friction mid-task, record it (see below). | +| User-flagged | user | "that didn't go well" → record the last friction with their context. | + +**Record a friction event** (see [references/friction-schema.md](references/friction-schema.md) for the schema and the `eventType` catalog). Use single quotes around the JSON on PowerShell: + +```powershell +node .github/hooks/journal-utils.js record '{"skill":"release-helper","tool":"powershell","eventType":"skill_step_mismatch","severity":"high","expected":"pipeline YAML under 1ES-Pipelines/","actual":"skill pointed to azure-pipelines/ which is deprecated","fixHint":"update path reference in SKILL.md step 3"}' +``` + +**Attribute events to a skill**: when you start working under a skill, optionally mark it active so hook-captured events get attributed: + +```powershell +node .github/hooks/journal-utils.js set-active +# ... work ... +node .github/hooks/journal-utils.js clear-active # (Stop hook also clears it) +``` + +**When to actively record** (don't log noise — log signal): +- A skill step referenced a wrong/outdated path, file, command, or API. +- The skill that *should* have triggered didn't (`trigger_miss`) — the description needs tuning. +- You needed context the skill should have provided and had to go discover it (`missing_context`). +- The user corrected your approach in a way a better instruction would have prevented (`user_correction`). +- A documented step failed or contradicted reality (`skill_step_mismatch`, `dead_end`). + +## 2. Retrospective (analyze) + +Run when asked to improve/evolve skills or review friction. + +1. **Pull the digest** (deterministic aggregation; ranks recurring issues by frequency × severity): + ```powershell + node .github/hooks/journal-utils.js stats --md + ``` + For raw events of one skill: `node .github/hooks/journal-utils.js list --skill `. + +2. **Classify each recurring group** using [references/classification-rubric.md](references/classification-rubric.md). The critical judgment: is this a **skill defect** (fixable by editing the skill), a **model mistake**, an **environment issue**, or a **genuinely novel task**? Only skill defects (and global-convention gaps) become edits. + +3. **Decide the target** of each fix: + - Single-skill defect → edit that skill's `SKILL.md` / `references/` / `scripts/`. + - Cross-cutting lesson that applies to many skills → edit `.github/copilot-instructions.md` instead. + - Trigger miss → tune the skill's `description` frontmatter (the activation mechanism). + +## 3. Propose, review, apply + +Follow [references/edit-safety-rules.md](references/edit-safety-rules.md) strictly. Summary: + +1. **Propose concrete diffs** — never vague advice. Show the exact before/after for each file. +2. **Gate on human review** — present proposals and use `ask_user` to get approval. Never silently change behavior-affecting instructions. +3. **Apply on a branch** (`skill-evolution/`), one logical change per commit. +4. **Validate** every edited skill: + ```powershell + python .github/skills/skill-creator/scripts/quick_validate.py .github/skills/ + ``` +5. **Log it** — append an entry to `.github/skill-evolution/evolution-log.md` (issue, evidence, change, target, rollback ref). +6. **Offer a PR** for the branch when the user wants it. + +## 4. Measure + +After fixes land, re-run `stats` over time to confirm the friction rate for the edited skill is trending down. Note the trend in the evolution-log entry. If an edit didn't help, roll it back (see edit-safety-rules) and try a different fix. + +## Scope notes + +- This system also applies to non-skill assets: prompt templates, agent instruction files, and MCP-usage notes — the same capture/analyze/propose loop works for them. +- Do **not** mass-edit every skill to call `set-active`; attribution is opt-in. Unattributed events default to `skill: "unknown"` and are triaged during the retrospective. diff --git a/.github/skills/skill-evolver/references/classification-rubric.md b/.github/skills/skill-evolver/references/classification-rubric.md new file mode 100644 index 00000000..240e0ba2 --- /dev/null +++ b/.github/skills/skill-evolver/references/classification-rubric.md @@ -0,0 +1,32 @@ +# Classification Rubric + +Most friction is **not** a skill defect. Classify before editing, or you will pollute skills +with noise. For each recurring group from `journal-utils.js stats`, assign one root cause. + +## Root-cause categories + +| Category | Signals | Action | +|----------|---------|--------| +| **Skill defect** | Documented step is wrong/outdated; path/API/command no longer exists; missing a step the task always needs; description too narrow to trigger | **Edit the skill.** This is the only category that normally changes a skill. | +| **Global-convention gap** | The same lesson would apply to many skills/tasks (e.g. a repo-wide path move, a naming rule) | **Edit `copilot-instructions.md`**, not a single skill. | +| **Model mistake** | The skill was correct; the agent misread or skipped it; one-off reasoning slip | **No edit.** Optionally tighten wording only if the instruction was genuinely easy to misread. | +| **Environment issue** | Network/auth failure, missing local tool, transient flake, permissions | **No skill edit.** Note it; route to setup docs if recurring. | +| **Novel task** | Legitimately new scenario the skill never claimed to cover | **No edit** unless this scenario is now in-scope; then add a new section. | + +## Decision heuristics + +- **Frequency × severity first.** Use the ranked `recurring` list; start at the top. A single + low-severity event is rarely worth a change. +- **Reproducibility.** If the documented step demonstrably contradicts the current repo/codebase, + it's a skill defect — verify against the actual file/path/API before editing. +- **Was the instruction present and correct?** If yes and the agent still erred → model mistake, + not a skill defect. Don't bloat the skill to patch a one-off. +- **Trigger misses are description bugs.** If the right skill didn't fire, the fix is almost always + the `description` frontmatter (add the missing trigger phrasing/scenario), not the body. +- **One lesson, right home.** If a fix would need to be copied into 3+ skills, it belongs in + `copilot-instructions.md` instead. + +## Output of classification + +For each group produce: `{ skill, eventType, rootCause, evidence (event ids/quotes), target file, +proposed change, severity }`. Carry this into the propose/review step. diff --git a/.github/skills/skill-evolver/references/edit-safety-rules.md b/.github/skills/skill-evolver/references/edit-safety-rules.md new file mode 100644 index 00000000..1b80b3d9 --- /dev/null +++ b/.github/skills/skill-evolver/references/edit-safety-rules.md @@ -0,0 +1,51 @@ +# Edit Safety Rules + +Skills change agent behavior. Treat every edit as a reviewed code change. + +## Hard rules + +1. **Propose, don't silently edit.** Always show concrete before/after diffs and get explicit + human approval (`ask_user`) before applying a behavior-affecting change. Trivial fixes (typos, + a dead path → correct path) may be batched, but still listed for review. +2. **Verify against reality first.** Before claiming a step is wrong, confirm the correct + path/API/command exists in the current codebase. Never "fix" based on the journal alone. +3. **Smallest change that resolves the issue.** Don't rewrite a skill to patch one defect. Prefer + editing the specific step/reference over restructuring. +4. **Right target.** Single-skill defect → that skill. Cross-cutting → `copilot-instructions.md`. + Trigger miss → the skill `description`. +5. **Preserve the skill contract.** Keep `SKILL.md` frontmatter to allowed keys only + (`name`, `description`, `license`, `allowed-tools`, `metadata`); no angle brackets in + `description`; keep it under the size limits. + +## Workflow + +1. Create a branch: `git checkout -b skill-evolution/`. +2. Make one logical change per commit; reference the journal event ids in the commit body. +3. **Validate** each edited skill: + ```powershell + python .github/skills/skill-creator/scripts/quick_validate.py .github/skills/ + ``` + For larger changes also run the packager validation: + `python .github/skills/skill-creator/scripts/package_skill.py .github/skills/`. +4. Append an evolution-log entry (format below). +5. Offer to open a PR. Do not auto-merge. + +## Rollback + +- Each evolution-log entry records the commit SHA. To revert: `git revert ` (or restore the + pre-change version of the file from that commit) and add a follow-up log entry noting the revert + and why the fix didn't help. + +## Evolution-log entry format + +Append to `.github/skill-evolution/evolution-log.md`: + +```markdown +## : +- **Root cause:** skill defect | global-convention gap | ... +- **Evidence:** event ids / quotes from the journal (frequency × severity) +- **Change:** what was edited (file + nature of change) +- **Target:** path to the edited file(s) +- **Commit:** (rollback: `git revert `) +- **Result/trend:** (fill in after measuring) friction for this skill before vs after +``` diff --git a/.github/skills/skill-evolver/references/friction-schema.md b/.github/skills/skill-evolver/references/friction-schema.md new file mode 100644 index 00000000..91bc92af --- /dev/null +++ b/.github/skills/skill-evolver/references/friction-schema.md @@ -0,0 +1,43 @@ +# Friction Event Schema + +One JSON object per line in `~/.skill-evolution/journal.jsonl`. Written only via +`journal-utils.js record` (single writer). Fields auto-filled by the store are marked *(auto)*. + +## Fields + +| Field | Type | Notes | +|-------|------|-------| +| `id` | string | *(auto)* `fr--` | +| `ts` | number | *(auto)* epoch ms | +| `iso` | string | *(auto)* ISO-8601 timestamp | +| `skill` | string | Owning skill; defaults to the active-skill marker, else `"unknown"` | +| `tool` | string or null | Tool involved (e.g. `powershell`, `ado-wit_create_work_item`) | +| `eventType` | enum | See catalog below; invalid values coerced to `note` | +| `severity` | enum | `low`, `medium`, or `high` (default `medium`) | +| `expected` | string | What should have happened | +| `actual` | string | What actually happened | +| `detail` | string | Error text / context snippet (truncated ~1200 chars) | +| `turnsCost` | number | Approx. extra turns the friction cost (default 0) | +| `fixHint` | string | Optional concrete suggestion for the fix | +| `source` | enum | `hook`, `agent`, `cli`, or `user` | +| `sessionId` | string or null | Optional session correlation id | + +## eventType catalog + +| Type | Use when | Typical fix target | +|------|----------|--------------------| +| `tool_error` | A tool/command failed or returned an error | Skill step, script, or environment | +| `retry` | The same operation needed repeated attempts | Skill step clarity / determinism | +| `user_correction` | The user redirected the approach | Skill instructions / defaults | +| `dead_end` | An approach was pursued then abandoned | Skill decision guidance | +| `missing_context` | Needed info the skill should have supplied | Skill body / references | +| `ambiguity` | A clarifying question was required that a better instruction would prevent | Skill instructions | +| `trigger_miss` | The skill failed to activate (or the wrong skill fired) | Skill `description` frontmatter | +| `skill_step_mismatch` | A documented step contradicted reality (wrong path/API/command) | Skill step / references | +| `note` | Free-form observation that doesn't fit above | Triage during retrospective | + +## Severity guidance + +- `high` — blocked progress, caused a wrong result, or wasted many turns. +- `medium` — slowed things down, required a workaround. +- `low` — minor friction, cosmetic, or easily self-corrected. From aa208eea9a33a4b5d86163fe5cff982f95962013 Mon Sep 17 00:00:00 2001 From: wzhipan Date: Tue, 16 Jun 2026 13:09:33 -0700 Subject: [PATCH 02/10] Strengthen skill-evolver trigger description Expand the description frontmatter (the skill's activation mechanism) with more natural-language trigger phrases and a clearer proactive cue, so the skill self-activates without the user naming it explicitly. Also broadens scope wording to skills, prompts, and AI tools, and syncs the skills-table row in copilot-instructions.md. Validated at 1019/1024 chars. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/copilot-instructions.md | 2 +- .github/skills/skill-evolver/SKILL.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 013014ac..2a8ab7bb 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -142,7 +142,7 @@ For complex investigation tasks, use these skills (read the skill file for detai | **test-planner** | `.github/skills/test-planner/SKILL.md` | "create test plan", "write test cases", "add tests to ADO", "export test plan", "E2E tests for" | | **threat-modeler** | `.github/skills/threat-modeler/SKILL.md` | "create a threat model", "threat model for", "threat model diagram", "STRIDE analysis for", "security diagram for" | | **copilot-review-analyst** | `.github/skills/copilot-review-analyst/SKILL.md` | "analyze Copilot reviews", "Copilot review effectiveness", "review analysis report", "how helpful are Copilot reviews" | -| **skill-evolver** | `.github/skills/skill-evolver/SKILL.md` | "improve/evolve skills", "run a skill retrospective", "what went wrong with X skill", "why didn't skill Y trigger", "review skill friction", "that didn't go well" | +| **skill-evolver** | `.github/skills/skill-evolver/SKILL.md` | "improve/evolve/fix my skills", "run a skill retrospective", "what went wrong with X skill", "why didn't skill Y trigger", "this skill is outdated/wrong", "review skill friction", "you keep making the same mistake", "that didn't go well" | ## 13. Azure DevOps Integration diff --git a/.github/skills/skill-evolver/SKILL.md b/.github/skills/skill-evolver/SKILL.md index 44a9b6af..c416a061 100644 --- a/.github/skills/skill-evolver/SKILL.md +++ b/.github/skills/skill-evolver/SKILL.md @@ -1,6 +1,6 @@ --- name: skill-evolver -description: Closed-loop self-improvement for skills and AI tools. Captures friction (tool errors, retries, wrong/outdated instructions, missing context, trigger misses, user corrections) into a structured journal, then runs retrospectives that classify root causes and propose concrete, reviewable edits to the offending SKILL.md / references / scripts (or to copilot-instructions.md for global lessons). Use when the user wants to improve, evolve, or tune skills, run a skill retrospective, record that something went wrong, asks "what went wrong with X skill", "why didn't skill Y trigger", "review skill friction", or says "that didn't go well". Also use proactively at the end of a task that hit notable friction to log a note for later. +description: Closed-loop self-improvement for skills, prompts, and AI tools. Captures friction (tool errors, repeated retries, wrong or outdated instructions, missing context, missed or wrong skill triggers, user corrections) into a structured journal, then runs retrospectives that classify root causes and propose concrete, reviewable edits to the offending SKILL.md, references, or scripts (or copilot-instructions.md for global lessons). Use whenever the user wants to improve, evolve, tune, or fix a skill or its instructions; run a skill retrospective; review or analyze skill friction; note that something went wrong, didn't work, or was confusing; or says things like "improve my skills", "what went wrong with X skill", "why didn't skill Y trigger", "this skill is outdated or wrong", "fix the skill so this doesn't happen again", "you keep making the same mistake", or "that didn't go well". Also use PROACTIVELY at the end of any task that hit notable friction (repeated tool failures or a user correction) to log a note. --- # Skill Evolver From a3ead980d31cdfcc33eef037879c418c81ee5d4b Mon Sep 17 00:00:00 2001 From: wzhipan Date: Tue, 16 Jun 2026 13:27:36 -0700 Subject: [PATCH 03/10] Add off switch and non-intrusiveness controls to skill-evolver Address the concern that always-on capture could feel intrusive: - SKILL_EVOLUTION_DISABLE env var silences all capture (hook + journal recordEvent become no-ops); read paths (stats/list) still work so past data stays reviewable. - friction-capture.js exits early when disabled, still returning {continue:true} so the tool flow is never blocked. - journal-utils.js recordEvent no-ops when disabled; CLI `record` reports it cleanly instead of printing null. - SKILL.md: add a "Non-intrusiveness & controls" section documenting the silent/non-blocking capture, the no-mid-task-edits guarantee, the off switch, and an explicit rule that proactive logging must be one-line and must never interrupt or question the user mid-task. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/hooks/friction-capture.js | 5 +++++ .github/hooks/journal-utils.js | 8 +++++++- .github/skills/skill-evolver/SKILL.md | 9 +++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/.github/hooks/friction-capture.js b/.github/hooks/friction-capture.js index 0ec6d502..b77327b8 100644 --- a/.github/hooks/friction-capture.js +++ b/.github/hooks/friction-capture.js @@ -34,6 +34,11 @@ try { emitAndExit(); } +// Global off switch — set SKILL_EVOLUTION_DISABLE=1 to silence all capture. +if (process.env.SKILL_EVOLUTION_DISABLE) { + emitAndExit(); +} + // Avoid re-entry loops if (hookInput.stop_hook_active) { emitAndExit(); diff --git a/.github/hooks/journal-utils.js b/.github/hooks/journal-utils.js index a89e38a5..2957c1cc 100644 --- a/.github/hooks/journal-utils.js +++ b/.github/hooks/journal-utils.js @@ -66,6 +66,11 @@ var VALID_SEVERITY = ['low', 'medium', 'high']; * truncates verbose fields. Returns the stored event. */ function recordEvent(evt) { + // Global off switch — when disabled, capture is a silent no-op. + // Read paths (stats/list) still work so past data stays reviewable. + if (process.env.SKILL_EVOLUTION_DISABLE) { + return null; + } ensureStore(); evt = evt || {}; @@ -212,7 +217,8 @@ function runCli() { var json = rest[0]; var evt = json ? JSON.parse(json) : {}; evt.source = evt.source || 'cli'; - console.log(JSON.stringify(recordEvent(evt))); + var rec = recordEvent(evt); + console.log(rec ? JSON.stringify(rec) : 'capture disabled (SKILL_EVOLUTION_DISABLE set) — not recorded'); } else if (cmd === 'set-active') { setActive(rest[0] || 'unknown'); console.log('active skill set to: ' + (rest[0] || 'unknown')); diff --git a/.github/skills/skill-evolver/SKILL.md b/.github/skills/skill-evolver/SKILL.md index c416a061..8769468d 100644 --- a/.github/skills/skill-evolver/SKILL.md +++ b/.github/skills/skill-evolver/SKILL.md @@ -14,6 +14,15 @@ Make skills and tools get better over time. The loop: **capture → analyze → - **Validation**: reuse `.github/skills/skill-creator/scripts/quick_validate.py` after every edit. - **Changelog**: `.github/skill-evolution/evolution-log.md` records every applied change (for audit + rollback). +## Non-intrusiveness & controls + +This system is designed to stay out of the way: + +- **Silent capture.** The hook only writes to the journal file. It never interrupts, never asks questions, and always lets the tool flow continue (returns `{continue:true}`). It records **only on detected failure**, so successful work produces no noise. +- **No mid-task edits.** Skills are never auto-edited. Analysis and proposals happen only when you invoke a retrospective, and every behavior-affecting edit is gated on your approval. +- **Proactive logging must not derail the user.** If you log a friction note proactively at the end of a friction-heavy task, do it in **one line, recorded silently** via the CLI — do NOT ask the user a question, pause their task, or expand scope to discuss it. They review the journal later. +- **Off switch.** Set the environment variable `SKILL_EVOLUTION_DISABLE=1` to silence all capture (hook + CLI `record` become no-ops). Reviewing past data (`stats`, `list`) still works. Unset it to re-enable. + ## 1. Capture Three capture paths feed the same journal: From 892af334b394fe4e5cd265cce977e24f12a1c1e6 Mon Sep 17 00:00:00 2001 From: wzhipan Date: Tue, 16 Jun 2026 13:52:56 -0700 Subject: [PATCH 04/10] skill-evolver retrospective: apply 3 approved fixes Retrospective over the friction journal (3 active-captured events from the build session). Each fix individually approved by the developer. 1. skill-creator: document the PyYAML prerequisite for the validation/ packaging scripts (fixes ModuleNotFoundError: No module named 'yaml'). 2. skill-evolver: clarify that automatic hook capture is best-effort and active capture is the PRIMARY path (this runtime didn't fire PostToolUse). 3. skill-evolver: state the 1024-char description limit explicitly and add a length-check command in edit-safety-rules (cost 2 retries this session). Logged all three in evolution-log.md. Both edited skills pass quick_validate. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/skill-evolution/evolution-log.md | 32 +++++++++++++++++++ .github/skills/skill-creator/SKILL.md | 2 ++ .github/skills/skill-evolver/SKILL.md | 2 +- .../references/edit-safety-rules.md | 4 ++- 4 files changed, 38 insertions(+), 2 deletions(-) diff --git a/.github/skill-evolution/evolution-log.md b/.github/skill-evolution/evolution-log.md index d435c15d..7e9fdef7 100644 --- a/.github/skill-evolution/evolution-log.md +++ b/.github/skill-evolution/evolution-log.md @@ -7,3 +7,35 @@ Newest entries on top. See `.github/skills/skill-evolver/references/edit-safety- entry format. + +## 2026-06-16 — skill-creator, skill-evolver: first retrospective (3 fixes) + +Source: retrospective run over `~/.skill-evolution/journal.jsonl` (3 active-captured events +from the build session). All fixes approved individually by the developer. + +### 1. skill-creator: document PyYAML prerequisite +- **Root cause:** skill defect (missing context). `quick_validate.py`/`package_skill.py` import + `yaml`, but the skill never states the dependency. +- **Evidence:** `missing_context`, medium — `ModuleNotFoundError: No module named 'yaml'` hit while + validating skill-evolver; required `pip install pyyaml` (1 extra turn). +- **Change:** added a "Prerequisite: requires PyYAML" note to Step 5 (Packaging) in + `.github/skills/skill-creator/SKILL.md`. + +### 2. skill-evolver: clarify automatic capture is best-effort +- **Root cause:** environmental (this runtime did not fire `PostToolUse`/`Stop`), not a code bug. + Doc-clarification only. +- **Evidence:** `trigger_miss`, medium — journal was empty despite real tool failures this session. +- **Change:** sharpened the Architecture bullet in `.github/skills/skill-evolver/SKILL.md` to mark + automatic capture best-effort and active capture the PRIMARY path. + +### 3. skill-evolver: make the 1024-char description limit explicit +- **Root cause:** skill defect (low). `edit-safety-rules.md` said "keep under the size limits" + without the number or a check command. +- **Evidence:** `retry`, low — description overshot 1024 (1184 → 1054) twice before fitting (2 turns). +- **Change:** added explicit ≤1024 limit + a PowerShell length-check command to rule 5 in + `.github/skills/skill-evolver/references/edit-safety-rules.md`. + +- **Validation:** `quick_validate.py` passes for both skill-evolver and skill-creator. +- **Commit:** see branch `skill-evolution/retro-2026-06-16` (rollback: `git revert `). +- **Result/trend:** to be measured on the next retrospective (expect these signatures not to recur). + diff --git a/.github/skills/skill-creator/SKILL.md b/.github/skills/skill-creator/SKILL.md index b7f86598..421b205b 100644 --- a/.github/skills/skill-creator/SKILL.md +++ b/.github/skills/skill-creator/SKILL.md @@ -321,6 +321,8 @@ Write instructions for using the skill and its bundled resources. Once development of the skill is complete, it must be packaged into a distributable .skill file that gets shared with the user. The packaging process automatically validates the skill first to ensure it meets all requirements: +**Prerequisite:** the validation and packaging scripts require PyYAML. If you hit `ModuleNotFoundError: No module named 'yaml'`, run `pip install pyyaml` first. + ```bash scripts/package_skill.py ``` diff --git a/.github/skills/skill-evolver/SKILL.md b/.github/skills/skill-evolver/SKILL.md index 8769468d..e03b7a84 100644 --- a/.github/skills/skill-evolver/SKILL.md +++ b/.github/skills/skill-evolver/SKILL.md @@ -10,7 +10,7 @@ Make skills and tools get better over time. The loop: **capture → analyze → ## Architecture (already wired in this repo) - **Store CLI**: `.github/hooks/journal-utils.js` — single writer for the JSONL friction journal (`~/.skill-evolution/journal.jsonl`) and the active-skill attribution marker. -- **Automatic capture**: `.github/hooks/friction-capture.js` runs on `PostToolUse`/`Stop` (registered in `.github/hooks/orchestrator.json`) and logs tool failures. If the runtime doesn't fire those events, automatic capture is silently skipped — **active capture (below) is the reliable fallback.** +- **Automatic capture**: `.github/hooks/friction-capture.js` runs on `PostToolUse`/`Stop` (registered in `.github/hooks/orchestrator.json`) and logs tool failures. **Automatic capture is best-effort: in some runtimes `PostToolUse`/`Stop` do not fire, so active capture (below) is the PRIMARY path — record friction yourself, don't assume the hook caught it.** - **Validation**: reuse `.github/skills/skill-creator/scripts/quick_validate.py` after every edit. - **Changelog**: `.github/skill-evolution/evolution-log.md` records every applied change (for audit + rollback). diff --git a/.github/skills/skill-evolver/references/edit-safety-rules.md b/.github/skills/skill-evolver/references/edit-safety-rules.md index 1b80b3d9..f324d2c4 100644 --- a/.github/skills/skill-evolver/references/edit-safety-rules.md +++ b/.github/skills/skill-evolver/references/edit-safety-rules.md @@ -15,7 +15,9 @@ Skills change agent behavior. Treat every edit as a reviewed code change. Trigger miss → the skill `description`. 5. **Preserve the skill contract.** Keep `SKILL.md` frontmatter to allowed keys only (`name`, `description`, `license`, `allowed-tools`, `metadata`); no angle brackets in - `description`; keep it under the size limits. + `description`; `description` must be **≤1024 characters**. Check before saving: + `(Select-String -Path -Pattern '^description:').Line.Length` (subtract the + `description: ` prefix). Keep it under the size limits. ## Workflow From 754d6925b545a6076790f7dc323fa9a3343017aa Mon Sep 17 00:00:00 2001 From: wzhipan Date: Tue, 16 Jun 2026 14:26:52 -0700 Subject: [PATCH 05/10] skill-evolver: make active capture first-class, quarantine non-firing hook The GitHub Copilot CLI runtime has no hooks system, so the Claude Code-style PostToolUse/Stop registrations in orchestrator.json never fired. Per the developer's choice (Option A: Copilot CLI only), stop pretending capture is automatic and make active (agent-driven) capture the primary mechanism. - orchestrator.json: remove the inert friction-capture.js registrations (PostToolUse, Stop, and the duplicate SubagentStop entry); keep the orchestrator's own subagent hooks. - friction-capture.js: mark DORMANT with a header banner explaining it is Claude Code-only and how to enable it via .claude/settings.json. - skill-evolver/SKILL.md: reframe Architecture + Capture so active capture is the primary/only reliable path on this runtime; fix non-intrusiveness and off-switch wording that implied a background hook runs here. - evolution-log.md: record the change with rollback ref. Validated: quick_validate passes; orchestrator.json no longer references friction-capture; CLI record/stats still work (active capture intact). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/hooks/friction-capture.js | 21 ++++++++++++++++---- .github/hooks/orchestrator.json | 19 ------------------ .github/skill-evolution/evolution-log.md | 25 ++++++++++++++++++++++++ .github/skills/skill-evolver/SKILL.md | 19 +++++++++--------- 4 files changed, 52 insertions(+), 32 deletions(-) diff --git a/.github/hooks/friction-capture.js b/.github/hooks/friction-capture.js index b77327b8..ff5d2beb 100644 --- a/.github/hooks/friction-capture.js +++ b/.github/hooks/friction-capture.js @@ -1,15 +1,28 @@ #!/usr/bin/env node /** - * friction-capture.js — automatic friction capture for the skill-evolver system. + * friction-capture.js — OPTIONAL automatic friction capture for skill-evolver. * - * Registered as a PostToolUse and Stop hook (see orchestrator.json). It reads the - * hook payload from stdin and: + * ┌─────────────────────────────────────────────────────────────────────────┐ + * │ DORMANT ON GITHUB COPILOT CLI. │ + * │ This is a Claude Code-style lifecycle hook (PostToolUse / Stop). The │ + * │ GitHub Copilot CLI runtime has no hooks system, so it NEVER fires here │ + * │ and is intentionally NOT registered in orchestrator.json. │ + * │ │ + * │ On Copilot CLI, ACTIVE capture is the real mechanism: the agent records │ + * │ friction itself via `journal-utils.js record` (see the skill-evolver │ + * │ SKILL.md). Do not rely on this file to catch anything on Copilot CLI. │ + * │ │ + * │ Kept for teams that run this repo under Claude Code: register it in │ + * │ `.claude/settings.json` (PostToolUse + Stop) and it will work there. │ + * └─────────────────────────────────────────────────────────────────────────┘ + * + * Behavior when it DOES fire (Claude Code): reads the hook payload from stdin and * - PostToolUse: if the tool reported a failure/error, appends a high-signal * `tool_error` friction event to the journal (attributed to the active skill). * - Stop / SubagentStop: clears the active-skill marker so attribution does not * leak across tasks. * - * Design rules (match the existing hooks in this folder): + * Design rules: * - Never block the tool flow. Always print {continue:true} and exit 0. * - Wrap everything in try/catch; capture is best-effort. * - Only record on detected failure to keep the journal high-signal. diff --git a/.github/hooks/orchestrator.json b/.github/hooks/orchestrator.json index c0fbe77e..adec1e5c 100644 --- a/.github/hooks/orchestrator.json +++ b/.github/hooks/orchestrator.json @@ -12,25 +12,6 @@ "type": "command", "command": "node .github/hooks/subagent-stop.js", "timeout": 10 - }, - { - "type": "command", - "command": "node .github/hooks/friction-capture.js", - "timeout": 10 - } - ], - "PostToolUse": [ - { - "type": "command", - "command": "node .github/hooks/friction-capture.js", - "timeout": 10 - } - ], - "Stop": [ - { - "type": "command", - "command": "node .github/hooks/friction-capture.js", - "timeout": 10 } ] } diff --git a/.github/skill-evolution/evolution-log.md b/.github/skill-evolution/evolution-log.md index 7e9fdef7..3ae720bc 100644 --- a/.github/skill-evolution/evolution-log.md +++ b/.github/skill-evolution/evolution-log.md @@ -8,6 +8,31 @@ entry format. +## 2026-06-16 — skill-evolver: make active capture first-class, quarantine non-firing hook (Option A) + +Source: investigation of "why isn't PostToolUse/Stop firing". Root cause: the GitHub +Copilot CLI runtime has no hooks system, and `orchestrator.json` used the Claude Code hook +schema, so `friction-capture.js` never fired. Developer chose **Option A** (Copilot CLI only). + +- **Root cause:** environmental / `skill_step_mismatch` (high) — automatic capture was + presented as primary but cannot fire on this runtime. +- **Evidence:** empty journal despite real tool failures; CLI docs show no hooks feature; + no runtime config references `orchestrator.json`. +- **Change:** + - `orchestrator.json`: removed the `PostToolUse`/`Stop` and the second `SubagentStop` + `friction-capture.js` registrations (kept the orchestrator's own subagent hooks). + - `friction-capture.js`: marked DORMANT with a header banner — Claude Code-only, not + registered on Copilot CLI; documents how to enable via `.claude/settings.json`. + - `skill-evolver/SKILL.md`: reframed capture so **active capture is the primary mechanism** + (Architecture, Capture section table, attribution note, non-intrusiveness + off-switch + wording all updated to stop implying an automatic hook runs here). +- **Validation:** `quick_validate.py` passes; `orchestrator.json` parses and no longer + references friction-capture; CLI `record`/`stats` still work (active capture intact). +- **Commit:** see branch `skill-evolution/copilot-cli-active-capture` (rollback: `git revert `). +- **Result/trend:** capture now honestly reflects the runtime; no false reliance on a hook + that never fires. + + ## 2026-06-16 — skill-creator, skill-evolver: first retrospective (3 fixes) Source: retrospective run over `~/.skill-evolution/journal.jsonl` (3 active-captured events diff --git a/.github/skills/skill-evolver/SKILL.md b/.github/skills/skill-evolver/SKILL.md index e03b7a84..2c91ddcc 100644 --- a/.github/skills/skill-evolver/SKILL.md +++ b/.github/skills/skill-evolver/SKILL.md @@ -10,7 +10,8 @@ Make skills and tools get better over time. The loop: **capture → analyze → ## Architecture (already wired in this repo) - **Store CLI**: `.github/hooks/journal-utils.js` — single writer for the JSONL friction journal (`~/.skill-evolution/journal.jsonl`) and the active-skill attribution marker. -- **Automatic capture**: `.github/hooks/friction-capture.js` runs on `PostToolUse`/`Stop` (registered in `.github/hooks/orchestrator.json`) and logs tool failures. **Automatic capture is best-effort: in some runtimes `PostToolUse`/`Stop` do not fire, so active capture (below) is the PRIMARY path — record friction yourself, don't assume the hook caught it.** +- **Capture is ACTIVE on this runtime.** On the GitHub Copilot CLI there is **no hooks system**, so capture happens because **you (the agent) record friction yourself** via `journal-utils.js record`. This is the primary and only reliable mechanism here — treat logging friction as part of doing the task, not something a hook does for you. +- **Dormant auto-capture hook**: `.github/hooks/friction-capture.js` is a Claude Code-style `PostToolUse`/`Stop` hook. It does **not** fire on Copilot CLI and is intentionally **not** registered in `orchestrator.json`. It's kept only for teams running this repo under Claude Code (register it in `.claude/settings.json` there). Do not rely on it here. - **Validation**: reuse `.github/skills/skill-creator/scripts/quick_validate.py` after every edit. - **Changelog**: `.github/skill-evolution/evolution-log.md` records every applied change (for audit + rollback). @@ -18,20 +19,20 @@ Make skills and tools get better over time. The loop: **capture → analyze → This system is designed to stay out of the way: -- **Silent capture.** The hook only writes to the journal file. It never interrupts, never asks questions, and always lets the tool flow continue (returns `{continue:true}`). It records **only on detected failure**, so successful work produces no noise. +- **Silent capture.** Recording a friction event only appends one line to the journal file. It never interrupts the user, never asks a question, and never changes your task flow. Log **only real friction** (failures, retries, wrong instructions, corrections) so the journal stays high-signal. - **No mid-task edits.** Skills are never auto-edited. Analysis and proposals happen only when you invoke a retrospective, and every behavior-affecting edit is gated on your approval. - **Proactive logging must not derail the user.** If you log a friction note proactively at the end of a friction-heavy task, do it in **one line, recorded silently** via the CLI — do NOT ask the user a question, pause their task, or expand scope to discuss it. They review the journal later. -- **Off switch.** Set the environment variable `SKILL_EVOLUTION_DISABLE=1` to silence all capture (hook + CLI `record` become no-ops). Reviewing past data (`stats`, `list`) still works. Unset it to re-enable. +- **Off switch.** Set the environment variable `SKILL_EVOLUTION_DISABLE=1` to silence capture (CLI `record` becomes a no-op; the dormant hook is already inert). Reviewing past data (`stats`, `list`) still works. Unset it to re-enable. -## 1. Capture +## 1. Capture (active — this is the main job on Copilot CLI) -Three capture paths feed the same journal: +**You are the capture mechanism.** There is no background hook on this runtime, so friction is only recorded if you record it. Make this a habit: whenever you hit friction, append one line to the journal before moving on. | Path | Who | How | |------|-----|-----| -| Automatic | hook | Tool errors logged on `PostToolUse`. No action needed. | -| Active | you (agent) | When you notice friction mid-task, record it (see below). | +| **Active (primary)** | you (agent) | The moment you notice friction, record it via the CLI (below). | | User-flagged | user | "that didn't go well" → record the last friction with their context. | +| Dormant hook | — | Not active on Copilot CLI; see Architecture. Ignore for capture here. | **Record a friction event** (see [references/friction-schema.md](references/friction-schema.md) for the schema and the `eventType` catalog). Use single quotes around the JSON on PowerShell: @@ -39,12 +40,12 @@ Three capture paths feed the same journal: node .github/hooks/journal-utils.js record '{"skill":"release-helper","tool":"powershell","eventType":"skill_step_mismatch","severity":"high","expected":"pipeline YAML under 1ES-Pipelines/","actual":"skill pointed to azure-pipelines/ which is deprecated","fixHint":"update path reference in SKILL.md step 3"}' ``` -**Attribute events to a skill**: when you start working under a skill, optionally mark it active so hook-captured events get attributed: +**Attribute events to a skill**: optionally mark the skill you're working under so events default to it (otherwise they record as `skill: "unknown"` and get triaged later): ```powershell node .github/hooks/journal-utils.js set-active # ... work ... -node .github/hooks/journal-utils.js clear-active # (Stop hook also clears it) +node .github/hooks/journal-utils.js clear-active ``` **When to actively record** (don't log noise — log signal): From 47183b4b99620244babeca0946578f51bc991301 Mon Sep 17 00:00:00 2001 From: wzhipan Date: Tue, 16 Jun 2026 14:45:01 -0700 Subject: [PATCH 06/10] skill-evolver retro #2: clarify git checkout -b uses powershell tool edit-safety-rules.md Workflow step 1 was ambiguous about which tool to use for branch creation. I used gitkraken-git_checkout which doesn't support -b, requiring an unnecessary two-step workaround. Verified: git checkout -b works correctly via the powershell tool with native git 2.52.0. Clarified in one line: 'via the powershell tool (not gitkraken-git_checkout, which does not support -b)'. Retro #2 summary: 7 journal events (4 carried/confirmed-fixed, 3 new). 1 skill defect fixed; 2 environmental (no action). All 4 prior fixes hold. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/skill-evolution/evolution-log.md | 24 +++++++++++++++++++ .../references/edit-safety-rules.md | 2 +- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/.github/skill-evolution/evolution-log.md b/.github/skill-evolution/evolution-log.md index 3ae720bc..4cebd802 100644 --- a/.github/skill-evolution/evolution-log.md +++ b/.github/skill-evolution/evolution-log.md @@ -8,6 +8,30 @@ entry format. +## 2026-06-16 — skill-evolver: clarify git branch creation uses powershell tool (retro #2) + +Source: retrospective #2. 7 events in journal (4 carried from retro #1 — all confirmed fixed, +no recurrence). 3 new events captured. 1 skill defect actioned. + +### 4. skill-evolver: `git checkout -b` clarification in edit-safety-rules +- **Root cause:** skill defect (medium). edit-safety-rules said `git checkout -b` without + specifying *which* tool — I used `gitkraken-git_checkout` (doesn't support `-b`) when + the `powershell` tool works fine with native git. +- **Evidence:** `tool_error`, medium — had to use an unnecessary two-step workaround + (git_branch create + git_checkout), costing an extra turn. Verified: `git checkout -b` + works perfectly via the powershell tool. +- **Change:** one-line clarification in Workflow step 1 of + `.github/skills/skill-evolver/references/edit-safety-rules.md`: specify + "via the powershell tool (not gitkraken-git_checkout, which doesn't support -b)". +- **Not actioned:** event #6 (`ask_user` interruption — environmental, no fix) and + event #7 (dirty workspace file — environmental, user skipped the doc nudge). + +- **Validation:** `quick_validate.py` passes. +- **Commit:** see branch `skill-evolution/copilot-cli-active-capture` (rollback: `git revert `). +- **Result/trend:** 4/4 carried-over defects still resolved; 1 new defect fixed; 2 environmental. + Velocity: retro #2 closed faster than retro #1 — journal patterns are getting cleaner. + + ## 2026-06-16 — skill-evolver: make active capture first-class, quarantine non-firing hook (Option A) Source: investigation of "why isn't PostToolUse/Stop firing". Root cause: the GitHub diff --git a/.github/skills/skill-evolver/references/edit-safety-rules.md b/.github/skills/skill-evolver/references/edit-safety-rules.md index f324d2c4..c3c52da6 100644 --- a/.github/skills/skill-evolver/references/edit-safety-rules.md +++ b/.github/skills/skill-evolver/references/edit-safety-rules.md @@ -21,7 +21,7 @@ Skills change agent behavior. Treat every edit as a reviewed code change. ## Workflow -1. Create a branch: `git checkout -b skill-evolution/`. +1. Create a branch: run `git checkout -b skill-evolution/` via the **powershell tool** (not `gitkraken-git_checkout`, which doesn't support `-b`). 2. Make one logical change per commit; reference the journal event ids in the commit body. 3. **Validate** each edited skill: ```powershell From db1d82642c20a414ffeea8564dfcf406c0ed7dde Mon Sep 17 00:00:00 2001 From: wzhipan Date: Tue, 16 Jun 2026 14:49:13 -0700 Subject: [PATCH 07/10] skill-evolver: require proposals to name target skill + file User feedback: retrospective proposals showed diffs but didn't make clear which skill each fix targeted. Since skill-evolver evolves many skills, that ambiguity makes per-skill review decisions hard. SKILL.md section 3 now mandates: - a per-proposal header: 'Target: -> . . ' - a summary table (# . Target skill . File . Root cause . Severity) when proposing multiple fixes - naming the target skill in per-fix approval questions quick_validate passes; logged in evolution-log.md. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/skill-evolution/evolution-log.md | 17 +++++++++++++++++ .github/skills/skill-evolver/SKILL.md | 20 ++++++++++++++------ 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/.github/skill-evolution/evolution-log.md b/.github/skill-evolution/evolution-log.md index 4cebd802..272d9458 100644 --- a/.github/skill-evolution/evolution-log.md +++ b/.github/skill-evolution/evolution-log.md @@ -8,6 +8,23 @@ entry format. +## 2026-06-16 — skill-evolver: require proposals to name target skill + file + +- **Target:** skill-evolver → `.github/skills/skill-evolver/SKILL.md` (§3 Propose, review, apply) +- **Root cause:** skill defect (medium). The Propose section required "concrete diffs" but did + not require each proposal to state *which skill and file* it targets. Since this skill evolves + many skills, reviewers couldn't tell at a glance what each fix changed. +- **Evidence:** `missing_context`, medium (user-reported) — "I didn't see which skill the fix was + for" during retro #1/#2 proposals. +- **Change:** §3 now mandates a per-proposal header + `Target: · · `, a summary table (# · Target skill · File · + Root cause · Severity) when proposing multiple fixes, and naming the target skill in per-fix + approval questions. +- **Validation:** `quick_validate.py` passes. +- **Commit:** branch `skill-evolution/copilot-cli-active-capture` (rollback: `git revert `). +- **Result/trend:** future retrospective proposals will be unambiguous about scope per skill. + + ## 2026-06-16 — skill-evolver: clarify git branch creation uses powershell tool (retro #2) Source: retrospective #2. 7 events in journal (4 carried from retro #1 — all confirmed fixed, diff --git a/.github/skills/skill-evolver/SKILL.md b/.github/skills/skill-evolver/SKILL.md index 2c91ddcc..4128f0d2 100644 --- a/.github/skills/skill-evolver/SKILL.md +++ b/.github/skills/skill-evolver/SKILL.md @@ -76,15 +76,23 @@ Run when asked to improve/evolve skills or review friction. Follow [references/edit-safety-rules.md](references/edit-safety-rules.md) strictly. Summary: -1. **Propose concrete diffs** — never vague advice. Show the exact before/after for each file. -2. **Gate on human review** — present proposals and use `ask_user` to get approval. Never silently change behavior-affecting instructions. -3. **Apply on a branch** (`skill-evolution/`), one logical change per commit. -4. **Validate** every edited skill: +1. **Lead every proposal with its target.** Before the diff, each proposed fix MUST state, on one line: + **`Target: ` · `` · ``** + (for global lessons use `Target: copilot-instructions.md (all skills)`). This skill evolves *many* + skills, so the reviewer must see at a glance which skill each fix changes — never bury it. + When proposing multiple fixes, also include a one-row-per-fix summary table with columns + **# · Target skill · File · Root cause · Severity** before the detailed diffs. +2. **Propose concrete diffs** — never vague advice. Show the exact before/after for each file. +3. **Gate on human review** — present proposals and use `ask_user` to get approval. When asking + per-fix, name the target skill in the question (e.g. "Apply fix #2 to **release-helper**?"). + Never silently change behavior-affecting instructions. +4. **Apply on a branch** (`skill-evolution/`), one logical change per commit. +5. **Validate** every edited skill: ```powershell python .github/skills/skill-creator/scripts/quick_validate.py .github/skills/ ``` -5. **Log it** — append an entry to `.github/skill-evolution/evolution-log.md` (issue, evidence, change, target, rollback ref). -6. **Offer a PR** for the branch when the user wants it. +6. **Log it** — append an entry to `.github/skill-evolution/evolution-log.md` (issue, evidence, change, target, rollback ref). +7. **Offer a PR** for the branch when the user wants it. ## 4. Measure From 5db977c07c310f6ba64ea8d9bd634e95dc3a905a Mon Sep 17 00:00:00 2001 From: wzhipan Date: Tue, 16 Jun 2026 15:10:10 -0700 Subject: [PATCH 08/10] skill-evolver: add anti-bloat guardrails (prune + tripwire + consolidate + references) Counters the loop's addition bias so skills don't grow into caveat-soup: - #2 tripwire: new 'journal-utils.js skill-sizes' command scans every SKILL.md and flags body >400/500 lines and description >900/1024 chars. - #1 prune: SKILL.md section 4 is now 'Measure & prune' - run skill-sizes each retro; every ~5th retro (or when flagged) propose removals, not just additions. - #3 + #4: new edit-safety rule 6 (consolidate over append; references over body; don't add to an over-budget skill without pruning). - New references/bloat-control.md holds budgets + prune procedure, kept out of the always-loaded body (practicing #4). Validated: quick_validate passes; skill-sizes runs and already flags skill-evolver's own description (1019/1024, DESC_WARN). Body grew only 104->111 lines because detail went into the reference. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/hooks/journal-utils.js | 58 ++++++++++++++++++- .github/skill-evolution/evolution-log.md | 26 +++++++++ .github/skills/skill-evolver/SKILL.md | 8 ++- .../skill-evolver/references/bloat-control.md | 47 +++++++++++++++ .../references/edit-safety-rules.md | 4 ++ 5 files changed, 141 insertions(+), 2 deletions(-) create mode 100644 .github/skills/skill-evolver/references/bloat-control.md diff --git a/.github/hooks/journal-utils.js b/.github/hooks/journal-utils.js index 2957c1cc..d7149542 100644 --- a/.github/hooks/journal-utils.js +++ b/.github/hooks/journal-utils.js @@ -171,6 +171,56 @@ function computeStats() { }; } +// --------------------------------------------------------------------------- +// Skill size budget (anti-bloat tripwire). Scans every SKILL.md and flags any +// whose body or description is over budget, so the retrospective can propose +// pruning / moving detail to references/ instead of letting skills grow forever. +// --------------------------------------------------------------------------- +var BODY_WARN = 400; // SKILL.md body lines — start consolidating +var BODY_OVER = 500; // skill-creator's stated maximum +var DESC_WARN = 900; // description chars — getting close to the limit +var DESC_MAX = 1024; // hard frontmatter limit + +function computeSkillSizes() { + var skillsDir = path.join(__dirname, '..', 'skills'); + var out = []; + if (!fs.existsSync(skillsDir)) return out; + var entries = fs.readdirSync(skillsDir); + for (var i = 0; i < entries.length; i++) { + var md = path.join(skillsDir, entries[i], 'SKILL.md'); + if (!fs.existsSync(md)) continue; + var content = fs.readFileSync(md, 'utf-8'); + var lineCount = content.split('\n').length; + var descMatch = content.match(/^description:\s*(.*)$/m); + var descLen = descMatch ? descMatch[1].length : 0; + var flags = []; + if (lineCount > BODY_OVER) flags.push('BODY_OVER'); + else if (lineCount > BODY_WARN) flags.push('BODY_WARN'); + if (descLen > DESC_MAX) flags.push('DESC_OVER'); + else if (descLen > DESC_WARN) flags.push('DESC_WARN'); + out.push({ skill: entries[i], lines: lineCount, descLen: descLen, flags: flags }); + } + return out.sort(function (a, b) { return b.lines - a.lines; }); +} + +function skillSizesToMarkdown(sizes) { + var lines = ['# Skill Size Budget', '', + 'Body budget: warn >' + BODY_WARN + ', over >' + BODY_OVER + + ' lines. Description: warn >' + DESC_WARN + ', max ' + DESC_MAX + ' chars.', '', + '| Skill | Lines | Desc chars | Flags |', + '|-------|-------|-----------|-------|']; + var flagged = 0; + sizes.forEach(function (s) { + if (s.flags.length) flagged++; + lines.push('| ' + s.skill + ' | ' + s.lines + ' | ' + s.descLen + ' | ' + + (s.flags.join(', ') || '—') + ' |'); + }); + lines.push(''); + lines.push(flagged ? ('⚠️ ' + flagged + ' skill(s) over budget — consider pruning or moving detail to references/.') + : '✅ All skills within budget.'); + return lines.join('\n'); +} + function statsToMarkdown(s) { var lines = []; lines.push('# Friction Digest'); @@ -239,6 +289,10 @@ function runCli() { var s = computeStats(); var fl = parseFlags(rest); console.log(fl.md ? statsToMarkdown(s) : JSON.stringify(s, null, 2)); + } else if (cmd === 'skill-sizes') { + var sizes = computeSkillSizes(); + var szf = parseFlags(rest); + console.log(szf.md ? skillSizesToMarkdown(sizes) : JSON.stringify(sizes, null, 2)); } else if (cmd === 'path') { console.log(JSON.stringify({ storeDir: STORE_DIR, journal: JOURNAL_FILE, activeMarker: ACTIVE_FILE }, null, 2)); } else if (cmd === 'clear') { @@ -248,7 +302,7 @@ function runCli() { console.log('journal cleared (backup at ' + JOURNAL_FILE + '.bak)'); } else { console.error('Unknown command: ' + cmd); - console.error('Commands: record, set-active, clear-active, active, list, stats, path, clear'); + console.error('Commands: record, set-active, clear-active, active, list, stats, skill-sizes, path, clear'); process.exit(1); } } catch (e) { @@ -265,6 +319,8 @@ module.exports = { readEvents: readEvents, computeStats: computeStats, statsToMarkdown: statsToMarkdown, + computeSkillSizes: computeSkillSizes, + skillSizesToMarkdown: skillSizesToMarkdown, STORE_DIR: STORE_DIR, JOURNAL_FILE: JOURNAL_FILE, ACTIVE_FILE: ACTIVE_FILE, diff --git a/.github/skill-evolution/evolution-log.md b/.github/skill-evolution/evolution-log.md index 272d9458..083d1317 100644 --- a/.github/skill-evolution/evolution-log.md +++ b/.github/skill-evolution/evolution-log.md @@ -8,6 +8,32 @@ entry format. +## 2026-06-16 — skill-evolver: add anti-bloat guardrails (#1 prune, #2 tripwire, #3 consolidate, #4 references) + +- **Target:** skill-evolver → `journal-utils.js`, `SKILL.md`, `references/edit-safety-rules.md`, + `references/bloat-control.md` (new) +- **Root cause:** design risk (user-raised) — the loop has an addition bias; every retrospective + tends to *add* a rule, so skills bloat over time and pay a per-trigger token tax. Nothing in the + loop pruned, consolidated, or measured skill weight. +- **Evidence:** in one session skill-evolver took 4 edits, all additions; largest skills already + 320–369 lines vs the 500-line guideline. +- **Change:** + - **#2 tripwire:** new `journal-utils.js skill-sizes` command scans every SKILL.md and flags + body >400/500 lines and description >900/1024 chars. + - **#1 prune:** SKILL.md §4 renamed "Measure & prune" — run `skill-sizes` each retro; every ~5th + retro (or when flagged) propose *removals*, not just additions. + - **#3 + #4:** new edit-safety rule 6 (consolidate over append; references over body; don't add + to an over-budget skill without pruning). + - New `references/bloat-control.md` holds the budgets + prune procedure (kept out of the + always-loaded body — practicing #4). +- **Validation:** `quick_validate.py` passes; `skill-sizes --md` runs and correctly flags + skill-evolver's own description (1019 chars, DESC_WARN). SKILL.md body grew only 104→111 lines + because detail went into the reference. +- **Commit:** branch `skill-evolution/copilot-cli-active-capture` (rollback: `git revert `). +- **Follow-up:** skill-evolver's description (1019/1024) should be trimmed at the next pass — the + tripwire is already flagging the tool's own author. + + ## 2026-06-16 — skill-evolver: require proposals to name target skill + file - **Target:** skill-evolver → `.github/skills/skill-evolver/SKILL.md` (§3 Propose, review, apply) diff --git a/.github/skills/skill-evolver/SKILL.md b/.github/skills/skill-evolver/SKILL.md index 4128f0d2..17f42243 100644 --- a/.github/skills/skill-evolver/SKILL.md +++ b/.github/skills/skill-evolver/SKILL.md @@ -62,8 +62,10 @@ Run when asked to improve/evolve skills or review friction. 1. **Pull the digest** (deterministic aggregation; ranks recurring issues by frequency × severity): ```powershell node .github/hooks/journal-utils.js stats --md + node .github/hooks/journal-utils.js skill-sizes --md ``` For raw events of one skill: `node .github/hooks/journal-utils.js list --skill `. + `skill-sizes` flags any skill over its body/description budget — those are pruning candidates (see step 4). 2. **Classify each recurring group** using [references/classification-rubric.md](references/classification-rubric.md). The critical judgment: is this a **skill defect** (fixable by editing the skill), a **model mistake**, an **environment issue**, or a **genuinely novel task**? Only skill defects (and global-convention gaps) become edits. @@ -94,10 +96,14 @@ Follow [references/edit-safety-rules.md](references/edit-safety-rules.md) strict 6. **Log it** — append an entry to `.github/skill-evolution/evolution-log.md` (issue, evidence, change, target, rollback ref). 7. **Offer a PR** for the branch when the user wants it. -## 4. Measure +## 4. Measure & prune After fixes land, re-run `stats` over time to confirm the friction rate for the edited skill is trending down. Note the trend in the evolution-log entry. If an edit didn't help, roll it back (see edit-safety-rules) and try a different fix. +**Counter the addition bias.** The loop naturally *adds* rules; without pushback, skills bloat. So: +- Run `skill-sizes --md` each retrospective; any flagged skill is a **pruning candidate**. +- Every ~5th retrospective (or whenever a skill is flagged), propose **removals** — obsolete, redundant, one-off, or contradictory rules — not just additions. See [references/bloat-control.md](references/bloat-control.md) for the prune procedure and budgets. A prune goes through the same review gate as any edit. + ## Scope notes - This system also applies to non-skill assets: prompt templates, agent instruction files, and MCP-usage notes — the same capture/analyze/propose loop works for them. diff --git a/.github/skills/skill-evolver/references/bloat-control.md b/.github/skills/skill-evolver/references/bloat-control.md new file mode 100644 index 00000000..a03379a3 --- /dev/null +++ b/.github/skills/skill-evolver/references/bloat-control.md @@ -0,0 +1,47 @@ +# Bloat Control + +The evolver has a built-in **addition bias**: every retrospective tends to *add* a rule. +Without counter-pressure, skills grow into unreadable caveat-soup and pay a per-trigger token +tax. These guardrails keep skills lean. Read this when running the Prune phase or when +`skill-sizes` flags a skill. + +## Size budget (tripwire) + +Run the automated check during every retrospective: + +```powershell +node .github/hooks/journal-utils.js skill-sizes --md +``` + +Thresholds (enforced by the tool): + +| Dimension | Warn | Over | +|-----------|------|------| +| SKILL.md body lines | > 400 | > 500 (skill-creator's stated max) | +| `description` chars | > 900 | > 1024 (hard limit) | + +Any skill flagged `BODY_OVER` / `BODY_WARN` / `DESC_*` is a candidate for pruning or relocation +**before** adding anything new to it. + +## Prune procedure + +When a skill is flagged (or every ~5th retrospective), look for and propose **removals**, not just +additions: + +1. **Obsolete** — rules for a path/API/tool that no longer exists. Delete. +2. **Redundant** — two bullets saying the same thing, or a rule the model would follow anyway. Merge or drop. +3. **One-off `low`-severity notes** — caveats added for a single incident that never recurred (check the journal: if the signature appears once and is old, expire it). +4. **Contradictions** — a newer rule that supersedes an older one. Keep one, remove the other. + +Propose prunes through the same review gate as any edit (lead with `Target:`, get approval, log it). +A retrospective that removes a stale rule is as valuable as one that adds a needed rule. + +## Append discipline (stop new bloat at the source) + +- **Consolidate over append.** Prefer editing or tightening an existing instruction over adding a + new bullet. Two short rules that overlap should become one. +- **References over body.** Put detailed caveats, examples, and edge-case handling in `references/` + (progressive disclosure), not in the always-loaded SKILL.md body. The body stays a lean index; + the detail loads only when needed. This is skill-creator's core principle. +- **Earn the line.** Every line added to a SKILL.md body costs tokens on every trigger. Only add to + the body if the lesson is core and high-frequency; otherwise it goes in a reference or is dropped. diff --git a/.github/skills/skill-evolver/references/edit-safety-rules.md b/.github/skills/skill-evolver/references/edit-safety-rules.md index c3c52da6..9922e611 100644 --- a/.github/skills/skill-evolver/references/edit-safety-rules.md +++ b/.github/skills/skill-evolver/references/edit-safety-rules.md @@ -18,6 +18,10 @@ Skills change agent behavior. Treat every edit as a reviewed code change. `description`; `description` must be **≤1024 characters**. Check before saving: `(Select-String -Path -Pattern '^description:').Line.Length` (subtract the `description: ` prefix). Keep it under the size limits. +6. **Consolidate over append (anti-bloat).** Prefer editing or merging an existing instruction + over adding a new bullet. Put detailed caveats/examples in `references/`, not the always-loaded + SKILL.md body. Don't add to a skill already over budget (`skill-sizes`) without pruning first. + See [bloat-control.md](bloat-control.md). ## Workflow From de46ab2bb2e75b0e2d84a1cd3c93148a33730d18 Mon Sep 17 00:00:00 2001 From: wzhipan Date: Tue, 16 Jun 2026 15:12:59 -0700 Subject: [PATCH 09/10] skill-evolver: trim description to clear self-flagged DESC_WARN The skill-sizes tripwire flagged skill-evolver's own description at 1019/1024 chars. Removed redundant trigger phrasings (overlapping 'didn't work' wording, a duplicate example) and tightened the global-lessons clause; strongest triggers preserved. Now 887 chars (under the 900 warn). quick_validate passes and skill-sizes reports all skills within budget. Demonstrates the anti-bloat loop end to end: tripwire flagged, prune cleared. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/skill-evolution/evolution-log.md | 16 ++++++++++++++++ .github/skills/skill-evolver/SKILL.md | 2 +- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/.github/skill-evolution/evolution-log.md b/.github/skill-evolution/evolution-log.md index 083d1317..d2deae5e 100644 --- a/.github/skill-evolution/evolution-log.md +++ b/.github/skill-evolution/evolution-log.md @@ -8,6 +8,22 @@ entry format. +## 2026-06-16 — skill-evolver: trim description to clear self-flagged DESC_WARN + +- **Target:** skill-evolver → `.github/skills/skill-evolver/SKILL.md` (frontmatter `description`) +- **Root cause:** bloat (low). The `skill-sizes` tripwire added in the prior commit immediately + flagged skill-evolver's own description at 1019/1024 chars (DESC_WARN). +- **Evidence:** `skill-sizes --md` output — skill-evolver was the only flagged skill. +- **Change:** removed redundant trigger phrasings ("note that something went wrong, didn't work, or + was confusing" overlapped with "that didn't go well"; dropped one duplicate example) and tightened + the global-lessons clause. Strongest trigger phrases preserved. +- **Validation:** `quick_validate.py` passes; description now 887 chars (under the 900 warn); + `skill-sizes --md` reports "✅ All skills within budget." +- **Commit:** branch `skill-evolution/copilot-cli-active-capture` (rollback: `git revert `). +- **Result/trend:** the tripwire flagged its own author and the prune cleared it — the anti-bloat + loop works end to end. + + ## 2026-06-16 — skill-evolver: add anti-bloat guardrails (#1 prune, #2 tripwire, #3 consolidate, #4 references) - **Target:** skill-evolver → `journal-utils.js`, `SKILL.md`, `references/edit-safety-rules.md`, diff --git a/.github/skills/skill-evolver/SKILL.md b/.github/skills/skill-evolver/SKILL.md index 17f42243..b4d8bdfa 100644 --- a/.github/skills/skill-evolver/SKILL.md +++ b/.github/skills/skill-evolver/SKILL.md @@ -1,6 +1,6 @@ --- name: skill-evolver -description: Closed-loop self-improvement for skills, prompts, and AI tools. Captures friction (tool errors, repeated retries, wrong or outdated instructions, missing context, missed or wrong skill triggers, user corrections) into a structured journal, then runs retrospectives that classify root causes and propose concrete, reviewable edits to the offending SKILL.md, references, or scripts (or copilot-instructions.md for global lessons). Use whenever the user wants to improve, evolve, tune, or fix a skill or its instructions; run a skill retrospective; review or analyze skill friction; note that something went wrong, didn't work, or was confusing; or says things like "improve my skills", "what went wrong with X skill", "why didn't skill Y trigger", "this skill is outdated or wrong", "fix the skill so this doesn't happen again", "you keep making the same mistake", or "that didn't go well". Also use PROACTIVELY at the end of any task that hit notable friction (repeated tool failures or a user correction) to log a note. +description: Closed-loop self-improvement for skills, prompts, and AI tools. Captures friction (tool errors, repeated retries, wrong or outdated instructions, missing context, missed or wrong skill triggers, user corrections) into a structured journal, then runs retrospectives that classify root causes and propose concrete, reviewable edits to the offending SKILL.md, references, scripts, or copilot-instructions.md. Use whenever the user wants to improve, evolve, tune, or fix a skill or its instructions; run a skill retrospective; review or analyze skill friction; or says things like "improve my skills", "what went wrong with X skill", "why didn't skill Y trigger", "this skill is outdated or wrong", "you keep making the same mistake", or "that didn't go well". Also use PROACTIVELY at the end of any task that hit notable friction (repeated tool failures or a user correction) to log a note. --- # Skill Evolver From 31437ccc753e1eea3f2e5b2e1fe23ae556b6d5c1 Mon Sep 17 00:00:00 2001 From: wzhipan Date: Tue, 16 Jun 2026 15:30:07 -0700 Subject: [PATCH 10/10] skills: bidirectional creator<->evolver handoff + 'Needs a new skill' outcome Integrate the build-time (skill-creator) and run-time (skill-evolver) halves of the skill lifecycle via lightweight cross-references (not a merge), and close a real gap: the evolver had no path to recommend creating a NEW skill. - creator -> evolver: Step 6 'Iterate' now points to skill-evolver for continuous, evidence-based iteration after a skill is in use (Step 6 still covers immediate in-authoring tweaks). - evolver -> creator: new 'Needs a new skill' classification outcome for a substantial out-of-scope task, or splitting an over-budget skill that's doing two jobs -> hand off to skill-creator. Added to the rubric table, SKILL.md target-decision list, and bloat-control prune procedure. Kept separate by design (distinct triggers, freedom levels, 1024-char description ceiling). Both skills pass quick_validate; skill-sizes reports all within budget. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/skill-evolution/evolution-log.md | 25 +++++++++++++++++++ .github/skills/skill-creator/SKILL.md | 2 ++ .github/skills/skill-evolver/SKILL.md | 1 + .../skill-evolver/references/bloat-control.md | 2 ++ .../references/classification-rubric.md | 3 ++- 5 files changed, 32 insertions(+), 1 deletion(-) diff --git a/.github/skill-evolution/evolution-log.md b/.github/skill-evolution/evolution-log.md index d2deae5e..57abfd94 100644 --- a/.github/skill-evolution/evolution-log.md +++ b/.github/skill-evolution/evolution-log.md @@ -8,6 +8,31 @@ entry format. +## 2026-06-16 — skill-creator ⇄ skill-evolver: bidirectional lifecycle handoff + "Needs a new skill" outcome + +- **Target:** skill-evolver → `SKILL.md` (§2 target decision), `references/classification-rubric.md`, + `references/bloat-control.md`; skill-creator → `SKILL.md` (Step 6 Iterate) +- **Root cause:** missing_context (design gap, user-raised). The build-time (creator) and run-time + (evolver) halves of the skill lifecycle had no documented handoff, and the evolver had **no branch + for "evolving an existing skill is insufficient — a new skill is needed."** The rubric's 5 + categories all resolved to edit/no-edit; "Novel task" only said "add a section." +- **Evidence:** user questions — (1) does a creator→evolver pointer skip Step 6 / does evolver cover + it; (2) does the evolver ever recommend a *new* skill. Verified against files: no new-skill path + existed, no cross-references existed. +- **Change:** + - **creator → evolver:** Step 6 now notes it covers immediate authoring tweaks, and points to + skill-evolver for continuous, evidence-based iteration after the skill is in use. + - **evolver → creator:** new **"Needs a new skill"** classification outcome (substantial + out-of-scope task, or splitting an over-budget skill doing two jobs) → recommend creating via + skill-creator. Added to the rubric table, §2 target-decision list, and bloat-control's prune + procedure (split vs. cram). +- **Decision:** keep the skills separate (different triggers, freedom levels, and the 1024-char + description ceiling) — integrate via lightweight cross-references, not a merge. +- **Validation:** both skills pass `quick_validate.py`; `skill-sizes` reports all within budget + (skill-creator 361, skill-evolver 112 lines). +- **Commit:** branch `skill-evolution/copilot-cli-active-capture` (rollback: `git revert `). + + ## 2026-06-16 — skill-evolver: trim description to clear self-flagged DESC_WARN - **Target:** skill-evolver → `.github/skills/skill-evolver/SKILL.md` (frontmatter `description`) diff --git a/.github/skills/skill-creator/SKILL.md b/.github/skills/skill-creator/SKILL.md index 421b205b..2d6e96f6 100644 --- a/.github/skills/skill-creator/SKILL.md +++ b/.github/skills/skill-creator/SKILL.md @@ -356,3 +356,5 @@ After testing the skill, users may request improvements. Often this happens righ 2. Notice struggles or inefficiencies 3. Identify how SKILL.md or bundled resources should be updated 4. Implement changes and test again + +This step covers immediate, in-the-moment tweaks while authoring. For **continuous, evidence-based iteration after the skill is in use** — capturing friction during real tasks, running retrospectives, and proposing reviewed edits — use the **skill-evolver** skill (`.github/skills/skill-evolver/SKILL.md`). diff --git a/.github/skills/skill-evolver/SKILL.md b/.github/skills/skill-evolver/SKILL.md index b4d8bdfa..313a3535 100644 --- a/.github/skills/skill-evolver/SKILL.md +++ b/.github/skills/skill-evolver/SKILL.md @@ -73,6 +73,7 @@ Run when asked to improve/evolve skills or review friction. - Single-skill defect → edit that skill's `SKILL.md` / `references/` / `scripts/`. - Cross-cutting lesson that applies to many skills → edit `.github/copilot-instructions.md` instead. - Trigger miss → tune the skill's `description` frontmatter (the activation mechanism). + - **Needs a new skill** → when a substantial novel task fits no existing skill, or an over-budget skill is really doing two jobs and should be **split**, don't force-fit it. Recommend creating a new skill via the **skill-creator** skill (`.github/skills/skill-creator/SKILL.md`) and hand off. ## 3. Propose, review, apply diff --git a/.github/skills/skill-evolver/references/bloat-control.md b/.github/skills/skill-evolver/references/bloat-control.md index a03379a3..178041d7 100644 --- a/.github/skills/skill-evolver/references/bloat-control.md +++ b/.github/skills/skill-evolver/references/bloat-control.md @@ -33,6 +33,8 @@ additions: 3. **One-off `low`-severity notes** — caveats added for a single incident that never recurred (check the journal: if the signature appears once and is old, expire it). 4. **Contradictions** — a newer rule that supersedes an older one. Keep one, remove the other. +If a skill is over budget because it's covering **two distinct jobs**, pruning won't help — the right fix is to **split** it into a new skill (classify as "Needs a new skill" and hand off to `skill-creator`), not to keep cramming. + Propose prunes through the same review gate as any edit (lead with `Target:`, get approval, log it). A retrospective that removes a stale rule is as valuable as one that adds a needed rule. diff --git a/.github/skills/skill-evolver/references/classification-rubric.md b/.github/skills/skill-evolver/references/classification-rubric.md index 240e0ba2..46522ff6 100644 --- a/.github/skills/skill-evolver/references/classification-rubric.md +++ b/.github/skills/skill-evolver/references/classification-rubric.md @@ -11,7 +11,8 @@ with noise. For each recurring group from `journal-utils.js stats`, assign one r | **Global-convention gap** | The same lesson would apply to many skills/tasks (e.g. a repo-wide path move, a naming rule) | **Edit `copilot-instructions.md`**, not a single skill. | | **Model mistake** | The skill was correct; the agent misread or skipped it; one-off reasoning slip | **No edit.** Optionally tighten wording only if the instruction was genuinely easy to misread. | | **Environment issue** | Network/auth failure, missing local tool, transient flake, permissions | **No skill edit.** Note it; route to setup docs if recurring. | -| **Novel task** | Legitimately new scenario the skill never claimed to cover | **No edit** unless this scenario is now in-scope; then add a new section. | +| **Novel task** | Legitimately new scenario the skill never claimed to cover | **No edit** for a small case (add a section if now in-scope). For a substantial out-of-scope task → **Needs a new skill** (below). | +| **Needs a new skill** | No existing skill fits a substantial task; or an over-budget skill is doing two distinct jobs and should be **split** | **Don't force-fit.** Recommend creating a new skill via the `skill-creator` skill, then hand off. Editing an unrelated skill here just causes bloat and trigger confusion. | ## Decision heuristics