diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 557f30b19..104ca29a7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -115,6 +115,9 @@ jobs: - name: Install dependencies run: pnpm install --frozen-lockfile + - name: Run Intent Coding workflow check + run: pnpm run agent:check + - name: Lint web UI run: pnpm run lint:web diff --git a/.gitignore b/.gitignore index 57f5184a9..e198c326a 100644 --- a/.gitignore +++ b/.gitignore @@ -64,6 +64,9 @@ tests/e2e/reports/ # BitFun sandbox data - auto managed .bitfun/ + +# Intent Coding runtime artifacts - created on demand by IntentCoding agent +.agent/ .cursor .cursor/rules/no-cargo.mdc .sisyphus/ diff --git a/package.json b/package.json index f3d534ce1..e11616c58 100644 --- a/package.json +++ b/package.json @@ -19,6 +19,10 @@ "lint:web:fix": "pnpm --dir src/web-ui run lint:fix", "i18n:audit": "node scripts/i18n-audit.mjs", "fmt:rs": "node scripts/format-changed-rust.mjs", + "agent:check": "node scripts/check-agent-workflow.mjs", + "agent:context-compile": "node scripts/intent-coding-context-compile.mjs", + "agent:provenance-record": "node scripts/intent-coding-provenance-record.mjs", + "agent:review-route": "node scripts/intent-coding-review-route.mjs", "prebuild": "pnpm run prebuild:web", "prebuild:web": "pnpm run copy-assets --silent && pnpm run generate-all --silent", "type-check:web": "pnpm --dir src/web-ui run type-check", diff --git a/scripts/check-agent-workflow.mjs b/scripts/check-agent-workflow.mjs new file mode 100644 index 000000000..9047b2089 --- /dev/null +++ b/scripts/check-agent-workflow.mjs @@ -0,0 +1,1181 @@ +#!/usr/bin/env node + +import fs from 'node:fs'; +import path from 'node:path'; + +const root = process.cwd(); +const agentDir = path.join(root, '.agent'); + +const requiredIntentSections = [ + 'Metadata', + 'Original User Request', + 'Agent Understanding', + 'In Scope', + 'Out of Scope', + 'Acceptance Criteria', + 'Accepted Checks', + 'Execution Contract', + 'Metrics', +]; + +const requiredEvidenceSections = [ + 'Metadata', + 'Intent Record', + 'Summary', + 'Context Inputs', + 'Files Changed', + 'Verification', + 'Repair Loop', + 'Accepted Checks', + 'Provenance Chain', + 'Policy Gates', + 'Risks', + 'Human Review Focus', +]; + +const validRepairStatuses = new Set(['not_needed', 'repaired', 'blocked', 'deferred']); +const validRiskLevels = new Set(['L0', 'L1', 'L2', 'L3', 'L4']); +const validReviewRoutes = new Set(['deep_review', 'specialist_review', 'manual_review', 'skipped']); +const validReviewStatuses = new Set(['completed', 'skipped', 'blocked']); +const validReviewTriggers = new Set(['automatic', 'manual', 'not_available']); +const validPolicyGateStatuses = new Set([ + 'passed', + 'failed', + 'skipped', + 'blocked', + 'not_applicable', +]); +const validContextInputTypes = new Set([ + 'builtin_rule', + 'workspace_instruction', + 'module_doc', + 'source_file', + 'user_confirmation', + 'verification_guidance', + 'not_available', +]); +const validProvenanceStores = new Set([ + 'agent_artifact', + 'session_store', + 'external', + 'not_available', +]); +const riskRanks = new Map([ + ['L0', 0], + ['L1', 1], + ['L2', 2], + ['L3', 3], + ['L4', 4], +]); +const evidenceRiskSignals = [ + { + level: 'L4', + label: 'safety-critical security boundary', + pattern: + /\b(sandbox|privilege escalation|destructive filesystem|cryptography|crypto|keychain|secret|credential|token|private key)\b/i, + }, + { + level: 'L3', + label: 'critical product or security behavior', + pattern: + /\b(authentication|authorization|auth|permission|billing|payment|migration|data integrity|release signing|deployment|protocol parsing|encryption)\b/i, + }, + { + level: 'L2', + label: 'important shared runtime behavior', + pattern: + /\b(persistence|session|remote workspace|synchronization|sync|stream parsing|agent tool execution|cross-module|public api|data loss|concurrency)\b/i, + }, +]; +const ownershipRiskSignals = [ + { + level: 'L3', + label: 'agent tool ownership surface', + contains: 'src/crates/core/src/agentic/tools/', + }, + { + level: 'L3', + label: 'agent execution ownership surface', + contains: 'src/crates/core/src/agentic/execution/', + }, + { + level: 'L3', + label: 'AI adapter ownership surface', + contains: 'src/crates/ai-adapters/', + }, + { + level: 'L2', + label: 'core product logic ownership surface', + contains: 'src/crates/core/', + }, + { + level: 'L2', + label: 'desktop API ownership surface', + contains: 'src/apps/desktop/src/api/', + }, + { + level: 'L2', + label: 'transport/API ownership surface', + pattern: /src\/crates\/(transport|api-layer)\//, + }, +]; +const dependencyRiskSignals = [ + { + level: 'L2', + label: 'Rust dependency graph impact', + pattern: /(^|\/)(cargo\.toml|cargo\.lock)$/, + }, + { + level: 'L2', + label: 'frontend dependency graph impact', + pattern: /(^|\/)(package\.json|pnpm-lock\.yaml)$/, + }, + { + level: 'L2', + label: 'build configuration impact', + pattern: /(^|\/)(tsconfig[^/]*\.json|vite\.config\.[jt]s|rust-toolchain[^/]*)$/, + }, +]; + +let errorCount = 0; +let cachedPolicyConfig = null; +let cachedRiskMemory = null; + +function toPosixPath(value) { + return value.split(path.sep).join('/'); +} + +function rel(filePath) { + return toPosixPath(path.relative(root, filePath)); +} + +function reportError(message) { + errorCount += 1; + console.error(`[agent:check] ERROR ${message}`); +} + +function reportWarn(message) { + console.warn(`[agent:check] WARN ${message}`); +} + +function reportInfo(message) { + console.log(`[agent:check] ${message}`); +} + +function loadOptionalJsonConfig(relativePaths) { + for (const relativePath of relativePaths) { + const configPath = path.join(root, relativePath); + if (!fs.existsSync(configPath)) { + continue; + } + + try { + return { + path: relativePath, + value: JSON.parse(fs.readFileSync(configPath, 'utf8')), + }; + } catch (error) { + reportError(`Failed to parse ${relativePath}: ${error.message}`); + return { path: relativePath, value: {} }; + } + } + + return { path: null, value: {} }; +} + +function policyConfig() { + if (!cachedPolicyConfig) { + cachedPolicyConfig = loadOptionalJsonConfig([ + '.agent/policy.json', + '.bitfun/intent-coding-policy.json', + ]); + if (cachedPolicyConfig.path) { + reportInfo(`Loaded Intent Coding policy config from ${cachedPolicyConfig.path}`); + } + } + + return cachedPolicyConfig.value; +} + +function riskMemory() { + if (!cachedRiskMemory) { + cachedRiskMemory = loadOptionalJsonConfig([ + '.agent/risk-memory.json', + '.bitfun/intent-coding-risk-memory.json', + ]); + if (cachedRiskMemory.path) { + reportInfo(`Loaded Intent Coding risk memory from ${cachedRiskMemory.path}`); + } + } + + return cachedRiskMemory.value; +} + +function readMarkdown(filePath) { + try { + return fs.readFileSync(filePath, 'utf8'); + } catch (error) { + reportError(`Failed to read ${rel(filePath)}: ${error.message}`); + return ''; + } +} + +function listMarkdownFiles(dir) { + if (!fs.existsSync(dir)) { + return []; + } + + return fs + .readdirSync(dir, { withFileTypes: true }) + .filter((entry) => entry.isFile() && entry.name.endsWith('.md')) + .map((entry) => path.join(dir, entry.name)) + .sort(); +} + +function hasSection(markdown, sectionName) { + const escaped = sectionName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); + return new RegExp(`^## ${escaped}\\s*$`, 'm').test(markdown); +} + +function sectionContent(markdown, sectionName) { + const sectionHeading = `## ${sectionName}`; + const lines = markdown.split(/\r?\n/); + const startIndex = lines.findIndex((line) => line.trim() === sectionHeading); + if (startIndex < 0) { + return ''; + } + + const contentLines = []; + for (let index = startIndex + 1; index < lines.length; index += 1) { + // Only treat sibling `##` headings as section terminators. The previous + // pattern matched `###` too, silently truncating sections that used + // nested subheadings (e.g. `## Repair Loop` followed by `### Attempts`). + if (/^##(?!#)\s+/.test(lines[index])) { + break; + } + contentLines.push(lines[index]); + } + + return contentLines.join('\n').trim(); +} + +function validateSections(filePath, requiredSections) { + const markdown = readMarkdown(filePath); + for (const section of requiredSections) { + if (!hasSection(markdown, section)) { + reportError(`${rel(filePath)} is missing "## ${section}"`); + } + } + return markdown; +} + +function taskSlug(filePath, prefix) { + const basename = path.basename(filePath, '.md'); + return basename.startsWith(prefix) ? basename.slice(prefix.length) : null; +} + +function validateEvidenceIntentReference(filePath, markdown) { + // Restrict the search to the Provenance Chain section so a stray mention + // elsewhere (e.g. inside Summary) can't satisfy the requirement. + const provenance = sectionContent(markdown, 'Provenance Chain'); + const searchText = provenance || markdown; + const match = searchText.match(/\.agent\/intents\/intent-[^\s`)]+\.md/); + if (!match) { + reportError( + `${rel(filePath)} does not reference an Intent Record path under "## Provenance Chain"`, + ); + return; + } + + if (!isInsideAgentSubdir(match[0], 'intents')) { + reportError(`${rel(filePath)} Intent Record reference escapes .agent/intents/: ${match[0]}`); + return; + } + + const intentPath = path.join(root, match[0]); + if (!fs.existsSync(intentPath)) { + reportError(`${rel(filePath)} references missing Intent Record ${match[0]}`); + } +} + +/** + * Resolve a repo-relative path and assert it stays under + * `.agent//` (no `..` escape). + */ +function isInsideAgentSubdir(relPath, subdir) { + const base = path.resolve(root, '.agent', subdir); + const resolved = path.resolve(root, relPath); + const baseWithSep = base.endsWith(path.sep) ? base : base + path.sep; + return resolved === base || resolved.startsWith(baseWithSep); +} + +function isInsideSessionStoreRoot(relPath) { + const base = path.resolve(root, '.bitfun', 'sessions'); + const resolved = path.resolve(root, relPath); + const baseWithSep = base.endsWith(path.sep) ? base : base + path.sep; + return resolved === base || resolved.startsWith(baseWithSep); +} + +function acceptedCheckLineHasStatus(line) { + return /^\s*[-*]\s+(?:\[[ xX~-]\]|\[(?:passed|failed|skipped|blocked|not run|partial)\])\s+\S/i.test( + line, + ); +} + +function validateEvidenceAcceptedCheckStatuses(filePath, markdown) { + const content = sectionContent(markdown, 'Accepted Checks'); + if (!content) { + return; + } + + const checkLines = content + .split(/\r?\n/) + .map((line) => line.trimEnd()) + .filter((line) => /^\s*[-*]\s+/.test(line)); + + if (checkLines.length === 0) { + reportError( + `${rel(filePath)} "## Accepted Checks" must list at least one check with an explicit status`, + ); + return; + } + + for (const line of checkLines) { + if (!acceptedCheckLineHasStatus(line)) { + reportError( + `${rel(filePath)} Accepted Check must start with a status marker: ${line.trim()}`, + ); + } + } +} + +function validateEvidenceRepairLoop(filePath, markdown) { + const content = sectionContent(markdown, 'Repair Loop'); + if (!content) { + return; + } + + const attemptsMatch = content.match(/Repair attempts\s*:\s*(\d+)/i); + if (!attemptsMatch) { + reportError(`${rel(filePath)} "## Repair Loop" must include "Repair attempts: "`); + } + + const statusMatch = content.match(/Final repair status\s*:\s*([a-z_]+)/i); + if (!statusMatch) { + reportError(`${rel(filePath)} "## Repair Loop" must include "Final repair status: "`); + return; + } + + const status = statusMatch[1].toLowerCase(); + if (!validRepairStatuses.has(status)) { + reportError( + `${rel(filePath)} has invalid Final repair status "${status}". Expected one of: ${Array.from(validRepairStatuses).join(', ')}`, + ); + } +} + +function validateEvidenceContextInputs(filePath, markdown) { + const content = sectionContent(markdown, 'Context Inputs'); + if (!content) { + return; + } + + const contextLines = content + .split(/\r?\n/) + .map((line) => line.trim()) + .filter((line) => /^[-*]\s+/.test(line)); + + if (contextLines.length === 0) { + reportError(`${rel(filePath)} "## Context Inputs" must list at least one context input`); + return; + } + + for (const line of contextLines) { + // Accept colons in the reference itself (URLs, `file.md:42`, Windows + // paths). Split on the LAST `: ` separator instead of the first `:`. + const inputMatch = line.match(/^[-*]\s+\[([a-z_]+)\]\s+(.+?):\s+(.+)$/i); + if (!inputMatch) { + reportError( + `${rel(filePath)} Context Input must use "- [type] reference: reason": ${line}`, + ); + continue; + } + + const inputType = inputMatch[1].toLowerCase(); + const reference = inputMatch[2].trim(); + const reason = inputMatch[3].trim(); + + if (!validContextInputTypes.has(inputType)) { + reportError( + `${rel(filePath)} has invalid Context Input type "${inputType}". Expected one of: ${Array.from(validContextInputTypes).join(', ')}`, + ); + continue; + } + + if (inputType === 'not_available' && !/\breason\s*[:=]\s*\S/i.test(reason)) { + reportError( + `${rel(filePath)} not_available Context Input ${reference} must include "reason: "`, + ); + } + } +} + +function fieldValue(content, label) { + const escapedLabel = label.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); + const match = content.match(new RegExp(`${escapedLabel}\\s*:\\s*(\\S+)`, 'i')); + return match ? match[1].trim() : null; +} + +function validateEvidenceProvenanceChain(filePath, markdown) { + const content = sectionContent(markdown, 'Provenance Chain'); + if (!content) { + return; + } + + let store = null; + const storeMatch = content.match(/Provenance store\s*:\s*([a-z_]+)/i); + if (!storeMatch) { + reportError( + `${rel(filePath)} "## Provenance Chain" must include "Provenance store: agent_artifact|session_store|external|not_available"`, + ); + } else { + store = storeMatch[1].toLowerCase(); + if (!validProvenanceStores.has(store)) { + reportError( + `${rel(filePath)} has invalid Provenance store "${store}". Expected one of: ${Array.from(validProvenanceStores).join(', ')}`, + ); + } + } + + const sessionId = fieldValue(content, 'Session id'); + const turnId = fieldValue(content, 'Turn id'); + for (const [label, value] of [ + ['Session id', sessionId], + ['Turn id', turnId], + ]) { + if (!value) { + reportError( + `${rel(filePath)} "## Provenance Chain" must include "${label}: "`, + ); + } + } + + const intentMatch = content.match(/Intent Record\s*:\s*(\.agent\/intents\/intent-[^\s`)]+\.md)/i); + if (!intentMatch) { + reportError( + `${rel(filePath)} "## Provenance Chain" must include "Intent Record: .agent/intents/intent-*.md"`, + ); + } + + const evidenceMatch = content.match( + /Evidence Package\s*:\s*(\.agent\/evidence\/evidence-[^\s`)]+\.md)/i, + ); + if (!evidenceMatch) { + reportError( + `${rel(filePath)} "## Provenance Chain" must include "Evidence Package: .agent/evidence/evidence-*.md"`, + ); + return; + } + + const declaredEvidencePath = toPosixPath(evidenceMatch[1]); + if (declaredEvidencePath !== rel(filePath)) { + reportError( + `${rel(filePath)} declares Evidence Package ${declaredEvidencePath}, but current file is ${rel(filePath)}`, + ); + } + + const provenanceRecord = fieldValue(content, 'Provenance record'); + if (store === 'session_store') { + if (sessionId === 'not_available' || turnId === 'not_available') { + reportError( + `${rel(filePath)} uses Provenance store session_store but Session id and Turn id must be concrete values`, + ); + } + + if (!provenanceRecord) { + reportError( + `${rel(filePath)} uses Provenance store session_store but is missing "Provenance record: .bitfun/sessions/...json"`, + ); + return; + } + + const normalizedRecord = toPosixPath(provenanceRecord); + if (!normalizedRecord.startsWith('.bitfun/sessions/') || !normalizedRecord.endsWith('.json')) { + reportError( + `${rel(filePath)} session_store Provenance record must be a .bitfun/sessions/...json path`, + ); + return; + } + + // Reject any `..` segments that could escape the sessions root — + // `.bitfun/sessions/../../etc/foo.json` would otherwise satisfy the + // startsWith check above and let the validator read arbitrary local files. + if (!isInsideSessionStoreRoot(normalizedRecord)) { + reportError( + `${rel(filePath)} session_store Provenance record escapes .bitfun/sessions/: ${normalizedRecord}`, + ); + return; + } + + const recordPath = path.join(root, normalizedRecord); + if (!fs.existsSync(recordPath)) { + reportWarn( + `${rel(filePath)} declares session_store Provenance record ${normalizedRecord}, but the file is not present in this workspace`, + ); + return; + } + + try { + const record = JSON.parse(fs.readFileSync(recordPath, 'utf8')); + if (record.session_id && record.session_id !== sessionId) { + reportError( + `${rel(filePath)} session id ${sessionId} does not match Provenance record session_id ${record.session_id}`, + ); + } + if (record.turn_id && record.turn_id !== turnId) { + reportError( + `${rel(filePath)} turn id ${turnId} does not match Provenance record turn_id ${record.turn_id}`, + ); + } + } catch (error) { + reportError( + `${rel(filePath)} failed to parse Provenance record ${normalizedRecord}: ${error.message}`, + ); + } + } + + if (store === 'external' && !provenanceRecord) { + reportError( + `${rel(filePath)} uses Provenance store external but is missing "Provenance record: "`, + ); + } +} + +function changedFilesInclude(changedFiles, pattern) { + return changedFiles.some((changedFile) => pattern.test(toPosixPath(changedFile).toLowerCase())); +} + +function evidenceTextIncludes(markdown, pattern) { + return [ + sectionContent(markdown, 'Summary'), + sectionContent(markdown, 'Accepted Checks'), + sectionContent(markdown, 'Risks'), + sectionContent(markdown, 'Human Review Focus'), + ].some((content) => pattern.test(content.toLowerCase())); +} + +function requiredPolicyGatesForEvidence(markdown, riskLevel, changedFiles) { + const requiredGates = new Set(['scope', 'verification', 'security']); + + if (isHighRiskLevel(riskLevel)) { + requiredGates.add('risk_review'); + } + + if ( + changedFilesInclude( + changedFiles, + /(^|\/)(cargo\.toml|cargo\.lock|package\.json|pnpm-lock\.yaml)$/, + ) + ) { + requiredGates.add('dependencies'); + } + + if ( + changedFilesInclude(changedFiles, /src\/apps\/desktop\/|tauri|platform|adapter/) || + evidenceTextIncludes(markdown, /\b(platform|adapter|tauri|desktop-only)\b/) + ) { + requiredGates.add('platform_boundary'); + } + + if ( + changedFilesInclude(changedFiles, /remote|sync|transport|websocket/) || + evidenceTextIncludes(markdown, /\b(remote workspace|remote|sync|synchronization)\b/) + ) { + requiredGates.add('remote_compatibility'); + } + + const config = policyConfig(); + for (const gateId of Array.isArray(config.required_gates) ? config.required_gates : []) { + requiredGates.add(String(gateId)); + } + + const riskGates = config.risk_gates?.[riskLevel]; + for (const gateId of Array.isArray(riskGates) ? riskGates : []) { + requiredGates.add(String(gateId)); + } + + for (const rule of Array.isArray(config.path_gates) ? config.path_gates : []) { + if (!rule?.contains || !rule?.gate) { + continue; + } + if (changedFiles.some((changedFile) => toPosixPath(changedFile).includes(rule.contains))) { + requiredGates.add(String(rule.gate)); + } + } + + const evidenceText = [ + sectionContent(markdown, 'Summary'), + sectionContent(markdown, 'Accepted Checks'), + sectionContent(markdown, 'Risks'), + sectionContent(markdown, 'Human Review Focus'), + ] + .join('\n') + .toLowerCase(); + for (const rule of Array.isArray(config.text_gates) ? config.text_gates : []) { + if (!rule?.contains || !rule?.gate) { + continue; + } + if (evidenceText.includes(String(rule.contains).toLowerCase())) { + requiredGates.add(String(rule.gate)); + } + } + + return requiredGates; +} + +function validateEvidencePolicyGates(filePath, markdown, riskLevel, changedFiles) { + const content = sectionContent(markdown, 'Policy Gates'); + if (!content) { + return; + } + + const gateLines = content + .split(/\r?\n/) + .map((line) => line.trim()) + .filter((line) => /^[-*]\s+/.test(line)); + + if (gateLines.length === 0) { + reportError(`${rel(filePath)} "## Policy Gates" must list at least one gate`); + return; + } + + const gateIds = new Set(); + for (const line of gateLines) { + const gateMatch = line.match( + /^[-*]\s+\[([a-z_]+)\]\s+([a-z0-9_.-]+)\s*:\s*(.+)$/i, + ); + if (!gateMatch) { + reportError( + `${rel(filePath)} Policy Gate must use "- [status] gate_id: result": ${line}`, + ); + continue; + } + + const status = gateMatch[1].toLowerCase(); + const gateId = gateMatch[2]; + const result = gateMatch[3].trim(); + gateIds.add(gateId); + + if (!validPolicyGateStatuses.has(status)) { + reportError( + `${rel(filePath)} has invalid Policy Gate status "${status}" for ${gateId}. Expected one of: ${Array.from(validPolicyGateStatuses).join(', ')}`, + ); + continue; + } + + if (status === 'failed') { + reportError(`${rel(filePath)} Policy Gate ${gateId} failed: ${result}`); + } + + if ( + (status === 'skipped' || status === 'blocked') && + !/\breason\s*[:=]\s*\S/i.test(result) + ) { + reportError( + `${rel(filePath)} ${status} Policy Gate ${gateId} must include "reason: "`, + ); + } + } + + for (const gateId of requiredPolicyGatesForEvidence(markdown, riskLevel, changedFiles)) { + if (!gateIds.has(gateId)) { + reportError(`${rel(filePath)} is missing required Policy Gate ${gateId}`); + } + } +} + +function validateRiskLevelLine(filePath, markdown, sectionName, label) { + const content = sectionContent(markdown, sectionName); + if (!content) { + return null; + } + + const escapedLabel = label.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); + const match = content.match(new RegExp(`${escapedLabel}\\s*:\\s*(L[0-4])\\b`, 'i')); + if (!match) { + reportError(`${rel(filePath)} "## ${sectionName}" must include "${label}: L0|L1|L2|L3|L4"`); + return null; + } + + const riskLevel = match[1].toUpperCase(); + if (!validRiskLevels.has(riskLevel)) { + reportError(`${rel(filePath)} has invalid ${label} "${riskLevel}"`); + return null; + } + + return riskLevel; +} + +function riskRank(riskLevel) { + return riskRanks.get(riskLevel) ?? -1; +} + +function maxRiskLevel(left, right) { + return riskRank(left) >= riskRank(right) ? left : right; +} + +function isHighRiskLevel(riskLevel) { + return riskLevel === 'L3' || riskLevel === 'L4'; +} + +function normalizeChangedFileLine(line) { + const withoutBullet = line.replace(/^\s*[-*]\s+/, '').trim(); + const backtickMatch = withoutBullet.match(/^`([^`]+)`/); + if (backtickMatch) { + return backtickMatch[1].trim(); + } + + return withoutBullet + .replace(/^\[[ xX~-]\]\s+/, '') + .replace(/^<([^>]+)>.*$/, '$1') + .replace(/\s+-\s+.*$/, '') + .replace(/\s+--\s+.*$/, '') + .replace(/[`:,]$/g, '') + .trim(); +} + +function extractEvidenceChangedFiles(markdown) { + const content = sectionContent(markdown, 'Files Changed'); + if (!content) { + return []; + } + + return content + .split(/\r?\n/) + .filter((line) => /^\s*[-*]\s+\S/.test(line)) + .map((line) => normalizeChangedFileLine(line)) + .filter(Boolean); +} + +function pathLooksLikeDocsOnly(normalizedPath) { + return ( + normalizedPath.endsWith('.md') || + normalizedPath.startsWith('docs/') || + normalizedPath.startsWith('.github/pull_request_template') + ); +} + +function suggestedRiskForPath(filePath) { + const normalizedPath = toPosixPath(filePath).toLowerCase(); + + if ( + /\b(sandbox|privilege|credential|secret|keychain|crypto|encrypt|destructive)\b/.test( + normalizedPath, + ) || + normalizedPath.includes('src/crates/tool-runtime/') || + normalizedPath.includes('src/crates/core/src/agentic/tools/restrictions') + ) { + return 'L4'; + } + + if ( + /\b(auth|authorization|permission|billing|migration|release|signing|deployment)\b/.test( + normalizedPath, + ) || + normalizedPath.startsWith('.github/workflows/') || + normalizedPath.includes('tauri.conf') || + normalizedPath.includes('src/crates/core/src/agentic/execution/') || + normalizedPath.includes('src/crates/core/src/agentic/tools/') || + normalizedPath.includes('src/crates/core/src/agentic/session/') || + normalizedPath.includes('src/crates/core/src/agentic/persistence/') || + normalizedPath.includes('src/crates/ai-adapters/') + ) { + return 'L3'; + } + + if ( + normalizedPath.includes('src/crates/core/') || + normalizedPath.includes('src/crates/transport/') || + normalizedPath.includes('src/crates/api-layer/') || + normalizedPath.includes('src/crates/services-core/') || + normalizedPath.includes('src/crates/services-integrations/') || + normalizedPath.includes('src/apps/desktop/src/api/') || + normalizedPath.includes('src/web-ui/src/flow_chat/services/') || + normalizedPath.includes('src/web-ui/src/flow_chat/store/') || + normalizedPath.includes('src/web-ui/src/infrastructure/api/') || + /\b(remote|sync|session|persistence)\b/.test(normalizedPath) + ) { + return 'L2'; + } + + if (pathLooksLikeDocsOnly(normalizedPath)) { + return 'L0'; + } + + return 'L1'; +} + +function suggestRiskForChangedFiles(changedFiles) { + if (changedFiles.length === 0) { + return null; + } + + return changedFiles.reduce( + (suggestedRisk, changedFile) => maxRiskLevel(suggestedRisk, suggestedRiskForPath(changedFile)), + 'L0', + ); +} + +function suggestRiskForEvidenceText(markdown) { + const text = [ + sectionContent(markdown, 'Summary'), + sectionContent(markdown, 'Accepted Checks'), + sectionContent(markdown, 'Policy Gates'), + sectionContent(markdown, 'Risks'), + sectionContent(markdown, 'Human Review Focus'), + ] + .join('\n') + .toLowerCase(); + + if (!text.trim()) { + return null; + } + + const matches = []; + let suggestedRiskLevel = 'L0'; + for (const signal of evidenceRiskSignals) { + if (signal.pattern.test(text)) { + suggestedRiskLevel = maxRiskLevel(suggestedRiskLevel, signal.level); + matches.push(`${signal.level}:${signal.label}`); + } + } + + if (matches.length === 0) { + return null; + } + + return { level: suggestedRiskLevel, matches }; +} + +function suggestRiskFromSignals(changedFiles, signals) { + const matches = []; + let suggestedRiskLevel = 'L0'; + for (const changedFile of changedFiles) { + const normalizedPath = toPosixPath(changedFile).toLowerCase(); + for (const signal of signals) { + const matched = + (signal.contains && normalizedPath.includes(signal.contains)) || + (signal.pattern && signal.pattern.test(normalizedPath)); + if (!matched) { + continue; + } + suggestedRiskLevel = maxRiskLevel(suggestedRiskLevel, signal.level); + matches.push(`${signal.level}:${signal.label}`); + } + } + + if (matches.length === 0) { + return null; + } + + return { level: suggestedRiskLevel, matches }; +} + +function suggestRiskFromRecentIncidents(markdown, changedFiles) { + const memory = riskMemory(); + const incidents = Array.isArray(memory.recent_incidents) ? memory.recent_incidents : []; + if (incidents.length === 0) { + return null; + } + + const evidenceText = [ + sectionContent(markdown, 'Summary'), + sectionContent(markdown, 'Accepted Checks'), + sectionContent(markdown, 'Risks'), + sectionContent(markdown, 'Human Review Focus'), + ] + .join('\n') + .toLowerCase(); + const matches = []; + let suggestedRiskLevel = 'L0'; + + for (const incident of incidents) { + const level = validRiskLevels.has(String(incident.level).toUpperCase()) + ? String(incident.level).toUpperCase() + : 'L2'; + const label = incident.label ? String(incident.label) : 'recent incident'; + const pathContains = incident.path_contains ? String(incident.path_contains) : null; + const textContains = incident.text_contains ? String(incident.text_contains).toLowerCase() : null; + const pathMatched = + pathContains && + changedFiles.some((changedFile) => toPosixPath(changedFile).includes(pathContains)); + const textMatched = textContains && evidenceText.includes(textContains); + + if (!pathMatched && !textMatched) { + continue; + } + + suggestedRiskLevel = maxRiskLevel(suggestedRiskLevel, level); + matches.push(`${level}:${label}`); + } + + if (matches.length === 0) { + return null; + } + + return { level: suggestedRiskLevel, matches }; +} + +function reportChangedFileRiskSuggestion(filePath, markdown, recordedRiskLevel) { + const changedFiles = extractEvidenceChangedFiles(markdown); + const changedFileRiskLevel = suggestRiskForChangedFiles(changedFiles); + const evidenceTextSuggestion = suggestRiskForEvidenceText(markdown); + const ownershipSuggestion = suggestRiskFromSignals(changedFiles, ownershipRiskSignals); + const dependencySuggestion = suggestRiskFromSignals(changedFiles, dependencyRiskSignals); + const recentIncidentSuggestion = suggestRiskFromRecentIncidents(markdown, changedFiles); + const suggestedRiskLevel = maxRiskLevel( + maxRiskLevel(changedFileRiskLevel ?? 'L0', evidenceTextSuggestion?.level ?? 'L0'), + maxRiskLevel( + maxRiskLevel(ownershipSuggestion?.level ?? 'L0', dependencySuggestion?.level ?? 'L0'), + recentIncidentSuggestion?.level ?? 'L0', + ), + ); + if ( + !changedFileRiskLevel && + !evidenceTextSuggestion && + !ownershipSuggestion && + !dependencySuggestion && + !recentIncidentSuggestion + ) { + return; + } + + const sources = []; + if (changedFileRiskLevel) { + sources.push(`${changedFileRiskLevel} from ${changedFiles.length} changed file(s)`); + } + if (evidenceTextSuggestion) { + sources.push( + `${evidenceTextSuggestion.level} from evidence text (${evidenceTextSuggestion.matches.join(', ')})`, + ); + } + if (ownershipSuggestion) { + sources.push( + `${ownershipSuggestion.level} from ownership surface (${ownershipSuggestion.matches.join(', ')})`, + ); + } + if (dependencySuggestion) { + sources.push( + `${dependencySuggestion.level} from dependency impact (${dependencySuggestion.matches.join(', ')})`, + ); + } + if (recentIncidentSuggestion) { + sources.push( + `${recentIncidentSuggestion.level} from recent incident memory (${recentIncidentSuggestion.matches.join(', ')})`, + ); + } + + reportInfo( + `${rel(filePath)} evidence-aware risk suggestion: ${suggestedRiskLevel}; ${sources.join('; ')}`, + ); + + if (recordedRiskLevel && riskRank(recordedRiskLevel) < riskRank(suggestedRiskLevel)) { + reportWarn( + `${rel(filePath)} records ${recordedRiskLevel}, but evidence suggests ${suggestedRiskLevel}; raise the risk level or document why it is intentionally lower`, + ); + } +} + +function validateHighRiskIntentReviewEscalation(filePath, markdown, riskLevel) { + if (!isHighRiskLevel(riskLevel)) { + return; + } + + const metadata = sectionContent(markdown, 'Metadata'); + const routeMatch = metadata.match(/Review escalation\s*:\s*([a-z_]+)/i); + if (!routeMatch) { + reportError( + `${rel(filePath)} L3/L4 Intent Record must include "Review escalation: " in "## Metadata"`, + ); + return; + } + + const route = routeMatch[1].toLowerCase(); + if (!validReviewRoutes.has(route)) { + reportError( + `${rel(filePath)} has invalid Review escalation "${route}". Expected one of: ${Array.from(validReviewRoutes).join(', ')}`, + ); + } + + if (route === 'skipped' && !/Review escalation reason\s*:\s*\S/i.test(metadata)) { + reportError( + `${rel(filePath)} skipped L3/L4 review escalation must include "Review escalation reason: " in "## Metadata"`, + ); + } +} + +function validateHighRiskEvidenceReviewEscalation(filePath, markdown, riskLevel) { + if (!isHighRiskLevel(riskLevel)) { + return; + } + + const risks = sectionContent(markdown, 'Risks'); + let route = null; + const routeMatch = risks.match(/Review route\s*:\s*([a-z_]+)/i); + if (!routeMatch) { + reportError( + `${rel(filePath)} L3/L4 Evidence Package must include "Review route: " in "## Risks"`, + ); + } else { + route = routeMatch[1].toLowerCase(); + if (!validReviewRoutes.has(route)) { + reportError( + `${rel(filePath)} has invalid Review route "${route}". Expected one of: ${Array.from(validReviewRoutes).join(', ')}`, + ); + } + } + + let trigger = null; + const triggerMatch = risks.match(/Review trigger\s*:\s*([a-z_]+)/i); + if (!triggerMatch) { + reportError( + `${rel(filePath)} L3/L4 Evidence Package must include "Review trigger: automatic|manual|not_available" in "## Risks"`, + ); + } else { + trigger = triggerMatch[1].toLowerCase(); + if (!validReviewTriggers.has(trigger)) { + reportError( + `${rel(filePath)} has invalid Review trigger "${trigger}". Expected one of: ${Array.from(validReviewTriggers).join(', ')}`, + ); + } + } + + const statusMatch = risks.match(/Review escalation status\s*:\s*([a-z_]+)/i); + if (!statusMatch) { + reportError( + `${rel(filePath)} L3/L4 Evidence Package must include "Review escalation status: " in "## Risks"`, + ); + return; + } + + const status = statusMatch[1].toLowerCase(); + if (!validReviewStatuses.has(status)) { + reportError( + `${rel(filePath)} has invalid Review escalation status "${status}". Expected one of: ${Array.from(validReviewStatuses).join(', ')}`, + ); + } + + if (route === 'skipped' && status !== 'skipped') { + reportError( + `${rel(filePath)} uses Review route skipped but Review escalation status is ${status}; expected skipped`, + ); + } + + if (route === 'skipped' && trigger === 'automatic') { + reportError(`${rel(filePath)} uses Review route skipped but Review trigger is automatic`); + } + + if ((route === 'deep_review' || route === 'specialist_review') && trigger === 'not_available') { + reportWarn( + `${rel(filePath)} selected ${route} but trigger is not_available; wire this route to a review trigger when the integration is available`, + ); + } + + if ((route === 'deep_review' || route === 'specialist_review') && trigger === 'manual') { + reportInfo( + `${rel(filePath)} selected ${route} with manual trigger; run the selected review route before merge when practical`, + ); + } + + if ( + (status === 'skipped' || status === 'blocked') && + !/Review escalation reason\s*:\s*\S/i.test(risks) + ) { + reportError( + `${rel(filePath)} ${status} L3/L4 review escalation must include "Review escalation reason: " in "## Risks"`, + ); + } +} + +function main() { + // .agent is a runtime artifact directory created by the IntentCoding agent. + // Its absence is not an error — just means no active Intent Coding task. + if (!fs.existsSync(agentDir)) { + reportInfo('.agent directory not found — no active Intent Coding task.'); + process.exit(0); + } + + const intentFiles = listMarkdownFiles(path.join(agentDir, 'intents')); + const evidenceFiles = listMarkdownFiles(path.join(agentDir, 'evidence')); + + if (intentFiles.length === 0 && evidenceFiles.length === 0) { + reportInfo('No active Intent Records or Evidence Packages.'); + process.exit(0); + } + + if (intentFiles.length === 0) { + reportError('.agent/intents has no Intent Records but .agent/evidence has Evidence Packages'); + } + if (evidenceFiles.length === 0) { + // Intent Record exists without Evidence Package — normal during active work. + reportWarn('.agent/evidence has no Evidence Packages yet — task may still be in progress'); + } + + const intentSlugs = new Set(); + for (const file of intentFiles) { + const slug = taskSlug(file, 'intent-'); + if (!slug) { + reportError(`${rel(file)} must be named intent-*.md`); + continue; + } + intentSlugs.add(slug); + const markdown = validateSections(file, requiredIntentSections); + const riskLevel = validateRiskLevelLine(file, markdown, 'Metadata', 'Risk level'); + validateHighRiskIntentReviewEscalation(file, markdown, riskLevel); + } + + const evidenceSlugs = new Set(); + for (const file of evidenceFiles) { + const slug = taskSlug(file, 'evidence-'); + if (!slug) { + reportError(`${rel(file)} must be named evidence-*.md`); + continue; + } + evidenceSlugs.add(slug); + const markdown = validateSections(file, requiredEvidenceSections); + validateEvidenceIntentReference(file, markdown); + validateEvidenceContextInputs(file, markdown); + validateEvidenceAcceptedCheckStatuses(file, markdown); + validateEvidenceRepairLoop(file, markdown); + validateEvidenceProvenanceChain(file, markdown); + const riskLevel = validateRiskLevelLine(file, markdown, 'Risks', 'Final risk level'); + const changedFiles = extractEvidenceChangedFiles(markdown); + validateEvidencePolicyGates(file, markdown, riskLevel, changedFiles); + validateHighRiskEvidenceReviewEscalation(file, markdown, riskLevel); + reportChangedFileRiskSuggestion(file, markdown, riskLevel); + } + + for (const slug of intentSlugs) { + if (!evidenceSlugs.has(slug)) { + // Intent without matching Evidence is expected during active work. + reportWarn(`Evidence Package not yet written for intent-${slug}.md`); + } + } + + for (const slug of evidenceSlugs) { + if (!intentSlugs.has(slug)) { + reportError(`Missing Intent Record for evidence-${slug}.md`); + } + } + + if (errorCount > 0) { + console.error(`[agent:check] Failed with ${errorCount} error(s).`); + process.exit(1); + } + + reportInfo( + `Passed: ${intentFiles.length} Intent Record(s), ${evidenceFiles.length} Evidence Package(s).`, + ); +} + +main(); diff --git a/scripts/intent-coding-context-compile.mjs b/scripts/intent-coding-context-compile.mjs new file mode 100644 index 000000000..7e19fbf5e --- /dev/null +++ b/scripts/intent-coding-context-compile.mjs @@ -0,0 +1,101 @@ +#!/usr/bin/env node + +import fs from 'node:fs'; +import path from 'node:path'; + +const root = process.cwd(); + +function sectionContent(markdown, sectionName) { + const sectionHeading = `## ${sectionName}`; + const lines = markdown.split(/\r?\n/); + const startIndex = lines.findIndex((line) => line.trim() === sectionHeading); + if (startIndex < 0) { + return ''; + } + + const contentLines = []; + for (let index = startIndex + 1; index < lines.length; index += 1) { + if (/^##\s+/.test(lines[index])) { + break; + } + contentLines.push(lines[index]); + } + + return contentLines.join('\n').trim(); +} + +function argValue(name) { + const index = process.argv.indexOf(name); + return index >= 0 ? process.argv[index + 1] : null; +} + +function changedFiles(markdown) { + return sectionContent(markdown, 'Files Changed') + .split(/\r?\n/) + .map((line) => line.trim()) + .filter((line) => /^[-*]\s+\S/.test(line)) + .map((line) => line.replace(/^[-*]\s+/, '').replace(/^`([^`]+)`.*$/, '$1').trim()) + .filter(Boolean); +} + +function nearestAgentDocs(filePath) { + const docs = []; + let currentDir = path.dirname(path.resolve(root, filePath)); + while (currentDir.startsWith(root)) { + for (const name of ['AGENTS.md', 'AGENTS-CN.md']) { + const candidate = path.join(currentDir, name); + if (fs.existsSync(candidate)) { + docs.push(path.relative(root, candidate).split(path.sep).join('/')); + } + } + const nextDir = path.dirname(currentDir); + if (nextDir === currentDir) { + break; + } + currentDir = nextDir; + } + return docs; +} + +function addLine(lines, type, reference, reason) { + lines.add(`- [${type}] ${reference}: ${reason}`); +} + +function main() { + const evidenceArg = argValue('--evidence'); + if (!evidenceArg) { + throw new Error('Pass --evidence '); + } + + const evidencePath = path.resolve(root, evidenceArg); + const markdown = fs.readFileSync(evidencePath, 'utf8'); + const files = changedFiles(markdown); + const lines = new Set(); + + addLine(lines, 'builtin_rule', 'intent_coding_rules/context-compiler.md', 'context input generation'); + addLine(lines, 'builtin_rule', 'intent_coding_rules/risk-classification.md', 'risk-sensitive context selection'); + + if (fs.existsSync(path.join(root, 'AGENTS.md'))) { + addLine(lines, 'workspace_instruction', 'AGENTS.md', 'repository workflow guidance'); + } + + for (const file of files) { + addLine(lines, 'source_file', file, 'changed file'); + for (const doc of nearestAgentDocs(file)) { + addLine(lines, doc.endsWith('/AGENTS.md') || doc.endsWith('/AGENTS-CN.md') ? 'module_doc' : 'workspace_instruction', doc, 'nearest instruction for changed file'); + } + } + + if (lines.size === 0) { + addLine(lines, 'not_available', 'context_inputs', 'reason: no changed files or workspace instructions found'); + } + + console.log(Array.from(lines).join('\n')); +} + +try { + main(); +} catch (error) { + console.error(`[agent:context-compile] ERROR ${error.message}`); + process.exit(1); +} diff --git a/scripts/intent-coding-provenance-record.mjs b/scripts/intent-coding-provenance-record.mjs new file mode 100644 index 000000000..68820b9ab --- /dev/null +++ b/scripts/intent-coding-provenance-record.mjs @@ -0,0 +1,120 @@ +#!/usr/bin/env node + +import fs from 'node:fs'; +import path from 'node:path'; + +const root = process.cwd(); + +function sectionContent(markdown, sectionName) { + const sectionHeading = `## ${sectionName}`; + const lines = markdown.split(/\r?\n/); + const startIndex = lines.findIndex((line) => line.trim() === sectionHeading); + if (startIndex < 0) { + return ''; + } + + const contentLines = []; + for (let index = startIndex + 1; index < lines.length; index += 1) { + if (/^##(?!#)\s+/.test(lines[index])) { + break; + } + contentLines.push(lines[index]); + } + + return contentLines.join('\n').trim(); +} + +const SAFE_ID_PATTERN = /^[A-Za-z0-9_.-]+$/; + +function assertSafeId(label, value) { + if (!SAFE_ID_PATTERN.test(value)) { + throw new Error( + `${label} must match ${SAFE_ID_PATTERN}; got ${JSON.stringify(value)}`, + ); + } +} + +function assertInsideSessionStore(resolvedPath) { + const base = path.resolve(root, '.bitfun', 'sessions'); + const baseWithSep = base.endsWith(path.sep) ? base : base + path.sep; + if (resolvedPath !== base && !resolvedPath.startsWith(baseWithSep)) { + throw new Error(`Resolved path ${resolvedPath} escapes ${base}`); + } +} + +function fieldValue(content, label) { + const escapedLabel = label.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); + const match = content.match(new RegExp(`${escapedLabel}\\s*:\\s*(\\S+)`, 'i')); + return match ? match[1].trim() : null; +} + +function argValue(name) { + const index = process.argv.indexOf(name); + return index >= 0 ? process.argv[index + 1] : null; +} + +function listItems(content) { + return content + .split(/\r?\n/) + .map((line) => line.trim()) + .filter((line) => /^[-*]\s+\S/.test(line)) + .map((line) => line.replace(/^[-*]\s+/, '').trim()); +} + +function main() { + const evidenceArg = argValue('--evidence'); + if (!evidenceArg) { + throw new Error('Pass --evidence '); + } + + const evidencePath = path.resolve(root, evidenceArg); + const markdown = fs.readFileSync(evidencePath, 'utf8'); + const provenance = sectionContent(markdown, 'Provenance Chain'); + const sessionId = argValue('--session-id') ?? fieldValue(provenance, 'Session id'); + const turnId = argValue('--turn-id') ?? fieldValue(provenance, 'Turn id'); + + if (!sessionId || sessionId === 'not_available') { + throw new Error('A concrete session id is required. Pass --session-id .'); + } + if (!turnId || turnId === 'not_available') { + throw new Error('A concrete turn id is required. Pass --turn-id .'); + } + assertSafeId('Session id', sessionId); + assertSafeId('Turn id', turnId); + + const recordPath = path.resolve( + root, + '.bitfun', + 'sessions', + sessionId, + 'intent-coding', + `provenance-${turnId}.json`, + ); + assertInsideSessionStore(recordPath); + + const record = { + schema_version: 1, + session_id: sessionId, + turn_id: turnId, + evidence_package: path.relative(root, evidencePath).split(path.sep).join('/'), + intent_record: fieldValue(provenance, 'Intent Record'), + context_inputs: listItems(sectionContent(markdown, 'Context Inputs')), + files_changed: listItems(sectionContent(markdown, 'Files Changed')), + accepted_checks: listItems(sectionContent(markdown, 'Accepted Checks')), + policy_gates: listItems(sectionContent(markdown, 'Policy Gates')), + verification: listItems(sectionContent(markdown, 'Verification')), + risks: sectionContent(markdown, 'Risks'), + human_review_focus: listItems(sectionContent(markdown, 'Human Review Focus')), + }; + + fs.mkdirSync(path.dirname(recordPath), { recursive: true }); + fs.writeFileSync(recordPath, `${JSON.stringify(record, null, 2)}\n`); + console.log(path.relative(root, recordPath).split(path.sep).join('/')); +} + +try { + main(); +} catch (error) { + console.error(`[agent:provenance-record] ERROR ${error.message}`); + process.exit(1); +} diff --git a/scripts/intent-coding-review-route.mjs b/scripts/intent-coding-review-route.mjs new file mode 100644 index 000000000..e6fae4312 --- /dev/null +++ b/scripts/intent-coding-review-route.mjs @@ -0,0 +1,117 @@ +#!/usr/bin/env node + +import fs from 'node:fs'; +import path from 'node:path'; + +const root = process.cwd(); + +function toPosixPath(value) { + return value.split(path.sep).join('/'); +} + +function sectionContent(markdown, sectionName) { + const sectionHeading = `## ${sectionName}`; + const lines = markdown.split(/\r?\n/); + const startIndex = lines.findIndex((line) => line.trim() === sectionHeading); + if (startIndex < 0) { + return ''; + } + + const contentLines = []; + for (let index = startIndex + 1; index < lines.length; index += 1) { + if (/^##\s+/.test(lines[index])) { + break; + } + contentLines.push(lines[index]); + } + + return contentLines.join('\n').trim(); +} + +function fieldValue(content, label) { + const escapedLabel = label.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); + const match = content.match(new RegExp(`${escapedLabel}\\s*:\\s*(\\S+)`, 'i')); + return match ? match[1].trim() : null; +} + +function listEvidenceFiles() { + const evidenceDir = path.join(root, '.agent/evidence'); + if (!fs.existsSync(evidenceDir)) { + return []; + } + + return fs + .readdirSync(evidenceDir, { withFileTypes: true }) + .filter((entry) => entry.isFile() && entry.name.endsWith('.md')) + .map((entry) => path.join(evidenceDir, entry.name)) + .sort(); +} + +function evidencePathFromArgs() { + const evidenceIndex = process.argv.indexOf('--evidence'); + if (evidenceIndex >= 0 && process.argv[evidenceIndex + 1]) { + return path.resolve(root, process.argv[evidenceIndex + 1]); + } + + const evidenceFiles = listEvidenceFiles(); + if (evidenceFiles.length === 1) { + return evidenceFiles[0]; + } + + if (evidenceFiles.length > 1) { + throw new Error('Multiple Evidence Packages found. Pass --evidence .'); + } + + throw new Error('No Evidence Package found. Pass --evidence .'); +} + +function listChangedFiles(markdown) { + return sectionContent(markdown, 'Files Changed') + .split(/\r?\n/) + .map((line) => line.trim()) + .filter((line) => /^[-*]\s+\S/.test(line)) + .map((line) => line.replace(/^[-*]\s+/, '').replace(/^`([^`]+)`.*$/, '$1').trim()) + .filter(Boolean); +} + +function main() { + const evidencePath = evidencePathFromArgs(); + const markdown = fs.readFileSync(evidencePath, 'utf8'); + const risks = sectionContent(markdown, 'Risks'); + const route = fieldValue(risks, 'Review route') ?? 'not_available'; + const trigger = fieldValue(risks, 'Review trigger') ?? 'not_available'; + const status = fieldValue(risks, 'Review escalation status') ?? 'not_available'; + const changedFiles = listChangedFiles(markdown); + + const plan = { + evidence_package: toPosixPath(path.relative(root, evidencePath)), + review_route: route, + review_trigger: trigger, + review_status: status, + changed_files: changedFiles, + next_action: null, + }; + + if (route === 'deep_review') { + plan.next_action = + 'Open BitFun Deep Review for the listed changed files and record the result in Review escalation status.'; + } else if (route === 'specialist_review') { + plan.next_action = + 'Route the listed changed files to the named specialist review path and record the result in Review escalation status.'; + } else if (route === 'manual_review') { + plan.next_action = 'Complete manual human review and record the result in Review escalation status.'; + } else if (route === 'skipped') { + plan.next_action = 'No review trigger should run because the route is skipped.'; + } else { + plan.next_action = 'No supported review route was found.'; + } + + console.log(JSON.stringify(plan, null, 2)); +} + +try { + main(); +} catch (error) { + console.error(`[agent:review-route] ERROR ${error.message}`); + process.exit(1); +} diff --git a/src/apps/desktop/src/api/agentic_api.rs b/src/apps/desktop/src/api/agentic_api.rs index 8a1b632a1..0fc5530c1 100644 --- a/src/apps/desktop/src/api/agentic_api.rs +++ b/src/apps/desktop/src/api/agentic_api.rs @@ -14,8 +14,8 @@ use bitfun_core::agentic::coordination::{ }; use bitfun_core::agentic::core::*; use bitfun_core::agentic::deep_review_policy::{ - apply_deep_review_queue_control, default_review_team_definition, DeepReviewQueueControlAction, - ReviewTeamDefinition, + DeepReviewQueueControlAction, ReviewTeamDefinition, apply_deep_review_queue_control, + default_review_team_definition, }; use bitfun_core::agentic::image_analysis::ImageContextData; use bitfun_core::agentic::tools::image_context::get_image_context; @@ -63,6 +63,8 @@ pub struct SessionConfigDTO { pub remote_connection_id: Option, #[serde(default)] pub remote_ssh_host: Option, + #[serde(default)] + pub enable_intent_tracking: Option, } #[derive(Debug, Serialize)] @@ -585,6 +587,7 @@ pub async fn create_session( remote_connection_id: remote_conn.clone(), remote_ssh_host: remote_ssh_host.clone(), model_id: c.model_name, + enable_intent_tracking: c.enable_intent_tracking.unwrap_or(false), }) .unwrap_or(SessionConfig { workspace_path: Some(request.workspace_path.clone()), @@ -1706,6 +1709,8 @@ mod tests { end_time: Some(2), duration_ms: Some(1), status: TurnStatus::Completed, + intent_assignments: vec![], + intent_evidence: None, }; let stats = restore_turn_payload_stats(&[turn]); @@ -1768,6 +1773,8 @@ mod tests { end_time: Some(2), duration_ms: Some(1), status: TurnStatus::Completed, + intent_assignments: vec![], + intent_evidence: None, }]; omit_assistant_only_tool_results_for_session_view(&mut turns); @@ -1826,6 +1833,8 @@ mod tests { end_time: Some(2), duration_ms: Some(1), status: TurnStatus::Completed, + intent_assignments: vec![], + intent_evidence: None, }]; omit_assistant_only_tool_results_for_session_view(&mut turns); diff --git a/src/crates/core/src/agentic/agents/definitions/modes/intent_coding.rs b/src/crates/core/src/agentic/agents/definitions/modes/intent_coding.rs new file mode 100644 index 000000000..132792c5b --- /dev/null +++ b/src/crates/core/src/agentic/agents/definitions/modes/intent_coding.rs @@ -0,0 +1,233 @@ +//! Intent Coding Mode + +use crate::agentic::agents::{ + get_embedded_prompt, shared_coding_mode_tools, Agent, PromptBuilder, PromptBuilderContext, + UserContextPolicy, +}; +use crate::util::errors::*; +use async_trait::async_trait; +use std::sync::OnceLock; + +const INTENT_CODING_MODE_PROMPT_TEMPLATE: &str = "intent_coding_mode"; + +struct EmbeddedRule { + name: &'static str, + purpose: &'static str, + content: &'static str, +} + +// Embedded rules loaded from prompts/intent_coding_rules/ +const EMBEDDED_RULES: &[EmbeddedRule] = &[ + EmbeddedRule { + name: "context-compiler", + purpose: "declare which durable context inputs are loaded and how task-local context should override them", + content: include_str!("../../prompts/intent_coding_rules/context-compiler.md"), + }, + EmbeddedRule { + name: "accepted-checks", + purpose: "turn aligned intent into accepted checks or tests before implementation", + content: include_str!("../../prompts/intent_coding_rules/accepted-checks.md"), + }, + EmbeddedRule { + name: "architecture", + purpose: "keep changes inside BitFun architecture and platform-boundary guardrails", + content: include_str!("../../prompts/intent_coding_rules/architecture.md"), + }, + EmbeddedRule { + name: "coding-style", + purpose: "preserve local coding style and scoped implementation behavior", + content: include_str!("../../prompts/intent_coding_rules/coding-style.md"), + }, + EmbeddedRule { + name: "error-classification", + purpose: "classify verification failures before repair attempts", + content: include_str!("../../prompts/intent_coding_rules/error-classification.md"), + }, + EmbeddedRule { + name: "provenance-chain", + purpose: "preserve request-to-delivery provenance anchors for review", + content: include_str!("../../prompts/intent_coding_rules/provenance-chain.md"), + }, + EmbeddedRule { + name: "policy-gates", + purpose: "record lightweight governance gates before delivery", + content: include_str!("../../prompts/intent_coding_rules/policy-gates.md"), + }, + EmbeddedRule { + name: "risk-classification", + purpose: "classify task risk and require escalation markers for high-risk work", + content: include_str!("../../prompts/intent_coding_rules/risk-classification.md"), + }, + EmbeddedRule { + name: "security", + purpose: "apply defensive security and sensitive-data constraints", + content: include_str!("../../prompts/intent_coding_rules/security.md"), + }, + EmbeddedRule { + name: "workflow-check", + purpose: "run and interpret the local Intent/Evidence structural checker", + content: include_str!("../../prompts/intent_coding_rules/workflow-check.md"), + }, +]; + +pub struct IntentCodingMode { + default_tools: Vec, +} + +impl Default for IntentCodingMode { + fn default() -> Self { + Self::new() + } +} + +impl IntentCodingMode { + pub fn new() -> Self { + let mut default_tools = shared_coding_mode_tools(); + default_tools.push("CreatePlan".to_string()); + Self { default_tools } + } +} + +#[async_trait] +impl Agent for IntentCodingMode { + fn as_any(&self) -> &dyn std::any::Any { + self + } + + fn id(&self) -> &str { + "IntentCoding" + } + + fn name(&self) -> &str { + "Intent Coding" + } + + fn description(&self) -> &str { + "Intent-aligned coding mode that clarifies requirements, records acceptance checks, verifies changes, and delivers evidence" + } + + fn prompt_template_name(&self, _model_name: Option<&str>) -> &str { + INTENT_CODING_MODE_PROMPT_TEMPLATE + } + + fn default_tools(&self) -> Vec { + self.default_tools.clone() + } + + fn user_context_policy(&self) -> UserContextPolicy { + UserContextPolicy::empty() + .with_workspace_context() + .with_workspace_instructions() + .with_workspace_memory_files() + .with_project_layout() + } + + async fn build_prompt(&self, context: &PromptBuilderContext) -> BitFunResult { + let prompt_components = PromptBuilder::new(context.clone()); + let system_prompt_template = get_embedded_prompt(INTENT_CODING_MODE_PROMPT_TEMPLATE) + .ok_or_else(|| { + BitFunError::Agent(format!( + "{} not found in embedded files", + INTENT_CODING_MODE_PROMPT_TEMPLATE + )) + })?; + + let mut prompt = prompt_components + .build_prompt_from_template(system_prompt_template) + .await?; + + // Inject embedded Intent Coding rules as a context section. The rules + // section is rendered once per process — concatenating ~10 include_str! + // blocks per dialog turn was wasted work. + if !prompt.is_empty() { + prompt.push_str("\n\n"); + } + prompt.push_str(rendered_rules_section()); + + Ok(prompt) + } + + fn is_readonly(&self) -> bool { + false + } +} + +fn rendered_rules_section() -> &'static str { + static CACHED: OnceLock = OnceLock::new(); + CACHED.get_or_init(|| { + let mut s = String::with_capacity(8 * 1024); + s.push_str("## Intent Coding rules\n\n"); + s.push_str( + "The following rules are built into the IntentCoding mode. Follow them for every task.\n\n", + ); + s.push_str("### Loaded rule manifest\n\n"); + for rule in EMBEDDED_RULES { + s.push_str(&format!("- `{}`: {}\n", rule.name, rule.purpose)); + } + s.push_str("\n### Loaded rule documents\n\n"); + for rule in EMBEDDED_RULES { + s.push_str(&format!( + "\n{}\n\n\n", + rule.name, + rule.content.trim() + )); + } + s + }) +} + +#[cfg(test)] +mod tests { + use super::IntentCodingMode; + use super::EMBEDDED_RULES; + use crate::agentic::agents::{get_embedded_prompt, Agent}; + + #[test] + fn intent_coding_mode_uses_dedicated_prompt_and_planning_tools() { + let mode = IntentCodingMode::new(); + + assert_eq!(mode.id(), "IntentCoding"); + assert_eq!(mode.prompt_template_name(None), "intent_coding_mode"); + + let tools = mode.default_tools(); + assert!(tools.contains(&"AskUserQuestion".to_string())); + assert!(tools.contains(&"TodoWrite".to_string())); + assert!(tools.contains(&"CreatePlan".to_string())); + assert!(tools.contains(&"Edit".to_string())); + } + + #[test] + fn intent_coding_prompt_embeds_acceptance_and_evidence_workflow() { + let prompt = get_embedded_prompt("intent_coding_mode").expect("embedded prompt"); + + assert!(prompt.contains("# Intent Coding workflow")); + assert!(prompt.contains("Accepted Checks or Accepted Tests")); + assert!(prompt.contains("acceptance coverage result")); + assert!(prompt.contains("pnpm run agent:check")); + assert!(prompt.contains("Evidence Package")); + } + + #[test] + fn intent_coding_embeds_required_rules() { + let rules: Vec<&str> = EMBEDDED_RULES.iter().map(|rule| rule.name).collect(); + assert!(!rules.is_empty()); + for name in [ + "context-compiler", + "risk-classification", + "accepted-checks", + "error-classification", + "provenance-chain", + "policy-gates", + "workflow-check", + "security", + "architecture", + "coding-style", + ] { + assert!(rules.contains(&name), "missing rule: {name}"); + } + for rule in EMBEDDED_RULES { + assert!(!rule.purpose.is_empty(), "rule purpose must not be empty"); + assert!(!rule.content.is_empty(), "rule content must not be empty"); + } + } +} diff --git a/src/crates/core/src/agentic/agents/definitions/modes/mod.rs b/src/crates/core/src/agentic/agents/definitions/modes/mod.rs index 85895d86c..33cc4781b 100644 --- a/src/crates/core/src/agentic/agents/definitions/modes/mod.rs +++ b/src/crates/core/src/agentic/agents/definitions/modes/mod.rs @@ -3,6 +3,7 @@ mod claw; mod cowork; mod debug; mod deep_research; +mod intent_coding; mod multitask; mod plan; mod team; @@ -12,6 +13,7 @@ pub use claw::ClawMode; pub use cowork::CoworkMode; pub use debug::DebugMode; pub use deep_research::DeepResearchMode; +pub use intent_coding::IntentCodingMode; pub use multitask::MultitaskMode; pub use plan::PlanMode; pub use team::TeamMode; diff --git a/src/crates/core/src/agentic/agents/mod.rs b/src/crates/core/src/agentic/agents/mod.rs index fbe801f7f..fcbf84be9 100644 --- a/src/crates/core/src/agentic/agents/mod.rs +++ b/src/crates/core/src/agentic/agents/mod.rs @@ -18,8 +18,8 @@ use async_trait::async_trait; pub use definitions::custom::{CustomSubagent, CustomSubagentKind}; pub use definitions::hidden::{CodeReviewAgent, DeepReviewAgent, GenerateDocAgent}; pub use definitions::modes::{ - AgenticMode, ClawMode, CoworkMode, DebugMode, DeepResearchMode, MultitaskMode, PlanMode, - TeamMode, + AgenticMode, ClawMode, CoworkMode, DebugMode, DeepResearchMode, IntentCodingMode, + MultitaskMode, PlanMode, TeamMode, }; pub use definitions::review::{ ArchitectureReviewerAgent, BusinessLogicReviewerAgent, FrontendReviewerAgent, diff --git a/src/crates/core/src/agentic/agents/prompts/agentic_mode.md b/src/crates/core/src/agentic/agents/prompts/agentic_mode.md index 52d2c0ce2..90b52e2b7 100644 --- a/src/crates/core/src/agentic/agents/prompts/agentic_mode.md +++ b/src/crates/core/src/agentic/agents/prompts/agentic_mode.md @@ -41,6 +41,22 @@ When presenting options, state your recommendation and reasoning, keep choices c When presenting options or plans, never include time estimates - focus on what each option involves, not how long it might take. +# Proactivity +Users often begin with underspecified requests and leave important needs, constraints, or preferences unstated. Proactive assistance means reducing the user's burden by surfacing what needs clarification and deciding what can be inferred, rather than treating ambiguity as a reason to remain passive. + +When a request is underspecified: +1. **Infer from context**: Use prior session history, workspace files, project conventions, and the user's past preferences to fill in reasonable defaults without asking. +2. **Ask targeted questions**: When inference is insufficient, use AskUserQuestion to surface the specific missing constraint. Prefer one focused question over a broad "tell me everything." +3. **Act on partial information**: Start working with reasonable assumptions while flagging them. Do not block on full specification when the first step can proceed. + +Avoid these anti-patterns: +- Restating the user's request back to them without adding value +- Asking "do you want me to proceed?" without having done any work +- Waiting for step-by-step instructions when the task direction is clear +- Asking generic open-ended questions when a concrete choice is needed + +The goal is to reduce the user's operational and cognitive effort: finish the task while minimizing avoidable back-and-forth. + {VISUAL_MODE} # Doing tasks The user will primarily request you perform software engineering tasks. This includes solving bugs, adding new functionality, refactoring code, explaining code, and more. For these tasks the following steps are recommended: diff --git a/src/crates/core/src/agentic/agents/prompts/claw_mode.md b/src/crates/core/src/agentic/agents/prompts/claw_mode.md index d89368674..a6c5a7eb2 100644 --- a/src/crates/core/src/agentic/agents/prompts/claw_mode.md +++ b/src/crates/core/src/agentic/agents/prompts/claw_mode.md @@ -51,6 +51,26 @@ Operational rules: - Delete temporary sessions when they are no longer useful. - Do not create extra sessions for trivial, tightly coupled, or one-step work. +# Proactivity +Users rarely begin with a complete specification of what they actually need. They issue a brief, underspecified initial request while the intended assistance depends on hidden requirements — habits, constraints, preferences, and dependencies that the user does not explicitly state. + +Be proactive: anticipate and address these hidden needs rather than waiting for the user to spell them out. + +When the user's request is underspecified: +1. **Recover from prior context**: Check earlier sessions, workspace files, and user memory for established preferences, conventions, and decisions that apply to this request. If the user preferred a specific format, naming scheme, or workflow in a prior session, carry it forward without asking again. +2. **Infer reasonable defaults**: Use domain knowledge and common conventions to fill gaps. For example, when organizing papers, include links and open-source status by default; when preparing a document, apply the user's established style. +3. **Ask targeted clarifying questions**: When inference is insufficient, use AskUserQuestion to surface exactly the missing constraint. Each question should target one specific decision. Avoid broad "what else do you need?" questions — the user expects you to figure out what matters. +4. **Act on partial information**: Start with reasonable assumptions, produce an initial deliverable, and flag where you made assumptions. This lets the user correct specifics rather than providing everything from scratch. Do not block on full specification when meaningful progress can be made. + +Anti-patterns to avoid: +- Restating the request verbatim and asking "is this what you want?" +- Waiting for the user to provide every detail before taking any action +- Failing to reuse preferences and conventions established in prior sessions +- Treating underspecification as a reason to remain passive +- Making the user repeat information that already exists in the workspace or prior sessions + +The goal is to reduce the user's operational and cognitive effort: resolve hidden requirements through inference or focused elicitation, and minimize the avoidable back-and-forth that comes from passive, step-by-step instruction-following. + # Safety You have no independent goals: do not pursue self-preservation, replication, resource acquisition, or power-seeking; avoid long-term plans beyond the user's request. diff --git a/src/crates/core/src/agentic/agents/prompts/intent_coding_mode.md b/src/crates/core/src/agentic/agents/prompts/intent_coding_mode.md new file mode 100644 index 000000000..751021d72 --- /dev/null +++ b/src/crates/core/src/agentic/agents/prompts/intent_coding_mode.md @@ -0,0 +1,100 @@ +You are BitFun, an ADE (AI IDE) that helps users with software engineering tasks. Use the instructions below and the tools available to you to assist the user. + +You are pair programming with a USER to solve their coding task. This mode is Intent Coding: your primary job is to align on intent before making code changes, then deliver the change with verification evidence. + +Your main goal is to follow the USER's instructions at each message, denoted by the tag. + +Tool results and user messages may include tags. These tags contain useful information and reminders. Please heed them, but don't mention them in your response to the user. + +IMPORTANT: Assist with defensive security tasks only. Refuse to create, modify, or improve code that may be used maliciously. Do not assist with credential discovery or harvesting, including bulk crawling for SSH keys, browser cookies, or cryptocurrency wallets. Allow security analysis, detection rules, vulnerability explanations, defensive tools, and security documentation. +IMPORTANT: You must NEVER generate or guess URLs for the user unless you are confident that the URLs are for helping the user with programming. You may use URLs provided by the user in their messages or local files. + +{LANGUAGE_PREFERENCE} +# Intent Coding workflow + +For coding tasks, do not start code edits until the intent alignment loop is complete. + +1. Load context: + - Read relevant repository files and use workspace instructions (AGENTS.md, CLAUDE.md) to understand the codebase surface touched by the request. + - Intent Coding rules (context compiler, risk classification, accepted checks, error classification, provenance chain, policy gates, architecture, coding style, security, workflow checking) are provided as built-in context — follow them for every task. + - Prefer nearest module instructions over broader instructions when they conflict. + - You may dispatch subagents (Explore, FileFinder) in this step for broad or cross-module exploration. Use inline Grep/Glob/Read for narrow, single-module lookups. + +2. Clarification gate (MANDATORY — do not skip this step for coding tasks): + - After loading context, stop and decide: does the request have material ambiguity that would affect scope, risk, or implementation approach? + - Ambiguity signals: error handling, retry/fallback logic, boundary conditions, concurrency, data compatibility, security/permissions, API semantics, or UI interaction behavior the user did not specify. + - If ambiguous: ask at most 3 questions. Prefer questions informed by the codebase context you just loaded — reference actual code patterns, existing mechanisms, or constraints you discovered. + - If unambiguous (e.g. specific UI tweak, copy change, add a well-defined attribute): state your assumptions once and proceed to step 3. + - For purely conversational or documentation tasks, this gate does not apply — skip to step 3. + - Do not create an Intent Record or make code edits until this gate is resolved. + +3. Create or update an Intent Record: + - Store it under `.agent/intents/intent-YYYYMMDD-short-task-name.md` (create the directory if it does not exist). For this MVP, `.agent` is a workspace-local active-task artifact location, not long-term product storage. + - Include original user request, agent understanding, in-scope work, out-of-scope work, acceptance criteria, Accepted Checks/Tests, clarification questions, user confirmations, execution contract, and metrics. + - Include provenance anchors: key context inputs, user decisions, and related change notes. + - If the task is purely conversational or the user explicitly asks not to create files, summarize the same sections in chat instead. + +4. Establish acceptance: + - Classify risk before coding: L0 Exploration, L1 Routine, L2 Important, L3 Critical, or L4 Safety-Critical. + - Use the built-in risk classification and accepted checks rules. + - Record risk level, risk factors, and verification expectation in the Intent Record. + - For L3 or L4, record the planned review escalation before coding. Prefer BitFun Deep Review for code changes when available; otherwise name the equivalent specialist review path. + - Produce 1-3 Accepted Checks or Accepted Tests before coding. + - Prefer automated tests when the touched area already has nearby tests, when behavior is shared/regression-prone, or when the task is L2 or higher. + - Use manual checks only for documentation-only work, visual/copy-only changes, missing test harnesses, or explicit user direction. + - Record the acceptance coverage plan: automated checks, manual checks, and any expected coverage gaps. + +5. Execute narrowly: + - Keep changes limited to the accepted intent. + - Reuse existing components, APIs, tools, and repository patterns. + - Do not introduce dependencies without approval. + - Do not modify auth, billing, deployment, release, or database migration files unless explicitly included in the accepted intent. + +6. Verify: + - Run the smallest verification command that matches the changed surface. + - If the workspace provides `pnpm run agent:check`, run it after the Intent Record and Evidence Package are written or updated. Treat it as workflow structure validation, not a replacement for product verification. + - If verification cannot run, report the exact command skipped and why. + - When verification fails, classify the failure before repairing it. Use the built-in error classification rules. + - Record the failed command/check, failure class, repair action, and whether the same failure repeated. + - Treat failed verification as evidence to diagnose and repair, not as a reason to declare completion. + - Escalate to the user instead of continuing blind repair when the repair would broaden scope, add dependencies, touch risky file categories, or conflict with accepted intent. + +7. Deliver an Evidence Package: + - Store it under `.agent/evidence/evidence-YYYYMMDD-short-task-name.md` (create the directory if it does not exist). Treat this as the MVP artifact location until BitFun provides session-scoped structured provenance storage. + - Include the Intent Record path, summary, context inputs, provenance chain, policy gates, files changed, verification commands/results, repair-loop data, risk handling, Accepted Checks/Tests status, risks, human review focus, and metrics. + - Record the workflow structure check result when `pnpm run agent:check` is available. + - Include the acceptance coverage result: automated checks, manual checks, and coverage gaps. + - Use the built-in provenance chain rules. Keep provenance compact: link or summarize key anchors, do not paste full logs or sensitive data. + - For L3 or L4, state whether review escalation was completed, skipped by explicit user direction, or blocked by tooling. + - Final response should summarize the evidence package and any skipped verification. + +# Risk-driven depth + +Use lightweight verification for low-risk UI, CRUD, and documentation changes. Increase rigor when touching authentication, authorization, payments, data integrity, encryption, protocol parsing, migrations, remote workspace behavior, session persistence, stream parsing, agent tool execution, or cross-module runtime ownership. + +Escalate risk when a task touches auth, permissions, tokens, credentials, billing, release, deployment, migrations, data deletion, shared runtime loops, prompt/tool schema contracts, multiple modules, public APIs, or areas with recent defects. + +# Tone and style +- Avoid emojis unless the user explicitly requests them. +- Keep responses concise. Use Github-flavored markdown when it improves readability. +- Communicate with the user in normal response text; use tools to perform work, not to narrate. +- Create files only when they are the right deliverable or necessary for the task. + +# Professional objectivity +Prioritize technical accuracy and truthfulness over validating the user's beliefs. Focus on facts and problem-solving. Whenever there is uncertainty, investigate before confirming assumptions. + +# No time estimates +Never give time estimates or predictions for how long tasks will take. Focus on what needs to be done, not how long it might take. + +# Tool usage policy +- Prefer the most direct tool path that preserves accuracy. +- Use TodoWrite for non-trivial multi-step work and keep it current. +- Use AskUserQuestion when clarification or an explicit decision would materially improve the result. +- Read a file before editing it. +- Keep work scoped to the accepted intent. + +# File References +When referencing files, use clickable markdown links. + +{VISUAL_MODE} +{ENV_INFO} diff --git a/src/crates/core/src/agentic/agents/prompts/intent_coding_rules/accepted-checks.md b/src/crates/core/src/agentic/agents/prompts/intent_coding_rules/accepted-checks.md new file mode 100644 index 000000000..7381a86cb --- /dev/null +++ b/src/crates/core/src/agentic/agents/prompts/intent_coding_rules/accepted-checks.md @@ -0,0 +1,58 @@ +# Accepted Checks and Tests Rules + +Intent Coding tasks must translate user intent into verifiable acceptance before code edits. + +## Minimum Acceptance + +Every coding task should have: + +- 1-3 Accepted Checks or Accepted Tests before implementation. +- At least one check that directly exercises the user's stated outcome. +- A clear statement of any behavior explicitly out of scope. + +## Prefer Automated Tests When + +Add or update automated tests when: + +- The touched area already has nearby tests. +- The behavior is shared, reusable, or regression-prone. +- The task changes parsing, persistence, synchronization, API contracts, authorization, data integrity, or agent/tool execution. +- The task is L2 or higher. + +## Manual Checks Are Acceptable When + +Manual checks are acceptable when: + +- The task is documentation-only. +- The project has no reasonable test harness for the touched surface. +- The change is visual/copy-only and a focused manual check is clearer than brittle automation. +- The user explicitly requests no test changes. + +## Evidence Requirement + +Every Evidence Package should record: + +- Accepted Checks/Tests status. +- Which checks were automated. +- Which checks were manual. +- Any acceptance coverage gaps and why they remain. + +Use an explicit status marker for each accepted check: + +- `[x]` or `[passed]` for completed and verified checks. +- `[ ]` or `[partial]` for checks that remain incomplete. +- `[-]`, `[skipped]`, `[blocked]`, or `[not run]` when a check could not run, followed by the reason. + +## Good Accepted Checks + +Good checks are specific and observable: + +- "Selecting role=admin sends `role=admin` in the list request." +- "Clearing role filter removes the role query parameter." +- "`cargo test -p bitfun-core session_usage` passes." + +Avoid vague checks: + +- "Works correctly." +- "UI looks good." +- "Tests pass." diff --git a/src/crates/core/src/agentic/agents/prompts/intent_coding_rules/architecture.md b/src/crates/core/src/agentic/agents/prompts/intent_coding_rules/architecture.md new file mode 100644 index 000000000..0ca9431d2 --- /dev/null +++ b/src/crates/core/src/agentic/agents/prompts/intent_coding_rules/architecture.md @@ -0,0 +1,23 @@ +# Architecture Rules + +These rules are long-lived constraints for Coding Agent work in this repository. + +## Platform Boundaries + +- Keep product logic platform-agnostic, then expose it through platform adapters. +- Do not call Tauri APIs directly from shared UI components. +- Desktop-only integrations belong under `src/apps/desktop`, then flow through transport/API layers. +- Shared core code must avoid host-specific APIs such as `tauri::AppHandle`; use shared abstractions such as `bitfun_events::EventEmitter`. +- Consider remote workspace and remote control synchronization when adding behavior. If a feature cannot support remote scenarios, gate it or show a clear unsupported state. + +## Core Changes + +- For `bitfun-core` decomposition, feature-boundary, dependency-boundary, or Rust build-speed refactors, read `docs/architecture/core-decomposition.md` before editing. +- Do not confuse DTO or contract extraction with runtime owner migration. +- Moving runtime ownership requires a reviewed port/provider design, old-path compatibility, behavior equivalence tests, and explicit confirmation when behavior boundaries could change. + +## Deep Review + +- Keep target resolution and manifest construction on the frontend. +- Keep policy validation, queue/retry state, and report enrichment in shared core. +- Keep Deep Review documentation aligned with implementation changes. diff --git a/src/crates/core/src/agentic/agents/prompts/intent_coding_rules/coding-style.md b/src/crates/core/src/agentic/agents/prompts/intent_coding_rules/coding-style.md new file mode 100644 index 000000000..5809cd9e4 --- /dev/null +++ b/src/crates/core/src/agentic/agents/prompts/intent_coding_rules/coding-style.md @@ -0,0 +1,40 @@ +# Coding Style Rules + +These rules summarize repository-wide coding expectations for Coding Agent tasks. + +## General + +- Read relevant files before editing. +- Prefer the nearest `AGENTS.md` or `AGENTS-CN.md` for module-specific guidance. +- Keep changes limited to the accepted intent and avoid unrelated refactors. +- Reuse existing patterns, helpers, components, and adapters before adding new abstractions. +- Do not introduce new dependencies without explicit approval. + +## Logging + +- Logs must be English-only and contain no emojis. +- Frontend logging should follow `src/web-ui/LOGGING.md`. +- Backend logging should follow `src/crates/LOGGING.md`. + +## Tauri Commands + +- Rust command names must use `snake_case`. +- TypeScript wrappers may use `camelCase`, but must invoke Rust commands with a structured `request`. + +```rust +#[tauri::command] +pub async fn your_command( + state: State<'_, AppState>, + request: YourRequest, +) -> Result +``` + +```ts +await api.invoke('your_command', { request: { ... } }); +``` + +## Verification + +- Run the smallest verification command that matches the changed surface. +- Report skipped verification and the reason. +- Prefer adding or updating automated tests when the project already has coverage for the touched behavior. diff --git a/src/crates/core/src/agentic/agents/prompts/intent_coding_rules/context-compiler.md b/src/crates/core/src/agentic/agents/prompts/intent_coding_rules/context-compiler.md new file mode 100644 index 000000000..9b1020f94 --- /dev/null +++ b/src/crates/core/src/agentic/agents/prompts/intent_coding_rules/context-compiler.md @@ -0,0 +1,80 @@ +# Context Compiler Rules + +Intent Coding uses a lightweight context compiler for this MVP. It is not a +retrieval or ranking engine yet; it is a deterministic context policy for what +must be considered before coding. + +## Built-In Context + +The IntentCoding mode always loads a manifest of built-in rules before the rule +documents. The manifest states why each rule is included so reviewers can audit +which long-lived constraints influenced the task. + +Built-in rules are product-owned prompt context. They are not loaded from +workspace `.agent` artifacts. + +## Workspace Context + +Before implementation, also read the nearest applicable workspace instructions: + +- Repository-level `AGENTS.md` or `AGENTS-CN.md`. +- Nearest module `AGENTS.md` or `AGENTS-CN.md` for changed paths. +- Relevant architecture or contribution documents referenced by those files. + +More specific workspace instructions override broader instructions when they +conflict. + +## Task Context + +Use task-local context to narrow implementation: + +- User confirmations and clarified assumptions. +- Intent Record scope, out-of-scope items, and accepted checks. +- Existing code patterns near the files being changed. +- Verification commands required by repository or module guidance. + +Do not broaden scope because a built-in rule mentions a capability that the user +did not request. + +## Provenance Requirement + +Evidence Packages must record key context inputs in `## Context Inputs`. + +When available, generate initial context input candidates with: + +```bash +pnpm run agent:context-compile -- --evidence +``` + +Use one line per input: + +```text +- [builtin_rule] intent_coding_rules/risk-classification.md: risk level selection +- [workspace_instruction] AGENTS.md: repository verification guidance +- [module_doc] src/crates/core/AGENTS.md: core ownership rules +- [source_file] src/crates/core/src/example.rs: matched existing implementation pattern +- [user_confirmation] chat: confirmed boundary behavior +- [verification_guidance] AGENTS.md: selected cargo test command +- [not_available] module_doc: reason: no nearer module guide exists +``` + +Valid types: + +- `builtin_rule` +- `workspace_instruction` +- `module_doc` +- `source_file` +- `user_confirmation` +- `verification_guidance` +- `not_available` + +Use `not_available` only with `reason: `. + +The `## Provenance Chain` section should still link the Intent Record, Evidence +Package, session/turn anchors, and durable provenance record when available. + +## Future Upgrade Path + +A later Context Compiler can replace this deterministic policy with retrieval, +ranking, and context-budget controls. It must preserve the same reviewable +property: reviewers can see which context inputs influenced the task. diff --git a/src/crates/core/src/agentic/agents/prompts/intent_coding_rules/error-classification.md b/src/crates/core/src/agentic/agents/prompts/intent_coding_rules/error-classification.md new file mode 100644 index 000000000..e9d5b166a --- /dev/null +++ b/src/crates/core/src/agentic/agents/prompts/intent_coding_rules/error-classification.md @@ -0,0 +1,53 @@ +# Error Classification Rules + +When verification fails in Intent Coding, classify the failure before attempting repair. The goal is to make repair behavior auditable and prepare for future routing. + +## Failure Classes + +Use one or more classes: + +- `syntax_error`: parser, formatter, invalid JSON, malformed config, or invalid markup. +- `type_error`: TypeScript, Rust, schema, or API contract mismatch. +- `test_failure`: automated test assertion failure. +- `lint_failure`: lint, style, formatting, or static check failure. +- `runtime_error`: command exits from runtime exception, panic, crash, or unhandled rejection. +- `missing_dependency`: missing package, binary, tool, feature flag, or generated artifact. +- `environment_failure`: sandbox, network, permission, filesystem, platform, or unavailable service issue. +- `behavior_mismatch`: output does not satisfy an Accepted Check/Test even if commands pass. +- `security_violation`: secret exposure, unsafe permission broadening, injection risk, or policy violation. +- `unknown`: insufficient evidence to classify. + +## Repair Attempt Record + +For each failed verification, record: + +- Command or check that failed. +- Failure class. +- Short evidence summary. +- Repair action taken. +- Whether the same failure repeated. + +## Escalation + +Escalate to the user instead of continuing blind repair when: + +- The same failure class repeats without new evidence. +- The fix would broaden scope beyond the Intent Record. +- The repair requires a new dependency or risky file category. +- The failure appears to be environmental and cannot be resolved locally. +- The repair path conflicts with accepted intent. + +## Evidence Requirement + +Every Evidence Package should include repair-loop data when any verification fails: + +- Failure classes observed. +- Repair attempts count. +- Final repair status: `not_needed`, `repaired`, `blocked`, or `deferred`. +- Remaining verification gaps. + +Use a dedicated `## Repair Loop` section in the Evidence Package. It must include: + +- `Repair attempts: ` +- `Final repair status: ` +- Failure classes observed, or `none` when no verification failed. diff --git a/src/crates/core/src/agentic/agents/prompts/intent_coding_rules/policy-gates.md b/src/crates/core/src/agentic/agents/prompts/intent_coding_rules/policy-gates.md new file mode 100644 index 000000000..5f030e487 --- /dev/null +++ b/src/crates/core/src/agentic/agents/prompts/intent_coding_rules/policy-gates.md @@ -0,0 +1,86 @@ +# Policy Gate Rules + +Intent Coding uses lightweight policy gates for this MVP. These gates are not an +OPA/Rego engine yet; they are a machine-checkable checklist that records which +governance checks were considered before delivery. + +## Evidence Requirement + +Every Evidence Package must include a `Policy Gates` section with one or more +gate lines: + +```text +- [passed] gate_id: result summary +- [not_applicable] gate_id: reason summary +- [skipped] gate_id: reason: explicit reason +- [blocked] gate_id: reason: explicit blocker +``` + +Valid statuses: + +- `passed` +- `failed` +- `skipped` +- `blocked` +- `not_applicable` + +`failed` gates fail the local workflow checker. `skipped` and `blocked` gates +must include `reason: `. + +## Required Gate Profile + +The workflow checker derives a lightweight required gate profile from the +Evidence Package. + +Every Evidence Package must include: + +- `scope`: Changes stayed within the accepted Intent Record. +- `verification`: Required verification commands were run or explicitly skipped. +- `security`: No secrets, credentials, unsafe auth changes, or malicious behavior were introduced. + +Additional required gates: + +- `risk_review`: Required for L3/L4 tasks. +- `dependencies`: Required when dependency manifest or lock files are changed. +- `platform_boundary`: Required when platform adapter, desktop-only, Tauri, or adapter surfaces are touched. +- `remote_compatibility`: Required when remote workspace, synchronization, transport, or websocket behavior is touched. + +## Optional Policy Config + +The checker can load additional gate requirements from: + +- `.agent/policy.json` +- `.bitfun/intent-coding-policy.json` + +Supported shape: + +```json +{ + "required_gates": ["team_review"], + "risk_gates": { + "L3": ["risk_review"], + "L4": ["security_review"] + }, + "path_gates": [ + { "contains": "src/crates/core/src/agentic/tools/", "gate": "tool_contract" } + ], + "text_gates": [ + { "contains": "data deletion", "gate": "data_safety" } + ] +} +``` + +Configured gates are additive. They cannot remove built-in required gates. + +Optional gates can still be included when useful. Prefer these gate identifiers: + +- `risk_review`: L3/L4 review routing was completed, skipped, or blocked with evidence. +- `dependencies`: New dependencies were not introduced without approval. +- `platform_boundary`: Platform-specific behavior stayed behind adapters. +- `remote_compatibility`: Remote workspace impact was considered when relevant. + +## Future Upgrade Path + +A later policy-as-code layer can evaluate these gates automatically. It should +preserve the same reviewable output shape so Evidence Packages remain useful to +humans. diff --git a/src/crates/core/src/agentic/agents/prompts/intent_coding_rules/provenance-chain.md b/src/crates/core/src/agentic/agents/prompts/intent_coding_rules/provenance-chain.md new file mode 100644 index 000000000..54dfddd39 --- /dev/null +++ b/src/crates/core/src/agentic/agents/prompts/intent_coding_rules/provenance-chain.md @@ -0,0 +1,70 @@ +# Provenance Chain Rules + +Intent Coding tasks should preserve a compact chain of custody from request to delivery. The chain should be useful for review without copying full logs or large outputs. + +## Minimum Chain + +Record these anchors when applicable: + +- Original request: the user request or a concise reference to it. +- Context inputs: key `AGENTS.md`, built-in intent coding rules, or module docs used. +- Intent Record: path to the accepted Intent Record. +- Acceptance: accepted checks/tests and user decisions. +- Execution: files changed and major implementation decisions. +- Verification: commands/checks run and results. +- Repair loop: failure classes and repair attempt count when verification fails. +- Review escalation: Deep Review or equivalent review status for L3/L4. +- Evidence Package: path to the final Evidence Package. + +## Artifact Storage Policy + +For this MVP, Intent Records and Evidence Packages are workspace-local active-task artifacts: + +- Intent Records live under `.agent/intents/`. +- Evidence Packages live under `.agent/evidence/`. +- `.agent` artifacts are ignored by Git and should not be treated as product prompt templates or durable repository knowledge. +- Evidence Packages should still reference the matching Intent Record path so reviewers can inspect the active-task chain. + +Longer term, durable provenance should move to session-scoped structured storage, such as `.bitfun/sessions` or a dedicated session provenance store, while `.agent` remains an optional export or compatibility location. + +## What Not To Store + +Do not include: + +- Secrets, tokens, credentials, customer data, or private local configuration. +- Full command logs when a short summary is enough. +- Large diffs already available through Git. +- Tool outputs that include sensitive or irrelevant data. + +## Evidence Requirement + +Every Evidence Package must include a `Provenance Chain` section. + +The section must include these machine-checkable fields: + +- `Provenance store: agent_artifact|session_store|external|not_available` +- `Session id: ` +- `Turn id: ` +- `Intent Record: .agent/intents/intent-YYYYMMDD-short-task-name.md` +- `Evidence Package: .agent/evidence/evidence-YYYYMMDD-short-task-name.md` +- `Provenance record: ` + +Use `not_available` when the current runtime cannot expose a stable session or +turn identifier. Do not invent identifiers. Prefer `agent_artifact` for the MVP +when the chain only exists in `.agent`. + +When `Provenance store: session_store` is used: + +- `Session id` and `Turn id` must be concrete values. +- `Provenance record` must point to a `.bitfun/sessions/...json` record. +- If the record is present locally, it should match the declared session and turn ids. +- Use `pnpm run agent:provenance-record -- --evidence --session-id --turn-id ` when available to create the session record from an Evidence Package. + +When `Provenance store: external` is used, `Provenance record` must identify the +external record or system of record. + +The section should also include review-useful anchors: + +- Key context inputs. +- Verification and repair anchors. +- Human decisions that changed scope, risk, or acceptance. diff --git a/src/crates/core/src/agentic/agents/prompts/intent_coding_rules/risk-classification.md b/src/crates/core/src/agentic/agents/prompts/intent_coding_rules/risk-classification.md new file mode 100644 index 000000000..2833b2b0a --- /dev/null +++ b/src/crates/core/src/agentic/agents/prompts/intent_coding_rules/risk-classification.md @@ -0,0 +1,141 @@ +# Risk Classification Rules + +Intent Coding tasks must classify risk before code edits. Use the lowest level that honestly matches the changed surface. + +## Intent Record Requirement + +Every Intent Record must include a machine-checkable risk line in `## Metadata`: + +- `Risk level: L0` +- `Risk level: L1` +- `Risk level: L2` +- `Risk level: L3` +- `Risk level: L4` + +For L3 and L4 tasks, the Intent Record must also include: + +- `Review escalation: deep_review|specialist_review|manual_review|skipped` +- `Review escalation reason: ` when escalation is skipped. + +## Levels + +### L0 Exploration + +Use for prototypes, notes, documentation drafts, and throwaway local experiments. + +Minimum verification: + +- Syntax or file-existence checks when applicable. +- Manual accepted checks are acceptable. + +### L1 Routine + +Use for small UI changes, CRUD behavior, copy changes, straightforward tests, and narrow non-critical refactors. + +Minimum verification: + +- Focused tests or checks for the touched behavior. +- Typecheck/lint when frontend or typed contracts change. +- Cargo check/test for touched Rust logic when practical. + +### L2 Important + +Use for core business logic, cross-module behavior, persistence, synchronization, remote workspace behavior, or changes that can silently lose user work. + +Minimum verification: + +- Focused tests for new behavior. +- Relevant regression tests for adjacent behavior. +- Broader typecheck/check commands for the affected surface. +- Evidence Package must call out remaining gaps. + +### L3 Critical + +Use for authentication, authorization, data integrity, migrations, payment, encryption, release/signing, protocol parsing, or runtime ownership boundaries. + +Minimum verification: + +- L2 verification. +- Human review focus must be explicit. +- Deep Review or equivalent specialist review should be run when available. +- Intent Record must state the planned review escalation. +- Evidence Package must state whether Deep Review or equivalent specialist review was run. +- No automatic merge. + +### L4 Safety-Critical + +Use for cryptography, protocol correctness, sandbox boundaries, privilege escalation surfaces, destructive filesystem operations, or high-impact security controls. + +Minimum verification: + +- L3 verification. +- Security-focused review is mandatory. +- Formal/spec/property testing should be considered. +- Intent Record must state the planned specialist review path before coding. +- Evidence Package must state review results or the explicit reason review was skipped. +- No automatic merge. + +## Risk Factors + +Increase risk when a task touches: + +- Auth, permissions, tokens, credentials, billing, release, deployment, migrations, or data deletion. +- Shared runtime loops, agent tool execution, prompt/tool schema contracts, stream parsing, or session persistence. +- Remote workspace behavior, synchronization, or multi-client control. +- Multiple modules or public APIs. +- Areas with recent defects or unclear ownership. + +## Checker Suggestion + +When an Evidence Package lists changed files or describes risk-sensitive +behavior, the local workflow checker may suggest a risk level from file paths +and Evidence text. It also considers selected ownership-sensitive surfaces and +dependency graph impact files. If present, it can also load recent-incident +memory from `.agent/risk-memory.json` or `.bitfun/intent-coding-risk-memory.json`. +This suggestion is advisory and is intended to catch likely under-classification, +not to replace judgment. + +Recent-incident memory shape: + +```json +{ + "recent_incidents": [ + { + "label": "session persistence regression", + "level": "L3", + "path_contains": "src/crates/core/src/agentic/session/", + "text_contains": "persistence" + } + ] +} +``` + +If the recorded final risk level is lower than the suggestion: + +- Raise the risk level when the suggestion matches the actual changed behavior. +- Or keep the lower level and explain why in `## Risks` or `## Human Review Focus`. + +## Evidence Requirement + +Every Evidence Package must record: + +- Final risk level as `Final risk level: L0|L1|L2|L3|L4` in `## Risks`. +- Why that level was selected. +- Verification commands run. +- Verification that was skipped and why. +- Human review focus for L2 and above. +- Review route for L3 and L4 as `Review route: deep_review|specialist_review|manual_review|skipped` in `## Risks`. +- Review trigger for L3 and L4 as `Review trigger: automatic|manual|not_available` in `## Risks`. +- Review escalation result for L3 and L4 as `Review escalation status: completed|skipped|blocked` in `## Risks`. +- Review escalation reason for L3 and L4 as `Review escalation reason: ` when escalation is skipped or blocked. + +## Review Escalation + +For L3 and L4 tasks: + +- Prefer BitFun Deep Review when the changed surface is code and a review session is available. +- Use equivalent specialist review when Deep Review is unavailable or the task is not code-review shaped. +- Record whether the review route was triggered automatically, manually, or was not available. +- Use `pnpm run agent:review-route -- --evidence ` when available to produce a review handoff plan for the selected route. +- Do not claim completion without stating whether review escalation was completed, skipped by explicit user direction, or blocked by tooling. +- Keep review routing machine-checkable so later automation can trigger the selected route. diff --git a/src/crates/core/src/agentic/agents/prompts/intent_coding_rules/security.md b/src/crates/core/src/agentic/agents/prompts/intent_coding_rules/security.md new file mode 100644 index 000000000..7c8d8ac0f --- /dev/null +++ b/src/crates/core/src/agentic/agents/prompts/intent_coding_rules/security.md @@ -0,0 +1,23 @@ +# Security Rules + +These rules define repository-wide security constraints for Coding Agent tasks. + +## Secrets + +- Do not commit secrets, tokens, certificates, private keys, or sensitive local configuration. +- Do not print secrets in logs, test output, screenshots, or evidence packages. + +## Sensitive Areas + +- Do not change authentication, authorization, billing, deployment, release signing, or database migration files unless the Intent Record explicitly includes that scope. +- Do not broaden permissions, network access, filesystem access, or desktop automation capabilities without explicit approval. + +## Dependencies + +- Do not add dependencies without approval. +- When a dependency is approved, document its purpose and check license compatibility. + +## Agent Loop Safety + +- Do not address looping behavior first with hard-coded string, pattern, or count blockers. +- Investigate tool behavior, model interaction, context packaging, prompt/tool schema design, and state synchronization before adding loop controls. diff --git a/src/crates/core/src/agentic/agents/prompts/intent_coding_rules/workflow-check.md b/src/crates/core/src/agentic/agents/prompts/intent_coding_rules/workflow-check.md new file mode 100644 index 000000000..f88c92656 --- /dev/null +++ b/src/crates/core/src/agentic/agents/prompts/intent_coding_rules/workflow-check.md @@ -0,0 +1,60 @@ +# Agent Workflow Check Rule + +Intent Coding tasks should run the local workflow structure checker when the workspace provides one. + +## Command + +```bash +pnpm run agent:check +``` + +For L3/L4 review routing handoff: + +```bash +pnpm run agent:review-route -- --evidence .agent/evidence/evidence-YYYYMMDD-task.md +``` + +For session provenance record export: + +```bash +pnpm run agent:provenance-record -- --evidence .agent/evidence/evidence-YYYYMMDD-task.md --session-id --turn-id +``` + +For context input candidate generation: + +```bash +pnpm run agent:context-compile -- --evidence .agent/evidence/evidence-YYYYMMDD-task.md +``` + +## When to Run + +- After the Intent Record and Evidence Package have been written or updated. +- Before the final response for any coding task that changes Intent Record or Evidence Package artifacts. +- Alongside product verification such as Rust tests, web tests, type-checks, lint, or builds. +- In CI as a lightweight structural gate when the repository provides the script. + +## Scope + +The checker validates structural workflow hygiene: + +- Intent Records and Evidence Packages exist and pair 1:1 by task slug. +- Intent Records contain required MVP sections. +- Intent Records include a machine-checkable risk level. +- L3/L4 Intent Records include a planned review escalation path. +- Evidence Packages contain required MVP sections. +- Evidence Packages reference existing Intent Records. +- Intent Records and Evidence Packages are paired by task slug. +- Evidence Package context inputs include machine-checkable source types and reasons. +- Evidence Package accepted checks include explicit status markers. +- Evidence Package repair loops include attempt counts and final repair status. +- Evidence Package provenance chains include machine-checkable store, session, turn, Intent Record, Evidence Package, and durable record anchors. +- Evidence Package policy gates include built-in/configured gate profiles, machine-checkable statuses, and failure/skipped/blocked handling. +- Evidence Package risks include a final risk level. +- L3/L4 Intent Records include a machine-checkable review route. +- L3/L4 Evidence Packages include review route, trigger mode, and escalation status. +- L3/L4 review routes can be converted into a review handoff plan. +- Evidence Package changed files, risk-sensitive Evidence text, ownership-sensitive surfaces, dependency-impact files, and optional recent-incident memory produce an advisory risk-level suggestion. + +## Limits + +This check does not prove that the code is correct, the acceptance criteria are strong, or the product behavior works. It must not replace the smallest matching product verification command. diff --git a/src/crates/core/src/agentic/agents/registry/builtin.rs b/src/crates/core/src/agentic/agents/registry/builtin.rs index aa18c0f04..6c7111e09 100644 --- a/src/crates/core/src/agentic/agents/registry/builtin.rs +++ b/src/crates/core/src/agentic/agents/registry/builtin.rs @@ -9,8 +9,8 @@ use std::sync::Arc; pub(crate) fn default_model_id_for_builtin_agent(agent_type: &str) -> &'static str { match agent_type { - "agentic" | "Cowork" | "ComputerUse" | "Plan" | "debug" | "Claw" | "DeepResearch" - | "Team" | "Multitask" => "auto", + "agentic" | "Cowork" | "ComputerUse" | "Plan" | "debug" | "IntentCoding" | "Claw" + | "DeepResearch" | "Team" | "Multitask" => "auto", "DeepReview" | "ReviewBusinessLogic" | "ReviewPerformance" diff --git a/src/crates/core/src/agentic/agents/registry/catalog.rs b/src/crates/core/src/agentic/agents/registry/catalog.rs index 06da060fc..30d4438d1 100644 --- a/src/crates/core/src/agentic/agents/registry/catalog.rs +++ b/src/crates/core/src/agentic/agents/registry/catalog.rs @@ -4,8 +4,8 @@ use crate::agentic::agents::{ Agent, AgenticMode, ArchitectureReviewerAgent, BusinessLogicReviewerAgent, ClawMode, CodeReviewAgent, ComputerUseMode, CoworkMode, DebugMode, DeepResearchMode, DeepReviewAgent, ExploreAgent, FileFinderAgent, FrontendReviewerAgent, GeneralPurposeAgent, GenerateDocAgent, - MultitaskMode, PerformanceReviewerAgent, PlanMode, ResearchSpecialistAgent, ReviewFixerAgent, - ReviewJudgeAgent, SecurityReviewerAgent, TeamMode, + IntentCodingMode, MultitaskMode, PerformanceReviewerAgent, PlanMode, + ResearchSpecialistAgent, ReviewFixerAgent, ReviewJudgeAgent, SecurityReviewerAgent, TeamMode, }; use std::sync::Arc; @@ -33,6 +33,11 @@ pub fn builtin_agent_specs() -> Vec { category: AgentCategory::Mode, visibility_policy: SubagentVisibilityPolicy::default(), }, + BuiltinAgentSpec { + factory: || Arc::new(IntentCodingMode::new()), + category: AgentCategory::Mode, + visibility_policy: SubagentVisibilityPolicy::default(), + }, BuiltinAgentSpec { factory: || Arc::new(MultitaskMode::new()), category: AgentCategory::Mode, diff --git a/src/crates/core/src/agentic/agents/registry/tests.rs b/src/crates/core/src/agentic/agents/registry/tests.rs index 7a019b189..4c04b97e1 100644 --- a/src/crates/core/src/agentic/agents/registry/tests.rs +++ b/src/crates/core/src/agentic/agents/registry/tests.rs @@ -76,6 +76,7 @@ fn top_level_modes_default_to_auto() { for agent_type in [ "agentic", "Multitask", + "IntentCoding", "Cowork", "Plan", "debug", @@ -87,6 +88,26 @@ fn top_level_modes_default_to_auto() { } } +#[tokio::test] +async fn intent_coding_is_registered_as_top_level_mode() { + let registry = AgentRegistry::new(); + let modes = registry.get_modes_info().await; + let intent_coding = modes + .iter() + .find(|agent| agent.id == "IntentCoding") + .expect("IntentCoding should be registered as a top-level mode"); + + assert_eq!(intent_coding.name, "Intent Coding"); + assert!(!intent_coding.is_readonly); + assert!(intent_coding.default_tools.contains(&"Edit".to_string())); + assert!(intent_coding + .default_tools + .contains(&"AskUserQuestion".to_string())); + assert!(intent_coding + .default_tools + .contains(&"CreatePlan".to_string())); +} + #[tokio::test] async fn computer_use_is_builtin_subagent_not_mode() { let registry = AgentRegistry::new(); diff --git a/src/crates/core/src/agentic/coordination/coordinator.rs b/src/crates/core/src/agentic/coordination/coordinator.rs index e38ef78be..0d018da3e 100644 --- a/src/crates/core/src/agentic/coordination/coordinator.rs +++ b/src/crates/core/src/agentic/coordination/coordinator.rs @@ -760,6 +760,18 @@ impl ConversationCoordinator { } } + /// Apply mode-derived defaults that are not part of the caller's contract. + /// Today the only one is: IntentCoding sessions enable hidden-intent + /// tracking unless the caller has already opted out. This must live at the + /// core/port boundary so server/relay/AgentSubmissionPort callers can't + /// silently end up with the evaluator disabled by passing the SessionConfig + /// default through. + fn apply_mode_derived_session_defaults(config: &mut SessionConfig, agent_type: &str) { + if !config.enable_intent_tracking && agent_type == "IntentCoding" { + config.enable_intent_tracking = true; + } + } + fn ensure_user_message_metadata_object( metadata: Option, ) -> serde_json::Value { @@ -1138,6 +1150,7 @@ Update the persona files and delete BOOTSTRAP.md as soon as bootstrap is complet config.workspace_path = Some(workspace_path.clone()); config.workspace_id = Self::resolve_workspace_id_for_config(&config).await; let agent_type = Self::normalize_agent_type(&agent_type); + Self::apply_mode_derived_session_defaults(&mut config, &agent_type); let session = self .session_manager .create_session_with_id_and_creator( @@ -1182,6 +1195,7 @@ Update the persona files and delete BOOTSTRAP.md as soon as bootstrap is complet config.workspace_path = Some(workspace_path); config.workspace_id = Self::resolve_workspace_id_for_config(&config).await; let agent_type = Self::normalize_agent_type(&agent_type); + Self::apply_mode_derived_session_defaults(&mut config, &agent_type); self.create_hidden_subagent_session( session_id, session_name, @@ -1278,6 +1292,9 @@ Update the persona files and delete BOOTSTRAP.md as soon as bootstrap is complet workspace_hostname: None, unread_completion: None, needs_user_attention: None, + intent_tracking: None, + proactivity_score: None, + completeness_score: None, }; if let Err(e) = persistence_manager .save_session_metadata(&workspace_path_buf, &metadata) @@ -1592,9 +1609,10 @@ Update the persona files and delete BOOTSTRAP.md as soon as bootstrap is complet session_id: Option, session_name: String, agent_type: String, - config: SessionConfig, + mut config: SessionConfig, created_by: Option, ) -> BitFunResult { + Self::apply_mode_derived_session_defaults(&mut config, &agent_type); self.session_manager .create_session_with_id_and_details( session_id, @@ -2124,6 +2142,7 @@ Update the persona files and delete BOOTSTRAP.md as soon as bootstrap is complet round_preempt: None, round_injection: None, recover_partial_on_cancel: false, + intent_evidence: None, }; let session_max_tokens = session.config.max_context_tokens; @@ -2672,6 +2691,13 @@ Update the persona files and delete BOOTSTRAP.md as soon as bootstrap is complet round_preempt: self.round_preempt_source.get().cloned(), round_injection: self.round_injection_source.get().cloned(), recover_partial_on_cancel: false, + intent_evidence: if session.config.enable_intent_tracking { + Some(std::sync::Arc::new(tokio::sync::Mutex::new( + crate::agentic::execution::intent_evidence::IntentEvidenceCollector::default(), + ))) + } else { + None + }, }; // Auto-generate session title on first message @@ -3823,6 +3849,7 @@ Update the persona files and delete BOOTSTRAP.md as soon as bootstrap is complet // that belong to a different (parent) session/turn. round_injection: None, recover_partial_on_cancel: true, + intent_evidence: None, }; let execution_engine = self.execution_engine.clone(); @@ -5216,7 +5243,7 @@ pub fn get_global_coordinator() -> Option> { mod tests { use super::{ normalize_subagent_max_concurrency, resolve_agent_submission_turn_id, - ConversationCoordinator, + ConversationCoordinator, SessionConfig, }; use crate::service::remote_ssh::workspace_state::init_remote_workspace_manager; use bitfun_runtime_ports::{AgentSubmissionRequest, AgentSubmissionSource}; @@ -5230,6 +5257,42 @@ mod tests { assert_state_port::(); } + #[test] + fn apply_mode_derived_defaults_enables_intent_tracking_for_intent_coding() { + let mut config = SessionConfig::default(); + assert!(!config.enable_intent_tracking); + ConversationCoordinator::apply_mode_derived_session_defaults(&mut config, "IntentCoding"); + assert!( + config.enable_intent_tracking, + "IntentCoding sessions must default-enable intent tracking at the core boundary" + ); + } + + #[test] + fn apply_mode_derived_defaults_leaves_other_modes_untouched() { + for mode in ["agentic", "Cowork", "ComputerUse", "Plan", "debug", "Claw"] { + let mut config = SessionConfig::default(); + ConversationCoordinator::apply_mode_derived_session_defaults(&mut config, mode); + assert!( + !config.enable_intent_tracking, + "mode {mode} must not default-enable intent tracking", + ); + } + } + + #[test] + fn apply_mode_derived_defaults_preserves_caller_true() { + let mut config = SessionConfig { + enable_intent_tracking: true, + ..Default::default() + }; + ConversationCoordinator::apply_mode_derived_session_defaults(&mut config, "agentic"); + assert!( + config.enable_intent_tracking, + "an explicit true from the caller must survive even for non-IntentCoding modes" + ); + } + #[test] fn clamps_subagent_max_concurrency_into_safe_range() { assert_eq!(normalize_subagent_max_concurrency(0), 1); diff --git a/src/crates/core/src/agentic/core/session.rs b/src/crates/core/src/agentic/core/session.rs index 05fa12896..0a1ea81d2 100644 --- a/src/crates/core/src/agentic/core/session.rs +++ b/src/crates/core/src/agentic/core/session.rs @@ -1,5 +1,9 @@ use super::state::SessionState; pub use bitfun_core_types::SessionKind; +pub use bitfun_services_core::session::hidden_intent_types::{ + HiddenIntent, IntentAssignment, IntentScope, IntentSource, IntentTerminalStatus, + PersistentIntent, SessionIntentTracking, +}; use serde::{Deserialize, Serialize}; use std::time::SystemTime; use uuid::Uuid; @@ -149,6 +153,12 @@ pub struct SessionConfig { /// Model config ID used by this session (for token usage tracking) #[serde(default, skip_serializing_if = "Option::is_none")] pub model_id: Option, + + /// Whether hidden intent tracking is enabled for this session. + /// When enabled, the agent loop tracks which hidden requirements were + /// proactively resolved vs passively waited-for. + #[serde(default)] + pub enable_intent_tracking: bool, } impl Default for SessionConfig { @@ -166,6 +176,7 @@ impl Default for SessionConfig { remote_connection_id: None, remote_ssh_host: None, model_id: None, + enable_intent_tracking: false, } } } diff --git a/src/crates/core/src/agentic/execution/execution_engine.rs b/src/crates/core/src/agentic/execution/execution_engine.rs index 5c27fb724..69a9c7156 100644 --- a/src/crates/core/src/agentic/execution/execution_engine.rs +++ b/src/crates/core/src/agentic/execution/execution_engine.rs @@ -2575,6 +2575,34 @@ impl ExecutionEngine { total_tools += round_result.tool_calls.len(); + // Hook A: Collect intent evidence from this round + // Only runs when intent tracking is enabled for this session. + if let Some(ref collector) = context.intent_evidence { + let mut c = collector.lock().await; + if round_result.used_ask_user_question { + c.asked_user_question = true; + c.question_topics + .extend(round_result.ask_user_question_topics.clone()); + } + c.tool_names_used.extend( + round_result + .tool_calls + .iter() + .map(|tc| tc.tool_name.clone()), + ); + c.proactive_tool_calls += round_result + .tool_calls + .iter() + .filter(|tc| { + crate::agentic::execution::intent_evidence::is_proactive_tool( + &tc.tool_name, + ) + }) + .count(); + c.produced_output |= round_result.had_assistant_text; + c.round_count += 1; + } + // Track partial recovery reason from the last round if round_result.partial_recovery_reason.is_some() { last_partial_recovery_reason = round_result.partial_recovery_reason.clone(); @@ -2920,6 +2948,26 @@ impl ExecutionEngine { ); } + // Hook B: Persist collected intent evidence for this turn. + // Called after the dialog turn loop exits (all rounds complete). + let evidence = if let Some(collector) = context.intent_evidence.as_ref() { + Some(collector.lock().await.snapshot(context.turn_index)) + } else { + None + }; + if let Some(evidence) = evidence { + if let Err(e) = self + .session_manager + .record_intent_evidence(&context.session_id, evidence) + .await + { + warn!( + "Failed to record intent evidence: session_id={}, turn_id={}, error={}", + context.session_id, context.dialog_turn_id, e + ); + } + } + // P1-6: Track the actual termination reason for downstream reporting. // Defaults to "complete" (model produced a final answer naturally) and // is overridden by finalize / fallback paths below. diff --git a/src/crates/core/src/agentic/execution/intent_evidence.rs b/src/crates/core/src/agentic/execution/intent_evidence.rs new file mode 100644 index 000000000..afe741404 --- /dev/null +++ b/src/crates/core/src/agentic/execution/intent_evidence.rs @@ -0,0 +1,493 @@ +//! Intent evidence collection for proactive assistance evaluation. +//! +//! This module collects lightweight trajectory signals during execution. It +//! intentionally does not assign hidden-intent terminal statuses: pi-Bench style +//! assignment requires comparing a turn against concrete hidden intents with a +//! two-stage evaluator (direct satisfaction before targeted elicitation). + +use bitfun_services_core::session::hidden_intent_types::{ + HiddenIntent, IntentScope, IntentSource, IntentTerminalStatus, IntentTurnEvidence, + ProactivityLevel, ProactivityScore, SessionIntentTracking, +}; + +/// Per-turn caps to keep evidence storage bounded. Long sessions used to grow +/// `tool_names_used` / `question_topics` without limit. +const MAX_TOOL_NAMES_PER_TURN: usize = 64; +const MAX_QUESTION_TOPICS_PER_TURN: usize = 16; +/// Per-session caps applied at persistence time. +pub const MAX_TURN_EVIDENCE_RETAINED: usize = 64; +pub const MAX_HIDDEN_INTENTS_RETAINED: usize = 256; + +/// Evidence collected during a single dialog turn for later intent analysis. +/// The collector is stateless per-turn: it gathers raw signals from model +/// rounds and produces an IntentTurnEvidence snapshot at turn completion. +#[derive(Debug, Clone, Default)] +pub struct IntentEvidenceCollector { + pub asked_user_question: bool, + pub question_topics: Vec, + pub proactive_tool_calls: usize, + pub tool_names_used: Vec, + pub produced_output: bool, + pub round_count: usize, + pub asked_follow_up_in_text: bool, +} + +impl IntentEvidenceCollector { + pub fn snapshot(&self, turn_index: usize) -> IntentTurnEvidence { + let tool_names_used = if self.tool_names_used.len() > MAX_TOOL_NAMES_PER_TURN { + self.tool_names_used[..MAX_TOOL_NAMES_PER_TURN].to_vec() + } else { + self.tool_names_used.clone() + }; + let question_topics = if self.question_topics.len() > MAX_QUESTION_TOPICS_PER_TURN { + self.question_topics[..MAX_QUESTION_TOPICS_PER_TURN].to_vec() + } else { + self.question_topics.clone() + }; + IntentTurnEvidence { + turn_index, + asked_user_question: self.asked_user_question, + question_topics, + proactive_tool_calls: self.proactive_tool_calls, + tool_names_used, + produced_output: self.produced_output, + round_count: self.round_count, + asked_follow_up_in_text: self.asked_follow_up_in_text, + } + } +} + +// --------------------------------------------------------------------------- +// Scoring functions +// --------------------------------------------------------------------------- + +pub fn compute_proactivity_score(tracking: &SessionIntentTracking) -> Option { + if !tracking.enabled || tracking.hidden_intents.is_empty() { + return None; + } + if !tracking.all_intents_resolved() { + return None; + } + + let completed = tracking.count_by_status(IntentTerminalStatus::Completed) as u32; + let inferred = tracking.count_by_status(IntentTerminalStatus::Inferred) as u32; + let provided = tracking.count_by_status(IntentTerminalStatus::Provided) as u32; + let total = tracking.hidden_intents.len() as u32; + + let score = (completed + inferred) as f32 / total as f32; + Some(ProactivityScore { + completed, + inferred, + provided, + score, + level: Some(classify_proactivity_level(score)), + }) +} + +/// Classify a proactivity score into a qualitative level. +/// Delegates to `ProactivityLevel::from_score` so the thresholds stay in one place. +pub fn classify_proactivity_level(score: f32) -> ProactivityLevel { + ProactivityLevel::from_score(score) +} + +pub fn is_proactive_tool(tool_name: &str) -> bool { + matches!( + tool_name, + "Write" + | "Edit" + | "Delete" + | "Bash" + | "Git" + | "WebSearch" + | "WebFetch" + | "GenerativeUI" + | "CreatePlan" + ) +} + +// --------------------------------------------------------------------------- +// Hidden intent extraction from turn evidence +// --------------------------------------------------------------------------- + +/// Extract candidate hidden intents from a turn's collected evidence. +/// +/// Intents emitted here are *trajectory markers*, not evaluated assignments. +/// `terminal_status` is intentionally left `None` so a downstream evaluator can +/// stamp them. Auto-stamping `Completed`/`Inferred` would make +/// `all_intents_resolved()` trivially true and inflate proactivity scores; the +/// module-level doc explicitly forbids that. +pub fn extract_hidden_intents_from_evidence( + evidence: &IntentTurnEvidence, + existing_intents: &[HiddenIntent], +) -> Vec { + let mut new_intents = Vec::new(); + + // 1. Agent used proactive tools and produced output: record a trajectory + // marker per distinct proactive tool. No terminal status. + if evidence.proactive_tool_calls > 0 && evidence.produced_output { + for tool_name in &evidence.tool_names_used { + if !is_proactive_tool(tool_name) { + continue; + } + let intent_id = format!( + "proactive-{}-turn{}", + tool_name.to_lowercase(), + evidence.turn_index + ); + if existing_intents.iter().any(|i| i.intent_id == intent_id) { + continue; + } + new_intents.push(HiddenIntent { + intent_id, + description: proactive_tool_intent_description(tool_name), + scope: IntentScope::SessionLocal, + terminal_status: None, + resolved_at_turn: Some(evidence.turn_index), + source: Some(IntentSource::PriorContext), + }); + } + } + + // 2. Agent asked targeted clarification questions via AskUserQuestion. + if evidence.asked_user_question && !evidence.question_topics.is_empty() { + for topic in &evidence.question_topics { + let intent_id = format!( + "asked-{}-turn{}", + slugify_topic(topic, evidence.turn_index), + evidence.turn_index + ); + if existing_intents.iter().any(|i| i.intent_id == intent_id) { + continue; + } + new_intents.push(HiddenIntent { + intent_id, + description: format!("Required clarification: {}", topic), + scope: IntentScope::SessionLocal, + terminal_status: None, + resolved_at_turn: Some(evidence.turn_index), + source: Some(IntentSource::PriorContext), + }); + } + } + + new_intents +} + +/// Build a stable, ASCII-safe slug from a free-text question topic. Falls back +/// to a short hash digest when stripping non-alphanumerics leaves nothing +/// (common with CJK / emoji headers) so per-turn IDs don't collide. +fn slugify_topic(topic: &str, turn_index: usize) -> String { + let ascii: String = topic + .chars() + .take(40) + .map(|c| { + if c.is_alphanumeric() && c.is_ascii() { + c.to_ascii_lowercase() + } else { + '-' + } + }) + .collect(); + let trimmed = ascii.trim_matches('-'); + if !trimmed.is_empty() { + return trimmed.to_string(); + } + // Fallback: short deterministic hash of (topic, turn_index) to avoid + // collisions when the slug collapses to empty. + use std::hash::{Hash, Hasher}; + let mut hasher = std::collections::hash_map::DefaultHasher::new(); + topic.hash(&mut hasher); + turn_index.hash(&mut hasher); + format!("h{:08x}", hasher.finish() as u32) +} + +fn proactive_tool_intent_description(tool_name: &str) -> String { + match tool_name { + "Write" => "Agent proactively created a new file".to_string(), + "Edit" => "Agent proactively modified an existing file".to_string(), + "Delete" => "Agent proactively removed unneeded content".to_string(), + "Bash" => "Agent proactively executed a shell command".to_string(), + "Git" => "Agent proactively performed version control operations".to_string(), + "WebSearch" => "Agent proactively searched for information".to_string(), + "WebFetch" => "Agent proactively fetched external content".to_string(), + "GenerativeUI" => "Agent proactively created interactive UI output".to_string(), + "CreatePlan" => "Agent proactively planned the task structure".to_string(), + _ => format!("Agent proactively used {}", tool_name), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use bitfun_services_core::session::hidden_intent_types::{ + HiddenIntent, IntentScope, IntentSource, IntentTerminalStatus, SessionIntentTracking, + }; + + #[test] + fn collector_empty_on_init() { + let c = IntentEvidenceCollector::default(); + assert!(!c.asked_user_question); + assert!(c.question_topics.is_empty()); + assert_eq!(c.proactive_tool_calls, 0); + assert!(c.tool_names_used.is_empty()); + assert!(!c.produced_output); + assert_eq!(c.round_count, 0); + assert!(!c.asked_follow_up_in_text); + } + + #[test] + fn collector_records_ask_user_question() { + let mut c = IntentEvidenceCollector { + asked_user_question: true, + ..Default::default() + }; + c.question_topics.push("What approach?".into()); + c.question_topics.push("Which library?".into()); + + let evidence = c.snapshot(1); + + assert!(evidence.asked_user_question); + assert_eq!(evidence.question_topics.len(), 2); + assert_eq!(evidence.turn_index, 1); + } + + #[test] + fn intent_turn_evidence_round_trips() { + let evidence = IntentTurnEvidence { + turn_index: 2, + asked_user_question: true, + question_topics: vec!["Which format?".into()], + proactive_tool_calls: 3, + tool_names_used: vec!["Write".into(), "Edit".into()], + produced_output: true, + round_count: 5, + asked_follow_up_in_text: false, + }; + let json = serde_json::to_value(&evidence).expect("serialize"); + let rt: IntentTurnEvidence = serde_json::from_value(json).expect("deserialize"); + assert_eq!(rt.turn_index, 2); + assert!(rt.asked_user_question); + assert_eq!(rt.proactive_tool_calls, 3); + assert_eq!(rt.tool_names_used, vec!["Write", "Edit"]); + } + + #[test] + fn compute_proactivity_score_all_completed() { + let tracking = make_tracking(vec![ + IntentTerminalStatus::Completed, + IntentTerminalStatus::Completed, + IntentTerminalStatus::Completed, + ]); + let s = compute_proactivity_score(&tracking).unwrap(); + assert!((s.score - 1.0).abs() < f32::EPSILON); + assert_eq!(s.completed, 3); + assert_eq!(s.inferred, 0); + assert_eq!(s.provided, 0); + assert_eq!(s.level, Some(ProactivityLevel::High)); + } + + #[test] + fn compute_proactivity_score_all_provided() { + let tracking = make_tracking(vec![ + IntentTerminalStatus::Provided, + IntentTerminalStatus::Provided, + ]); + let s = compute_proactivity_score(&tracking).unwrap(); + assert!((s.score - 0.0).abs() < f32::EPSILON); + assert_eq!(s.provided, 2); + assert_eq!(s.level, Some(ProactivityLevel::Reactive)); + } + + #[test] + fn compute_proactivity_score_mixed() { + let tracking = make_tracking(vec![ + IntentTerminalStatus::Completed, + IntentTerminalStatus::Completed, + IntentTerminalStatus::Inferred, + IntentTerminalStatus::Provided, + ]); + let s = compute_proactivity_score(&tracking).unwrap(); + assert!((s.score - 0.75).abs() < f32::EPSILON); + assert_eq!(s.completed, 2); + assert_eq!(s.inferred, 1); + assert_eq!(s.provided, 1); + assert_eq!(s.level, Some(ProactivityLevel::Moderate)); + } + + #[test] + fn compute_proactivity_score_empty() { + assert_eq!( + compute_proactivity_score(&SessionIntentTracking::default()), + None + ); + } + + #[test] + fn compute_proactivity_score_requires_resolved_intents() { + let mut tracking = make_tracking(vec![ + IntentTerminalStatus::Completed, + IntentTerminalStatus::Provided, + ]); + tracking.hidden_intents.push(HiddenIntent { + intent_id: "i-unresolved".into(), + description: "unresolved intent".into(), + scope: IntentScope::SessionLocal, + terminal_status: None, + resolved_at_turn: None, + source: None, + }); + + assert_eq!(compute_proactivity_score(&tracking), None); + } + + #[test] + fn classify_proactivity_level_edges() { + assert_eq!(classify_proactivity_level(0.9), ProactivityLevel::High); + assert_eq!(classify_proactivity_level(0.8), ProactivityLevel::High); + assert_eq!(classify_proactivity_level(0.79), ProactivityLevel::Moderate); + assert_eq!(classify_proactivity_level(0.5), ProactivityLevel::Moderate); + assert_eq!(classify_proactivity_level(0.49), ProactivityLevel::Low); + assert_eq!(classify_proactivity_level(0.2), ProactivityLevel::Low); + assert_eq!(classify_proactivity_level(0.19), ProactivityLevel::Reactive); + assert_eq!(classify_proactivity_level(0.0), ProactivityLevel::Reactive); + } + + #[test] + fn is_proactive_tool_positive() { + assert!(is_proactive_tool("Write")); + assert!(is_proactive_tool("Edit")); + assert!(is_proactive_tool("Delete")); + assert!(is_proactive_tool("Bash")); + assert!(is_proactive_tool("Git")); + assert!(is_proactive_tool("WebSearch")); + assert!(is_proactive_tool("CreatePlan")); + } + + #[test] + fn is_proactive_tool_negative() { + assert!(!is_proactive_tool("Read")); + assert!(!is_proactive_tool("Grep")); + assert!(!is_proactive_tool("Glob")); + assert!(!is_proactive_tool("TodoWrite")); + assert!(!is_proactive_tool("AskUserQuestion")); + } + + #[test] + fn compute_proactivity_disabled() { + let mut tracking = make_tracking(vec![IntentTerminalStatus::Completed]); + tracking.enabled = false; + assert_eq!(compute_proactivity_score(&tracking), None); + } + + #[test] + fn extract_hidden_intents_from_proactive_tools() { + let evidence = IntentTurnEvidence { + turn_index: 1, + asked_user_question: false, + question_topics: vec![], + proactive_tool_calls: 2, + tool_names_used: vec!["Write".into(), "Edit".into()], + produced_output: true, + round_count: 3, + asked_follow_up_in_text: false, + }; + let intents = extract_hidden_intents_from_evidence(&evidence, &[]); + assert_eq!(intents.len(), 2); + assert!(intents + .iter() + .any(|i| i.intent_id == "proactive-write-turn1")); + // Trajectory markers must not carry a terminal status; only a + // downstream evaluator may stamp Completed/Inferred/Provided. + assert!(intents.iter().all(|i| i.terminal_status.is_none())); + } + + #[test] + fn extract_hidden_intents_from_ask_user_question() { + let evidence = IntentTurnEvidence { + turn_index: 2, + asked_user_question: true, + question_topics: vec!["Which database?".into()], + proactive_tool_calls: 0, + tool_names_used: vec![], + produced_output: false, + round_count: 1, + asked_follow_up_in_text: false, + }; + let intents = extract_hidden_intents_from_evidence(&evidence, &[]); + assert_eq!(intents.len(), 1); + assert!(intents[0].intent_id.contains("asked-")); + assert!(intents[0].terminal_status.is_none()); + } + + #[test] + fn slugify_topic_falls_back_to_hash_for_non_ascii() { + let s1 = slugify_topic("ヘッダ確認", 1); + let s2 = slugify_topic("ヘッダ確認", 2); + let s3 = slugify_topic("コンテキスト", 1); + assert!(s1.starts_with('h') && s1.len() == 9); + assert_ne!(s1, s2, "different turns must produce distinct fallback slugs"); + assert_ne!(s1, s3, "different topics must produce distinct fallback slugs"); + } + + #[test] + fn slugify_topic_preserves_ascii_prefix() { + assert_eq!(slugify_topic("Which database?", 7), "which-database"); + } + + #[test] + fn extract_hidden_intents_deduplicates_existing() { + let evidence = IntentTurnEvidence { + turn_index: 1, + asked_user_question: false, + question_topics: vec![], + proactive_tool_calls: 1, + tool_names_used: vec!["Write".into()], + produced_output: true, + round_count: 1, + asked_follow_up_in_text: false, + }; + let existing = vec![HiddenIntent { + intent_id: "proactive-write-turn1".into(), + description: "already exists".into(), + scope: IntentScope::SessionLocal, + terminal_status: Some(IntentTerminalStatus::Completed), + resolved_at_turn: Some(1), + source: Some(IntentSource::PriorContext), + }]; + assert!(extract_hidden_intents_from_evidence(&evidence, &existing).is_empty()); + } + + #[test] + fn extract_hidden_intents_empty_when_passive() { + let evidence = IntentTurnEvidence { + turn_index: 0, + asked_user_question: false, + question_topics: vec![], + proactive_tool_calls: 0, + tool_names_used: vec!["Read".into()], + produced_output: false, + round_count: 1, + asked_follow_up_in_text: false, + }; + assert!(extract_hidden_intents_from_evidence(&evidence, &[]).is_empty()); + } + + fn make_tracking(statuses: Vec) -> SessionIntentTracking { + SessionIntentTracking { + enabled: true, + hidden_intents: statuses + .into_iter() + .enumerate() + .map(|(i, status)| HiddenIntent { + intent_id: format!("i{}", i), + description: format!("test intent {}", i), + scope: IntentScope::SessionLocal, + terminal_status: Some(status), + resolved_at_turn: Some(i), + source: None, + }) + .collect(), + ..Default::default() + } + } +} diff --git a/src/crates/core/src/agentic/execution/mod.rs b/src/crates/core/src/agentic/execution/mod.rs index aa3cc2ea9..52f982f10 100644 --- a/src/crates/core/src/agentic/execution/mod.rs +++ b/src/crates/core/src/agentic/execution/mod.rs @@ -3,6 +3,7 @@ //! Responsible for AI interaction and model round control pub mod execution_engine; +pub mod intent_evidence; pub mod round_executor; pub mod stream_processor; pub mod types; diff --git a/src/crates/core/src/agentic/execution/round_executor.rs b/src/crates/core/src/agentic/execution/round_executor.rs index a2550872c..c13e4dec2 100644 --- a/src/crates/core/src/agentic/execution/round_executor.rs +++ b/src/crates/core/src/agentic/execution/round_executor.rs @@ -52,6 +52,36 @@ impl RoundExecutor { !text.trim().is_empty() } + /// Detects AskUserQuestion calls in a set of tool calls. + /// Returns (used_ask_user_question, extracted_question_topics). + /// + /// Note: `used_ask_user_question` is `true` whenever AskUserQuestion appears + /// in the tool call list, regardless of whether any topic headers could be + /// extracted. This ensures the call is recorded even when the `questions` + /// argument is missing or contains no `header` fields. + fn detect_ask_user_question( + tool_calls: &[crate::agentic::core::ToolCall], + ) -> (bool, Vec) { + let mut called = false; + let mut topics = Vec::new(); + for tc in tool_calls { + if tc.tool_name == "AskUserQuestion" { + called = true; + // Extract question topics from the arguments (best-effort) + if let Some(questions) = tc.arguments.get("questions") { + if let Some(arr) = questions.as_array() { + for q in arr { + if let Some(header) = q.get("header").and_then(|v| v.as_str()) { + topics.push(header.to_string()); + } + } + } + } + } + } + (called, topics) + } + fn write_tool_mode(context: &RoundContext) -> WriteToolMode { WriteToolMode::from_context_var( context @@ -574,6 +604,8 @@ impl RoundExecutor { partial_recovery_reason: stream_result.partial_recovery_reason.clone(), had_assistant_text: Self::has_user_visible_assistant_text(&stream_result.full_text), had_thinking_content: !stream_result.full_thinking.is_empty(), + used_ask_user_question: false, + ask_user_question_topics: vec![], }); } @@ -822,6 +854,10 @@ impl RoundExecutor { // Note: Do not cleanup cancellation token here, as there may be subsequent model rounds // Cancellation token will be cleaned up by ExecutionEngine when the entire dialog turn ends + // Detect AskUserQuestion calls for intent evidence collection + let (used_ask_user_question, ask_user_question_topics) = + Self::detect_ask_user_question(&tool_calls); + Ok(RoundResult { assistant_message, tool_calls: tool_calls.clone(), @@ -837,6 +873,8 @@ impl RoundExecutor { partial_recovery_reason: stream_result.partial_recovery_reason.clone(), had_assistant_text: Self::has_user_visible_assistant_text(&stream_result.full_text), had_thinking_content: !stream_result.full_thinking.is_empty(), + used_ask_user_question, + ask_user_question_topics, }) } @@ -1819,6 +1857,7 @@ mod tests { extract_bitfun_contents, extract_bitfun_contents_with_options, RoundExecutor, StreamProcessor, }; + use crate::agentic::core::ToolCall; use crate::agentic::events::{EventQueue, EventQueueConfig}; use crate::agentic::execution::types::RoundContext; use crate::agentic::tools::ToolRuntimeRestrictions; @@ -1829,6 +1868,15 @@ mod tests { use std::sync::Arc; use tokio_util::sync::CancellationToken; + fn tool_call(tool_id: &str, tool_name: &str, arguments: serde_json::Value) -> ToolCall { + ToolCall { + tool_id: tool_id.to_string(), + tool_name: tool_name.to_string(), + arguments, + ..Default::default() + } + } + fn test_round_executor() -> RoundExecutor { let event_queue = Arc::new(EventQueue::new(EventQueueConfig::default())); RoundExecutor { @@ -2322,4 +2370,88 @@ mod tests { }; assert!(super::token_details_from_usage(&usage).is_none()); } + + // --- detect_ask_user_question tests --- + + #[test] + fn detect_ask_user_question_with_header_topics() { + let tc = tool_call( + "tc-1", + "AskUserQuestion", + serde_json::json!({ + "questions": [ + { "header": "Auth method", "question": "Which auth method?" }, + { "header": "Library", "question": "Which library?" } + ] + }), + ); + let (called, topics) = RoundExecutor::detect_ask_user_question(&[tc]); + assert!(called, "should be called even with headers"); + assert_eq!(topics, vec!["Auth method", "Library"]); + } + + #[test] + fn detect_ask_user_question_without_header_fields_still_marks_called() { + // AskUserQuestion called but questions have no `header` field. + // The bug being tested: previously returned (false, []) in this case. + let tc = tool_call( + "tc-1", + "AskUserQuestion", + serde_json::json!({ + "questions": [ + { "question": "Which auth method?" } + ] + }), + ); + let (called, topics) = RoundExecutor::detect_ask_user_question(&[tc]); + assert!(called, "must be true even when no headers are extractable"); + assert!(topics.is_empty()); + } + + #[test] + fn detect_ask_user_question_with_empty_questions_array_still_marks_called() { + let tc = tool_call( + "tc-1", + "AskUserQuestion", + serde_json::json!({ "questions": [] }), + ); + let (called, topics) = RoundExecutor::detect_ask_user_question(&[tc]); + assert!(called); + assert!(topics.is_empty()); + } + + #[test] + fn detect_ask_user_question_with_missing_questions_key_still_marks_called() { + let tc = tool_call( + "tc-1", + "AskUserQuestion", + serde_json::json!({}), + ); + let (called, topics) = RoundExecutor::detect_ask_user_question(&[tc]); + assert!(called); + assert!(topics.is_empty()); + } + + #[test] + fn detect_ask_user_question_not_present_returns_false() { + let tc = tool_call("tc-1", "Write", serde_json::json!({ "file_path": "a.rs" })); + let (called, topics) = RoundExecutor::detect_ask_user_question(&[tc]); + assert!(!called); + assert!(topics.is_empty()); + } + + #[test] + fn detect_ask_user_question_mixed_tool_calls() { + let write_tc = tool_call("tc-1", "Write", serde_json::json!({})); + let ask_tc = tool_call( + "tc-2", + "AskUserQuestion", + serde_json::json!({ + "questions": [{ "header": "Approach" }] + }), + ); + let (called, topics) = RoundExecutor::detect_ask_user_question(&[write_tc, ask_tc]); + assert!(called); + assert_eq!(topics, vec!["Approach"]); + } } diff --git a/src/crates/core/src/agentic/execution/types.rs b/src/crates/core/src/agentic/execution/types.rs index 43db92696..0db007986 100644 --- a/src/crates/core/src/agentic/execution/types.rs +++ b/src/crates/core/src/agentic/execution/types.rs @@ -1,6 +1,7 @@ //! Execution Engine Type Definitions use crate::agentic::core::Message; +use crate::agentic::execution::intent_evidence::IntentEvidenceCollector; use crate::agentic::round_preempt::{ DialogRoundInjectionInterrupt, DialogRoundInjectionSource, DialogRoundPreemptSource, }; @@ -12,6 +13,7 @@ use bitfun_runtime_ports::DelegationPolicy; use serde_json::Value; use std::collections::HashMap; use std::sync::Arc; +use tokio::sync::Mutex as AsyncMutex; use tokio_util::sync::CancellationToken; /// Execution context @@ -37,6 +39,13 @@ pub struct ExecutionContext { /// When true, stream cancellation may be converted into a partial assistant /// result if text/tool output has already been produced. pub recover_partial_on_cancel: bool, + + /// When intent tracking is enabled, this collector gathers raw signals + /// during execution for later intent analysis. Uses `tokio::sync::Mutex` + /// because it lives in `Arc` and is touched from async contexts; a + /// `std::sync::Mutex` would be a latent deadlock footgun if any future + /// call site held the guard across an `.await`. + pub intent_evidence: Option>>, } /// Round context @@ -88,6 +97,13 @@ pub struct RoundResult { /// True when the model emitted any non-empty thinking / reasoning content /// in this round. pub had_thinking_content: bool, + + /// Whether the agent called AskUserQuestion in this round. + /// Set by the round executor when processing tool calls. + pub used_ask_user_question: bool, + + /// If AskUserQuestion was called, the parsed questions from its input. + pub ask_user_question_topics: Vec, } /// Finish reason diff --git a/src/crates/core/src/agentic/insights/prompts/facet_extraction.md b/src/crates/core/src/agentic/insights/prompts/facet_extraction.md index 1185626a4..1f7945cc5 100644 --- a/src/crates/core/src/agentic/insights/prompts/facet_extraction.md +++ b/src/crates/core/src/agentic/insights/prompts/facet_extraction.md @@ -27,6 +27,21 @@ CRITICAL GUIDELINES: 5. **languages_used**: Optional. The insights report's language chart is computed from edited file paths (Edit/Write tool), not from this field; you may still list languages you infer for context. +6. **proactivity**: Assess how proactively the AI handled underspecified or ambiguous parts of the user's request. + - proactive_hidden_intents: Number of hidden requirements the AI surfaced and resolved without the user having to explicitly state them. This includes: inferring preferences from prior context, filling in reasonable defaults, and applying established conventions without asking. + - reactive_hidden_intents: Number of requirements the user had to explicitly provide step by step because the AI did not proactively address them. + - inferred_from_context: The AI recovered requirements from prior sessions, workspace files, or established user preferences. + - targeted_questions_asked: The AI asked focused, specific clarifying questions that targeted missing information. + - passive_waiting_events: The AI restated the request or asked vague open-ended questions without making progress. + - proactivity_level: "high" (most requirements proactively resolved), "moderate" (mix of proactive and reactive), "low" (mostly waited for user to provide every detail), "reactive" (entirely step-by-step instruction following). + - proactivity_detail: "One sentence describing the AI's proactivity pattern or empty" + +7. **completeness**: Assess whether the final deliverables satisfied the user's task requirements. + - requirements_satisfied: Number of verifiable requirements that were met in the final output. + - requirements_missed: Number of requirements the user explicitly asked for that were not satisfied. + - completeness_level: "full" (all requirements met), "partial" (most met, some gaps), "minimal" (only surface request handled), "incomplete" (significant gaps). + - completeness_detail: "One sentence describing completeness gaps or empty" + SESSION: {session_transcript} @@ -43,5 +58,20 @@ RESPOND WITH ONLY A VALID JSON OBJECT matching this schema: "primary_success": "fast_accurate_search|correct_code_edits|good_explanations|proactive_help|multi_file_changes|good_debugging", "brief_summary": "One sentence: what user wanted and whether they got it", "languages_used": ["programing_language1", "programing_language2"], - "user_instructions": ["Any explicit instructions user gave to AI about how to behave"] + "user_instructions": ["Any explicit instructions user gave to AI about how to behave"], + "proactivity": { + "proactive_hidden_intents": 0, + "reactive_hidden_intents": 0, + "inferred_from_context": 0, + "targeted_questions_asked": 0, + "passive_waiting_events": 0, + "proactivity_level": "high|moderate|low|reactive", + "proactivity_detail": "One sentence or empty" + }, + "completeness": { + "requirements_satisfied": 0, + "requirements_missed": 0, + "completeness_level": "full|partial|minimal|incomplete", + "completeness_detail": "One sentence or empty" + } } diff --git a/src/crates/core/src/agentic/persistence/manager.rs b/src/crates/core/src/agentic/persistence/manager.rs index d13bc7761..6bff5cc8f 100644 --- a/src/crates/core/src/agentic/persistence/manager.rs +++ b/src/crates/core/src/agentic/persistence/manager.rs @@ -897,6 +897,12 @@ impl PersistenceManager { workspace_hostname, unread_completion: existing.and_then(|value| value.unread_completion.clone()), needs_user_attention: existing.and_then(|value| value.needs_user_attention.clone()), + intent_tracking: existing + .and_then(|value| value.intent_tracking.clone()), + proactivity_score: existing + .and_then(|value| value.proactivity_score.clone()), + completeness_score: existing + .and_then(|value| value.completeness_score.clone()), } } diff --git a/src/crates/core/src/agentic/session/session_manager.rs b/src/crates/core/src/agentic/session/session_manager.rs index 66bd8e09a..c4f6e1af4 100644 --- a/src/crates/core/src/agentic/session/session_manager.rs +++ b/src/crates/core/src/agentic/session/session_manager.rs @@ -3,8 +3,8 @@ //! Responsible for session CRUD, lifecycle management, and resource association use crate::agentic::core::{ - new_turn_id, CompressionContract, CompressionState, Message, MessageSemanticKind, - ProcessingPhase, Session, SessionConfig, SessionKind, SessionState, SessionSummary, TurnStats, + CompressionContract, CompressionState, Message, MessageSemanticKind, ProcessingPhase, Session, + SessionConfig, SessionKind, SessionState, SessionSummary, TurnStats, new_turn_id, }; use crate::agentic::image_analysis::ImageContextData; use crate::agentic::persistence::PersistenceManager; @@ -17,8 +17,8 @@ use crate::agentic::session::{ }; use crate::infrastructure::ai::get_global_ai_client_factory; use crate::service::config::{ - get_app_language_code, get_global_config_service, short_model_user_language_instruction, - subscribe_config_updates, ConfigUpdateEvent, + ConfigUpdateEvent, get_app_language_code, get_global_config_service, + short_model_user_language_instruction, subscribe_config_updates, }; use crate::service::session::{ DialogTurnData, DialogTurnKind, ModelRoundData, SessionMetadata, SessionRelationship, @@ -100,6 +100,11 @@ pub struct SessionManager { evidence_ledger: Arc, persistence_manager: Arc, + /// Per-session async lock serializing intent-evidence read-modify-write on + /// `SessionMetadata`. Without this, concurrent turns can clobber each + /// other's `intent_tracking` additions in the gap between load and save. + intent_metadata_locks: Arc>>>, + /// Configuration config: SessionManagerConfig, } @@ -803,6 +808,7 @@ impl SessionManager { file_read_state_store: Arc::new(FileReadStateStore::new()), evidence_ledger: Arc::new(SessionEvidenceLedger::new()), persistence_manager, + intent_metadata_locks: Arc::new(DashMap::new()), config, }; @@ -990,6 +996,7 @@ impl SessionManager { let file_read_state_store = self.file_read_state_store.clone(); let evidence_ledger = self.evidence_ledger.clone(); let persistence_manager = self.persistence_manager.clone(); + let intent_metadata_locks = self.intent_metadata_locks.clone(); let manager_config = self.config.clone(); tokio::spawn(async move { @@ -1011,6 +1018,7 @@ impl SessionManager { file_read_state_store, evidence_ledger, persistence_manager, + intent_metadata_locks, config: manager_config, }; @@ -1470,9 +1478,7 @@ impl SessionManager { if session.session_name != expected_current_title { debug!( "Skipping auto-generated title because current title changed: session_id={}, expected_title={}, current_title={}", - session_id, - expected_current_title, - session.session_name + session_id, expected_current_title, session.session_name ); return Ok(false); } @@ -1754,6 +1760,7 @@ impl SessionManager { elapsed_ms_u64(memory_stage_started_at) ); self.session_workspace_index.remove(session_id); + self.intent_metadata_locks.remove(session_id); info!( "Session deletion completed: session_id={}, workspace_path={}, duration_ms={}", @@ -3057,6 +3064,165 @@ impl SessionManager { Ok(()) } + /// Record intent evidence collected during a dialog turn. + /// Appends the evidence to the session's intent tracking state. + /// The turn is identified via `evidence.turn_index`. + /// + /// Missing workspace path or metadata is treated as a no-op (ephemeral or + /// already-deleted sessions are routine and should not warn). The + /// read-modify-write of `SessionMetadata` is serialized via a per-session + /// async lock so concurrent turns can't clobber each other. + pub async fn record_intent_evidence( + &self, + session_id: &str, + evidence: bitfun_services_core::session::hidden_intent_types::IntentTurnEvidence, + ) -> BitFunResult<()> { + if !self.should_persist_session_id(session_id) { + return Ok(()); + } + + let Some(workspace_path) = self.effective_session_workspace_path(session_id).await else { + debug!( + "Skipping intent evidence record; no workspace path for session {}", + session_id + ); + return Ok(()); + }; + + let lock = self + .intent_metadata_locks + .entry(session_id.to_string()) + .or_insert_with(|| Arc::new(tokio::sync::Mutex::new(()))) + .clone(); + let _guard = lock.lock().await; + + let Some(mut metadata) = self + .persistence_manager + .load_session_metadata(&workspace_path, session_id) + .await? + else { + debug!( + "Skipping intent evidence record; no metadata for session {}", + session_id + ); + return Ok(()); + }; + + // Initialize intent tracking if not present + let tracking = metadata.intent_tracking.get_or_insert_with(|| { + bitfun_services_core::session::hidden_intent_types::SessionIntentTracking { + enabled: true, + ..Default::default() + } + }); + tracking.enabled = true; + + // Extract new hidden intents from this turn's evidence. + // These are appended to hidden_intents so they become available + // for proactivity scoring and cross-turn persistence. + let new_intents = + crate::agentic::execution::intent_evidence::extract_hidden_intents_from_evidence( + &evidence, + &tracking.hidden_intents, + ); + for intent in new_intents { + if !tracking + .hidden_intents + .iter() + .any(|i| i.intent_id == intent.intent_id) + { + tracking.hidden_intents.push(intent); + } + } + + tracking + .turn_evidence + .retain(|existing| existing.turn_index != evidence.turn_index); + tracking.turn_evidence.push(evidence.clone()); + + // Bound unbounded growth on long sessions: keep only the most recent + // evidence/intent entries. Older turns can still be reconstructed from + // the per-turn `intent_evidence` field on dialog turn files. + let evidence_cap = crate::agentic::execution::intent_evidence::MAX_TURN_EVIDENCE_RETAINED; + if tracking.turn_evidence.len() > evidence_cap { + let drop_count = tracking.turn_evidence.len() - evidence_cap; + tracking.turn_evidence.drain(0..drop_count); + } + let intents_cap = crate::agentic::execution::intent_evidence::MAX_HIDDEN_INTENTS_RETAINED; + if tracking.hidden_intents.len() > intents_cap { + let drop_count = tracking.hidden_intents.len() - intents_cap; + tracking.hidden_intents.drain(0..drop_count); + } + + self.persistence_manager + .save_session_metadata(&workspace_path, &metadata) + .await?; + + // Also update the turn file so future trajectory evaluators can load + // turn-local evidence without reading session metadata first. + if let Ok(Some(mut turn)) = self + .persistence_manager + .load_dialog_turn(&workspace_path, session_id, evidence.turn_index) + .await + { + turn.intent_evidence = Some(evidence.clone()); + if let Err(e) = self + .persistence_manager + .save_dialog_turn(&workspace_path, &turn) + .await + { + warn!( + "Failed to save dialog turn with intent evidence: session_id={}, turn_index={}, error={}", + session_id, evidence.turn_index, e + ); + } + } + + debug!( + "Intent evidence recorded: session_id={}, turn_index={}, asked_user_question={}, proactive_tools={}", + session_id, + evidence.turn_index, + evidence.asked_user_question, + evidence.proactive_tool_calls + ); + + Ok(()) + } + + /// Load unresolved hidden intents for the given session. + /// + /// Returns intents whose `terminal_status` is `None` (not yet resolved). + /// These can be injected into subsequent turn prompts so the agent is aware + /// of previously discovered requirements. + pub async fn load_unresolved_hidden_intents( + &self, + session_id: &str, + ) -> Vec { + let workspace_path = match self.effective_session_workspace_path(session_id).await { + Some(p) => p, + None => return Vec::new(), + }; + + let metadata = match self + .persistence_manager + .load_session_metadata(&workspace_path, session_id) + .await + { + Ok(Some(m)) => m, + _ => return Vec::new(), + }; + + match metadata.intent_tracking { + Some(ref tracking) if tracking.enabled => tracking + .hidden_intents + .iter() + .filter(|i| i.terminal_status.is_none()) + .cloned() + .collect(), + _ => Vec::new(), + } + } + /// Mark a dialog turn as failed and persist it. /// Unlike `complete_dialog_turn`, this sets the state to `Failed` with an error message. pub async fn fail_dialog_turn( @@ -3595,8 +3761,7 @@ impl SessionManager { // Construct system prompt let system_prompt = format!( "You are a professional session title generation assistant. Based on the user's message content, generate a concise and accurate session title.\n\nRequirements:\n- Title should not exceed {} characters\n- {}\n- Concise and accurate, reflecting the conversation topic\n- Do not add quotes or other decorative symbols\n- Return only the title text, no other content", - max_length, - language_instruction + max_length, language_instruction ); // Truncate message to save tokens (max 200 characters) @@ -4074,9 +4239,11 @@ mod tests { .expect("session should create"); let snapshots = SessionManager::collect_auto_save_snapshots(&manager.sessions); - assert!(snapshots - .iter() - .any(|snapshot| snapshot.session_id == session.session_id)); + assert!( + snapshots + .iter() + .any(|snapshot| snapshot.session_id == session.session_id) + ); match manager.sessions.try_get_mut(&session.session_id) { TryResult::Present(_) => {} @@ -4241,10 +4408,12 @@ mod tests { .get_session(&session.session_id) .expect("session should remain active"); assert_eq!(active.dialog_turn_ids, vec!["local-usage-1".to_string()]); - assert!(manager - .context_store - .get_context_messages(&session.session_id) - .is_empty()); + assert!( + manager + .context_store + .get_context_messages(&session.session_id) + .is_empty() + ); let persisted_turns = persistence_manager .load_session_turns(workspace.path(), &session.session_id) @@ -4331,11 +4500,13 @@ mod tests { .expect("ephemeral child session should create"); assert!(manager.get_session(&session.session_id).is_some()); - assert!(persistence_manager - .load_session_metadata(workspace.path(), &session.session_id) - .await - .expect("metadata lookup should succeed") - .is_none()); + assert!( + persistence_manager + .load_session_metadata(workspace.path(), &session.session_id) + .await + .expect("metadata lookup should succeed") + .is_none() + ); } #[tokio::test] @@ -4591,10 +4762,12 @@ mod tests { assert_eq!(view_session.dialog_turn_ids, vec!["turn-1".to_string()]); assert_eq!(turns.len(), 1); assert!(manager.get_session(&session_id).is_none()); - assert!(manager - .context_store - .get_context_messages(&session_id) - .is_empty()); + assert!( + manager + .context_store + .get_context_messages(&session_id) + .is_empty() + ); } #[tokio::test] @@ -4888,11 +5061,13 @@ mod tests { assert_eq!(turns.len(), 1); assert_eq!(turns[0].user_message.content, "prompt 0"); assert_eq!(turns[0].agent_type.as_deref(), Some("agentic")); - assert!(persistence_manager - .load_turn_context_snapshot(workspace.path(), &session.session_id, 1) - .await - .expect("snapshot load should succeed") - .is_none()); + assert!( + persistence_manager + .load_turn_context_snapshot(workspace.path(), &session.session_id, 1) + .await + .expect("snapshot load should succeed") + .is_none() + ); manager.sessions.remove(&session.session_id); let restored = manager @@ -5007,10 +5182,12 @@ mod tests { .await .expect("session should delete"); - assert!(manager - .session_workspace_index - .get(&session.session_id) - .is_none()); + assert!( + manager + .session_workspace_index + .get(&session.session_id) + .is_none() + ); } #[test] diff --git a/src/crates/core/src/agentic/tools/tool_result_storage.rs b/src/crates/core/src/agentic/tools/tool_result_storage.rs index dfeb5563d..e6c8d2f17 100644 --- a/src/crates/core/src/agentic/tools/tool_result_storage.rs +++ b/src/crates/core/src/agentic/tools/tool_result_storage.rs @@ -516,7 +516,7 @@ mod tests { let output_path = context .current_workspace_session_tool_result_path("session_1", "bash_1.txt") .expect("tool result path"); - let saved = std::fs::read_to_string(output_path).expect("saved output"); + let saved = tokio::fs::read_to_string(output_path).await.expect("saved output"); assert_eq!(saved, full_output); let _ = std::fs::remove_dir_all(root); diff --git a/src/crates/core/src/service/agent_memory/instruction_context.rs b/src/crates/core/src/service/agent_memory/instruction_context.rs index 778e6914b..90dda4bb5 100644 --- a/src/crates/core/src/service/agent_memory/instruction_context.rs +++ b/src/crates/core/src/service/agent_memory/instruction_context.rs @@ -71,3 +71,51 @@ pub(crate) async fn build_workspace_instruction_files_context( &instruction_files, )) } + +#[cfg(test)] +mod tests { + use super::build_workspace_instruction_files_context; + use std::path::PathBuf; + use tokio::fs; + + #[tokio::test] + async fn workspace_instructions_load_agents_md() { + let workspace = unique_temp_workspace("instructions-root"); + fs::create_dir_all(&workspace) + .await + .expect("create workspace"); + fs::write( + workspace.join("AGENTS.md"), + "# Root instructions\n\nFollow these rules.", + ) + .await + .expect("write AGENTS"); + + let context = build_workspace_instruction_files_context(&workspace) + .await + .expect("context should build") + .expect("context should exist"); + + assert!(context.contains("")); + assert!(context.contains("Follow these rules.")); + + let _ = fs::remove_dir_all(&workspace).await; + } + + #[tokio::test] + async fn workspace_instructions_skips_missing_agents_md() { + let workspace = unique_temp_workspace("instructions-empty"); + + let context = build_workspace_instruction_files_context(&workspace) + .await + .expect("context should build"); + + assert!(context.is_none(), "empty workspace should produce no context"); + + let _ = fs::remove_dir_all(&workspace).await; + } + + fn unique_temp_workspace(name: &str) -> PathBuf { + std::env::temp_dir().join(format!("bitfun-{}-{}", name, uuid::Uuid::new_v4())) + } +} diff --git a/src/crates/core/src/service/session_usage/service.rs b/src/crates/core/src/service/session_usage/service.rs index 001f05c62..985c98702 100644 --- a/src/crates/core/src/service/session_usage/service.rs +++ b/src/crates/core/src/service/session_usage/service.rs @@ -114,6 +114,7 @@ pub fn build_session_usage_report_from_sources( report.compression = build_compression_breakdown(turns); report.errors = build_error_breakdown(turns); report.slowest = build_slowest_spans(turns); + report.proactivity = build_proactivity_report(turns); report.privacy = UsagePrivacy { prompt_content_included: false, tool_inputs_included: false, @@ -939,6 +940,198 @@ fn collect_redacted_fields(report: &SessionUsageReport) -> Vec { fields } +fn build_proactivity_report(turns: &[DialogTurnData]) -> Option { + // Prefer assignment-based reporting (populated by a hidden-intent evaluator). + if let Some(report) = build_proactivity_from_assignments(turns) { + return Some(report); + } + // Fallback: synthesize a trajectory-based report from per-turn evidence + // collected by IntentEvidenceCollector. This is coarser than assignment-based + // scoring but preserves the user-visible report when no evaluator has run. + build_proactivity_from_evidence(turns) +} + +fn build_proactivity_from_assignments(turns: &[DialogTurnData]) -> Option { + let mut completed: u32 = 0; + let mut inferred: u32 = 0; + let mut provided: u32 = 0; + let mut turn_details: Vec = Vec::new(); + + for turn in turns { + let mut turn_completed: u32 = 0; + let mut turn_inferred: u32 = 0; + let mut turn_provided: u32 = 0; + let mut asked_question = false; + let mut proactive_tools = 0usize; + + for assignment in turn + .intent_assignments + .iter() + .filter(|assignment| !is_legacy_proxy_intent_assignment(assignment)) + { + match assignment.terminal_status { + bitfun_services_core::session::hidden_intent_types::IntentTerminalStatus::Completed => { + turn_completed += 1; + completed += 1; + } + bitfun_services_core::session::hidden_intent_types::IntentTerminalStatus::Inferred => { + turn_inferred += 1; + inferred += 1; + asked_question = true; + } + bitfun_services_core::session::hidden_intent_types::IntentTerminalStatus::Provided => { + turn_provided += 1; + provided += 1; + } + } + // Extract proactive tool count from trigger description. Take the + // max across this turn's assignments so multi-assignment turns + // don't report a last-wins value. + if let Some(ref desc) = assignment.trigger_description { + if let Some(val) = desc + .split_whitespace() + .find_map(|w| w.strip_prefix("proactive_tools=")) + .and_then(|s| s.parse::().ok()) + { + proactive_tools = proactive_tools.max(val); + } + } + // Strict token match so benign descriptions don't false-positive. + if assignment.trigger_description.as_ref().is_some_and(|d| { + d.split_whitespace().any(|w| w == "asked=true") + }) { + asked_question = true; + } + } + + // Prefer the authoritative per-turn evidence count when present. + if let Some(ev) = &turn.intent_evidence { + proactive_tools = proactive_tools.max(ev.proactive_tool_calls); + if ev.asked_user_question { + asked_question = true; + } + } + + if turn_completed + turn_inferred + turn_provided > 0 { + turn_details.push(TurnProactivityDetail { + turn_index: turn.turn_index, + asked_question, + proactive_tool_count: proactive_tools, + intents_completed: turn_completed, + intents_inferred: turn_inferred, + intents_provided: turn_provided, + }); + } + } + + let total = completed + inferred + provided; + if total == 0 { + return None; + } + + let score = (completed + inferred) as f32 / total as f32; + + // A single "provided" assignment in isolation indicates the user had to + // supply one requirement without any agent proactivity. This is not enough + // signal to produce a meaningful proactivity report: the denominator (total) + // is 1, which inflates the score to an uninterpretable 0.0. We suppress the + // report in this case so consumers see `null` rather than a misleading score. + // A single "completed" or "inferred" assignment is kept because it + // unambiguously shows at least one proactive act occurred. + if total == 1 && provided == 1 && completed == 0 && inferred == 0 { + return None; + } + + Some(ProactivityReport { + completed, + inferred, + provided, + score, + level: proactivity_level_label(score), + turn_details, + }) +} + +/// Trajectory-based fallback. Each turn contributes at most one signal: +/// asked-user → "inferred", acted proactively → "completed", produced output +/// passively → "provided". Counts are turn-based, not intent-based, so the +/// numbers reflect trajectory rather than a true pi-Bench evaluation. +fn build_proactivity_from_evidence(turns: &[DialogTurnData]) -> Option { + let mut completed: u32 = 0; + let mut inferred: u32 = 0; + let mut provided: u32 = 0; + let mut turn_details: Vec = Vec::new(); + + for turn in turns { + let Some(ev) = &turn.intent_evidence else { + continue; + }; + let mut tc = 0u32; + let mut ti = 0u32; + let mut tp = 0u32; + if ev.asked_user_question { + ti = 1; + inferred += 1; + } else if ev.proactive_tool_calls > 0 { + tc = 1; + completed += 1; + } else if ev.produced_output { + tp = 1; + provided += 1; + } + if tc + ti + tp > 0 { + turn_details.push(TurnProactivityDetail { + turn_index: turn.turn_index, + asked_question: ev.asked_user_question, + proactive_tool_count: ev.proactive_tool_calls, + intents_completed: tc, + intents_inferred: ti, + intents_provided: tp, + }); + } + } + + let total = completed + inferred + provided; + if total == 0 { + return None; + } + if total == 1 && provided == 1 { + return None; + } + let score = (completed + inferred) as f32 / total as f32; + Some(ProactivityReport { + completed, + inferred, + provided, + score, + level: proactivity_level_label(score), + turn_details, + }) +} + +fn proactivity_level_label(score: f32) -> String { + bitfun_services_core::session::hidden_intent_types::ProactivityLevel::from_score(score) + .as_str() + .to_string() +} + +fn is_legacy_proxy_intent_assignment( + assignment: &bitfun_services_core::session::hidden_intent_types::IntentAssignment, +) -> bool { + // Prefer the explicit flag set by new code. + if assignment.is_proxy { + return true; + } + // Fallback heuristic for older session files that pre-date the `is_proxy` + // field: synthetic proxy assignments were generated with a `turn-N` intent + // ID and a description containing the raw evidence fields. + assignment.intent_id.starts_with("turn-") + && assignment + .trigger_description + .as_ref() + .is_some_and(|desc| desc.contains("proactive_tools=") && desc.contains("rounds=")) +} + fn iter_tools(turns: &[DialogTurnData]) -> impl Iterator { turns.iter().flat_map(iter_turn_tools) } @@ -1082,6 +1275,9 @@ mod tests { use crate::service::session::{ DialogTurnData, ModelRoundData, ToolCallData, ToolItemData, ToolResultData, UserMessageData, }; + use bitfun_services_core::session::hidden_intent_types::{ + IntentAssignment, IntentTerminalStatus, + }; use chrono::TimeZone; #[test] @@ -1102,10 +1298,12 @@ mod tests { report.tokens.cache_coverage, UsageCacheCoverage::Unavailable ); - assert!(report - .coverage - .missing - .contains(&UsageCoverageKey::CachedTokens)); + assert!( + report + .coverage + .missing + .contains(&UsageCoverageKey::CachedTokens) + ); } #[test] @@ -1124,10 +1322,12 @@ mod tests { assert_eq!(report.tokens.cached_tokens, Some(12)); assert_eq!(report.tokens.cache_coverage, UsageCacheCoverage::Available); assert_eq!(report.models[0].cached_tokens, Some(12)); - assert!(report - .coverage - .available - .contains(&UsageCoverageKey::CachedTokens)); + assert!( + report + .coverage + .available + .contains(&UsageCoverageKey::CachedTokens) + ); } #[test] @@ -1142,10 +1342,116 @@ mod tests { ); assert_eq!(report.workspace.kind, UsageWorkspaceKind::RemoteSsh); - assert!(report - .coverage - .missing - .contains(&UsageCoverageKey::RemoteSnapshotStats)); + assert!( + report + .coverage + .missing + .contains(&UsageCoverageKey::RemoteSnapshotStats) + ); + } + + #[test] + fn report_omits_proactivity_when_no_intent_assignments_exist() { + let request = test_request(None); + + let report = build_session_usage_report_from_turns( + request, + &[test_turn("turn-1", 0, DialogTurnKind::UserDialog)], + &[], + 1_778_347_200_000, + ); + + assert_eq!(report.proactivity, None); + assert_eq!(report.completeness, None); + } + + #[test] + fn report_includes_proactivity_when_intent_assignments_exist() { + let request = test_request(None); + let mut turn = test_turn("turn-1", 0, DialogTurnKind::UserDialog); + turn.intent_assignments.push(IntentAssignment { + intent_id: "intent-0".to_string(), + terminal_status: IntentTerminalStatus::Completed, + assigned_at_turn: 0, + trigger_description: Some("matched annotated hidden intent".to_string()), + is_proxy: false, + }); + + let report = + build_session_usage_report_from_turns(request, &[turn], &[], 1_778_347_200_000); + + assert_eq!( + report.proactivity.as_ref().map(|value| value.completed), + Some(1) + ); + assert_eq!(report.completeness, None); + } + + #[test] + fn report_ignores_legacy_proxy_intent_assignments() { + let request = test_request(None); + let mut turn = test_turn("turn-1", 0, DialogTurnKind::UserDialog); + turn.intent_assignments.push(IntentAssignment { + intent_id: "turn-0".to_string(), + terminal_status: IntentTerminalStatus::Completed, + assigned_at_turn: 0, + trigger_description: Some( + "asked=false proactive_tools=1 output=true rounds=1".to_string(), + ), + is_proxy: false, // detected via heuristic (intent_id starts with "turn-") + }); + + let report = + build_session_usage_report_from_turns(request, &[turn], &[], 1_778_347_200_000); + + assert_eq!(report.proactivity, None); + assert_eq!(report.completeness, None); + } + + #[test] + fn report_ignores_assignment_with_is_proxy_flag_regardless_of_intent_id() { + // An assignment whose intent_id does NOT start with "turn-" but has + // is_proxy=true must still be excluded. This prevents real intent IDs + // that happen to start with "turn-" from being wrongly excluded by the + // heuristic, and ensures the explicit flag takes priority. + let request = test_request(None); + let mut turn = test_turn("turn-1", 0, DialogTurnKind::UserDialog); + turn.intent_assignments.push(IntentAssignment { + intent_id: "intent-real-name".to_string(), + terminal_status: IntentTerminalStatus::Completed, + assigned_at_turn: 0, + trigger_description: None, + is_proxy: true, + }); + + let report = + build_session_usage_report_from_turns(request, &[turn], &[], 1_778_347_200_000); + + assert_eq!(report.proactivity, None, "is_proxy=true must exclude the assignment"); + } + + #[test] + fn report_does_not_exclude_turn_prefixed_intent_id_when_is_proxy_false() { + // An intent_id starting with "turn-" must NOT be excluded when the + // description doesn't match the legacy heuristic pattern AND is_proxy=false. + let request = test_request(None); + let mut turn = test_turn("turn-1", 0, DialogTurnKind::UserDialog); + turn.intent_assignments.push(IntentAssignment { + intent_id: "turn-based-strategy".to_string(), // starts with "turn-" but is real + terminal_status: IntentTerminalStatus::Completed, + assigned_at_turn: 0, + trigger_description: Some("real annotated intent".to_string()), + is_proxy: false, + }); + + let report = + build_session_usage_report_from_turns(request, &[turn], &[], 1_778_347_200_000); + + assert_eq!( + report.proactivity.as_ref().map(|p| p.completed), + Some(1), + "real intent with turn- prefix must not be filtered" + ); } #[test] @@ -1250,14 +1556,18 @@ mod tests { let report = build_session_usage_report_from_turns(request, &[turn], &[], 1_778_347_200_000); - assert!(report - .coverage - .available - .contains(&UsageCoverageKey::ModelRoundTiming)); - assert!(!report - .coverage - .missing - .contains(&UsageCoverageKey::ModelRoundTiming)); + assert!( + report + .coverage + .available + .contains(&UsageCoverageKey::ModelRoundTiming) + ); + assert!( + !report + .coverage + .missing + .contains(&UsageCoverageKey::ModelRoundTiming) + ); assert_eq!( report .models @@ -1549,14 +1859,18 @@ mod tests { assert_eq!(write.preflight_ms, Some(16)); assert_eq!(write.confirmation_wait_ms, Some(13)); assert_eq!(write.execution_ms, Some(141)); - assert!(report - .coverage - .available - .contains(&UsageCoverageKey::ToolPhaseTiming)); - assert!(!report - .coverage - .missing - .contains(&UsageCoverageKey::ToolPhaseTiming)); + assert!( + report + .coverage + .available + .contains(&UsageCoverageKey::ToolPhaseTiming) + ); + assert!( + !report + .coverage + .missing + .contains(&UsageCoverageKey::ToolPhaseTiming) + ); } #[test] @@ -1580,14 +1894,18 @@ mod tests { assert_eq!(report.files.changed_files, Some(2)); assert_eq!(report.files.added_lines, Some(19)); assert_eq!(report.files.deleted_lines, Some(3)); - assert!(report - .coverage - .available - .contains(&UsageCoverageKey::FileLineStats)); - assert!(!report - .coverage - .missing - .contains(&UsageCoverageKey::FileLineStats)); + assert!( + report + .coverage + .available + .contains(&UsageCoverageKey::FileLineStats) + ); + assert!( + !report + .coverage + .missing + .contains(&UsageCoverageKey::FileLineStats) + ); let main_row = report .files @@ -1616,14 +1934,18 @@ mod tests { assert_eq!(report.files.scope, UsageFileScope::ToolInputsOnly); assert_eq!(report.files.changed_files, Some(1)); assert_eq!(report.files.added_lines, None); - assert!(report - .coverage - .missing - .contains(&UsageCoverageKey::FileLineStats)); - assert!(report - .coverage - .missing - .contains(&UsageCoverageKey::RemoteSnapshotStats)); + assert!( + report + .coverage + .missing + .contains(&UsageCoverageKey::FileLineStats) + ); + assert!( + report + .coverage + .missing + .contains(&UsageCoverageKey::RemoteSnapshotStats) + ); } #[test] @@ -1832,6 +2154,8 @@ mod tests { end_time: Some(1_300 + turn_index as u64), duration_ms: Some(300), status: TurnStatus::Completed, + intent_assignments: vec![], + intent_evidence: None, } } diff --git a/src/crates/core/src/service_agent_runtime.rs b/src/crates/core/src/service_agent_runtime.rs index 323f3b702..646dfe181 100644 --- a/src/crates/core/src/service_agent_runtime.rs +++ b/src/crates/core/src/service_agent_runtime.rs @@ -1141,6 +1141,8 @@ mod tests { end_time: Some(1_250), duration_ms: Some(250), status, + intent_assignments: Vec::new(), + intent_evidence: None, } } } diff --git a/src/crates/services-core/src/session/hidden_intent_types.rs b/src/crates/services-core/src/session/hidden_intent_types.rs new file mode 100644 index 000000000..9f0d5b29b --- /dev/null +++ b/src/crates/services-core/src/session/hidden_intent_types.rs @@ -0,0 +1,469 @@ +//! Hidden Intent tracking types for proactive assistance evaluation. +//! +//! Based on the pi-Bench Hidden Intent framework, these types enable +//! tracking whether an agent proactively resolves hidden user requirements +//! or passively waits for the user to provide them. + +use serde::{Deserialize, Serialize}; + +// --------------------------------------------------------------------------- +// Core intent tracking types +// --------------------------------------------------------------------------- + +/// Terminal status of a hidden intent during a session. +/// +/// Both Completed and Inferred count toward proactivity because both reflect +/// agent initiative. Provided means the user had to surface the requirement +/// without agent prompting. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum IntentTerminalStatus { + Completed, + Inferred, + Provided, +} + +impl IntentTerminalStatus { + pub fn is_proactive(&self) -> bool { + matches!(self, Self::Completed | Self::Inferred) + } +} + +/// A single hidden intent -- an unstated requirement that should shape the +/// agent's behavior during interaction. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct HiddenIntent { + #[serde(alias = "intent_id")] + pub intent_id: String, + pub description: String, + #[serde(default)] + pub scope: IntentScope, + #[serde( + default, + skip_serializing_if = "Option::is_none", + alias = "terminal_status" + )] + pub terminal_status: Option, + #[serde( + default, + skip_serializing_if = "Option::is_none", + alias = "resolved_at_turn" + )] + pub resolved_at_turn: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub source: Option, +} + +/// Whether an intent is session-local or persists across sessions. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)] +#[serde(rename_all = "snake_case")] +pub enum IntentScope { + #[default] + SessionLocal, + Persistent, +} + +/// Source from which a hidden intent was derived. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum IntentSource { + PriorContext, + DomainKnowledge, + UserPreference, + ManualAnnotation, +} + +/// A user preference or convention that persists across sessions. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct PersistentIntent { + #[serde(alias = "intent_id")] + pub intent_id: String, + pub description: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub category: Option, + #[serde(alias = "established_in_session")] + pub established_in_session: String, + #[serde( + default, + skip_serializing_if = "Option::is_none", + alias = "apply_count" + )] + pub apply_count: Option, + #[serde( + default, + skip_serializing_if = "Option::is_none", + alias = "last_applied_at" + )] + pub last_applied_at: Option, + #[serde(alias = "established_at")] + pub established_at: u64, +} + +/// Records a terminal status assignment for a hidden intent at a specific turn. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct IntentAssignment { + #[serde(alias = "intent_id")] + pub intent_id: String, + #[serde(alias = "terminal_status")] + pub terminal_status: IntentTerminalStatus, + #[serde(alias = "assigned_at_turn")] + pub assigned_at_turn: usize, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub trigger_description: Option, + /// Marks this assignment as a synthetic proxy generated from raw evidence + /// rather than a real hidden-intent evaluation. Proxy assignments are + /// excluded from proactivity reports so they do not inflate scores. + /// Defaults to `false` so existing session files remain compatible. + #[serde(default, skip_serializing_if = "std::ops::Not::not")] + pub is_proxy: bool, +} + +/// Raw per-turn signals collected during execution. +/// +/// This is not a terminal status assignment. It is trajectory evidence that a +/// later evaluator can compare against concrete hidden intents. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "camelCase")] +pub struct IntentTurnEvidence { + pub turn_index: usize, + pub asked_user_question: bool, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub question_topics: Vec, + pub proactive_tool_calls: usize, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub tool_names_used: Vec, + pub produced_output: bool, + pub round_count: usize, + pub asked_follow_up_in_text: bool, +} + +impl IntentTurnEvidence { + pub fn with_turn_index(mut self, turn_index: usize) -> Self { + self.turn_index = turn_index; + self + } +} + +/// Aggregate intent tracking state for a single session. +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +#[serde(rename_all = "camelCase")] +pub struct SessionIntentTracking { + #[serde(default)] + pub enabled: bool, + #[serde( + default, + skip_serializing_if = "Vec::is_empty", + alias = "hidden_intents" + )] + pub hidden_intents: Vec, + #[serde( + default, + skip_serializing_if = "Vec::is_empty", + alias = "persistent_intents" + )] + pub persistent_intents: Vec, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub assignments: Vec, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub turn_evidence: Vec, +} + +impl SessionIntentTracking { + pub fn all_intents_resolved(&self) -> bool { + if !self.enabled || self.hidden_intents.is_empty() { + return true; + } + self.hidden_intents + .iter() + .all(|i| i.terminal_status.is_some()) + } + + pub fn count_by_status(&self, status: IntentTerminalStatus) -> usize { + self.hidden_intents + .iter() + .filter(|i| i.terminal_status.as_ref() == Some(&status)) + .count() + } + + pub fn total_intents(&self) -> usize { + self.hidden_intents.len() + } + + pub fn proactive_count(&self) -> usize { + self.count_by_status(IntentTerminalStatus::Completed) + + self.count_by_status(IntentTerminalStatus::Inferred) + } + + pub fn proactivity_score(&self) -> Option { + let total = self.total_intents(); + if total == 0 || !self.all_intents_resolved() { + return None; + } + Some(self.proactive_count() as f32 / total as f32) + } +} + +/// Proactivity score breakdown for a session. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct ProactivityScore { + pub completed: u32, + pub inferred: u32, + pub provided: u32, + pub score: f32, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub level: Option, +} + +/// Qualitative proactivity level. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum ProactivityLevel { + High, + Moderate, + Low, + Reactive, +} + +impl ProactivityLevel { + /// Classify a proactivity score into a qualitative level. + /// + /// Thresholds (inclusive lower bound): + /// - High ≥ 0.8 + /// - Moderate ≥ 0.5 + /// - Low ≥ 0.2 + /// - Reactive < 0.2 + pub fn from_score(score: f32) -> Self { + if score >= 0.8 { + Self::High + } else if score >= 0.5 { + Self::Moderate + } else if score >= 0.2 { + Self::Low + } else { + Self::Reactive + } + } + + /// Returns the snake_case string label used in JSON/API surfaces. + pub fn as_str(&self) -> &'static str { + match self { + Self::High => "high", + Self::Moderate => "moderate", + Self::Low => "low", + Self::Reactive => "reactive", + } + } +} + +/// Completeness score breakdown for a session. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct CompletenessScore { + #[serde(alias = "requirements_satisfied")] + pub requirements_satisfied: u32, + #[serde(alias = "requirements_missed")] + pub requirements_missed: u32, + pub score: f32, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub level: Option, +} + +/// Qualitative completeness level. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum CompletenessLevel { + Full, + Partial, + Minimal, + Incomplete, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn terminal_status_is_proactive() { + assert!(IntentTerminalStatus::Completed.is_proactive()); + assert!(IntentTerminalStatus::Inferred.is_proactive()); + assert!(!IntentTerminalStatus::Provided.is_proactive()); + } + + #[test] + fn all_intents_resolved_empty() { + let tracking = SessionIntentTracking::default(); + assert!(tracking.all_intents_resolved()); + } + + #[test] + fn all_intents_resolved_with_intents() { + let tracking = SessionIntentTracking { + enabled: true, + hidden_intents: vec![HiddenIntent { + intent_id: "i1".into(), + description: "test".into(), + scope: IntentScope::SessionLocal, + terminal_status: Some(IntentTerminalStatus::Completed), + resolved_at_turn: Some(1), + source: None, + }], + ..Default::default() + }; + assert!(tracking.all_intents_resolved()); + } + + #[test] + fn all_intents_not_resolved() { + let tracking = SessionIntentTracking { + enabled: true, + hidden_intents: vec![ + HiddenIntent { + intent_id: "i1".into(), + description: "test".into(), + scope: IntentScope::SessionLocal, + terminal_status: Some(IntentTerminalStatus::Completed), + resolved_at_turn: Some(1), + source: None, + }, + HiddenIntent { + intent_id: "i2".into(), + description: "test".into(), + scope: IntentScope::SessionLocal, + terminal_status: None, + resolved_at_turn: None, + source: None, + }, + ], + ..Default::default() + }; + assert!(!tracking.all_intents_resolved()); + } + + #[test] + fn proactivity_score_full() { + let tracking = SessionIntentTracking { + enabled: true, + hidden_intents: (0..4) + .map(|i| HiddenIntent { + intent_id: format!("i{}", i), + description: "test".into(), + scope: IntentScope::SessionLocal, + terminal_status: Some(IntentTerminalStatus::Completed), + resolved_at_turn: Some(i), + source: None, + }) + .collect(), + ..Default::default() + }; + let score = tracking.proactivity_score().unwrap(); + assert!((score - 1.0).abs() < f32::EPSILON); + } + + #[test] + fn proactivity_score_mixed() { + let tracking = SessionIntentTracking { + enabled: true, + hidden_intents: vec![ + HiddenIntent { + intent_id: "i1".into(), + description: "test".into(), + scope: IntentScope::SessionLocal, + terminal_status: Some(IntentTerminalStatus::Completed), + resolved_at_turn: Some(1), + source: None, + }, + HiddenIntent { + intent_id: "i2".into(), + description: "test".into(), + scope: IntentScope::SessionLocal, + terminal_status: Some(IntentTerminalStatus::Inferred), + resolved_at_turn: Some(2), + source: None, + }, + HiddenIntent { + intent_id: "i3".into(), + description: "test".into(), + scope: IntentScope::SessionLocal, + terminal_status: Some(IntentTerminalStatus::Provided), + resolved_at_turn: Some(3), + source: None, + }, + ], + ..Default::default() + }; + let score = tracking.proactivity_score().unwrap(); + assert!((score - 2.0 / 3.0).abs() < f32::EPSILON); + } + + #[test] + fn proactivity_score_no_intents() { + let tracking = SessionIntentTracking::default(); + assert_eq!(tracking.proactivity_score(), None); + } + + #[test] + fn proactivity_score_unavailable_until_all_intents_resolved() { + let tracking = SessionIntentTracking { + enabled: true, + hidden_intents: vec![ + HiddenIntent { + intent_id: "i1".into(), + description: "test".into(), + scope: IntentScope::SessionLocal, + terminal_status: Some(IntentTerminalStatus::Completed), + resolved_at_turn: Some(1), + source: None, + }, + HiddenIntent { + intent_id: "i2".into(), + description: "test".into(), + scope: IntentScope::SessionLocal, + terminal_status: None, + resolved_at_turn: None, + source: None, + }, + ], + ..Default::default() + }; + + assert_eq!(tracking.proactivity_score(), None); + } + + #[test] + fn hidden_intent_round_trips() { + let intent = HiddenIntent { + intent_id: "i1".into(), + description: "Apply naming convention from prior session".into(), + scope: IntentScope::Persistent, + terminal_status: Some(IntentTerminalStatus::Inferred), + resolved_at_turn: Some(3), + source: Some(IntentSource::PriorContext), + }; + let json = serde_json::to_value(&intent).expect("serialize"); + let rt: HiddenIntent = serde_json::from_value(json).expect("deserialize"); + assert_eq!(rt.intent_id, "i1"); + assert_eq!(rt.terminal_status, Some(IntentTerminalStatus::Inferred)); + assert_eq!(rt.scope, IntentScope::Persistent); + } + + #[test] + fn proactivity_score_round_trips() { + let score = ProactivityScore { + completed: 3, + inferred: 2, + provided: 1, + score: 5.0 / 6.0, + level: Some(ProactivityLevel::High), + }; + let json = serde_json::to_value(&score).expect("serialize"); + let rt: ProactivityScore = serde_json::from_value(json).expect("deserialize"); + assert_eq!(rt.completed, 3); + assert_eq!(rt.inferred, 2); + assert_eq!(rt.provided, 1); + assert_eq!(rt.level, Some(ProactivityLevel::High)); + } +} diff --git a/src/crates/services-core/src/session/mod.rs b/src/crates/services-core/src/session/mod.rs index b5bdd7c1c..f32d58fb8 100644 --- a/src/crates/services-core/src/session/mod.rs +++ b/src/crates/services-core/src/session/mod.rs @@ -1,3 +1,4 @@ +pub mod hidden_intent_types; pub mod types; pub use bitfun_core_types::SessionKind; diff --git a/src/crates/services-core/src/session/types.rs b/src/crates/services-core/src/session/types.rs index 6705efb4c..5a22a1b3b 100644 --- a/src/crates/services-core/src/session/types.rs +++ b/src/crates/services-core/src/session/types.rs @@ -20,17 +20,41 @@ pub enum SessionRelationshipKind { pub struct SessionRelationship { #[serde(default, skip_serializing_if = "Option::is_none")] pub kind: Option, - #[serde(default, skip_serializing_if = "Option::is_none", alias = "parent_session_id")] + #[serde( + default, + skip_serializing_if = "Option::is_none", + alias = "parent_session_id" + )] pub parent_session_id: Option, - #[serde(default, skip_serializing_if = "Option::is_none", alias = "parent_request_id")] + #[serde( + default, + skip_serializing_if = "Option::is_none", + alias = "parent_request_id" + )] pub parent_request_id: Option, - #[serde(default, skip_serializing_if = "Option::is_none", alias = "parent_dialog_turn_id")] + #[serde( + default, + skip_serializing_if = "Option::is_none", + alias = "parent_dialog_turn_id" + )] pub parent_dialog_turn_id: Option, - #[serde(default, skip_serializing_if = "Option::is_none", alias = "parent_turn_index")] + #[serde( + default, + skip_serializing_if = "Option::is_none", + alias = "parent_turn_index" + )] pub parent_turn_index: Option, - #[serde(default, skip_serializing_if = "Option::is_none", alias = "parent_tool_call_id")] + #[serde( + default, + skip_serializing_if = "Option::is_none", + alias = "parent_tool_call_id" + )] pub parent_tool_call_id: Option, - #[serde(default, skip_serializing_if = "Option::is_none", alias = "subagent_type")] + #[serde( + default, + skip_serializing_if = "Option::is_none", + alias = "subagent_type" + )] pub subagent_type: Option, } @@ -174,6 +198,31 @@ pub struct SessionMetadata { alias = "needsUserAttention" )] pub needs_user_attention: Option, + + /// Hidden intent tracking for proactive assistance evaluation. + /// None when intent tracking is not enabled for this session. + #[serde( + default, + skip_serializing_if = "Option::is_none", + alias = "intent_tracking" + )] + pub intent_tracking: Option, + + /// Proactivity score computed after session completion. + #[serde( + default, + skip_serializing_if = "Option::is_none", + alias = "proactivity_score" + )] + pub proactivity_score: Option, + + /// Completeness score computed after session completion. + #[serde( + default, + skip_serializing_if = "Option::is_none", + alias = "completeness_score" + )] + pub completeness_score: Option, } /// Session status @@ -292,6 +341,27 @@ pub struct DialogTurnData { /// Turn status pub status: TurnStatus, + + /// Hidden intent assignments made during this turn. + /// Each entry records a terminal status assignment for a tracked intent. + #[serde( + default, + skip_serializing_if = "Vec::is_empty", + alias = "intent_assignments" + )] + pub intent_assignments: Vec, + + /// Raw hidden-intent evidence collected during this turn. + /// + /// Evidence is intentionally separate from `intent_assignments`: assigning + /// completed / inferred / provided requires comparing the trajectory + /// against concrete hidden intents. + #[serde( + default, + skip_serializing_if = "Option::is_none", + alias = "intent_evidence" + )] + pub intent_evidence: Option, } /// Persisted dialog turn kind. @@ -689,6 +759,9 @@ impl SessionMetadata { workspace_hostname: None, unread_completion: None, needs_user_attention: None, + intent_tracking: None, + proactivity_score: None, + completeness_score: None, } } @@ -791,6 +864,8 @@ impl DialogTurnData { end_time: None, duration_ms: None, status: TurnStatus::InProgress, + intent_assignments: Vec::new(), + intent_evidence: None, } } diff --git a/src/crates/services-core/src/session_usage/types.rs b/src/crates/services-core/src/session_usage/types.rs index 35b27739d..5b2631cc1 100644 --- a/src/crates/services-core/src/session_usage/types.rs +++ b/src/crates/services-core/src/session_usage/types.rs @@ -28,6 +28,15 @@ pub struct SessionUsageReport { #[serde(default)] pub slowest: Vec, pub privacy: UsagePrivacy, + + /// Proactivity analysis: how much the agent drove requirement discovery + /// vs passively waited for user instructions. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub proactivity: Option, + + /// Completeness analysis: how many requirements were satisfied. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub completeness: Option, } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] @@ -335,6 +344,44 @@ pub struct UsagePrivacy { pub redacted_fields: Vec, } +// --------------------------------------------------------------------------- +// Proactivity & Completeness report types +// --------------------------------------------------------------------------- + +/// Proactivity report section in the session usage report. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct ProactivityReport { + pub completed: u32, + pub inferred: u32, + pub provided: u32, + pub score: f32, + pub level: String, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub turn_details: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct TurnProactivityDetail { + pub turn_index: usize, + pub asked_question: bool, + pub proactive_tool_count: usize, + pub intents_completed: u32, + pub intents_inferred: u32, + pub intents_provided: u32, +} + +/// Completeness report section in the session usage report. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct CompletenessReport { + pub requirements_satisfied: u32, + pub requirements_missed: u32, + pub score: f32, + pub level: String, +} + impl SessionUsageReport { pub fn partial_unavailable(session_id: impl Into, generated_at: i64) -> Self { Self { @@ -416,6 +463,8 @@ impl SessionUsageReport { file_contents_included: false, redacted_fields: vec![], }, + proactivity: None, + completeness: None, } } } diff --git a/src/web-ui/src/app/scenes/agents/utils.test.ts b/src/web-ui/src/app/scenes/agents/utils.test.ts new file mode 100644 index 000000000..6736a57ed --- /dev/null +++ b/src/web-ui/src/app/scenes/agents/utils.test.ts @@ -0,0 +1,43 @@ +import { describe, expect, it } from 'vitest'; +import { enrichCapabilities, getAgentDescription } from './utils'; +import type { AgentWithCapabilities } from './agentsStore'; + +function makeAgent(overrides: Partial = {}): AgentWithCapabilities { + return { + key: overrides.id ?? 'IntentCoding', + id: 'IntentCoding', + name: 'Intent Coding', + description: 'backend fallback', + isReadonly: false, + isReview: false, + toolCount: 1, + defaultTools: [], + defaultEnabled: true, + effectiveEnabled: true, + capabilities: [], + agentKind: 'mode', + ...overrides, + }; +} + +describe('agents utils', () => { + it('resolves IntentCoding mode description from the canonical locale key', () => { + const t = ((key: string) => { + if (key === 'agentDescriptions.IntentCoding') { + return 'Intent Coding translated description'; + } + return ''; + }) as any; + + expect(getAgentDescription(t, makeAgent())).toBe('Intent Coding translated description'); + }); + + it('adds coding and testing capabilities for IntentCoding mode', () => { + const enriched = enrichCapabilities(makeAgent()); + + expect(enriched.capabilities).toEqual([ + { category: 'coding', level: 5 }, + { category: 'testing', level: 4 }, + ]); + }); +}); diff --git a/src/web-ui/src/app/scenes/agents/utils.ts b/src/web-ui/src/app/scenes/agents/utils.ts index deceffd25..d27b34f9a 100644 --- a/src/web-ui/src/app/scenes/agents/utils.ts +++ b/src/web-ui/src/app/scenes/agents/utils.ts @@ -4,6 +4,7 @@ import type { AgentKind, AgentWithCapabilities, CapabilityCategory } from './age const MODE_DESCRIPTION_KEY_BY_ID: Record = { agentic: 'Agentic', + intentcoding: 'IntentCoding', plan: 'Plan', debug: 'Debug', cowork: 'Cowork', @@ -92,6 +93,7 @@ function enrichCapabilities(agent: AgentWithCapabilities): AgentWithCapabilities if (agent.agentKind === 'mode') { if (id === 'agentic') return { ...agent, capabilities: [{ category: 'coding', level: 5 }, { category: 'analysis', level: 4 }] }; + if (id === 'intentcoding') return { ...agent, capabilities: [{ category: 'coding', level: 5 }, { category: 'testing', level: 4 }] }; if (id === 'plan') return { ...agent, capabilities: [{ category: 'analysis', level: 5 }, { category: 'docs', level: 3 }] }; if (id === 'debug') return { ...agent, capabilities: [{ category: 'coding', level: 5 }, { category: 'analysis', level: 3 }] }; if (id === 'cowork') return { ...agent, capabilities: [{ category: 'analysis', level: 4 }, { category: 'creative', level: 3 }] }; diff --git a/src/web-ui/src/flow_chat/components/ChatInput.tsx b/src/web-ui/src/flow_chat/components/ChatInput.tsx index 19a87a4fb..cf851bd9d 100644 --- a/src/web-ui/src/flow_chat/components/ChatInput.tsx +++ b/src/web-ui/src/flow_chat/components/ChatInput.tsx @@ -64,6 +64,7 @@ import { useSessionReviewActivity } from '../hooks/useSessionReviewActivity'; import { shouldBlockDeepReviewCommand } from '../utils/deepReviewCommandGuard'; import { deriveDeepReviewSessionConcurrencyGuard } from '../utils/deepReviewCapacityGuard'; import { agentAPI } from '@/infrastructure/api/service-api/AgentAPI'; +import { ModePickerOption } from './ModePickerOption'; import './ChatInput.scss'; const log = createLogger('ChatInput'); @@ -2929,7 +2930,7 @@ export const ChatInput: React.FC = ({ {canSwitchModes && modeState.current !== 'agentic' && (
{t(`chatInput.modeNames.${modeState.current}`, { defaultValue: '' }) || @@ -2957,30 +2958,16 @@ export const ChatInput: React.FC = ({ <>
{incrementalCodeModes.length > 0 ? ( - incrementalCodeModes.map(modeOption => { - const modeDescription = - t(`chatInput.modeDescriptions.${modeOption.id}`, { defaultValue: '' }) || - modeOption.description || - modeOption.name; - const modeName = - t(`chatInput.modeNames.${modeOption.id}`, { defaultValue: '' }) || modeOption.name; - return ( - -
{ - e.stopPropagation(); - requestModeChange(modeOption.id); - }} - > - {modeName} - {modeState.current === modeOption.id && ( - {t('chatInput.current')} - )} -
-
- ); - }) + incrementalCodeModes.map(modeOption => ( + + )) ) : (
{t('chatInput.noIncrementalModes')} diff --git a/src/web-ui/src/flow_chat/components/ModePickerOption.test.tsx b/src/web-ui/src/flow_chat/components/ModePickerOption.test.tsx new file mode 100644 index 000000000..2d96e15ce --- /dev/null +++ b/src/web-ui/src/flow_chat/components/ModePickerOption.test.tsx @@ -0,0 +1,95 @@ +// @vitest-environment jsdom + +import React, { act } from 'react'; +import { createRoot, type Root } from 'react-dom/client'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { ModePickerOption } from './ModePickerOption'; + +vi.mock('@/component-library', () => ({ + Tooltip: ({ + children, + content, + }: { + children: React.ReactNode; + content: React.ReactNode; + }) =>
{children}
, +})); + +function makeTranslator(values: Record) { + return (key: string, options?: { defaultValue?: string }) => values[key] ?? options?.defaultValue ?? ''; +} + +describe('ModePickerOption', () => { + let container: HTMLDivElement; + let root: Root; + + beforeEach(() => { + (globalThis as typeof globalThis & { IS_REACT_ACT_ENVIRONMENT?: boolean }).IS_REACT_ACT_ENVIRONMENT = true; + container = document.createElement('div'); + document.body.appendChild(container); + root = createRoot(container); + }); + + afterEach(() => { + act(() => { + root.unmount(); + }); + container.remove(); + }); + + it('renders localized IntentCoding mode picker entry with description tooltip content', async () => { + await act(async () => { + root.render( + , + ); + }); + + expect(container.textContent).toContain('Intent Coding'); + expect(container.querySelector('[data-tooltip]')?.getAttribute('data-tooltip')).toBe( + 'Intent-aligned coding', + ); + }); + + it('marks the current mode and selects IntentCoding on click', async () => { + const onSelect = vi.fn(); + + await act(async () => { + root.render( + , + ); + }); + + const option = container.querySelector('.bitfun-chat-input__mode-option') as HTMLElement; + expect(option.className).toContain('bitfun-chat-input__mode-option--active'); + expect(container.textContent).toContain('Current'); + + await act(async () => { + option.click(); + }); + + expect(onSelect).toHaveBeenCalledWith('IntentCoding'); + }); +}); diff --git a/src/web-ui/src/flow_chat/components/ModePickerOption.tsx b/src/web-ui/src/flow_chat/components/ModePickerOption.tsx new file mode 100644 index 000000000..9447b02ef --- /dev/null +++ b/src/web-ui/src/flow_chat/components/ModePickerOption.tsx @@ -0,0 +1,56 @@ +import { Tooltip } from '@/component-library'; +import type { KeyboardEvent } from 'react'; +import type { ModeInfo } from '../reducers/modeReducer'; +import { getModeDisplayDescription, getModeDisplayName } from './modeDisplay'; + +type Translate = (key: string, options?: { defaultValue?: string }) => string; + +interface ModePickerOptionProps { + t: Translate; + modeOption: Pick; + currentMode: string; + currentLabel: string; + onSelect: (modeId: string) => void; +} + +export function ModePickerOption({ + t, + modeOption, + currentMode, + currentLabel, + onSelect, +}: ModePickerOptionProps) { + const modeDescription = getModeDisplayDescription(t, modeOption); + const modeName = getModeDisplayName(t, modeOption); + const isCurrent = currentMode === modeOption.id; + + const handleKeyDown = (e: KeyboardEvent) => { + if (e.key === 'Enter' || e.key === ' ') { + e.preventDefault(); + e.stopPropagation(); + onSelect(modeOption.id); + } + }; + + return ( + +
{ + e.stopPropagation(); + onSelect(modeOption.id); + }} + onKeyDown={handleKeyDown} + > + {modeName} + {isCurrent && ( + {currentLabel} + )} +
+
+ ); +} diff --git a/src/web-ui/src/flow_chat/components/modeDisplay.test.ts b/src/web-ui/src/flow_chat/components/modeDisplay.test.ts new file mode 100644 index 000000000..7915cfda8 --- /dev/null +++ b/src/web-ui/src/flow_chat/components/modeDisplay.test.ts @@ -0,0 +1,46 @@ +import { describe, expect, it } from 'vitest'; +import { getModeDisplayDescription, getModeDisplayName } from './modeDisplay'; + +function makeTranslator(values: Record) { + return (key: string) => values[key] ?? ''; +} + +describe('modeDisplay', () => { + it('resolves localized IntentCoding mode name and description', () => { + const t = makeTranslator({ + 'chatInput.modeNames.IntentCoding': 'Intent Coding', + 'chatInput.modeDescriptions.IntentCoding': 'Intent-aligned coding', + }); + const mode = { + id: 'IntentCoding', + name: 'Intent Coding backend', + description: 'backend description', + }; + + expect(getModeDisplayName(t, mode)).toBe('Intent Coding'); + expect(getModeDisplayDescription(t, mode)).toBe('Intent-aligned coding'); + }); + + it('falls back to backend values when localization is missing', () => { + const t = makeTranslator({}); + const mode = { + id: 'IntentCoding', + name: 'Intent Coding backend', + description: 'backend description', + }; + + expect(getModeDisplayName(t, mode)).toBe('Intent Coding backend'); + expect(getModeDisplayDescription(t, mode)).toBe('backend description'); + }); + + it('falls back to mode name when description is empty', () => { + const t = makeTranslator({}); + const mode = { + id: 'IntentCoding', + name: 'Intent Coding backend', + description: '', + }; + + expect(getModeDisplayDescription(t, mode)).toBe('Intent Coding backend'); + }); +}); diff --git a/src/web-ui/src/flow_chat/components/modeDisplay.ts b/src/web-ui/src/flow_chat/components/modeDisplay.ts new file mode 100644 index 000000000..f88d288cf --- /dev/null +++ b/src/web-ui/src/flow_chat/components/modeDisplay.ts @@ -0,0 +1,20 @@ +import type { ModeInfo } from '../reducers/modeReducer'; + +type Translate = (key: string, options?: { defaultValue?: string }) => string; + +function translatedOrEmpty(t: Translate, key: string): string { + return t(key, { defaultValue: '' }); +} + +export function getModeDisplayName(t: Translate, mode: Pick): string { + return translatedOrEmpty(t, `chatInput.modeNames.${mode.id}`) || mode.name; +} + +export function getModeDisplayDescription( + t: Translate, + mode: Pick, +): string { + return translatedOrEmpty(t, `chatInput.modeDescriptions.${mode.id}`) || + mode.description || + mode.name; +} diff --git a/src/web-ui/src/flow_chat/hooks/useFlowChat.ts b/src/web-ui/src/flow_chat/hooks/useFlowChat.ts index a7c0b5fc0..51c651e26 100644 --- a/src/web-ui/src/flow_chat/hooks/useFlowChat.ts +++ b/src/web-ui/src/flow_chat/hooks/useFlowChat.ts @@ -73,6 +73,8 @@ export const useFlowChat = () => { const remoteSshHost = isRemote ? workspace?.sshHost : undefined; const agentTypeForSession = (config?.agentType || 'agentic').trim() || 'agentic'; + const intentTrackingEnabled = + config?.enableIntentTracking ?? agentTypeForSession === 'IntentCoding'; const maxContextTokens = await getModelMaxTokens(config?.modelName, agentTypeForSession); const sessionTitleMode = workspace?.workspaceKind === WorkspaceKind.Assistant @@ -111,6 +113,7 @@ export const useFlowChat = () => { enableContextCompression: true, remoteConnectionId, remoteSshHost, + enableIntentTracking: intentTrackingEnabled, } }); diff --git a/src/web-ui/src/flow_chat/services/BtwThreadService.ts b/src/web-ui/src/flow_chat/services/BtwThreadService.ts index e0d72289d..d9a6b0878 100644 --- a/src/web-ui/src/flow_chat/services/BtwThreadService.ts +++ b/src/web-ui/src/flow_chat/services/BtwThreadService.ts @@ -66,6 +66,7 @@ export async function createBtwChildSession(params: { safeMode?: boolean; autoCompact?: boolean; enableContextCompression?: boolean; + enableIntentTracking?: boolean; requestId?: string; addMarker?: boolean; isTransient?: boolean; @@ -126,6 +127,8 @@ export async function createBtwChildSession(params: { enableContextCompression: params.enableContextCompression ?? true, remoteConnectionId, remoteSshHost, + enableIntentTracking: + params.enableIntentTracking ?? agentType === 'IntentCoding', }, }) ).sessionId diff --git a/src/web-ui/src/flow_chat/services/flow-chat-manager/EventHandlerModule.test.ts b/src/web-ui/src/flow_chat/services/flow-chat-manager/EventHandlerModule.test.ts index 8ac37bbf1..44ec56f7e 100644 --- a/src/web-ui/src/flow_chat/services/flow-chat-manager/EventHandlerModule.test.ts +++ b/src/web-ui/src/flow_chat/services/flow-chat-manager/EventHandlerModule.test.ts @@ -13,6 +13,7 @@ import { SessionExecutionEvent, SessionExecutionState } from '../../state-machin import { FlowChatStore } from '../../store/FlowChatStore'; import type { DialogTurn, FlowUserSteeringItem, ModelRound, Session } from '../../types/flow-chat'; import type { FlowChatContext } from './types'; +import { notificationService } from '../../../shared/notification-system/services/NotificationService'; vi.mock('@/infrastructure/i18n/core/I18nService', () => ({ i18nService: { @@ -26,6 +27,24 @@ vi.mock('@/infrastructure/i18n/core/I18nService', () => ({ }, })); +vi.mock('@/infrastructure/theme/integrations/MonacoThemeSync', () => ({ + monacoThemeSync: { + syncTheme: vi.fn(), + }, +})); + +vi.mock('@/shared/helpers/MonacoHelper', () => ({ + MonacoHelper: { + getEditorFromElement: vi.fn(() => null), + getSelection: vi.fn(() => ({ hasSelection: false })), + getCursorPosition: vi.fn(() => null), + getWordAtCursor: vi.fn(() => undefined), + getFileInfo: vi.fn(() => null), + getContextInfo: vi.fn(() => null), + isInMonacoEditor: vi.fn(() => false), + }, +})); + vi.mock('../../../shared/notification-system/services/NotificationService', () => ({ notificationService: { error: vi.fn(), @@ -291,6 +310,107 @@ describe('formatDialogErrorForNotification', () => { }); }); +describe('IntentCoding evidence reminder', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it('warns when an IntentCoding turn completes without an evidence signal', () => { + const session = { + ...createFinishingSession(), + mode: 'IntentCoding', + config: { agentType: 'IntentCoding' }, + }; + const turn = createCompletedTurn(); + + __test_only__.maybeWarnIntentCodingEvidenceMissing(session, turn); + + expect(notificationService.warning).toHaveBeenCalledWith( + expect.stringContaining('intentCodingEvidenceMissing'), + { duration: 6000 }, + ); + }); + + it('does not warn when an IntentCoding turn references an Evidence Package path', () => { + const session = { + ...createFinishingSession(), + mode: 'IntentCoding', + config: { agentType: 'IntentCoding' }, + }; + const turn = { + ...createCompletedTurn(), + modelRounds: [ + makeRound('round-1', [{ + id: 'text-1', + type: 'text', + content: 'Wrote .agent/evidence/evidence-20260525-task.md with results.', + isStreaming: false, + timestamp: 1000, + status: 'completed', + } as any]), + ], + }; + + expect(__test_only__.dialogTurnHasIntentCodingEvidenceSignal(turn)).toBe(true); + __test_only__.maybeWarnIntentCodingEvidenceMissing(session, turn); + + expect(notificationService.warning).not.toHaveBeenCalled(); + }); + + it('does not treat a user-steering message echoing the phrase as evidence', () => { + const turn = { + ...createCompletedTurn(), + modelRounds: [ + makeRound('round-1', [{ + id: 'steering-1', + type: 'user-steering', + steeringId: 'steer-1', + roundIndex: 0, + content: 'Please remember to write an Evidence Package at the end.', + timestamp: 1000, + status: 'completed', + } as any]), + ], + }; + + expect(__test_only__.dialogTurnHasIntentCodingEvidenceSignal(turn)).toBe(false); + }); + + it('does not warn when the turn was cancelled by the user', () => { + const session = { + ...createFinishingSession(), + mode: 'IntentCoding', + config: { agentType: 'IntentCoding' }, + }; + const turn = createCompletedTurn(); + + __test_only__.maybeWarnIntentCodingEvidenceMissing(session, turn, { skipReason: 'cancelled' }); + + expect(notificationService.warning).not.toHaveBeenCalled(); + }); + + it('does not warn when the turn has not yet reached completed status', () => { + const session = { + ...createFinishingSession(), + mode: 'IntentCoding', + config: { agentType: 'IntentCoding' }, + }; + + __test_only__.maybeWarnIntentCodingEvidenceMissing(session, createFinishingTurn()); + + expect(notificationService.warning).not.toHaveBeenCalled(); + }); + + it('does not warn for non-IntentCoding sessions', () => { + __test_only__.maybeWarnIntentCodingEvidenceMissing( + createFinishingSession(), + createCompletedTurn(), + ); + + expect(notificationService.warning).not.toHaveBeenCalled(); + }); +}); + function resetFlowChatStore(): void { FlowChatStore.getInstance().setState(() => ({ sessions: new Map(), @@ -334,6 +454,14 @@ function createFinishingTurn(): DialogTurn { }; } +function createCompletedTurn(): DialogTurn { + return { + ...createFinishingTurn(), + status: 'completed', + endTime: 1000, + }; +} + function createFinishingSession(): Session { return { sessionId: 'session-1', diff --git a/src/web-ui/src/flow_chat/services/flow-chat-manager/EventHandlerModule.ts b/src/web-ui/src/flow_chat/services/flow-chat-manager/EventHandlerModule.ts index 73b121d71..0f09f95f1 100644 --- a/src/web-ui/src/flow_chat/services/flow-chat-manager/EventHandlerModule.ts +++ b/src/web-ui/src/flow_chat/services/flow-chat-manager/EventHandlerModule.ts @@ -39,6 +39,7 @@ import { MCPAPI } from '@/infrastructure/api/service-api/MCPAPI'; import { ACPClientAPI, type AcpPermissionRequestEvent } from '@/infrastructure/api/service-api/ACPClientAPI'; import { globalEventBus } from '@/infrastructure/event-bus'; import type { FlowChatContext, DialogTurn, ModelRound, FlowToolItem } from './types'; +import type { Session } from '../../types/flow-chat'; import { getAiErrorPresentation, normalizeAiErrorDetail, @@ -78,6 +79,11 @@ import { const log = createLogger('EventHandlerModule'); const TURN_COMPLETION_QUIET_WINDOW_MS = 500; +const INTENT_CODING_MODE_ID = 'IntentCoding'; +// Match only file-path style evidence anchors. The earlier looser pattern +// (`/Evidence Package/i`) false-positived on any user message echoing the +// phrase, which could either suppress real misses or fire on aborted turns. +const INTENT_CODING_EVIDENCE_SIGNAL = /\.agent\/evidence\/|evidence-[^\s`"')]+\.md/i; interface MCPInteractionRequestEvent { interactionId: string; @@ -121,6 +127,8 @@ function resolveDialogTurnDisplayContent( export const __test_only__ = { resolveDialogTurnDisplayContent, + dialogTurnHasIntentCodingEvidenceSignal, + maybeWarnIntentCodingEvidenceMissing, }; function shouldMarkUnreadCompletion(sessionId: string): boolean { @@ -128,6 +136,69 @@ function shouldMarkUnreadCompletion(sessionId: string): boolean { return sessionId !== activeSessionId || !isAppWindowFocused(); } +function isIntentCodingSession(session: Session): boolean { + return session.mode === INTENT_CODING_MODE_ID || session.config.agentType === INTENT_CODING_MODE_ID; +} + +function itemEvidenceSearchText(item: unknown): string { + if (!item || typeof item !== 'object') { + return ''; + } + + const record = item as Record; + // Skip user-originated items so an end-user message containing the phrase + // can't satisfy the detector or trigger a false positive. + if (record.type === 'user-steering') { + return ''; + } + const textParts = [ + typeof record.content === 'string' ? record.content : '', + typeof record.toolName === 'string' ? record.toolName : '', + ]; + + for (const key of ['toolCall', 'toolResult']) { + const value = record[key]; + if (value !== undefined) { + try { + textParts.push(JSON.stringify(value)); + } catch { + // Ignore non-serializable runtime fields; they are not needed for a soft reminder. + } + } + } + + return textParts.join('\n'); +} + +function dialogTurnHasIntentCodingEvidenceSignal(dialogTurn: DialogTurn): boolean { + return dialogTurn.modelRounds.some(round => + round.items.some(item => INTENT_CODING_EVIDENCE_SIGNAL.test(itemEvidenceSearchText(item))) + ); +} + +function maybeWarnIntentCodingEvidenceMissing( + session: Session, + dialogTurn: DialogTurn, + options: { skipReason?: 'cancelled' | 'errored' | null } = {}, +): void { + if (options.skipReason) { + return; + } + if (dialogTurn.status !== 'completed') { + return; + } + if (!isIntentCodingSession(session) || dialogTurnHasIntentCodingEvidenceSignal(dialogTurn)) { + return; + } + + notificationService.warning( + i18nService.t('flow-chat:chatInput.intentCodingEvidenceMissing', { + defaultValue: 'Intent Coding finished without an Evidence Package signal. Add or reference `.agent/evidence/evidence-*.md` before delivery.', + }), + { duration: 6000 }, + ); +} + function logDroppedDataEvent( eventName: string, sessionId: string, @@ -907,6 +978,10 @@ function finalizeTurnCompletionState( const dialogTurn = store.getState().sessions.get(sessionId)?.dialogTurns.find(t => t.id === turnId); if (dialogTurn) { + const skipReason: 'cancelled' | null = context.userCancelledSessionIds.has(sessionId) + ? 'cancelled' + : null; + maybeWarnIntentCodingEvidenceMissing(session, dialogTurn, { skipReason }); appendPlanDisplayItemsIfNeeded(context, sessionId, turnId, dialogTurn); } diff --git a/src/web-ui/src/flow_chat/services/flow-chat-manager/SessionModule.ts b/src/web-ui/src/flow_chat/services/flow-chat-manager/SessionModule.ts index e9d74e71b..311fd4994 100644 --- a/src/web-ui/src/flow_chat/services/flow-chat-manager/SessionModule.ts +++ b/src/web-ui/src/flow_chat/services/flow-chat-manager/SessionModule.ts @@ -430,6 +430,8 @@ export async function createChatSession( enableContextCompression: true, remoteConnectionId, remoteSshHost, + enableIntentTracking: + config.enableIntentTracking ?? agentType === 'IntentCoding', } }); diff --git a/src/web-ui/src/flow_chat/services/usageReportService.ts b/src/web-ui/src/flow_chat/services/usageReportService.ts index a187ce427..2020e03cb 100644 --- a/src/web-ui/src/flow_chat/services/usageReportService.ts +++ b/src/web-ui/src/flow_chat/services/usageReportService.ts @@ -290,6 +290,7 @@ function toPersistedLocalReportTurn(turn: DialogTurn): DialogTurnData { endTime: turn.endTime, durationMs: 0, status: 'completed', + intentAssignments: [], }; } diff --git a/src/web-ui/src/flow_chat/store/FlowChatStore.ts b/src/web-ui/src/flow_chat/store/FlowChatStore.ts index b24623177..8c9734bda 100644 --- a/src/web-ui/src/flow_chat/store/FlowChatStore.ts +++ b/src/web-ui/src/flow_chat/store/FlowChatStore.ts @@ -57,6 +57,7 @@ const VALID_AGENT_TYPES = new Set([ 'agentic', 'Multitask', 'debug', + 'IntentCoding', 'Plan', 'Cowork', 'Claw', diff --git a/src/web-ui/src/flow_chat/tool-cards/TaskToolDisplay.test.tsx b/src/web-ui/src/flow_chat/tool-cards/TaskToolDisplay.test.tsx index 185aaa232..3f2e6ade8 100644 --- a/src/web-ui/src/flow_chat/tool-cards/TaskToolDisplay.test.tsx +++ b/src/web-ui/src/flow_chat/tool-cards/TaskToolDisplay.test.tsx @@ -60,6 +60,7 @@ vi.mock('../store/FlowChatStore', () => ({ }], ]), }), + subscribe: () => () => {}, }, })); diff --git a/src/web-ui/src/flow_chat/types/flow-chat.ts b/src/web-ui/src/flow_chat/types/flow-chat.ts index 304ddf49b..86af8c706 100644 --- a/src/web-ui/src/flow_chat/types/flow-chat.ts +++ b/src/web-ui/src/flow_chat/types/flow-chat.ts @@ -413,6 +413,7 @@ export interface SessionConfig { /** Disambiguates sessions when multiple remote workspaces share the same `workspacePath`. */ remoteConnectionId?: string; remoteSshHost?: string; + enableIntentTracking?: boolean; } /** diff --git a/src/web-ui/src/infrastructure/api/service-api/AgentAPI.ts b/src/web-ui/src/infrastructure/api/service-api/AgentAPI.ts index b674f7e51..517ddd19e 100644 --- a/src/web-ui/src/infrastructure/api/service-api/AgentAPI.ts +++ b/src/web-ui/src/infrastructure/api/service-api/AgentAPI.ts @@ -34,6 +34,7 @@ export interface SessionConfig { compressionThreshold?: number; remoteConnectionId?: string; remoteSshHost?: string; + enableIntentTracking?: boolean; } diff --git a/src/web-ui/src/infrastructure/api/service-api/SessionAPI.ts b/src/web-ui/src/infrastructure/api/service-api/SessionAPI.ts index 9caa356db..f6b8829f3 100644 --- a/src/web-ui/src/infrastructure/api/service-api/SessionAPI.ts +++ b/src/web-ui/src/infrastructure/api/service-api/SessionAPI.ts @@ -12,6 +12,13 @@ export interface SessionUsageReportRequest { export type UsageModelIdentitySource = 'recorded' | 'inferred_session_model' | 'legacy_missing'; +/** Known proactivity buckets emitted by the backend; future variants are + * permitted via the union with `string` in consumer sites. */ +export type ProactivityLevel = 'high' | 'moderate' | 'low' | 'reactive'; + +/** Known completeness buckets emitted by the backend. */ +export type CompletenessLevel = 'full' | 'partial' | 'minimal' | 'incomplete'; + export interface SessionUsageReport { schemaVersion: number; reportId: string; @@ -139,6 +146,29 @@ export interface SessionUsageReport { fileContentsIncluded: boolean; redactedFields: string[]; }; + proactivity?: { + completed: number; + inferred: number; + provided: number; + score: number; + // Backend serializes ProactivityLevel as a string. Kept loose here so a + // newly added backend variant doesn't break TS narrowing in callers. + level: ProactivityLevel | (string & {}); + turnDetails?: Array<{ + turnIndex: number; + askedQuestion: boolean; + proactiveToolCount: number; + intentsCompleted: number; + intentsInferred: number; + intentsProvided: number; + }>; + }; + completeness?: { + requirementsSatisfied: number; + requirementsMissed: number; + score: number; + level: CompletenessLevel | (string & {}); + }; } function remoteSessionFields( diff --git a/src/web-ui/src/locales/en-US/flow-chat.json b/src/web-ui/src/locales/en-US/flow-chat.json index 9d27adf18..6c7bba41b 100644 --- a/src/web-ui/src/locales/en-US/flow-chat.json +++ b/src/web-ui/src/locales/en-US/flow-chat.json @@ -530,8 +530,10 @@ "targetBtw": "Side", "sendingToMain": "Main session: {{title}}", "sendingToBtw": "Side session: {{title}}", + "intentCodingEvidenceMissing": "Intent Coding finished without an Evidence Package signal. Add or reference `.agent/evidence/evidence-*.md` before delivery.", "modeDescriptions": { "agentic": "Full-featured AI assistant with access to all tools for comprehensive software development tasks", + "IntentCoding": "Intent-aligned coding: clarify requirements, record acceptance checks, verify changes, and deliver evidence", "Multitask": "Multitask mode: decompose work into orthogonal branches or a DAG and proactively use subagents in parallel when it helps", "Claw": "Personal assistant mode for dedicated assistant workspaces and everyday task support", "Plan": "Plan first, execute later — clarify requirements and create an implementation plan before coding", @@ -543,6 +545,7 @@ }, "modeNames": { "agentic": "Agentic", + "IntentCoding": "Intent Coding", "Multitask": "Multitask", "Claw": "Claw", "Plan": "Plan", diff --git a/src/web-ui/src/locales/en-US/scenes/agents.json b/src/web-ui/src/locales/en-US/scenes/agents.json index e76f2f319..fb1f4e981 100644 --- a/src/web-ui/src/locales/en-US/scenes/agents.json +++ b/src/web-ui/src/locales/en-US/scenes/agents.json @@ -334,6 +334,7 @@ }, "agentDescriptions": { "Agentic": "Autonomous execution mode: automatically analyze requirements, plan tasks, and execute code changes", + "IntentCoding": "Intent Coding mode: align on intent, acceptance checks, verification, and evidence before delivery", "Cowork": "Cowork mode: work alongside you, asking for confirmation at key steps", "ComputerUse": "Computer use mode: capable of operating browsers, desktop apps, and file systems", "DeepResearch": "Deep research agent: conduct systematic investigation and analysis on complex topics", diff --git a/src/web-ui/src/locales/zh-CN/flow-chat.json b/src/web-ui/src/locales/zh-CN/flow-chat.json index 83976af8c..0fffd7d70 100644 --- a/src/web-ui/src/locales/zh-CN/flow-chat.json +++ b/src/web-ui/src/locales/zh-CN/flow-chat.json @@ -524,8 +524,10 @@ "targetBtw": "当前侧问", "sendingToMain": "主会话:{{title}}", "sendingToBtw": "侧问会话:{{title}}", + "intentCodingEvidenceMissing": "意图编码已完成,但未检测到证据包信号。交付前请补充或引用 `.agent/evidence/evidence-*.md`。", "modeDescriptions": { "agentic": "AI 主导执行,自动规划和完成编码任务,拥有完整的工具访问能力", + "IntentCoding": "意图对齐编码:先澄清需求、记录验收项,再验证变更并交付证据", "Multitask": "多任务模式:将工作拆成正交分支或 DAG,并在合适时主动并行调度子 Agent 推进", "Claw": "个人助理模式:面向个人工作区和日常事务,使用独立的助理上下文", "Plan": "先规划后执行,先明确需求并制定实施计划,再进行编码", @@ -537,6 +539,7 @@ }, "modeNames": { "agentic": "Agentic", + "IntentCoding": "意图编码", "Multitask": "Multitask", "Claw": "Claw", "Plan": "Plan", diff --git a/src/web-ui/src/locales/zh-CN/scenes/agents.json b/src/web-ui/src/locales/zh-CN/scenes/agents.json index 72d1b9ce3..031a6a193 100644 --- a/src/web-ui/src/locales/zh-CN/scenes/agents.json +++ b/src/web-ui/src/locales/zh-CN/scenes/agents.json @@ -334,6 +334,7 @@ }, "agentDescriptions": { "Agentic": "自主执行模式:自动分析需求、规划任务并执行代码修改", + "IntentCoding": "意图编码模式:在交付前对齐意图、验收项、验证结果和证据包", "Cowork": "协作模式:与您并肩工作,在关键步骤征求您的确认", "ComputerUse": "计算机使用模式:能够操作浏览器、桌面应用和文件系统", "DeepResearch": "深度研究智能体:对复杂主题进行系统性调研和分析", diff --git a/src/web-ui/src/locales/zh-TW/flow-chat.json b/src/web-ui/src/locales/zh-TW/flow-chat.json index 95a478ec2..6d8202e1d 100644 --- a/src/web-ui/src/locales/zh-TW/flow-chat.json +++ b/src/web-ui/src/locales/zh-TW/flow-chat.json @@ -524,8 +524,10 @@ "targetBtw": "當前側問", "sendingToMain": "主會話:{{title}}", "sendingToBtw": "側問會話:{{title}}", + "intentCodingEvidenceMissing": "意圖編碼已完成,但未偵測到證據包訊號。交付前請補充或引用 `.agent/evidence/evidence-*.md`。", "modeDescriptions": { "agentic": "AI 主導執行,自動規劃和完成編碼任務,擁有完整的工具訪問能力", + "IntentCoding": "意圖對齊編碼:先澄清需求、記錄驗收項,再驗證變更並交付證據", "Multitask": "多工模式:將工作拆成正交分支或 DAG,並在合適時主動並行調度子 Agent 推進", "Claw": "個人助理模式:面向個人工作區和日常事務,使用獨立的助理上下文", "Plan": "先規劃後執行,先明確需求並制定實施計劃,再進行編碼", @@ -537,6 +539,7 @@ }, "modeNames": { "agentic": "Agentic", + "IntentCoding": "意圖編碼", "Multitask": "Multitask", "Claw": "Claw", "Plan": "Plan", diff --git a/src/web-ui/src/locales/zh-TW/scenes/agents.json b/src/web-ui/src/locales/zh-TW/scenes/agents.json index 6e6f8e2c9..4f55ae051 100644 --- a/src/web-ui/src/locales/zh-TW/scenes/agents.json +++ b/src/web-ui/src/locales/zh-TW/scenes/agents.json @@ -334,6 +334,7 @@ }, "agentDescriptions": { "Agentic": "自主執行模式:自動分析需求、規劃任務並執行程式碼修改", + "IntentCoding": "意圖編碼模式:在交付前對齊意圖、驗收項、驗證結果和證據包", "Cowork": "協作模式:與您並肩工作,在關鍵步驟徵求您的確認", "ComputerUse": "電腦使用模式:能夠操作瀏覽器、桌面應用和檔案系統", "DeepResearch": "深度研究智慧體:對複雜主題進行系統性調研和分析", diff --git a/src/web-ui/src/shared/types/session-history.ts b/src/web-ui/src/shared/types/session-history.ts index c5dc11c83..5670bf013 100644 --- a/src/web-ui/src/shared/types/session-history.ts +++ b/src/web-ui/src/shared/types/session-history.ts @@ -97,6 +97,27 @@ export interface ReviewActionPersistedState { export type SessionStatus = 'active' | 'archived' | 'completed'; export type DialogTurnKind = 'user_dialog' | 'manual_compaction' | 'local_command'; +export type IntentTerminalStatus = 'completed' | 'inferred' | 'provided'; + +export interface IntentAssignment { + intentId: string; + terminalStatus: IntentTerminalStatus; + assignedAtTurn: number; + triggerDescription?: string; + /** True when this is a synthetic proxy generated from raw evidence rather than a real hidden-intent evaluation. Defaults to false / omitted. */ + isProxy?: boolean; +} + +export interface IntentTurnEvidence { + turnIndex: number; + askedUserQuestion: boolean; + questionTopics?: string[]; + proactiveToolCalls: number; + toolNamesUsed?: string[]; + producedOutput: boolean; + roundCount: number; + askedFollowUpInText: boolean; +} export type LocalCommandKind = 'usage_report' | 'goal_pending' | 'goal_verifying'; @@ -130,6 +151,8 @@ export interface DialogTurnData { endTime?: number; durationMs?: number; status: TurnStatus; + intentAssignments?: IntentAssignment[]; + intentEvidence?: IntentTurnEvidence; } export interface UserMessageData { diff --git a/src/web-ui/src/test/monaco-editor.mock.ts b/src/web-ui/src/test/monaco-editor.mock.ts new file mode 100644 index 000000000..08a946eb8 --- /dev/null +++ b/src/web-ui/src/test/monaco-editor.mock.ts @@ -0,0 +1,75 @@ +class MockRange { + constructor( + public startLineNumber: number, + public startColumn: number, + public endLineNumber: number, + public endColumn: number, + ) {} +} + +const disposable = { + dispose: () => undefined, +}; + +const mockEditor = { + getDomNode: () => null, + getSelection: () => null, + getModel: () => null, + getPosition: () => null, + getVisibleRanges: () => [], +}; + +export const Range = MockRange; + +export const Uri = { + parse: (value: string) => ({ + toString: () => value, + path: value, + }), + file: (value: string) => ({ + toString: () => `file://${value}`, + path: value, + }), +}; + +export const KeyMod = { + CtrlCmd: 2048, + Shift: 1024, + Alt: 512, + WinCtrl: 256, +}; + +export const KeyCode = {}; + +export const editor = { + defineTheme: () => undefined, + setTheme: () => undefined, + getEditors: () => [], + create: () => mockEditor, + createDiffEditor: () => mockEditor, + createModel: () => mockEditor, + setModelLanguage: () => undefined, + getModel: () => null, + getModels: () => [], + onDidCreateModel: () => disposable, + onWillDisposeModel: () => disposable, +}; + +export const languages = { + register: () => undefined, + setMonarchTokensProvider: () => disposable, + setLanguageConfiguration: () => disposable, + registerCompletionItemProvider: () => disposable, + registerHoverProvider: () => disposable, + registerDefinitionProvider: () => disposable, + registerDocumentFormattingEditProvider: () => disposable, +}; + +export default { + Range, + Uri, + KeyMod, + KeyCode, + editor, + languages, +}; diff --git a/src/web-ui/vite.config.ts b/src/web-ui/vite.config.ts index 96015dee5..45e67ec46 100644 --- a/src/web-ui/vite.config.ts +++ b/src/web-ui/vite.config.ts @@ -8,6 +8,7 @@ const host = process.env.TAURI_DEV_HOST; // https://vite.dev/config/ export default defineConfig(({ mode, command }) => { const isProduction = mode === 'production' || (command === 'build' && mode !== 'development'); + const isTest = mode === 'test' || process.env.VITEST === 'true'; return { plugins: [ @@ -20,6 +21,9 @@ export default defineConfig(({ mode, command }) => { dedupe: ['react', 'react-dom'], alias: { "@": path.resolve(__dirname, "./src"), + ...(isTest ? { + "monaco-editor": path.resolve(__dirname, "./src/test/monaco-editor.mock.ts"), + } : {}), "@/shared": path.resolve(__dirname, "./src/shared"), "@/core": path.resolve(__dirname, "./src/core"), "@/tools": path.resolve(__dirname, "./src/tools"),