browseragentprotocol · pyyush · Mar 25, 2026
diff --git a/.gitignore b/.gitignore
@@ -45,3 +45,4 @@ ROADMAP.md
 # Demo recording intermediates
 assets/demos/*-events.json
 assets/demos/*-raw.webm
+.dev-session/
diff --git a/packages/cli/__tests__/commands/trace.test.ts b/packages/cli/__tests__/commands/trace.test.ts
@@ -0,0 +1,247 @@
+import { beforeEach, describe, expect, it, vi } from 'vitest';
+
+const existsSync = vi.fn();
+const readFileSync = vi.fn();
+const readdirSync = vi.fn();
+const statSync = vi.fn();
+const mkdirSync = vi.fn();
+const writeFileSync = vi.fn();
+const register = vi.fn();
+
+vi.mock('node:fs', () => ({
+	default: {
+		existsSync,
+		readFileSync,
+		readdirSync,
+		statSync,
+		mkdirSync,
+		writeFileSync,
+	},
+}));
+
+vi.mock('node:os', () => ({
+	default: {
+		homedir: () => '/tmp/test-home',
+	},
+}));
+
+vi.mock('../../src/commands/registry.js', () => ({
+	register,
+}));
+
+const { buildContractEvidence, traceCommand } = await import('../../src/commands/trace.js');
+
+describe('trace contract evidence export', () => {
+	beforeEach(() => {
+		vi.clearAllMocks();
+	});
+
+	it('normalizes BAP trace entries into contract evidence', () => {
+		const evidence = buildContractEvidence([
+			{
+				ts: '2026-03-24T18:00:00.000Z',
+				sessionId: 'checkout-demo',
+				clientId: 'client-1',
+				method: 'page/navigate',
+				duration: 42,
+				status: 'ok',
+				requestSummary: {
+					url: 'https://example.com/login',
+					observe: {
+						responseTier: 'interactive',
+						stableRefs: true,
+					},
+				},
+				resultSummary: {
+					url: 'https://example.com/login',
+					status: 200,
+				},
+			},
+			{
+				ts: '2026-03-24T18:00:05.000Z',
+				sessionId: 'checkout-demo',
+				clientId: 'client-1',
+				method: 'agent/act',
+				duration: 85,
+				status: 'ok',
+				requestSummary: {
+					actions: ['action/fill', 'action/click'],
+					postObserve: {
+						incremental: true,
+						includeScreenshot: true,
+						stableRefs: true,
+					},
+				},
+				resultSummary: {
+					completed: 2,
+					total: 2,
+				},
+			},
+			{
+				ts: '2026-03-24T18:00:06.000Z',
+				sessionId: 'checkout-demo',
+				clientId: 'client-1',
+				method: 'agent/extract',
+				duration: 33,
+				status: 'ok',
+				requestSummary: {
+					mode: 'list',
+				},
+				resultSummary: {
+					keys: ['data', 'sourceRefs'],
+				},
+			},
+		]);
+
+		expect(evidence).toEqual({
+			adapter: 'bap',
+			version: 1,
+			runtime: {
+				tools: ['act', 'extract', 'navigate', 'observe'],
+				actions: ['click', 'fill'],
+				domains: ['https://example.com'],
+				artifacts: ['json-extraction', 'screenshot', 'trace-jsonl'],
+				approvalsObserved: [],
+			},
+			provenance: {
+				formats: ['bap-trace-jsonl'],
+				replaySupported: true,
+				determinism: 'best-effort',
+				validator: 'bap trace --replay',
+			},
+			grounding: {
+				observationModels: [
+					'incremental-changes',
+					'interactive-elements',
+					'screenshot-observation',
+				],
+				identityMechanisms: ['selector-fallback', 'semantic-selector', 'stable-ref'],
+				stableRefs: true,
+				abstentionSupported: false,
+			},
+		});
+	});
+
+	it('captures direct action/* method calls as tools and actions', () => {
+		const evidence = buildContractEvidence([
+			{
+				ts: '2026-03-24T18:00:00.000Z',
+				sessionId: 'direct-actions',
+				clientId: 'client-1',
+				method: 'action/click',
+				duration: 15,
+				status: 'ok',
+				requestSummary: { selector: 'e5' },
+			},
+			{
+				ts: '2026-03-24T18:00:01.000Z',
+				sessionId: 'direct-actions',
+				clientId: 'client-1',
+				method: 'action/fill',
+				duration: 20,
+				status: 'ok',
+				requestSummary: { selector: 'e8', value: 'test@example.com' },
+			},
+			{
+				ts: '2026-03-24T18:00:02.000Z',
+				sessionId: 'direct-actions',
+				clientId: 'client-1',
+				method: 'action/hover',
+				duration: 10,
+				status: 'ok',
+				requestSummary: { selector: 'e12' },
+			},
+		]);
+
+		// Direct action/* calls should appear in both tools AND actions
+		expect(evidence.runtime?.tools).toContain('click');
+		expect(evidence.runtime?.tools).toContain('fill');
+		expect(evidence.runtime?.tools).toContain('hover');
+		expect(evidence.runtime?.actions).toContain('click');
+		expect(evidence.runtime?.actions).toContain('fill');
+		expect(evidence.runtime?.actions).toContain('hover');
+	});
+
+	it('exports normalized evidence from the trace command', async () => {
+		existsSync.mockReturnValue(true);
+		readdirSync.mockReturnValue(['checkout-demo-123.jsonl']);
+		statSync.mockReturnValue({
+			size: 512,
+			mtime: new Date('2026-03-24T18:01:00.000Z'),
+		});
+		readFileSync.mockReturnValue(
+			[
+				JSON.stringify({
+					ts: '2026-03-24T18:00:00.000Z',
+					sessionId: 'checkout-demo',
+					clientId: 'client-1',
+					method: 'page/navigate',
+					duration: 42,
+					status: 'ok',
+					requestSummary: {
+						url: 'https://example.com/login',
+						observe: { responseTier: 'interactive', stableRefs: true },
+					},
+					resultSummary: { url: 'https://example.com/login', status: 200 },
+				}),
+				JSON.stringify({
+					ts: '2026-03-24T18:00:05.000Z',
+					sessionId: 'checkout-demo',
+					clientId: 'client-1',
+					method: 'agent/act',
+					duration: 85,
+					status: 'ok',
+					requestSummary: {
+						actions: ['action/fill', 'action/click'],
+						postObserve: {
+							incremental: true,
+							includeScreenshot: true,
+							stableRefs: true,
+						},
+					},
+					resultSummary: { completed: 2, total: 2 },
+				}),
+			].join('\n'),
+		);
+
+		const logSpy = vi.spyOn(console, 'log').mockImplementation(() => {});
+		const errorSpy = vi.spyOn(console, 'error').mockImplementation(() => {});
+
+		await traceCommand(['--export-evidence=.bap/trace-evidence.json'], {} as never, {} as never);
+
+		expect(mkdirSync).toHaveBeenCalled();
+		expect(writeFileSync).toHaveBeenCalledTimes(1);
+
+		const [, writtenJson] = writeFileSync.mock.calls[0] as [string, string];
+		expect(JSON.parse(writtenJson)).toEqual({
+			adapter: 'bap',
+			version: 1,
+			runtime: {
+				tools: ['act', 'navigate', 'observe'],
+				actions: ['click', 'fill'],
+				domains: ['https://example.com'],
+				artifacts: ['screenshot', 'trace-jsonl'],
+				approvalsObserved: [],
+			},
+			provenance: {
+				formats: ['bap-trace-jsonl'],
+				replaySupported: true,
+				determinism: 'best-effort',
+				validator: 'bap trace --replay',
+			},
+			grounding: {
+				observationModels: [
+					'incremental-changes',
+					'interactive-elements',
+					'screenshot-observation',
+				],
+				identityMechanisms: ['selector-fallback', 'semantic-selector', 'stable-ref'],
+				stableRefs: true,
+				abstentionSupported: false,
+			},
+		});
+
+		expect(logSpy).toHaveBeenCalledWith('Exported contract evidence to .bap/trace-evidence.json');
+		expect(errorSpy).not.toHaveBeenCalled();
+	});
+});
diff --git a/packages/cli/skills/bap-browser/SKILL.md b/packages/cli/skills/bap-browser/SKILL.md
@@ -2,6 +2,89 @@
 name: bap-browser
 description: "Browser automation CLI with composite actions, semantic selectors, and self-healing selectors. Use when the user needs to visit websites, fill forms, extract data, take screenshots, stream browser events, or automate multi-step browser workflows like login, checkout, or search."
 license: Apache-2.0
+contract:
+  kind: browser-agent
+  version: 1
+  runtime:
+    interfaces:
+      - cli
+    tools:
+      - navigate
+      - go_back
+      - go_forward
+      - reload
+      - observe
+      - screenshot
+      - aria_snapshot
+      - content
+      - act
+      - extract
+      - pages
+      - activate_page
+      - close_page
+    actionClasses:
+      - navigate
+      - observe
+      - click
+      - fill
+      - type
+      - press
+      - hover
+      - scroll
+      - select
+      - extract
+    domainPolicy:
+      mode: report
+    approval:
+      policy: manual
+      requiredFor:
+        - checkout
+        - purchase
+        - delete
+        - upload
+        - submit
+    artifacts:
+      outputs:
+        - trace-jsonl
+        - trace-replay-html
+        - json-extraction
+        - screenshot
+      sensitivity: moderate
+      retention: session
+      redaction:
+        - cookies
+        - auth-tokens
+        - passwords
+  provenance:
+    formats:
+      - bap-trace-jsonl
+    replay:
+      supported: true
+      determinism: best-effort
+      validator: bap trace --replay
+  grounding:
+    observation:
+      models:
+        - interactive-elements
+        - incremental-changes
+        - screenshot-observation
+    identity:
+      mechanisms:
+        - stable-ref
+        - semantic-selector
+        - selector-fallback
+      stableRefs: true
+    abstention:
+      supported: false
+      reasons:
+        - delegated-to-caller
+  extensions:
+    cliAliases:
+      navigate: goto
+      go_back: back
+      go_forward: forward
+      pages: tabs
+      activate_page: tab-select
 ---
 
 # BAP Browser CLI
@@ -168,6 +251,7 @@ bap trace --all                        # Show all traces across sessions
 bap trace --session=<id>               # Traces for a specific session
 bap trace --replay                     # Generate self-contained HTML timeline viewer
 bap trace --export                     # Export traces as JSON
+bap trace --export-evidence=evidence.json  # Export normalized contract evidence
 bap trace --limit=20                   # Limit number of trace entries shown
 ```
 
@@ -211,3 +295,4 @@ bap recipe wait-for <selector> [--timeout=ms]
 4. Use `bap act` for multi-step flows instead of individual commands — fewer calls, fewer tokens
 5. Use `--diff` for incremental observation after small DOM changes
 6. Check `bap trace` when debugging failures — it records every request with timing
+7. Use `bap trace --export-evidence=...` when you need normalized contract audit evidence for skill validation
diff --git a/packages/cli/src/cli.ts b/packages/cli/src/cli.ts
@@ -103,6 +103,7 @@ ${pc.cyan("TRACING")}
   bap trace --session=<id>          Show trace for a specific session
   bap trace --replay                Generate HTML timeline viewer
   bap trace --export=<file>         Export trace as JSON
+  bap trace --export-evidence=<f>   Export normalized contract evidence
   bap trace --limit=<N>             Show last N entries (default: 10)
 
 ${pc.cyan("DEBUGGING")}