Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,4 @@ ROADMAP.md
# Demo recording intermediates
assets/demos/*-events.json
assets/demos/*-raw.webm
.dev-session/
247 changes: 247 additions & 0 deletions packages/cli/__tests__/commands/trace.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,247 @@
import { beforeEach, describe, expect, it, vi } from 'vitest';

const existsSync = vi.fn();
const readFileSync = vi.fn();
const readdirSync = vi.fn();
const statSync = vi.fn();
const mkdirSync = vi.fn();
const writeFileSync = vi.fn();
const register = vi.fn();

vi.mock('node:fs', () => ({
default: {
existsSync,
readFileSync,
readdirSync,
statSync,
mkdirSync,
writeFileSync,
},
}));

vi.mock('node:os', () => ({
default: {
homedir: () => '/tmp/test-home',
},
}));

vi.mock('../../src/commands/registry.js', () => ({
register,
}));

const { buildContractEvidence, traceCommand } = await import('../../src/commands/trace.js');

describe('trace contract evidence export', () => {
beforeEach(() => {
vi.clearAllMocks();
});

it('normalizes BAP trace entries into contract evidence', () => {
const evidence = buildContractEvidence([
{
ts: '2026-03-24T18:00:00.000Z',
sessionId: 'checkout-demo',
clientId: 'client-1',
method: 'page/navigate',
duration: 42,
status: 'ok',
requestSummary: {
url: 'https://example.com/login',
observe: {
responseTier: 'interactive',
stableRefs: true,
},
},
resultSummary: {
url: 'https://example.com/login',
status: 200,
},
},
{
ts: '2026-03-24T18:00:05.000Z',
sessionId: 'checkout-demo',
clientId: 'client-1',
method: 'agent/act',
duration: 85,
status: 'ok',
requestSummary: {
actions: ['action/fill', 'action/click'],
postObserve: {
incremental: true,
includeScreenshot: true,
stableRefs: true,
},
},
resultSummary: {
completed: 2,
total: 2,
},
},
{
ts: '2026-03-24T18:00:06.000Z',
sessionId: 'checkout-demo',
clientId: 'client-1',
method: 'agent/extract',
duration: 33,
status: 'ok',
requestSummary: {
mode: 'list',
},
resultSummary: {
keys: ['data', 'sourceRefs'],
},
},
]);

expect(evidence).toEqual({
adapter: 'bap',
version: 1,
runtime: {
tools: ['act', 'extract', 'navigate', 'observe'],
actions: ['click', 'fill'],
domains: ['https://example.com'],
artifacts: ['json-extraction', 'screenshot', 'trace-jsonl'],
approvalsObserved: [],
},
provenance: {
formats: ['bap-trace-jsonl'],
replaySupported: true,
determinism: 'best-effort',
validator: 'bap trace --replay',
},
grounding: {
observationModels: [
'incremental-changes',
'interactive-elements',
'screenshot-observation',
],
identityMechanisms: ['selector-fallback', 'semantic-selector', 'stable-ref'],
stableRefs: true,
abstentionSupported: false,
},
});
});

it('captures direct action/* method calls as tools and actions', () => {
const evidence = buildContractEvidence([
{
ts: '2026-03-24T18:00:00.000Z',
sessionId: 'direct-actions',
clientId: 'client-1',
method: 'action/click',
duration: 15,
status: 'ok',
requestSummary: { selector: 'e5' },
},
{
ts: '2026-03-24T18:00:01.000Z',
sessionId: 'direct-actions',
clientId: 'client-1',
method: 'action/fill',
duration: 20,
status: 'ok',
requestSummary: { selector: 'e8', value: 'test@example.com' },
},
{
ts: '2026-03-24T18:00:02.000Z',
sessionId: 'direct-actions',
clientId: 'client-1',
method: 'action/hover',
duration: 10,
status: 'ok',
requestSummary: { selector: 'e12' },
},
]);

// Direct action/* calls should appear in both tools AND actions
expect(evidence.runtime?.tools).toContain('click');
expect(evidence.runtime?.tools).toContain('fill');
expect(evidence.runtime?.tools).toContain('hover');
expect(evidence.runtime?.actions).toContain('click');
expect(evidence.runtime?.actions).toContain('fill');
expect(evidence.runtime?.actions).toContain('hover');
});

it('exports normalized evidence from the trace command', async () => {
existsSync.mockReturnValue(true);
readdirSync.mockReturnValue(['checkout-demo-123.jsonl']);
statSync.mockReturnValue({
size: 512,
mtime: new Date('2026-03-24T18:01:00.000Z'),
});
readFileSync.mockReturnValue(
[
JSON.stringify({
ts: '2026-03-24T18:00:00.000Z',
sessionId: 'checkout-demo',
clientId: 'client-1',
method: 'page/navigate',
duration: 42,
status: 'ok',
requestSummary: {
url: 'https://example.com/login',
observe: { responseTier: 'interactive', stableRefs: true },
},
resultSummary: { url: 'https://example.com/login', status: 200 },
}),
JSON.stringify({
ts: '2026-03-24T18:00:05.000Z',
sessionId: 'checkout-demo',
clientId: 'client-1',
method: 'agent/act',
duration: 85,
status: 'ok',
requestSummary: {
actions: ['action/fill', 'action/click'],
postObserve: {
incremental: true,
includeScreenshot: true,
stableRefs: true,
},
},
resultSummary: { completed: 2, total: 2 },
}),
].join('\n'),
);

const logSpy = vi.spyOn(console, 'log').mockImplementation(() => {});
const errorSpy = vi.spyOn(console, 'error').mockImplementation(() => {});

await traceCommand(['--export-evidence=.bap/trace-evidence.json'], {} as never, {} as never);

expect(mkdirSync).toHaveBeenCalled();
expect(writeFileSync).toHaveBeenCalledTimes(1);

const [, writtenJson] = writeFileSync.mock.calls[0] as [string, string];
expect(JSON.parse(writtenJson)).toEqual({
adapter: 'bap',
version: 1,
runtime: {
tools: ['act', 'navigate', 'observe'],
actions: ['click', 'fill'],
domains: ['https://example.com'],
artifacts: ['screenshot', 'trace-jsonl'],
approvalsObserved: [],
},
provenance: {
formats: ['bap-trace-jsonl'],
replaySupported: true,
determinism: 'best-effort',
validator: 'bap trace --replay',
},
grounding: {
observationModels: [
'incremental-changes',
'interactive-elements',
'screenshot-observation',
],
identityMechanisms: ['selector-fallback', 'semantic-selector', 'stable-ref'],
stableRefs: true,
abstentionSupported: false,
},
});

expect(logSpy).toHaveBeenCalledWith('Exported contract evidence to .bap/trace-evidence.json');
expect(errorSpy).not.toHaveBeenCalled();
});
});
85 changes: 85 additions & 0 deletions packages/cli/skills/bap-browser/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,89 @@
name: bap-browser
description: "Browser automation CLI with composite actions, semantic selectors, and self-healing selectors. Use when the user needs to visit websites, fill forms, extract data, take screenshots, stream browser events, or automate multi-step browser workflows like login, checkout, or search."
license: Apache-2.0
contract:
kind: browser-agent
version: 1
runtime:
interfaces:
- cli
tools:
- navigate
- go_back
- go_forward
- reload
- observe
- screenshot
- aria_snapshot
- content
- act
- extract
- pages
- activate_page
- close_page
actionClasses:
- navigate
- observe
- click
- fill
- type
- press
- hover
- scroll
- select
- extract
domainPolicy:
mode: report
approval:
policy: manual
requiredFor:
- checkout
- purchase
- delete
- upload
- submit
artifacts:
outputs:
- trace-jsonl
- trace-replay-html
- json-extraction
- screenshot
sensitivity: moderate
retention: session
redaction:
- cookies
- auth-tokens
- passwords
provenance:
formats:
- bap-trace-jsonl
replay:
supported: true
determinism: best-effort
validator: bap trace --replay
grounding:
observation:
models:
- interactive-elements
- incremental-changes
- screenshot-observation
identity:
mechanisms:
- stable-ref
- semantic-selector
- selector-fallback
stableRefs: true
abstention:
supported: false
reasons:
- delegated-to-caller
extensions:
cliAliases:
navigate: goto
go_back: back
go_forward: forward
pages: tabs
activate_page: tab-select
---

# BAP Browser CLI
Expand Down Expand Up @@ -168,6 +251,7 @@ bap trace --all # Show all traces across sessions
bap trace --session=<id> # Traces for a specific session
bap trace --replay # Generate self-contained HTML timeline viewer
bap trace --export # Export traces as JSON
bap trace --export-evidence=evidence.json # Export normalized contract evidence
bap trace --limit=20 # Limit number of trace entries shown
```

Expand Down Expand Up @@ -211,3 +295,4 @@ bap recipe wait-for <selector> [--timeout=ms]
4. Use `bap act` for multi-step flows instead of individual commands — fewer calls, fewer tokens
5. Use `--diff` for incremental observation after small DOM changes
6. Check `bap trace` when debugging failures — it records every request with timing
7. Use `bap trace --export-evidence=...` when you need normalized contract audit evidence for skill validation
1 change: 1 addition & 0 deletions packages/cli/src/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ ${pc.cyan("TRACING")}
bap trace --session=<id> Show trace for a specific session
bap trace --replay Generate HTML timeline viewer
bap trace --export=<file> Export trace as JSON
bap trace --export-evidence=<f> Export normalized contract evidence
bap trace --limit=<N> Show last N entries (default: 10)

${pc.cyan("DEBUGGING")}
Expand Down
Loading
Loading