Skip to content

Commit 86e3c55

Browse files
committed
Run freebuff e2e tests on every commit!
1 parent f010bc5 commit 86e3c55

17 files changed

+1402
-1
lines changed

.github/workflows/freebuff-e2e.yml

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
name: Freebuff E2E Tests
2+
3+
on:
4+
push:
5+
branches: ['main']
6+
pull_request:
7+
branches: ['main']
8+
workflow_dispatch: # Manual trigger
9+
10+
concurrency:
11+
group: freebuff-e2e-${{ github.ref }}
12+
cancel-in-progress: true
13+
14+
jobs:
15+
build-freebuff:
16+
runs-on: ubuntu-latest
17+
timeout-minutes: 15
18+
steps:
19+
- name: Checkout repository
20+
uses: actions/checkout@v6
21+
22+
- uses: ./.github/actions/setup-project
23+
24+
- name: Set environment variables
25+
env:
26+
SECRETS_CONTEXT: ${{ toJSON(secrets) }}
27+
run: |
28+
VAR_NAMES=$(bun scripts/generate-ci-env.ts --scope client)
29+
echo "$SECRETS_CONTEXT" | jq -r --argjson vars "$VAR_NAMES" '
30+
to_entries | .[] | select(.key as $k | $vars | index($k)) | .key + "=" + .value
31+
' >> $GITHUB_ENV
32+
echo "FREEBUFF_MODE=true" >> $GITHUB_ENV
33+
echo "NEXT_PUBLIC_CB_ENVIRONMENT=prod" >> $GITHUB_ENV
34+
echo "CODEBUFF_GITHUB_ACTIONS=true" >> $GITHUB_ENV
35+
36+
- name: Build Freebuff binary
37+
run: bun freebuff/cli/build.ts 0.0.0-e2e
38+
39+
- name: Smoke test binary
40+
run: |
41+
chmod +x cli/bin/freebuff
42+
cli/bin/freebuff --version
43+
44+
- name: Upload binary
45+
uses: actions/upload-artifact@v7
46+
with:
47+
name: freebuff-binary
48+
path: cli/bin/freebuff
49+
retention-days: 1
50+
51+
e2e:
52+
needs: build-freebuff
53+
runs-on: ubuntu-latest
54+
timeout-minutes: 10
55+
strategy:
56+
fail-fast: false
57+
matrix:
58+
test:
59+
- version
60+
- startup
61+
- help-command
62+
- slash-commands
63+
- mode-restriction
64+
- ads-behavior
65+
- agent-startup
66+
- code-edit
67+
- terminal-command
68+
name: e2e-${{ matrix.test }}
69+
steps:
70+
- name: Checkout repository
71+
uses: actions/checkout@v6
72+
73+
- uses: ./.github/actions/setup-project
74+
75+
- name: Install tmux
76+
run: sudo apt-get update && sudo apt-get install -y tmux
77+
78+
- name: Download Freebuff binary
79+
uses: actions/download-artifact@v4
80+
with:
81+
name: freebuff-binary
82+
path: cli/bin/
83+
84+
- name: Make binary executable
85+
run: chmod +x cli/bin/freebuff
86+
87+
- name: Set environment variables
88+
env:
89+
SECRETS_CONTEXT: ${{ toJSON(secrets) }}
90+
run: |
91+
VAR_NAMES=$(bun scripts/generate-ci-env.ts)
92+
echo "$SECRETS_CONTEXT" | jq -r --argjson vars "$VAR_NAMES" '
93+
to_entries | .[] | select(.key as $k | $vars | index($k)) | .key + "=" + .value
94+
' >> $GITHUB_ENV
95+
echo "CODEBUFF_GITHUB_ACTIONS=true" >> $GITHUB_ENV
96+
echo "NEXT_PUBLIC_CB_ENVIRONMENT=test" >> $GITHUB_ENV
97+
echo "CODEBUFF_GITHUB_TOKEN=${{ secrets.CODEBUFF_GITHUB_TOKEN }}" >> $GITHUB_ENV
98+
echo "CODEBUFF_API_KEY=${{ secrets.CODEBUFF_API_KEY }}" >> $GITHUB_ENV
99+
100+
- name: Build SDK
101+
run: cd sdk && bun run build
102+
103+
- name: Run e2e test - ${{ matrix.test }}
104+
run: bun test freebuff/e2e/tests/${{ matrix.test }}.e2e.test.ts --timeout=120000
105+
106+
- name: Upload tmux session logs on failure
107+
if: failure()
108+
uses: actions/upload-artifact@v7
109+
with:
110+
name: tmux-logs-${{ matrix.test }}
111+
path: debug/tmux-sessions/
112+
retention-days: 7

freebuff/e2e/README.md

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
# Freebuff E2E Tests
2+
3+
End-to-end tests for the Freebuff CLI binary. Tests verify that the compiled binary works correctly by interacting with it via tmux.
4+
5+
## Architecture
6+
7+
Two testing approaches are supported:
8+
9+
### 1. Direct tmux tests (fast, deterministic)
10+
11+
Use the `FreebuffSession` class to start the binary in tmux, send commands, capture output, and assert directly.
12+
13+
```typescript
14+
import { describe, test, expect, afterEach } from 'bun:test'
15+
import { FreebuffSession, requireFreebuffBinary } from '../utils'
16+
17+
describe('My Feature', () => {
18+
let session: FreebuffSession | null = null
19+
20+
afterEach(async () => {
21+
if (session) await session.stop()
22+
session = null
23+
})
24+
25+
test('works correctly', async () => {
26+
const binary = requireFreebuffBinary()
27+
session = await FreebuffSession.start(binary)
28+
29+
await session.send('/help')
30+
const output = await session.capture(2)
31+
32+
expect(output).toContain('Shortcuts')
33+
}, 60_000)
34+
})
35+
```
36+
37+
### 2. SDK agent-driven tests (AI-powered verification)
38+
39+
Use the Codebuff SDK to run a testing agent that interacts with Freebuff via custom tmux tools. The agent reasons about the CLI output and verifies complex behaviors.
40+
41+
```typescript
42+
import { describe, test, expect, afterEach } from 'bun:test'
43+
import { CodebuffClient } from '@codebuff/sdk'
44+
import { freebuffTesterAgent } from '../agent/freebuff-tester'
45+
import { createFreebuffTmuxTools, requireFreebuffBinary } from '../utils'
46+
47+
describe('Agent Test', () => {
48+
let cleanup: (() => Promise<void>) | null = null
49+
50+
afterEach(async () => {
51+
if (cleanup) await cleanup()
52+
cleanup = null
53+
})
54+
55+
test('verifies startup', async () => {
56+
const apiKey = process.env.CODEBUFF_API_KEY
57+
if (!apiKey) return // Skip if no API key
58+
59+
const binary = requireFreebuffBinary()
60+
const tmuxTools = createFreebuffTmuxTools(binary)
61+
cleanup = tmuxTools.cleanup
62+
63+
const client = new CodebuffClient({ apiKey })
64+
const result = await client.run({
65+
agent: freebuffTesterAgent.id,
66+
prompt: 'Start Freebuff and verify the branding is correct.',
67+
agentDefinitions: [freebuffTesterAgent],
68+
customToolDefinitions: tmuxTools.tools,
69+
handleEvent: () => {},
70+
})
71+
72+
expect(result.output.type).not.toBe('error')
73+
}, 180_000)
74+
})
75+
```
76+
77+
## Prerequisites
78+
79+
- **tmux** must be installed: `brew install tmux` (macOS) or `sudo apt-get install tmux` (Ubuntu)
80+
- **Freebuff binary** must be built: `bun freebuff/cli/build.ts 0.0.0-dev`
81+
- **SDK built** (for agent tests): `cd sdk && bun run build`
82+
- **CODEBUFF_API_KEY** (for agent tests only): Set this environment variable
83+
84+
## Running Tests
85+
86+
### Build the binary first
87+
88+
```bash
89+
bun freebuff/cli/build.ts 0.0.0-dev
90+
```
91+
92+
### Run all tests
93+
94+
```bash
95+
bun test freebuff/e2e/tests/
96+
```
97+
98+
### Run a specific test
99+
100+
```bash
101+
bun test freebuff/e2e/tests/version.e2e.test.ts
102+
bun test freebuff/e2e/tests/startup.e2e.test.ts
103+
bun test freebuff/e2e/tests/help-command.e2e.test.ts
104+
bun test freebuff/e2e/tests/agent-startup.e2e.test.ts
105+
```
106+
107+
### Use a custom binary path
108+
109+
```bash
110+
FREEBUFF_BINARY=/path/to/freebuff bun test freebuff/e2e/tests/
111+
```
112+
113+
## Adding New Tests
114+
115+
1. Create a new file in `freebuff/e2e/tests/` with the naming convention `<feature>.e2e.test.ts`
116+
2. Add the test name to `.github/workflows/freebuff-e2e.yml` matrix:
117+
118+
```yaml
119+
matrix:
120+
test:
121+
- version
122+
- startup
123+
- help-command
124+
- agent-startup
125+
- your-new-test # <-- add here
126+
```
127+
128+
3. The test will automatically run in parallel with other tests in CI.
129+
130+
## CI Workflow
131+
132+
The `.github/workflows/freebuff-e2e.yml` workflow:
133+
134+
1. **Builds** the Freebuff binary once (linux-x64)
135+
2. **Runs each test file in parallel** via GitHub Actions matrix strategy
136+
3. **Uploads tmux session logs** on failure for debugging
137+
138+
Triggers:
139+
- **Nightly** at 6:00 AM PT
140+
- **Manual** via workflow_dispatch
141+
142+
## Utilities Reference
143+
144+
### `FreebuffSession`
145+
146+
| Method | Description |
147+
|--------|-------------|
148+
| `FreebuffSession.start(binaryPath)` | Start binary in tmux, returns session |
149+
| `session.send(text)` | Send text input (presses Enter) |
150+
| `session.sendKey(key)` | Send special key (e.g. `'C-c'`, `'Escape'`) |
151+
| `session.capture(waitSec?)` | Capture terminal output |
152+
| `session.captureLabeled(label, waitSec?)` | Capture and save to session logs |
153+
| `session.waitForText(pattern, timeoutMs?)` | Poll until text appears |
154+
| `session.stop()` | Stop session and clean up |
155+
156+
### `createFreebuffTmuxTools(binaryPath)`
157+
158+
Creates SDK custom tools for agent-driven testing:
159+
- `start_freebuff` - Launch the CLI
160+
- `send_to_freebuff` - Send text input
161+
- `capture_freebuff_output` - Capture terminal output
162+
- `stop_freebuff` - Stop and clean up
163+
164+
### Helper functions
165+
166+
| Function | Description |
167+
|----------|-------------|
168+
| `requireFreebuffBinary()` | Get binary path, throws if not found |
169+
| `getFreebuffBinaryPath()` | Get binary path (may not exist) |
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import type { AgentDefinition } from '@codebuff/sdk'
2+
3+
/**
4+
* Agent definition for testing the Freebuff CLI via tmux.
5+
*
6+
* This agent is designed to be used with the custom tmux tools from
7+
* `createFreebuffTmuxTools()`. It receives a testing task in its prompt
8+
* and uses tmux tools to start Freebuff, interact with it, and verify behavior.
9+
*
10+
* Example usage:
11+
* ```ts
12+
* const { tools, cleanup } = createFreebuffTmuxTools(binaryPath)
13+
* const result = await client.run({
14+
* agent: freebuffTesterAgent.id,
15+
* prompt: 'Start freebuff and verify the welcome screen shows Freebuff branding',
16+
* agentDefinitions: [freebuffTesterAgent],
17+
* customToolDefinitions: tools,
18+
* handleEvent: collector.handleEvent,
19+
* })
20+
* await cleanup()
21+
* ```
22+
*/
23+
export const freebuffTesterAgent: AgentDefinition = {
24+
id: 'freebuff-tester',
25+
displayName: 'Freebuff E2E Tester',
26+
model: 'anthropic/claude-sonnet-4.5',
27+
toolNames: [
28+
'start_freebuff',
29+
'send_to_freebuff',
30+
'capture_freebuff_output',
31+
'stop_freebuff',
32+
],
33+
instructionsPrompt: `You are a QA tester for the Freebuff CLI application.
34+
35+
Your job is to verify that Freebuff behaves correctly by interacting with it
36+
through tmux tools. Follow these steps:
37+
38+
1. Call start_freebuff to launch the CLI
39+
2. Use capture_freebuff_output (with waitSeconds) to see the terminal output
40+
3. Use send_to_freebuff to type commands or text
41+
4. Capture output again to verify behavior
42+
5. ALWAYS call stop_freebuff when done
43+
44+
Key things to verify:
45+
- The CLI starts without errors or crashes
46+
- Branding shows "Freebuff" (not "Codebuff")
47+
- Commands work as expected
48+
- Error messages are user-friendly
49+
50+
Report your findings clearly. State what you tested, what you observed, and
51+
whether each check passed or failed.`,
52+
}

0 commit comments

Comments
 (0)