CodebuffAI
diff --git a/‎.github/workflows/freebuff-e2e.yml‎
Lines changed: 112 additions & 0 deletions b/‎.github/workflows/freebuff-e2e.yml‎
Lines changed: 112 additions & 0 deletions
diff --git a/‎freebuff/e2e/README.md‎
Lines changed: 169 additions & 0 deletions b/‎freebuff/e2e/README.md‎
Lines changed: 169 additions & 0 deletions
diff --git a/‎freebuff/e2e/agent/freebuff-tester.ts‎
Lines changed: 52 additions & 0 deletions b/‎freebuff/e2e/agent/freebuff-tester.ts‎
Lines changed: 52 additions & 0 deletions
@@ -0,0 +1,112 @@
+name: Freebuff E2E Tests
+
+on:
+  push:
+    branches: ['main']
+  pull_request:
+    branches: ['main']
+  workflow_dispatch: # Manual trigger
+
+concurrency:
+  group: freebuff-e2e-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  build-freebuff:
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v6
+
+      - uses: ./.github/actions/setup-project
+
+      - name: Set environment variables
+        env:
+          SECRETS_CONTEXT: ${{ toJSON(secrets) }}
+        run: |
+          VAR_NAMES=$(bun scripts/generate-ci-env.ts --scope client)
+          echo "$SECRETS_CONTEXT" | jq -r --argjson vars "$VAR_NAMES" '
+            to_entries | .[] | select(.key as $k | $vars | index($k)) | .key + "=" + .value
+          ' >> $GITHUB_ENV
+          echo "FREEBUFF_MODE=true" >> $GITHUB_ENV
+          echo "NEXT_PUBLIC_CB_ENVIRONMENT=prod" >> $GITHUB_ENV
+          echo "CODEBUFF_GITHUB_ACTIONS=true" >> $GITHUB_ENV
+
+      - name: Build Freebuff binary
+        run: bun freebuff/cli/build.ts 0.0.0-e2e
+
+      - name: Smoke test binary
+        run: |
+          chmod +x cli/bin/freebuff
+          cli/bin/freebuff --version
+
+      - name: Upload binary
+        uses: actions/upload-artifact@v7
+        with:
+          name: freebuff-binary
+          path: cli/bin/freebuff
+          retention-days: 1
+
+  e2e:
+    needs: build-freebuff
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    strategy:
+      fail-fast: false
+      matrix:
+        test:
+          - version
+          - startup
+          - help-command
+          - slash-commands
+          - mode-restriction
+          - ads-behavior
+          - agent-startup
+          - code-edit
+          - terminal-command
+    name: e2e-${{ matrix.test }}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v6
+
+      - uses: ./.github/actions/setup-project
+
+      - name: Install tmux
+        run: sudo apt-get update && sudo apt-get install -y tmux
+
+      - name: Download Freebuff binary
+        uses: actions/download-artifact@v4
+        with:
+          name: freebuff-binary
+          path: cli/bin/
+
+      - name: Make binary executable
+        run: chmod +x cli/bin/freebuff
+
+      - name: Set environment variables
+        env:
+          SECRETS_CONTEXT: ${{ toJSON(secrets) }}
+        run: |
+          VAR_NAMES=$(bun scripts/generate-ci-env.ts)
+          echo "$SECRETS_CONTEXT" | jq -r --argjson vars "$VAR_NAMES" '
+            to_entries | .[] | select(.key as $k | $vars | index($k)) | .key + "=" + .value
+          ' >> $GITHUB_ENV
+          echo "CODEBUFF_GITHUB_ACTIONS=true" >> $GITHUB_ENV
+          echo "NEXT_PUBLIC_CB_ENVIRONMENT=test" >> $GITHUB_ENV
+          echo "CODEBUFF_GITHUB_TOKEN=${{ secrets.CODEBUFF_GITHUB_TOKEN }}" >> $GITHUB_ENV
+          echo "CODEBUFF_API_KEY=${{ secrets.CODEBUFF_API_KEY }}" >> $GITHUB_ENV
+
+      - name: Build SDK
+        run: cd sdk && bun run build
+
+      - name: Run e2e test - ${{ matrix.test }}
+        run: bun test freebuff/e2e/tests/${{ matrix.test }}.e2e.test.ts --timeout=120000
+
+      - name: Upload tmux session logs on failure
+        if: failure()
+        uses: actions/upload-artifact@v7
+        with:
+          name: tmux-logs-${{ matrix.test }}
+          path: debug/tmux-sessions/
+          retention-days: 7
@@ -0,0 +1,169 @@
+# Freebuff E2E Tests
+
+End-to-end tests for the Freebuff CLI binary. Tests verify that the compiled binary works correctly by interacting with it via tmux.
+
+## Architecture
+
+Two testing approaches are supported:
+
+### 1. Direct tmux tests (fast, deterministic)
+
+Use the `FreebuffSession` class to start the binary in tmux, send commands, capture output, and assert directly.
+
+```typescript
+import { describe, test, expect, afterEach } from 'bun:test'
+import { FreebuffSession, requireFreebuffBinary } from '../utils'
+
+describe('My Feature', () => {
+  let session: FreebuffSession | null = null
+
+  afterEach(async () => {
+    if (session) await session.stop()
+    session = null
+  })
+
+  test('works correctly', async () => {
+    const binary = requireFreebuffBinary()
+    session = await FreebuffSession.start(binary)
+
+    await session.send('/help')
+    const output = await session.capture(2)
+
+    expect(output).toContain('Shortcuts')
+  }, 60_000)
+})
+```
+
+### 2. SDK agent-driven tests (AI-powered verification)
+
+Use the Codebuff SDK to run a testing agent that interacts with Freebuff via custom tmux tools. The agent reasons about the CLI output and verifies complex behaviors.
+
+```typescript
+import { describe, test, expect, afterEach } from 'bun:test'
+import { CodebuffClient } from '@codebuff/sdk'
+import { freebuffTesterAgent } from '../agent/freebuff-tester'
+import { createFreebuffTmuxTools, requireFreebuffBinary } from '../utils'
+
+describe('Agent Test', () => {
+  let cleanup: (() => Promise<void>) | null = null
+
+  afterEach(async () => {
+    if (cleanup) await cleanup()
+    cleanup = null
+  })
+
+  test('verifies startup', async () => {
+    const apiKey = process.env.CODEBUFF_API_KEY
+    if (!apiKey) return // Skip if no API key
+
+    const binary = requireFreebuffBinary()
+    const tmuxTools = createFreebuffTmuxTools(binary)
+    cleanup = tmuxTools.cleanup
+
+    const client = new CodebuffClient({ apiKey })
+    const result = await client.run({
+      agent: freebuffTesterAgent.id,
+      prompt: 'Start Freebuff and verify the branding is correct.',
+      agentDefinitions: [freebuffTesterAgent],
+      customToolDefinitions: tmuxTools.tools,
+      handleEvent: () => {},
+    })
+
+    expect(result.output.type).not.toBe('error')
+  }, 180_000)
+})
+```
+
+## Prerequisites
+
+- **tmux** must be installed: `brew install tmux` (macOS) or `sudo apt-get install tmux` (Ubuntu)
+- **Freebuff binary** must be built: `bun freebuff/cli/build.ts 0.0.0-dev`
+- **SDK built** (for agent tests): `cd sdk && bun run build`
+- **CODEBUFF_API_KEY** (for agent tests only): Set this environment variable
+
+## Running Tests
+
+### Build the binary first
+
+```bash
+bun freebuff/cli/build.ts 0.0.0-dev
+```
+
+### Run all tests
+
+```bash
+bun test freebuff/e2e/tests/
+```
+
+### Run a specific test
+
+```bash
+bun test freebuff/e2e/tests/version.e2e.test.ts
+bun test freebuff/e2e/tests/startup.e2e.test.ts
+bun test freebuff/e2e/tests/help-command.e2e.test.ts
+bun test freebuff/e2e/tests/agent-startup.e2e.test.ts
+```
+
+### Use a custom binary path
+
+```bash
+FREEBUFF_BINARY=/path/to/freebuff bun test freebuff/e2e/tests/
+```
+
+## Adding New Tests
+
+1. Create a new file in `freebuff/e2e/tests/` with the naming convention `<feature>.e2e.test.ts`
+2. Add the test name to `.github/workflows/freebuff-e2e.yml` matrix:
+
+```yaml
+matrix:
+  test:
+    - version
+    - startup
+    - help-command
+    - agent-startup
+    - your-new-test    # <-- add here
+```
+
+3. The test will automatically run in parallel with other tests in CI.
+
+## CI Workflow
+
+The `.github/workflows/freebuff-e2e.yml` workflow:
+
+1. **Builds** the Freebuff binary once (linux-x64)
+2. **Runs each test file in parallel** via GitHub Actions matrix strategy
+3. **Uploads tmux session logs** on failure for debugging
+
+Triggers:
+- **Nightly** at 6:00 AM PT
+- **Manual** via workflow_dispatch
+
+## Utilities Reference
+
+### `FreebuffSession`
+
+| Method | Description |
+|--------|-------------|
+| `FreebuffSession.start(binaryPath)` | Start binary in tmux, returns session |
+| `session.send(text)` | Send text input (presses Enter) |
+| `session.sendKey(key)` | Send special key (e.g. `'C-c'`, `'Escape'`) |
+| `session.capture(waitSec?)` | Capture terminal output |
+| `session.captureLabeled(label, waitSec?)` | Capture and save to session logs |
+| `session.waitForText(pattern, timeoutMs?)` | Poll until text appears |
+| `session.stop()` | Stop session and clean up |
+
+### `createFreebuffTmuxTools(binaryPath)`
+
+Creates SDK custom tools for agent-driven testing:
+- `start_freebuff` - Launch the CLI
+- `send_to_freebuff` - Send text input
+- `capture_freebuff_output` - Capture terminal output
+- `stop_freebuff` - Stop and clean up
+
+### Helper functions
+
+| Function | Description |
+|----------|-------------|
+| `requireFreebuffBinary()` | Get binary path, throws if not found |
+| `getFreebuffBinaryPath()` | Get binary path (may not exist) |
@@ -0,0 +1,52 @@
+import type { AgentDefinition } from '@codebuff/sdk'
+
+/**
+ * Agent definition for testing the Freebuff CLI via tmux.
+ *
+ * This agent is designed to be used with the custom tmux tools from
+ * `createFreebuffTmuxTools()`. It receives a testing task in its prompt
+ * and uses tmux tools to start Freebuff, interact with it, and verify behavior.
+ *
+ * Example usage:
+ * ```ts
+ * const { tools, cleanup } = createFreebuffTmuxTools(binaryPath)
+ * const result = await client.run({
+ *   agent: freebuffTesterAgent.id,
+ *   prompt: 'Start freebuff and verify the welcome screen shows Freebuff branding',
+ *   agentDefinitions: [freebuffTesterAgent],
+ *   customToolDefinitions: tools,
+ *   handleEvent: collector.handleEvent,
+ * })
+ * await cleanup()
+ * ```
+ */
+export const freebuffTesterAgent: AgentDefinition = {
+  id: 'freebuff-tester',
+  displayName: 'Freebuff E2E Tester',
+  model: 'anthropic/claude-sonnet-4.5',
+  toolNames: [
+    'start_freebuff',
+    'send_to_freebuff',
+    'capture_freebuff_output',
+    'stop_freebuff',
+  ],
+  instructionsPrompt: `You are a QA tester for the Freebuff CLI application.
+
+Your job is to verify that Freebuff behaves correctly by interacting with it
+through tmux tools. Follow these steps:
+
+1. Call start_freebuff to launch the CLI
+2. Use capture_freebuff_output (with waitSeconds) to see the terminal output
+3. Use send_to_freebuff to type commands or text
+4. Capture output again to verify behavior
+5. ALWAYS call stop_freebuff when done
+
+Key things to verify:
+- The CLI starts without errors or crashes
+- Branding shows "Freebuff" (not "Codebuff")
+- Commands work as expected
+- Error messages are user-friendly
+
+Report your findings clearly. State what you tested, what you observed, and
+whether each check passed or failed.`,
+}