browseros-ai · shivammittal274 · Nov 28, 2025 · Nov 28, 2025 · Nov 28, 2025 · Nov 28, 2025
diff --git a/.gitignore b/.gitignore
@@ -165,3 +165,6 @@ browseros-server-*
 log.txt
 
 .DS_Store
+
+# BAML generated client
+**/baml_client/
diff --git a/bun.lock b/bun.lock
@@ -72,6 +72,7 @@
         "@ai-sdk/provider": "2.0.0",
         "@ai-sdk/ui-utils": "^1.2.11",
         "@anthropic-ai/claude-agent-sdk": "^0.1.11",
+        "@boundaryml/baml": "^0.214.0",
         "@browseros/common": "workspace:*",
         "@browseros/server": "workspace:*",
         "@browseros/tools": "workspace:*",
@@ -311,6 +312,22 @@
 
     "@bcoe/v8-coverage": ["@bcoe/v8-coverage@0.2.3", "", {}, "sha512-0hYQ8SB4Db5zvZB4axdMHGwEaQjkZzFjQiN9LVYvIFB2nSUHW9tYpxWriPrWDASIxiaXax83REcLxuSdnGPZtw=="],
 
+    "@boundaryml/baml": ["@boundaryml/baml@0.214.0", "", { "dependencies": { "@scarf/scarf": "^1.3.0" }, "optionalDependencies": { "@boundaryml/baml-darwin-arm64": "0.214.0", "@boundaryml/baml-darwin-x64": "0.214.0", "@boundaryml/baml-linux-arm64-gnu": "0.214.0", "@boundaryml/baml-linux-arm64-musl": "0.214.0", "@boundaryml/baml-linux-x64-gnu": "0.214.0", "@boundaryml/baml-linux-x64-musl": "0.214.0", "@boundaryml/baml-win32-x64-msvc": "0.214.0" }, "bin": { "baml-cli": "cli.js", "baml": "cli.js" } }, "sha512-w2FBsK0LBsFtQ5qSsSoL3Gp+aGg/qefzqSY6Bkyg/Obyj1U4T7WK+HyNTOKHx0pLdXKXGjmfNKLZZXzPb+/KHw=="],
+
+    "@boundaryml/baml-darwin-arm64": ["@boundaryml/baml-darwin-arm64@0.214.0", "", { "os": "darwin", "cpu": "arm64" }, "sha512-qCXHwf1VP79jNqhS1/X/XAEPb9jDRDcHkwA7i4t2LJk0uN/j3Yy1dtCj1+VVFq6FW1uOwYSb4ieZLdZ/0w4UVQ=="],
+
+    "@boundaryml/baml-darwin-x64": ["@boundaryml/baml-darwin-x64@0.214.0", "", { "os": "darwin", "cpu": "x64" }, "sha512-/2MOM+QTucCbPlAxaTBPG/ZFFaYaV7H2DhDg5VcS1l43JxYpMAili/lslWbfQRa/ZgxfU44Jp4ptLkr+1fNPuw=="],
+
+    "@boundaryml/baml-linux-arm64-gnu": ["@boundaryml/baml-linux-arm64-gnu@0.214.0", "", { "os": "linux", "cpu": "arm64" }, "sha512-9suGfkdAOYS49C/Z2YAioaPCpOV6reJmPPDiFSg6CbHD3yMWHYHgeOTWpOdB+xpKZJqiB0Q1wyaXTF/UxF3HdA=="],
+
+    "@boundaryml/baml-linux-arm64-musl": ["@boundaryml/baml-linux-arm64-musl@0.214.0", "", { "os": "linux", "cpu": "arm64" }, "sha512-LMH8m8Er/V6x1BsPCdsX0W4zxdMmk+4wE2YjS3DJisjA8/fZTyk4I8wl01Qf3Azq3GSAKtlBeP70+w2KKrQTkg=="],
+
+    "@boundaryml/baml-linux-x64-gnu": ["@boundaryml/baml-linux-x64-gnu@0.214.0", "", { "os": "linux", "cpu": "x64" }, "sha512-3JZ/BZeVpgMvcB6rvI4dJ9amCt+cGmyl7DYaNY9csF2vnr8JltRmA4fTSUWq9F5vk9la3juDvLHmUKhQRlVaPw=="],
+
+    "@boundaryml/baml-linux-x64-musl": ["@boundaryml/baml-linux-x64-musl@0.214.0", "", { "os": "linux", "cpu": "x64" }, "sha512-XMowDiaqbT4DsOVMCdg3Rc8RjyyXQLXjKSvuj5gsuf09AkMceN7niJO4+OhvWKWH2l3eMjRMaT2Dp32quMzXHA=="],
+
+    "@boundaryml/baml-win32-x64-msvc": ["@boundaryml/baml-win32-x64-msvc@0.214.0", "", { "os": "win32", "cpu": "x64" }, "sha512-IUdaaJr4v8PdCY8Te+h7E6sLw+wPicPTvAPBO3FAYz5e3/h7J6pjeBcsmy/MdduNVPUC5AB3V+TbIFsvRJcN9w=="],
+
     "@browseros/agent": ["@browseros/agent@workspace:packages/agent"],
 
     "@browseros/codex-sdk-ts": ["@browseros/codex-sdk-ts@workspace:packages/codex-sdk-ts"],
@@ -685,6 +702,8 @@
 
     "@rtsao/scc": ["@rtsao/scc@1.1.0", "", {}, "sha512-zt6OdqaDoOnJ1ZYsCYGt9YmWzDXl4vQdKTyJev62gFhRGKdx7mcT54V9KIjg+d2wi9EXsPvAPKe7i7WjfVWB8g=="],
 
+    "@scarf/scarf": ["@scarf/scarf@1.4.0", "", {}, "sha512-xxeapPiUXdZAE3che6f3xogoJPeZgig6omHEy1rIY5WVsB3H2BHNnZH+gHG6x91SCWyQCzWGsuL2Hh3ClO5/qQ=="],
+
     "@sec-ant/readable-stream": ["@sec-ant/readable-stream@0.4.1", "", {}, "sha512-831qok9r2t8AlxLko40y2ebgSDhenenCatLVeW/uBtnHPyhHOvG0C7TvfgecV+wHzIm5KUICgzmVpWS+IMEAeg=="],
 
     "@selderee/plugin-htmlparser2": ["@selderee/plugin-htmlparser2@0.11.0", "", { "dependencies": { "domhandler": "^5.0.3", "selderee": "^0.11.0" } }, "sha512-P33hHGdldxGabLFjPPpaTxVolMrzrcegejx+0GxjrIb9Zv48D8yAIA/QTDR2dFl7Uz7urX8aX6+5bCZslr+gWQ=="],

diff --git a/package.json b/package.json
@@ -8,9 +8,10 @@
     "packages/*"
   ],
   "scripts": {
-    "start": "bun run build:codex-sdk-ts && CODEX_BINARY_PATH=third_party/bin/codex bun --env-file=.env.dev packages/server/src/index.ts",
-    "start:debug": "bun run build:codex-sdk-ts && CODEX_BINARY_PATH=third_party/bin/codex bun --inspect-brk --env-file=.env.dev packages/server/src/index.ts",
+    "start": "bun run build:codex-sdk-ts && bun run build:baml && CODEX_BINARY_PATH=third_party/bin/codex bun --env-file=.env.dev packages/server/src/index.ts",
+    "start:debug": "bun run build:codex-sdk-ts && bun run build:baml && CODEX_BINARY_PATH=third_party/bin/codex bun --inspect-brk --env-file=.env.dev packages/server/src/index.ts",
     "build:codex-sdk-ts": "bun run --filter @browseros/codex-sdk-ts prepare",
+    "build:baml": "cd packages/agent/src/baml && bunx baml-cli generate",
     "test": "bun test; bun run test:cleanup",
     "test:all": "bun test --workspace",
     "test:common": "bun run --filter @browseros/common test",

diff --git a/packages/agent/package.json b/packages/agent/package.json
@@ -37,6 +37,7 @@
     "@ai-sdk/provider": "2.0.0",
     "@ai-sdk/ui-utils": "^1.2.11",
     "@anthropic-ai/claude-agent-sdk": "^0.1.11",
+    "@boundaryml/baml": "^0.214.0",
     "@browseros/common": "workspace:*",
     "@browseros/server": "workspace:*",
     "@browseros/tools": "workspace:*",

diff --git a/packages/agent/src/agent/GeminiAgent.ts b/packages/agent/src/agent/GeminiAgent.ts
@@ -6,12 +6,14 @@ import {
   type GeminiClient,
   type ToolCallRequestInfo,
 } from '@google/gemini-cli-core';
-import type { Part } from '@google/genai';
+import type { Part, Content } from '@google/genai';
 import { logger, fetchBrowserOSConfig, getLLMConfigFromProvider } from '@browseros/common';
 import { VercelAIContentGenerator, AIProvider } from './gemini-vercel-sdk-adapter/index.js';
 import type { HonoSSEStream } from './gemini-vercel-sdk-adapter/types.js';
 import { AgentExecutionError } from '../errors.js';
 import type { AgentConfig } from './types.js';
+import { getBAMLExtractor, type JSONSchema } from '../baml/index.js';
+import { buildExtractionContext } from './extractionUtils.js';
 
 const MAX_TURNS = 100;
 
@@ -43,6 +45,7 @@ export class GeminiAgent {
     private geminiConfig: GeminiConfig,
     private contentGenerator: VercelAIContentGenerator,
     private conversationId: string,
+    private agentConfig: AgentConfig,
   ) {}
 
   static async create(config: AgentConfig): Promise<GeminiAgent> {
@@ -107,14 +110,19 @@ export class GeminiAgent {
       model: resolvedConfig.model,
     });
 
-    return new GeminiAgent(client, geminiConfig, contentGenerator, resolvedConfig.conversationId);
+    return new GeminiAgent(client, geminiConfig, contentGenerator, resolvedConfig.conversationId, resolvedConfig);
   }
 
   getHistory() {
     return this.client.getHistory();
   }
 
-  async execute(message: string, honoStream: HonoSSEStream, signal?: AbortSignal): Promise<void> {
+  async execute(
+    message: string,
+    honoStream: HonoSSEStream,
+    signal?: AbortSignal,
+    responseSchema?: JSONSchema,
+  ): Promise<void> {
     this.contentGenerator.setHonoStream(honoStream);
 
     const abortSignal = signal || new AbortController().signal;
@@ -127,6 +135,7 @@ export class GeminiAgent {
       conversationId: this.conversationId,
       message: message.substring(0, 100),
       historyLength: this.client.getHistory().length,
+      hasResponseSchema: !!responseSchema,
     });
 
     while (true) {
@@ -210,6 +219,57 @@ export class GeminiAgent {
         });
         break;
       }
+
+    }
+
+    // Extract structured output if responseSchema provided
+    if (responseSchema) {
+      await this.extractStructuredOutput(message, honoStream, responseSchema);
+    }
+  }
+
+  private async extractStructuredOutput(
+    query: string,
+    honoStream: HonoSSEStream,
+    responseSchema: JSONSchema,
+  ): Promise<void> {
+    try {
+      const history = this.client.getHistory() as Content[];
+      const context = buildExtractionContext(history, 4);
+
+      if (!context) {
+        logger.warn('No model responses found for extraction', {
+          conversationId: this.conversationId,
+        });
+        return;
+      }
+
+      logger.debug('Extracting structured output', {
+        conversationId: this.conversationId,
+        queryLength: query.length,
+        contextLength: context.length,
+      });
+
+      const extractor = getBAMLExtractor();
+      const extracted = await extractor.extract(query, context, responseSchema, this.agentConfig);
+
+      // Emit structured output as SSE event
+      const sseData = JSON.stringify({
+        type: 'structured-output',
+        data: extracted,
+      });
+      await honoStream.write(`d:${sseData}\n`);
+
+      logger.info('Structured output extracted', {
+        conversationId: this.conversationId,
+        hasData: !!extracted,
+      });
+    } catch (error) {
+      const errorMessage = error instanceof Error ? error.message : String(error);
+      logger.error('Failed to extract structured output', {
+        conversationId: this.conversationId,
+        error: errorMessage,
+      });
     }
   }
 }
diff --git a/packages/agent/src/agent/extractionUtils.ts b/packages/agent/src/agent/extractionUtils.ts
@@ -0,0 +1,51 @@
+import type { Content, Part } from '@google/genai';
+
+const MAX_CONTEXT_LENGTH = 32000; // ~8k tokens
+
+export function extractTextFromPart(part: Part): string {
+  if ('text' in part && typeof part.text === 'string') {
+    return part.text;
+  }
+  return '';
+}
+
+export function extractTextFromContent(content: Content): string {
+  if (!content.parts) return '';
+
+  return content.parts
+    .map(extractTextFromPart)
+    .filter(Boolean)
+    .join('\n');
+}
+
+export function buildExtractionContext(
+  history: Content[],
+  maxResponses: number = 4,
+): string | null {
+  // Get last N model responses
+  const modelResponses = history
+    .filter((msg) => msg.role === 'model')
+    .slice(-maxResponses);
+
+  if (modelResponses.length === 0) {
+    return null;
+  }
+
+  // Extract text from each model response
+  const texts = modelResponses
+    .map(extractTextFromContent)
+    .filter(Boolean);
+
+  if (texts.length === 0) {
+    return null;
+  }
+
+  let context = texts.join('\n\n---\n\n');
+
+  // Truncate from start if too long
+  if (context.length > MAX_CONTEXT_LENGTH) {
+    context = context.slice(-MAX_CONTEXT_LENGTH);
+  }
+
+  return context;
+}
diff --git a/packages/agent/src/agent/gemini-vercel-sdk-adapter/index.ts b/packages/agent/src/agent/gemini-vercel-sdk-adapter/index.ts
@@ -171,6 +171,19 @@ export class VercelAIContentGenerator implements ContentGenerator {
     );
   }
 
+  /**
+   * Simple text generation from a prompt string
+   * Used by BAML extractor for structured output extraction
+   */
+  async generateTextFromPrompt(prompt: string, temperature = 0.1): Promise<string> {
+    const result = await generateText({
+      model: this.providerInstance(this.model) as Parameters<typeof generateText>[0]['model'],
+      prompt,
+      temperature,
+    });
+    return result.text;
+  }
+
   /**
    * Create provider instance based on config
    */

diff --git a/packages/agent/src/baml/baml_src/clients.baml b/packages/agent/src/baml/baml_src/clients.baml
@@ -0,0 +1,24 @@
+// BAML Client Configuration
+//
+// NOTE: We only use this for b.request to render prompts with ctx.output_format()
+// The actual LLM call is made via Vercel AI SDK, not BAML's HTTP client.
+// These are dummy configs - credentials are not used at runtime.
+
+retry_policy Exponential {
+  max_retries 2
+  strategy {
+    type exponential_backoff
+    delay_ms 300
+    multiplier 2
+  }
+}
+
+// Dummy OpenAI client - used only for prompt rendering via b.request
+client<llm> OpenAI {
+  provider openai
+  retry_policy Exponential
+  options {
+    model env.BAML_OPENAI_MODEL
+    api_key env.BAML_OPENAI_API_KEY
+  }
+}
diff --git a/packages/agent/src/baml/baml_src/extract.baml b/packages/agent/src/baml/baml_src/extract.baml
@@ -0,0 +1,37 @@
+// BAML Extraction Function
+//
+// Dynamic extraction using @@dynamic types.
+// Schema is injected at runtime via TypeBuilder.addBaml()
+
+// Response type with dynamic data field
+// The actual schema is injected at runtime
+class Response {
+  @@dynamic
+}
+
+// Extraction prompt template
+// Uses ctx.output_format() to render the schema in a format optimized for LLMs
+template_string ExtractionPrompt(query: string, content: string) #"
+  You are extracting structured data from an AI assistant's response.
+
+  The user originally asked:
+  {{ query }}
+
+  Based on this request, extract the relevant information from the assistant's response below.
+  Be precise and only extract what is explicitly present in the content.
+  If a field cannot be determined from the content, use null.
+
+  {{ ctx.output_format(prefix="Answer with JSON matching this schema:\n") }}
+
+  {{ _.role('user') }}
+  Assistant's response to extract from:
+  ---
+  {{ content }}
+  ---
+"#
+
+// Extraction function - uses OpenAI client for prompt rendering
+function Extract(query: string, content: string) -> Response {
+  client OpenAI
+  prompt #"{{ ExtractionPrompt(query, content) }}"#
+}
diff --git a/packages/agent/src/baml/baml_src/generators.baml b/packages/agent/src/baml/baml_src/generators.baml
@@ -0,0 +1,8 @@
+// BAML Generator Configuration
+// Defines where the TypeScript client is generated
+
+generator target {
+  output_type typescript
+  output_dir "../"
+  version "0.214.0"
+}