From e9682997f9fa549081ac44ca12a484f477cc54c4 Mon Sep 17 00:00:00 2001 From: shivammittal274 Date: Sat, 29 Nov 2025 00:53:07 +0530 Subject: [PATCH 1/4] baml structured output using vercel ai sdk --- .gitignore | 3 + bun.lock | 19 ++ package.json | 5 +- packages/agent/package.json | 1 + packages/agent/src/agent/GeminiAgent.ts | 66 +++++- .../agent/gemini-vercel-sdk-adapter/index.ts | 13 ++ packages/agent/src/baml/baml_src/clients.baml | 24 ++ packages/agent/src/baml/baml_src/extract.baml | 37 +++ .../agent/src/baml/baml_src/generators.baml | 8 + packages/agent/src/baml/extractor.ts | 217 ++++++++++++++++++ packages/agent/src/baml/index.ts | 12 + packages/agent/src/baml/schemaConverter.ts | 111 +++++++++ packages/agent/src/http/HttpServer.ts | 45 +++- packages/agent/src/http/types.ts | 10 + 14 files changed, 563 insertions(+), 8 deletions(-) create mode 100644 packages/agent/src/baml/baml_src/clients.baml create mode 100644 packages/agent/src/baml/baml_src/extract.baml create mode 100644 packages/agent/src/baml/baml_src/generators.baml create mode 100644 packages/agent/src/baml/extractor.ts create mode 100644 packages/agent/src/baml/index.ts create mode 100644 packages/agent/src/baml/schemaConverter.ts diff --git a/.gitignore b/.gitignore index 8da75d8..36ceb88 100644 --- a/.gitignore +++ b/.gitignore @@ -165,3 +165,6 @@ browseros-server-* log.txt .DS_Store + +# BAML generated client +**/baml_client/ diff --git a/bun.lock b/bun.lock index 72a0da1..d14e78e 100644 --- a/bun.lock +++ b/bun.lock @@ -72,6 +72,7 @@ "@ai-sdk/provider": "2.0.0", "@ai-sdk/ui-utils": "^1.2.11", "@anthropic-ai/claude-agent-sdk": "^0.1.11", + "@boundaryml/baml": "^0.214.0", "@browseros/common": "workspace:*", "@browseros/server": "workspace:*", "@browseros/tools": "workspace:*", @@ -311,6 +312,22 @@ "@bcoe/v8-coverage": ["@bcoe/v8-coverage@0.2.3", "", {}, "sha512-0hYQ8SB4Db5zvZB4axdMHGwEaQjkZzFjQiN9LVYvIFB2nSUHW9tYpxWriPrWDASIxiaXax83REcLxuSdnGPZtw=="], + "@boundaryml/baml": ["@boundaryml/baml@0.214.0", "", { "dependencies": { "@scarf/scarf": "^1.3.0" }, "optionalDependencies": { "@boundaryml/baml-darwin-arm64": "0.214.0", "@boundaryml/baml-darwin-x64": "0.214.0", "@boundaryml/baml-linux-arm64-gnu": "0.214.0", "@boundaryml/baml-linux-arm64-musl": "0.214.0", "@boundaryml/baml-linux-x64-gnu": "0.214.0", "@boundaryml/baml-linux-x64-musl": "0.214.0", "@boundaryml/baml-win32-x64-msvc": "0.214.0" }, "bin": { "baml-cli": "cli.js", "baml": "cli.js" } }, "sha512-w2FBsK0LBsFtQ5qSsSoL3Gp+aGg/qefzqSY6Bkyg/Obyj1U4T7WK+HyNTOKHx0pLdXKXGjmfNKLZZXzPb+/KHw=="], + + "@boundaryml/baml-darwin-arm64": ["@boundaryml/baml-darwin-arm64@0.214.0", "", { "os": "darwin", "cpu": "arm64" }, "sha512-qCXHwf1VP79jNqhS1/X/XAEPb9jDRDcHkwA7i4t2LJk0uN/j3Yy1dtCj1+VVFq6FW1uOwYSb4ieZLdZ/0w4UVQ=="], + + "@boundaryml/baml-darwin-x64": ["@boundaryml/baml-darwin-x64@0.214.0", "", { "os": "darwin", "cpu": "x64" }, "sha512-/2MOM+QTucCbPlAxaTBPG/ZFFaYaV7H2DhDg5VcS1l43JxYpMAili/lslWbfQRa/ZgxfU44Jp4ptLkr+1fNPuw=="], + + "@boundaryml/baml-linux-arm64-gnu": ["@boundaryml/baml-linux-arm64-gnu@0.214.0", "", { "os": "linux", "cpu": "arm64" }, "sha512-9suGfkdAOYS49C/Z2YAioaPCpOV6reJmPPDiFSg6CbHD3yMWHYHgeOTWpOdB+xpKZJqiB0Q1wyaXTF/UxF3HdA=="], + + "@boundaryml/baml-linux-arm64-musl": ["@boundaryml/baml-linux-arm64-musl@0.214.0", "", { "os": "linux", "cpu": "arm64" }, "sha512-LMH8m8Er/V6x1BsPCdsX0W4zxdMmk+4wE2YjS3DJisjA8/fZTyk4I8wl01Qf3Azq3GSAKtlBeP70+w2KKrQTkg=="], + + "@boundaryml/baml-linux-x64-gnu": ["@boundaryml/baml-linux-x64-gnu@0.214.0", "", { "os": "linux", "cpu": "x64" }, "sha512-3JZ/BZeVpgMvcB6rvI4dJ9amCt+cGmyl7DYaNY9csF2vnr8JltRmA4fTSUWq9F5vk9la3juDvLHmUKhQRlVaPw=="], + + "@boundaryml/baml-linux-x64-musl": ["@boundaryml/baml-linux-x64-musl@0.214.0", "", { "os": "linux", "cpu": "x64" }, "sha512-XMowDiaqbT4DsOVMCdg3Rc8RjyyXQLXjKSvuj5gsuf09AkMceN7niJO4+OhvWKWH2l3eMjRMaT2Dp32quMzXHA=="], + + "@boundaryml/baml-win32-x64-msvc": ["@boundaryml/baml-win32-x64-msvc@0.214.0", "", { "os": "win32", "cpu": "x64" }, "sha512-IUdaaJr4v8PdCY8Te+h7E6sLw+wPicPTvAPBO3FAYz5e3/h7J6pjeBcsmy/MdduNVPUC5AB3V+TbIFsvRJcN9w=="], + "@browseros/agent": ["@browseros/agent@workspace:packages/agent"], "@browseros/codex-sdk-ts": ["@browseros/codex-sdk-ts@workspace:packages/codex-sdk-ts"], @@ -685,6 +702,8 @@ "@rtsao/scc": ["@rtsao/scc@1.1.0", "", {}, "sha512-zt6OdqaDoOnJ1ZYsCYGt9YmWzDXl4vQdKTyJev62gFhRGKdx7mcT54V9KIjg+d2wi9EXsPvAPKe7i7WjfVWB8g=="], + "@scarf/scarf": ["@scarf/scarf@1.4.0", "", {}, "sha512-xxeapPiUXdZAE3che6f3xogoJPeZgig6omHEy1rIY5WVsB3H2BHNnZH+gHG6x91SCWyQCzWGsuL2Hh3ClO5/qQ=="], + "@sec-ant/readable-stream": ["@sec-ant/readable-stream@0.4.1", "", {}, "sha512-831qok9r2t8AlxLko40y2ebgSDhenenCatLVeW/uBtnHPyhHOvG0C7TvfgecV+wHzIm5KUICgzmVpWS+IMEAeg=="], "@selderee/plugin-htmlparser2": ["@selderee/plugin-htmlparser2@0.11.0", "", { "dependencies": { "domhandler": "^5.0.3", "selderee": "^0.11.0" } }, "sha512-P33hHGdldxGabLFjPPpaTxVolMrzrcegejx+0GxjrIb9Zv48D8yAIA/QTDR2dFl7Uz7urX8aX6+5bCZslr+gWQ=="], diff --git a/package.json b/package.json index f089bf3..8b595f9 100644 --- a/package.json +++ b/package.json @@ -8,9 +8,10 @@ "packages/*" ], "scripts": { - "start": "bun run build:codex-sdk-ts && CODEX_BINARY_PATH=third_party/bin/codex bun --env-file=.env.dev packages/server/src/index.ts", - "start:debug": "bun run build:codex-sdk-ts && CODEX_BINARY_PATH=third_party/bin/codex bun --inspect-brk --env-file=.env.dev packages/server/src/index.ts", + "start": "bun run build:codex-sdk-ts && bun run build:baml && CODEX_BINARY_PATH=third_party/bin/codex bun --env-file=.env.dev packages/server/src/index.ts", + "start:debug": "bun run build:codex-sdk-ts && bun run build:baml && CODEX_BINARY_PATH=third_party/bin/codex bun --inspect-brk --env-file=.env.dev packages/server/src/index.ts", "build:codex-sdk-ts": "bun run --filter @browseros/codex-sdk-ts prepare", + "build:baml": "cd packages/agent/src/baml && bunx baml-cli generate", "test": "bun test; bun run test:cleanup", "test:all": "bun test --workspace", "test:common": "bun run --filter @browseros/common test", diff --git a/packages/agent/package.json b/packages/agent/package.json index 846d4f2..2b04579 100644 --- a/packages/agent/package.json +++ b/packages/agent/package.json @@ -37,6 +37,7 @@ "@ai-sdk/provider": "2.0.0", "@ai-sdk/ui-utils": "^1.2.11", "@anthropic-ai/claude-agent-sdk": "^0.1.11", + "@boundaryml/baml": "^0.214.0", "@browseros/common": "workspace:*", "@browseros/server": "workspace:*", "@browseros/tools": "workspace:*", diff --git a/packages/agent/src/agent/GeminiAgent.ts b/packages/agent/src/agent/GeminiAgent.ts index a7ef2c4..68f6903 100644 --- a/packages/agent/src/agent/GeminiAgent.ts +++ b/packages/agent/src/agent/GeminiAgent.ts @@ -6,12 +6,14 @@ import { type GeminiClient, type ToolCallRequestInfo, } from '@google/gemini-cli-core'; -import type { Part } from '@google/genai'; +import type { Part, Content } from '@google/genai'; import { logger, fetchBrowserOSConfig, getLLMConfigFromProvider } from '@browseros/common'; import { VercelAIContentGenerator, AIProvider } from './gemini-vercel-sdk-adapter/index.js'; import type { HonoSSEStream } from './gemini-vercel-sdk-adapter/types.js'; import { AgentExecutionError } from '../errors.js'; import type { AgentConfig } from './types.js'; +import { getBAMLExtractor, type JSONSchema } from '../baml/index.js'; +import { buildExtractionContext } from './extractionUtils.js'; const MAX_TURNS = 100; @@ -43,6 +45,7 @@ export class GeminiAgent { private geminiConfig: GeminiConfig, private contentGenerator: VercelAIContentGenerator, private conversationId: string, + private agentConfig: AgentConfig, ) {} static async create(config: AgentConfig): Promise { @@ -107,14 +110,19 @@ export class GeminiAgent { model: resolvedConfig.model, }); - return new GeminiAgent(client, geminiConfig, contentGenerator, resolvedConfig.conversationId); + return new GeminiAgent(client, geminiConfig, contentGenerator, resolvedConfig.conversationId, resolvedConfig); } getHistory() { return this.client.getHistory(); } - async execute(message: string, honoStream: HonoSSEStream, signal?: AbortSignal): Promise { + async execute( + message: string, + honoStream: HonoSSEStream, + signal?: AbortSignal, + responseSchema?: JSONSchema, + ): Promise { this.contentGenerator.setHonoStream(honoStream); const abortSignal = signal || new AbortController().signal; @@ -127,6 +135,7 @@ export class GeminiAgent { conversationId: this.conversationId, message: message.substring(0, 100), historyLength: this.client.getHistory().length, + hasResponseSchema: !!responseSchema, }); while (true) { @@ -210,6 +219,57 @@ export class GeminiAgent { }); break; } + + } + + // Extract structured output if responseSchema provided + if (responseSchema) { + await this.extractStructuredOutput(message, honoStream, responseSchema); + } + } + + private async extractStructuredOutput( + query: string, + honoStream: HonoSSEStream, + responseSchema: JSONSchema, + ): Promise { + try { + const history = this.client.getHistory() as Content[]; + const context = buildExtractionContext(history, 4); + + if (!context) { + logger.warn('No model responses found for extraction', { + conversationId: this.conversationId, + }); + return; + } + + logger.debug('Extracting structured output', { + conversationId: this.conversationId, + queryLength: query.length, + contextLength: context.length, + }); + + const extractor = getBAMLExtractor(); + const extracted = await extractor.extract(query, context, responseSchema, this.agentConfig); + + // Emit structured output as SSE event + const sseData = JSON.stringify({ + type: 'structured-output', + data: extracted, + }); + await honoStream.write(`d:${sseData}\n`); + + logger.info('Structured output extracted', { + conversationId: this.conversationId, + hasData: !!extracted, + }); + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + logger.error('Failed to extract structured output', { + conversationId: this.conversationId, + error: errorMessage, + }); } } } diff --git a/packages/agent/src/agent/gemini-vercel-sdk-adapter/index.ts b/packages/agent/src/agent/gemini-vercel-sdk-adapter/index.ts index 35520d7..9787a76 100644 --- a/packages/agent/src/agent/gemini-vercel-sdk-adapter/index.ts +++ b/packages/agent/src/agent/gemini-vercel-sdk-adapter/index.ts @@ -171,6 +171,19 @@ export class VercelAIContentGenerator implements ContentGenerator { ); } + /** + * Simple text generation from a prompt string + * Used by BAML extractor for structured output extraction + */ + async generateTextFromPrompt(prompt: string, temperature = 0.1): Promise { + const result = await generateText({ + model: this.providerInstance(this.model) as Parameters[0]['model'], + prompt, + temperature, + }); + return result.text; + } + /** * Create provider instance based on config */ diff --git a/packages/agent/src/baml/baml_src/clients.baml b/packages/agent/src/baml/baml_src/clients.baml new file mode 100644 index 0000000..db3475d --- /dev/null +++ b/packages/agent/src/baml/baml_src/clients.baml @@ -0,0 +1,24 @@ +// BAML Client Configuration +// +// NOTE: We only use this for b.request to render prompts with ctx.output_format() +// The actual LLM call is made via Vercel AI SDK, not BAML's HTTP client. +// These are dummy configs - credentials are not used at runtime. + +retry_policy Exponential { + max_retries 2 + strategy { + type exponential_backoff + delay_ms 300 + multiplier 2 + } +} + +// Dummy OpenAI client - used only for prompt rendering via b.request +client OpenAI { + provider openai + retry_policy Exponential + options { + model env.BAML_OPENAI_MODEL + api_key env.BAML_OPENAI_API_KEY + } +} diff --git a/packages/agent/src/baml/baml_src/extract.baml b/packages/agent/src/baml/baml_src/extract.baml new file mode 100644 index 0000000..63dfa09 --- /dev/null +++ b/packages/agent/src/baml/baml_src/extract.baml @@ -0,0 +1,37 @@ +// BAML Extraction Function +// +// Dynamic extraction using @@dynamic types. +// Schema is injected at runtime via TypeBuilder.addBaml() + +// Response type with dynamic data field +// The actual schema is injected at runtime +class Response { + @@dynamic +} + +// Extraction prompt template +// Uses ctx.output_format() to render the schema in a format optimized for LLMs +template_string ExtractionPrompt(query: string, content: string) #" + You are extracting structured data from an AI assistant's response. + + The user originally asked: + {{ query }} + + Based on this request, extract the relevant information from the assistant's response below. + Be precise and only extract what is explicitly present in the content. + If a field cannot be determined from the content, use null. + + {{ ctx.output_format(prefix="Answer with JSON matching this schema:\n") }} + + {{ _.role('user') }} + Assistant's response to extract from: + --- + {{ content }} + --- +"# + +// Extraction function - uses OpenAI client for prompt rendering +function Extract(query: string, content: string) -> Response { + client OpenAI + prompt #"{{ ExtractionPrompt(query, content) }}"# +} diff --git a/packages/agent/src/baml/baml_src/generators.baml b/packages/agent/src/baml/baml_src/generators.baml new file mode 100644 index 0000000..cad4284 --- /dev/null +++ b/packages/agent/src/baml/baml_src/generators.baml @@ -0,0 +1,8 @@ +// BAML Generator Configuration +// Defines where the TypeScript client is generated + +generator target { + output_type typescript + output_dir "../" + version "0.214.0" +} diff --git a/packages/agent/src/baml/extractor.ts b/packages/agent/src/baml/extractor.ts new file mode 100644 index 0000000..cf35d1e --- /dev/null +++ b/packages/agent/src/baml/extractor.ts @@ -0,0 +1,217 @@ +/** + * BAML Structured Data Extractor + * + * Uses BAML's Modular API for structured output extraction: + * - b.request.Extract() → Renders prompt with schema via ctx.output_format() + * - VercelAIContentGenerator → Makes LLM call with any provider + * - b.parse.Extract() → Parses response with SAP (~99% success rate) + * + * This approach leverages BAML's SAP parsing without configuring BAML's HTTP client. + */ + +import { logger } from '@browseros/common'; +import { VercelAIContentGenerator } from '../agent/gemini-vercel-sdk-adapter/index.js'; +import type { VercelAIConfig } from '../agent/gemini-vercel-sdk-adapter/types.js'; +import { jsonSchemaToBAML, type JSONSchema } from './schemaConverter.js'; + +// ============================================================================ +// BAML Client Types (loaded dynamically) +// ============================================================================ + +interface ContentPart { + type: 'text'; + text: string; +} + +interface Message { + role: 'system' | 'user' | 'assistant'; + content: string | ContentPart[]; +} + +interface RequestBody { + model: string; + messages: Message[]; +} + +interface HTTPRequest { + body: { + json: () => RequestBody; + }; +} + +interface BAMLOptions { + tb?: TypeBuilderInstance; + env?: Record; +} + +interface ParsedResponse { + data?: unknown; +} + +interface TypeBuilderInstance { + addBaml: (code: string) => void; +} + +interface TypeBuilderConstructor { + new (): TypeBuilderInstance; +} + +interface BAMLClient { + request: { + Extract: (query: string, content: string, options: BAMLOptions) => Promise; + }; + parse: { + Extract: (llmResponse: string, options: BAMLOptions) => ParsedResponse; + }; +} + +// ============================================================================ +// Extractor Implementation +// ============================================================================ + +export class BAMLExtractor { + private initialized = false; + private b!: BAMLClient; + private TypeBuilder!: TypeBuilderConstructor; + + /** + * Initialize BAML client (lazy loaded) + */ + async initialize(): Promise { + if (this.initialized) return; + + try { + const bamlClient = await import('./baml_client/index.js'); + const typeBuilder = await import('./baml_client/type_builder.js'); + + this.b = bamlClient.b as BAMLClient; + this.TypeBuilder = typeBuilder.default as TypeBuilderConstructor; + this.initialized = true; + + logger.info('BAML Extractor initialized'); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + logger.error('Failed to initialize BAML client', { error: message }); + throw new Error( + 'BAML client not found. Run `bunx baml-cli generate` in packages/agent/src/baml' + ); + } + } + + /** + * Extract text content from message (handles both string and ContentPart[]) + */ + private extractMessageContent(content: string | ContentPart[]): string { + if (typeof content === 'string') { + return content; + } + return content + .filter((part) => part.type === 'text') + .map((part) => part.text) + .join('\n'); + } + + /** + * Extract prompt from BAML HTTPRequest body + */ + private extractPrompt(body: RequestBody): string { + const parts: string[] = []; + + for (const msg of body.messages) { + if (msg.role === 'system' || msg.role === 'user') { + const text = this.extractMessageContent(msg.content); + if (text) parts.push(text); + } + } + + return parts.join('\n\n'); + } + + /** + * Extract structured data from content using JSON Schema + * + * @param query - Original user query (provides context for extraction) + * @param content - Content to extract from (e.g., LLM response) + * @param schema - JSON Schema defining the structure to extract + * @param providerConfig - Vercel AI SDK provider configuration + * @returns Extracted structured data matching the schema + * + * @example + * const result = await extractor.extract( + * "What are the product details?", + * "The MacBook Pro costs $1999 and is in stock.", + * { type: 'object', properties: { name: { type: 'string' }, price: { type: 'number' } } }, + * { provider: AIProvider.OPENAI, model: 'gpt-4o', apiKey: '...' } + * ); + */ + async extract( + query: string, + content: string, + schema: JSONSchema, + providerConfig: VercelAIConfig, + rootClassName = 'ExtractedData' + ): Promise { + await this.initialize(); + + // 1. Build TypeBuilder with dynamic schema + const bamlCode = jsonSchemaToBAML(schema, rootClassName); + const tb = new this.TypeBuilder(); + + try { + tb.addBaml(bamlCode); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + logger.error('Failed to parse BAML schema', { error: message, bamlCode }); + throw new Error(`Invalid BAML schema: ${message}`); + } + + // 2. Get rendered prompt from BAML + // Uses dummy env since we only need the prompt, not HTTP headers + const dummyEnv = { + BAML_OPENAI_API_KEY: 'dummy-for-prompt-rendering', + BAML_OPENAI_MODEL: 'gpt-4o', + }; + + const httpRequest = await this.b.request.Extract(query, content, { tb, env: dummyEnv }); + const prompt = this.extractPrompt(httpRequest.body.json()); + + logger.debug('BAML prompt rendered', { + promptLength: prompt.length, + provider: providerConfig.provider, + }); + + console.log('BAML prompt', prompt); + + // 3. Call LLM via Vercel AI SDK + const contentGenerator = new VercelAIContentGenerator(providerConfig); + const llmResponse = await contentGenerator.generateTextFromPrompt(prompt); + + logger.debug('LLM response received', { + responseLength: llmResponse.length, + provider: providerConfig.provider, + }); + + // 4. Parse with BAML SAP + const parsed = this.b.parse.Extract(llmResponse, { tb }); + + logger.debug('BAML SAP parsing complete', { + hasData: !!parsed?.data, + provider: providerConfig.provider, + }); + + return parsed?.data ?? parsed; + } +} + +// ============================================================================ +// Singleton Export +// ============================================================================ + +let instance: BAMLExtractor | null = null; + +export function getBAMLExtractor(): BAMLExtractor { + if (!instance) { + instance = new BAMLExtractor(); + } + return instance; +} diff --git a/packages/agent/src/baml/index.ts b/packages/agent/src/baml/index.ts new file mode 100644 index 0000000..9167228 --- /dev/null +++ b/packages/agent/src/baml/index.ts @@ -0,0 +1,12 @@ +/** + * BAML Structured Output Module + * + * Uses BAML's Modular API for structured data extraction: + * - b.request.* → Renders prompts with schema via ctx.output_format() + * - b.parse.* → Parses responses with SAP (~99% success rate) + * + * LLM calls are made via Vercel AI SDK (not BAML's HTTP client). + */ + +export { BAMLExtractor, getBAMLExtractor } from './extractor.js'; +export { jsonSchemaToBAML, type JSONSchema } from './schemaConverter.js'; diff --git a/packages/agent/src/baml/schemaConverter.ts b/packages/agent/src/baml/schemaConverter.ts new file mode 100644 index 0000000..4dc3095 --- /dev/null +++ b/packages/agent/src/baml/schemaConverter.ts @@ -0,0 +1,111 @@ +/** + * JSON Schema to BAML Type Converter + * + * Converts JSON Schema definitions to BAML class definitions + * for use with BAML's dynamic type system (@@dynamic) + * + * Supports OpenAI Structured Outputs compatible subset: + * - type, properties, items, required, description, enum + * - NO: oneOf, anyOf, allOf, $ref (not supported by OpenAI) + */ + +export interface JSONSchema { + type?: 'object' | 'string' | 'number' | 'integer' | 'boolean' | 'array' | 'null'; + properties?: Record; + items?: JSONSchema; + required?: string[]; + description?: string; + enum?: string[]; + additionalProperties?: boolean; +} + +/** + * Convert a JSON Schema type to BAML type + */ +function jsonTypeToBaml(schema: JSONSchema, isRequired: boolean): string { + const nullSuffix = isRequired ? '' : ' | null'; + + if (schema.enum) { + const enumValues = schema.enum.map((v) => `"${v}"`); + return `(${enumValues.join(' | ')})${nullSuffix}`; + } + + switch (schema.type) { + case 'string': + return `string${nullSuffix}`; + case 'number': + case 'integer': + return `float${nullSuffix}`; + case 'boolean': + return `bool${nullSuffix}`; + case 'array': + if (schema.items) { + const itemType = jsonTypeToBaml(schema.items, true); + return `${itemType}[]${nullSuffix}`; + } + return `string[]${nullSuffix}`; + case 'object': + return `map${nullSuffix}`; + default: + return `string${nullSuffix}`; + } +} + +/** + * Convert JSON Schema to BAML class definition + * + * @param schema - JSON Schema object + * @param className - Name for the generated BAML class (default: 'ExtractedData') + * @returns BAML class definition string + * + * @example + * const schema = { + * type: 'object', + * properties: { + * name: { type: 'string', description: 'User name' }, + * age: { type: 'number' } + * }, + * required: ['name'] + * }; + * + * jsonSchemaToBAML(schema, 'User'); + * // Returns: + * // class User { + * // name string @description("User name") + * // age float | null + * // } + * // + * // override Response { + * // data User + * // } + */ +export function jsonSchemaToBAML(schema: JSONSchema, className = 'ExtractedData'): string { + if (schema.type !== 'object' || !schema.properties) { + throw new Error('Schema must be an object type with properties'); + } + + const requiredFields = new Set(schema.required || []); + const lines: string[] = [`class ${className} {`]; + + for (const [fieldName, fieldSchema] of Object.entries(schema.properties)) { + const isRequired = requiredFields.has(fieldName); + const bamlType = jsonTypeToBaml(fieldSchema, isRequired); + + let fieldDef = ` ${fieldName} ${bamlType}`; + + if (fieldSchema.description) { + const escapedDesc = fieldSchema.description.replace(/"/g, '\\"'); + fieldDef += ` @description("${escapedDesc}")`; + } + + lines.push(fieldDef); + } + + lines.push('}'); + lines.push(''); + lines.push('dynamic class Response {'); + lines.push(` data ${className}`); + lines.push('}'); + + return lines.join('\n'); +} diff --git a/packages/agent/src/http/HttpServer.ts b/packages/agent/src/http/HttpServer.ts index 5519046..56f5b6c 100644 --- a/packages/agent/src/http/HttpServer.ts +++ b/packages/agent/src/http/HttpServer.ts @@ -10,8 +10,9 @@ import type { z } from 'zod'; import { SessionManager } from '../session/SessionManager.js'; import { HttpAgentError, ValidationError, AgentExecutionError } from '../errors.js'; -import { ChatRequestSchema, HttpServerConfigSchema } from './types.js'; -import type { HttpServerConfig, ValidatedHttpServerConfig, ChatRequest } from './types.js'; +import { ChatRequestSchema, ExtractRequestSchema, HttpServerConfigSchema } from './types.js'; +import type { HttpServerConfig, ValidatedHttpServerConfig, ChatRequest, ExtractRequest } from './types.js'; +import { getBAMLExtractor } from '../baml/index.js'; type AppVariables = { validatedBody: unknown; @@ -95,6 +96,7 @@ export function createHttpServer(config: HttpServerConfig) { conversationId: request.conversationId, provider: request.provider, model: request.model, + hasResponseSchema: !!request.responseSchema, }); c.header('Content-Type', 'text/plain; charset=utf-8'); @@ -125,7 +127,7 @@ export function createHttpServer(config: HttpServerConfig) { mcpServerUrl, }); - await agent.execute(request.message, honoStream, abortSignal); + await agent.execute(request.message, honoStream, abortSignal, request.responseSchema); } catch (error) { const errorMessage = error instanceof Error ? error.message : 'Agent execution failed'; logger.error('Agent execution error', { @@ -210,6 +212,43 @@ export function createHttpServer(config: HttpServerConfig) { }); }); + app.post('/extract', validateRequest(ExtractRequestSchema), async (c) => { + const request = c.get('validatedBody') as ExtractRequest; + + logger.info('Extract request received', { + provider: request.provider, + model: request.model, + queryLength: request.query.length, + contentLength: request.content.length, + }); + + try { + const extractor = getBAMLExtractor(); + const result = await extractor.extract( + request.query, + request.content, + request.schema, + { + provider: request.provider, + model: request.model, + apiKey: request.apiKey, + baseUrl: request.baseUrl, + resourceName: request.resourceName, + region: request.region, + accessKeyId: request.accessKeyId, + secretAccessKey: request.secretAccessKey, + sessionToken: request.sessionToken, + } + ); + + return c.json({ success: true, data: result }); + } catch (error) { + const errorMessage = error instanceof Error ? error.message : 'Extraction failed'; + logger.error('Extract error', { error: errorMessage }); + return c.json({ success: false, error: errorMessage }, 500); + } + }); + app.delete('/chat/:conversationId', (c) => { const conversationId = c.req.param('conversationId'); const deleted = sessionManager.delete(conversationId); diff --git a/packages/agent/src/http/types.ts b/packages/agent/src/http/types.ts index 6da0379..6b2c266 100644 --- a/packages/agent/src/http/types.ts +++ b/packages/agent/src/http/types.ts @@ -3,14 +3,24 @@ import { VercelAIConfigSchema } from '../agent/gemini-vercel-sdk-adapter/types.j /** * Chat request schema extends VercelAIConfig with request-specific fields + * responseSchema accepts any valid JSON Schema object */ export const ChatRequestSchema = VercelAIConfigSchema.extend({ conversationId: z.string().uuid(), message: z.string().min(1, 'Message cannot be empty'), + responseSchema: z.any().optional(), }); export type ChatRequest = z.infer; +export const ExtractRequestSchema = VercelAIConfigSchema.extend({ + query: z.string().min(1, 'Query cannot be empty'), + content: z.string().min(1, 'Content cannot be empty'), + schema: z.any(), +}); + +export type ExtractRequest = z.infer; + export interface HttpServerConfig { port: number; host?: string; From 327ece78b5a38c969f756b97067be197667a8abb Mon Sep 17 00:00:00 2001 From: shivammittal274 Date: Sat, 29 Nov 2025 00:54:26 +0530 Subject: [PATCH 2/4] baml structured output using vercel ai sdk --- packages/agent/src/baml/extractor.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/packages/agent/src/baml/extractor.ts b/packages/agent/src/baml/extractor.ts index cf35d1e..7a32c6d 100644 --- a/packages/agent/src/baml/extractor.ts +++ b/packages/agent/src/baml/extractor.ts @@ -180,8 +180,6 @@ export class BAMLExtractor { provider: providerConfig.provider, }); - console.log('BAML prompt', prompt); - // 3. Call LLM via Vercel AI SDK const contentGenerator = new VercelAIContentGenerator(providerConfig); const llmResponse = await contentGenerator.generateTextFromPrompt(prompt); From 6870863f9150a85fcee585840951e05e601ccfae Mon Sep 17 00:00:00 2001 From: shivammittal274 Date: Sat, 29 Nov 2025 01:05:04 +0530 Subject: [PATCH 3/4] baml structured output using vercel ai sdk --- packages/agent/src/agent/extractionUtils.ts | 51 +++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 packages/agent/src/agent/extractionUtils.ts diff --git a/packages/agent/src/agent/extractionUtils.ts b/packages/agent/src/agent/extractionUtils.ts new file mode 100644 index 0000000..6b18072 --- /dev/null +++ b/packages/agent/src/agent/extractionUtils.ts @@ -0,0 +1,51 @@ +import type { Content, Part } from '@google/genai'; + +const MAX_CONTEXT_LENGTH = 32000; // ~8k tokens + +export function extractTextFromPart(part: Part): string { + if ('text' in part && typeof part.text === 'string') { + return part.text; + } + return ''; +} + +export function extractTextFromContent(content: Content): string { + if (!content.parts) return ''; + + return content.parts + .map(extractTextFromPart) + .filter(Boolean) + .join('\n'); +} + +export function buildExtractionContext( + history: Content[], + maxResponses: number = 4, +): string | null { + // Get last N model responses + const modelResponses = history + .filter((msg) => msg.role === 'model') + .slice(-maxResponses); + + if (modelResponses.length === 0) { + return null; + } + + // Extract text from each model response + const texts = modelResponses + .map(extractTextFromContent) + .filter(Boolean); + + if (texts.length === 0) { + return null; + } + + let context = texts.join('\n\n---\n\n'); + + // Truncate from start if too long + if (context.length > MAX_CONTEXT_LENGTH) { + context = context.slice(-MAX_CONTEXT_LENGTH); + } + + return context; +} From 6b4d831ba00bc67c7162db970d8bb3b2fa91fc49 Mon Sep 17 00:00:00 2001 From: shivammittal274 Date: Sat, 29 Nov 2025 01:15:10 +0530 Subject: [PATCH 4/4] support added for nested objects as well --- packages/agent/src/baml/schemaConverter.ts | 124 +++++++++++++++------ 1 file changed, 91 insertions(+), 33 deletions(-) diff --git a/packages/agent/src/baml/schemaConverter.ts b/packages/agent/src/baml/schemaConverter.ts index 4dc3095..9ca47a7 100644 --- a/packages/agent/src/baml/schemaConverter.ts +++ b/packages/agent/src/baml/schemaConverter.ts @@ -6,6 +6,7 @@ * * Supports OpenAI Structured Outputs compatible subset: * - type, properties, items, required, description, enum + * - Nested objects are converted to nested BAML classes * - NO: oneOf, anyOf, allOf, $ref (not supported by OpenAI) */ @@ -19,10 +20,26 @@ export interface JSONSchema { additionalProperties?: boolean; } -/** - * Convert a JSON Schema type to BAML type - */ -function jsonTypeToBaml(schema: JSONSchema, isRequired: boolean): string { +interface ConversionContext { + generatedClasses: Map; + classCounter: number; +} + +function capitalize(str: string): string { + return str.charAt(0).toUpperCase() + str.slice(1); +} + +function generateClassName(fieldName: string, parentName: string): string { + return `${parentName}${capitalize(fieldName)}`; +} + +function jsonTypeToBaml( + schema: JSONSchema, + isRequired: boolean, + fieldName: string, + parentClassName: string, + ctx: ConversionContext +): string { const nullSuffix = isRequired ? '' : ' | null'; if (schema.enum) { @@ -40,42 +57,86 @@ function jsonTypeToBaml(schema: JSONSchema, isRequired: boolean): string { return `bool${nullSuffix}`; case 'array': if (schema.items) { - const itemType = jsonTypeToBaml(schema.items, true); + const itemType = jsonTypeToBaml(schema.items, true, `${fieldName}Item`, parentClassName, ctx); return `${itemType}[]${nullSuffix}`; } return `string[]${nullSuffix}`; case 'object': + if (schema.properties) { + const nestedClassName = generateClassName(fieldName, parentClassName); + generateClass(nestedClassName, schema, ctx); + return `${nestedClassName}${nullSuffix}`; + } return `map${nullSuffix}`; default: + if (schema.properties) { + const nestedClassName = generateClassName(fieldName, parentClassName); + generateClass(nestedClassName, schema, ctx); + return `${nestedClassName}${nullSuffix}`; + } return `string${nullSuffix}`; } } +function generateClass(className: string, schema: JSONSchema, ctx: ConversionContext): void { + if (ctx.generatedClasses.has(className)) return; + + const requiredFields = new Set(schema.required || []); + const lines: string[] = [`class ${className} {`]; + + for (const [fieldName, fieldSchema] of Object.entries(schema.properties || {})) { + const isRequired = requiredFields.has(fieldName); + const bamlType = jsonTypeToBaml(fieldSchema, isRequired, fieldName, className, ctx); + + let fieldDef = ` ${fieldName} ${bamlType}`; + + if (fieldSchema.description) { + const escapedDesc = fieldSchema.description.replace(/"/g, '\\"'); + fieldDef += ` @description("${escapedDesc}")`; + } + + lines.push(fieldDef); + } + + lines.push('}'); + ctx.generatedClasses.set(className, lines.join('\n')); +} + /** - * Convert JSON Schema to BAML class definition + * Convert JSON Schema to BAML class definitions * * @param schema - JSON Schema object - * @param className - Name for the generated BAML class (default: 'ExtractedData') - * @returns BAML class definition string + * @param className - Name for the root BAML class (default: 'ExtractedData') + * @returns BAML class definitions string (may include multiple classes for nested objects) * * @example * const schema = { * type: 'object', * properties: { - * name: { type: 'string', description: 'User name' }, - * age: { type: 'number' } - * }, - * required: ['name'] + * name: { type: 'string' }, + * address: { + * type: 'object', + * properties: { + * city: { type: 'string' }, + * zip: { type: 'string' } + * } + * } + * } * }; * * jsonSchemaToBAML(schema, 'User'); * // Returns: + * // class UserAddress { + * // city string | null + * // zip string | null + * // } + * // * // class User { - * // name string @description("User name") - * // age float | null + * // name string | null + * // address UserAddress | null * // } * // - * // override Response { + * // dynamic class Response { * // data User * // } */ @@ -84,28 +145,25 @@ export function jsonSchemaToBAML(schema: JSONSchema, className = 'ExtractedData' throw new Error('Schema must be an object type with properties'); } - const requiredFields = new Set(schema.required || []); - const lines: string[] = [`class ${className} {`]; + const ctx: ConversionContext = { + generatedClasses: new Map(), + classCounter: 0, + }; - for (const [fieldName, fieldSchema] of Object.entries(schema.properties)) { - const isRequired = requiredFields.has(fieldName); - const bamlType = jsonTypeToBaml(fieldSchema, isRequired); - - let fieldDef = ` ${fieldName} ${bamlType}`; + generateClass(className, schema, ctx); - if (fieldSchema.description) { - const escapedDesc = fieldSchema.description.replace(/"/g, '\\"'); - fieldDef += ` @description("${escapedDesc}")`; - } + const parts: string[] = []; - lines.push(fieldDef); + // Add nested classes first (in reverse order so dependencies come before dependents) + const classNames = Array.from(ctx.generatedClasses.keys()); + for (const name of classNames.reverse()) { + parts.push(ctx.generatedClasses.get(name)!); } - lines.push('}'); - lines.push(''); - lines.push('dynamic class Response {'); - lines.push(` data ${className}`); - lines.push('}'); + parts.push(''); + parts.push('dynamic class Response {'); + parts.push(` data ${className}`); + parts.push('}'); - return lines.join('\n'); + return parts.join('\n'); }