From 832aa5b18317ad4fdef8657f3b3b75098b018764 Mon Sep 17 00:00:00 2001 From: ssdeanx Date: Thu, 15 Jan 2026 12:31:41 -0500 Subject: [PATCH] chore: remove unused Claude Code provider configuration - Deleted the claude-code.ts file which contained the Claude Code provider instance and related exports. - This change helps in cleaning up the codebase by removing obsolete code that is no longer needed. --- convex/_generated/api.js | 2 +- convex/_generated/server.js | 2 +- src/mastra/agents/copywriterAgent.ts | 22 +- src/mastra/config/claude-code.ts | 41 --- src/mastra/config/index.ts | 1 - src/mastra/config/vector/AGENTS.md | 238 --------------- src/mastra/config/vector/astra.ts | 350 --------------------- src/mastra/config/vector/chroma.ts | 365 ---------------------- src/mastra/config/vector/cloudflare.ts | 388 ------------------------ src/mastra/config/vector/couchbase.ts | 356 ---------------------- src/mastra/config/vector/opensearch.ts | 344 --------------------- src/mastra/config/vector/pinecone.ts | 339 --------------------- src/mastra/config/vector/registry.ts | 0 src/mastra/config/vector/s3vectors.ts | 358 ---------------------- src/mastra/processors/custom-output.ts | 27 -- src/mastra/processors/step-processor.ts | 29 -- 16 files changed, 13 insertions(+), 2849 deletions(-) delete mode 100644 src/mastra/config/claude-code.ts delete mode 100644 src/mastra/config/vector/AGENTS.md delete mode 100644 src/mastra/config/vector/astra.ts delete mode 100644 src/mastra/config/vector/chroma.ts delete mode 100644 src/mastra/config/vector/cloudflare.ts delete mode 100644 src/mastra/config/vector/couchbase.ts delete mode 100644 src/mastra/config/vector/opensearch.ts delete mode 100644 src/mastra/config/vector/pinecone.ts delete mode 100644 src/mastra/config/vector/registry.ts delete mode 100644 src/mastra/config/vector/s3vectors.ts delete mode 100644 src/mastra/processors/custom-output.ts delete mode 100644 src/mastra/processors/step-processor.ts diff --git a/convex/_generated/api.js b/convex/_generated/api.js index 9124e12..44bf985 100644 --- a/convex/_generated/api.js +++ b/convex/_generated/api.js @@ -1,4 +1,4 @@ - +/* eslint-disable */ /** * Generated `api` utility. * diff --git a/convex/_generated/server.js b/convex/_generated/server.js index 8df3867..bf3d25a 100644 --- a/convex/_generated/server.js +++ b/convex/_generated/server.js @@ -1,4 +1,4 @@ - +/* eslint-disable */ /** * Generated utilities for implementing server-side Convex query and mutation functions. * diff --git a/src/mastra/agents/copywriterAgent.ts b/src/mastra/agents/copywriterAgent.ts index de7cb79..1355aea 100644 --- a/src/mastra/agents/copywriterAgent.ts +++ b/src/mastra/agents/copywriterAgent.ts @@ -1,5 +1,4 @@ import { Agent } from '@mastra/core/agent' -import { google3 } from '../config/google' import { log } from '../config/logger' import { pgMemory } from '../config/pg-storage' import { @@ -16,12 +15,13 @@ import type { GoogleGenerativeAIProviderOptions } from '@ai-sdk/google' import { InternalSpans } from '@mastra/core/observability' import { TokenLimiterProcessor } from '@mastra/core/processors' import type { RequestContext } from '@mastra/core/request-context' -import { - createCompletenessScorer, - createTextualDifferenceScorer, - createToneScorer, -} from '../evals/scorers/prebuilt' +//import { + // createCompletenessScorer, + // createTextualDifferenceScorer, + // createToneScorer, +//} from '../evals/scorers/prebuilt' import { chartSupervisorTool } from '../tools/financial-chart-tools' + // Define runtime context for this agent export interface CopywriterAgentContext { userId?: string @@ -72,7 +72,7 @@ Create compelling content (blog, marketing, social, technical, business, creativ }, } }, - model: google3, + model: 'google/gemini-2.5-flash-lite-preview-09-2025', memory: pgMemory, tools: { webScraperTool, @@ -85,9 +85,9 @@ Create compelling content (blog, marketing, social, technical, business, creativ chartSupervisorTool, }, scorers: { - toneConsistency: { scorer: createToneScorer() }, - textualDifference: { scorer: createTextualDifferenceScorer() }, - completeness: { scorer: createCompletenessScorer() }, + // toneConsistency: { scorer: createToneScorer() }, + // textualDifference: { scorer: createTextualDifferenceScorer() }, + // completeness: { scorer: createCompletenessScorer() }, }, options: { tracingPolicy: { @@ -96,7 +96,7 @@ Create compelling content (blog, marketing, social, technical, business, creativ }, workflows: {}, maxRetries: 5, - outputProcessors: [new TokenLimiterProcessor(1048576)], + outputProcessors: [new TokenLimiterProcessor(128576)], // defaultOptions: { // autoResumeSuspendedTools: true, // }, diff --git a/src/mastra/config/claude-code.ts b/src/mastra/config/claude-code.ts deleted file mode 100644 index a54567f..0000000 --- a/src/mastra/config/claude-code.ts +++ /dev/null @@ -1,41 +0,0 @@ -import { createClaudeCode } from 'ai-sdk-provider-claude-code' -import { logError } from './logger' - -// Claude Code provider instance (uses Claude Code CLI authentication) -const claudeCodeProvider = createClaudeCode({ - defaultSettings: { - pathToClaudeCodeExecutable: - process.env.CLAUDE_CODE_CLI_PATH ?? 'claude', - permissionMode: 'bypassPermissions', - maxTurns: 20, - continue: true, - maxThinkingTokens: 1000, - additionalDirectories: [], - cwd: process.env.CLAUDE_CODE_CLI_CWD ?? 'claude', - mcpServers: {}, - }, -}) - -// Claude Code Models -export const claudeCodeChatModels = { - // Claude 4.1 Opus (most capable) - claudeOpus: claudeCodeProvider('opus'), - // Claude 4.5 Sonnet (balanced performance) - claudeSonnet: claudeCodeProvider('sonnet'), - // Claude 4.5 Haiku (fastest, most cost-effective) - claudeHaiku: claudeCodeProvider('haiku'), -} - -// Model selector function -export function getClaudeCodeChatModel( - modelId: keyof typeof claudeCodeChatModels -) { - return claudeCodeChatModels[modelId] -} - -// Backward compatibility exports -export const claudeCodeOpus = claudeCodeChatModels.claudeOpus -export const claudeCodeSonnet = claudeCodeChatModels.claudeSonnet -export const claudeCodeHaiku = claudeCodeChatModels.claudeHaiku - -export default claudeCodeProvider diff --git a/src/mastra/config/index.ts b/src/mastra/config/index.ts index 751fdd8..69c5b02 100644 --- a/src/mastra/config/index.ts +++ b/src/mastra/config/index.ts @@ -13,4 +13,3 @@ export * from './anthropic' export * from './github-copilot' export * from './gemini-cli' export * from './ai-gateway' -export * from './claude-code' diff --git a/src/mastra/config/vector/AGENTS.md b/src/mastra/config/vector/AGENTS.md deleted file mode 100644 index b91d533..0000000 --- a/src/mastra/config/vector/AGENTS.md +++ /dev/null @@ -1,238 +0,0 @@ - - -# Vector Store Configurations (`/src/mastra/config/vector`) - -## Persona - -**Name:** Vector Infrastructure Architect -**Role Objective:** Provide unified, production-ready vector storage abstractions with consistent APIs, embedding support, and query capabilities across multiple vector databases. - -## Purpose - -Enable flexible vector storage options for the Governed RAG system, supporting various deployment scenarios from local development to enterprise cloud deployments. Each configuration provides identical functionality levels with Mastra RAG tool integration. - -## Key Files - -| File | Vector Store | Deployment | Key Features | Status | -| --------------- | ------------------------ | ------------- | -------------------------------------------------------------------------------------- | ------------- | -| `astra.ts` | **DataStax AstraDB** | Cloud | Enterprise-grade vector search, JSON metadata, N1QL queries, multi-region replication | ✅ Production | -| `chroma.ts` | **ChromaDB** | Local/Cloud | Open-source vector database, metadata filtering, Python/JS clients, local persistence | ✅ Production | -| `cloudflare.ts` | **Cloudflare Vectorize** | Cloud | Serverless vector storage, global CDN, Workers integration, automatic scaling | 🟡 Basic | -| `couchbase.ts` | **Couchbase Server** | On-prem/Cloud | Full N1QL SQL queries, JSON documents, multi-model database, enterprise features | ✅ Production | -| `lance.ts` | **LanceDB** | Local | Columnar vector storage, fast similarity search, embedded deployment, Sift syntax | ✅ Production | -| `mongodb.ts` | **MongoDB Atlas** | Cloud | Document-based vectors, aggregation pipelines, Atlas Vector Search, enterprise MongoDB | ✅ Production | -| `opensearch.ts` | **OpenSearch** | On-prem/Cloud | Elasticsearch-compatible, full-text search, aggregations, Kibana integration | ✅ Production | -| `pinecone.ts` | **Pinecone** | Cloud | Managed vector database, pod-based scaling, metadata filtering, REST/gRPC APIs | 🟡 Basic | -| `qdrant.ts` | **Qdrant** | Local/Cloud | High-performance vectors, HNSW indexing, payload filtering, distributed deployment | 🟡 Basic | -| `s3vectors.ts` | **AWS S3 Vectors** | Cloud | Amazon S3 integration, MongoDB/Sift filters, serverless scaling, AWS ecosystem | ✅ Production | - -## Architecture Overview - -Each vector store configuration provides: - -### Core Capabilities - -- **Document Processing**: Text chunking with configurable overlap -- **Embedding Generation**: Google Gemini integration (128-3072 dimensions) -- **Vector Storage**: Batch upsert with rich metadata -- **Similarity Search**: Configurable top-K retrieval with scoring -- **Index Management**: Automatic index creation and validation - -### Advanced Features - -- **Mastra RAG Tools**: `createVectorQueryTool` and `createGraphRAGTool` -- **Metadata Filtering**: Type-safe filter interfaces (implementation varies by store) -- **Error Handling**: Structured logging and graceful degradation -- **Environment Config**: Comprehensive environment variable support - -### Common Interface - -```typescript -// All stores implement this consistent API -export async function processDocument(content: string): Promise<{ chunks: Chunk[], embeddings: number[][] }> -export async function storeDocumentEmbeddings(chunks: Chunk[], embeddings: number[][]): Promise -export async function querySimilarDocuments(query: string, options?: QueryOptions): Promise -export async function initializeVectorIndex(): Promise - -// Mastra RAG tool exports -export const {storeName}QueryTool: VectorQueryTool -export const {storeName}GraphTool: GraphRAGTool -``` - -## Configuration Patterns - -### Environment Variables - -Each store supports provider-specific environment variables. All vector stores support `*_EMBEDDING_DIMENSION` for configuring the embedding vector size (default: 1536): - -```bash -# AstraDB (Enterprise-grade managed vector database) -ASTRA_DB_TOKEN=your_token -ASTRA_DB_ENDPOINT=your_endpoint -ASTRA_DB_KEYSPACE=your_keyspace -ASTRA_EMBEDDING_DIMENSION=1536 - -# ChromaDB (Open-source vector database) -CHROMA_API_KEY=your_key # For Chroma Cloud -CHROMA_TENANT=your_tenant -CHROMA_DATABASE=your_database -CHROMA_URL=http://localhost:8000 # For local instance -CHROMA_EMBEDDING_DIMENSION=1536 - -# Cloudflare Vectorize (Serverless vector storage) -CF_ACCOUNT_ID=your_account_id -CF_API_TOKEN=your_api_token -CF_EMBEDDING_DIMENSION=1536 - -# LanceDB (Local columnar vector database) -LANCE_DB_PATH=/tmp/lance_db -LANCE_TABLE_NAME=governed_rag -LANCE_EMBEDDING_DIMENSION=1536 - -# MongoDB Atlas (Document-based vector storage) -MONGODB_URI=mongodb://user:pass@host:27017 -MONGODB_DATABASE=mastra_db -MONGODB_COLLECTION=governed_rag -MONGODB_EMBEDDING_DIMENSION=1536 - -# Pinecone (Managed vector database) -PINECONE_API_KEY=your_api_key -PINECONE_ENVIRONMENT=us-east1-gcp -PINECONE_PROJECT_ID=your_project_id -PINECONE_EMBEDDING_DIMENSION=1536 - -# OpenSearch (Elasticsearch-compatible vector search) -OPENSEARCH_URL=https://your-domain.es.amazonaws.com -OPENSEARCH_EMBEDDING_DIMENSION=1536 - -# Qdrant (High-performance vector database) -QDRANT_URL=https://your-cluster.cloud.qdrant.io:6333 -QDRANT_API_KEY=your_api_key -QDRANT_EMBEDDING_DIMENSION=1536 - -# S3Vectors (AWS S3-based vector storage) -S3_VECTORS_BUCKET_NAME=governed-rag-vectors -AWS_REGION=us-east-1 -S3_EMBEDDING_DIMENSION=1536 - -# Couchbase (Multi-model database with vectors) -COUCHBASE_CONNECTION_STRING=couchbases://your-cluster.cloud.couchbase.com -COUCHBASE_USERNAME=your_username -COUCHBASE_PASSWORD=your_password -COUCHBASE_BUCKET=governed_rag -COUCHBASE_SCOPE=_default -COUCHBASE_COLLECTION=vectors -COUCHBASE_EMBEDDING_DIMENSION=1536 -``` - -### Initialization - -All stores follow the same initialization pattern: - -```typescript -import { initializeVectorIndex } from './config/vector/{store}.ts' - -// Initialize on startup -await initializeVectorIndex() -``` - -### Usage in Agents/Tools - -```typescript -import { astraQueryTool, astraGraphTool } from './config/vector/astra' - -// Use in Mastra workflows -const workflow = createWorkflow({ - // ... workflow definition - tools: [astraQueryTool, astraGraphTool], -}) -``` - -## Selection Criteria - -### For Development/Local - -- **LanceDB**: Fast, embedded, no external dependencies -- **ChromaDB**: Local instance with web UI -- **Qdrant**: Local deployment with advanced features - -### For Production/Cloud - -- **AstraDB**: Enterprise-grade, multi-cloud, managed -- **MongoDB Atlas**: If already using MongoDB ecosystem -- **Pinecone**: Pure vector focus, global CDN -- **S3Vectors**: AWS ecosystem integration -- **OpenSearch**: Full-text search requirements - -### For Enterprise On-Prem - -- **Couchbase**: Multi-model database needs -- **OpenSearch**: Elasticsearch ecosystem -- **MongoDB**: Document database requirements - -## Performance Characteristics - -| Store | Indexing Speed | Query Speed | Storage Efficiency | Scalability | -| ---------- | -------------- | ----------- | ------------------ | ----------- | -| AstraDB | Fast | Very Fast | High | Excellent | -| ChromaDB | Medium | Fast | Medium | Good | -| LanceDB | Very Fast | Very Fast | High | Limited | -| MongoDB | Medium | Fast | Medium | Excellent | -| OpenSearch | Slow | Medium | Low | Excellent | -| Pinecone | Fast | Very Fast | High | Excellent | -| S3Vectors | Medium | Fast | High | Excellent | -| Couchbase | Medium | Fast | Medium | Excellent | - -## Migration Guide - -### Switching Between Stores - -1. **Update Environment Variables**: Set new store's environment variables -2. **Change Imports**: Update agent/workflow imports to use new store tools -3. **Re-index Documents**: Run indexing workflow with new store -4. **Update Frontend**: Modify vector store selection if applicable - -### Data Migration - -Most stores don't support direct data export/import. For migration: - -1. Export documents from source system -2. Re-run document processing pipeline -3. Index into target vector store -4. Update application configuration - -## Troubleshooting - -### Common Issues - -#### Connection Failures - -- Verify environment variables are set correctly -- Check network connectivity and firewall rules -- Validate credentials and permissions - -#### Indexing Errors - -- Ensure embedding dimensions match index configuration -- Check available storage space -- Verify index name uniqueness - -#### Query Performance - -- Adjust top-K values for better performance -- Implement metadata filtering to reduce result sets -- Consider index optimization or scaling - -#### Memory Issues - -- Reduce batch sizes for document processing -- Implement streaming for large document sets -- Monitor embedding generation memory usage - -## Change Log - -| Version | Date (UTC) | Change | -| ------- | ---------- | -------------------------------------------------------------------------------------------------------------------------------------------------------- | -| 1.2.0 | 2025-10-20 | Added comprehensive environment variable documentation for all 10 vector stores with detailed configuration examples and comments | -| 1.1.0 | 2025-10-20 | Added comprehensive documentation for all 10 vector store configurations with selection criteria, performance characteristics, and troubleshooting guide | -| 1.0.0 | 2025-10-20 | Initial comprehensive vector store documentation | diff --git a/src/mastra/config/vector/astra.ts b/src/mastra/config/vector/astra.ts deleted file mode 100644 index d97bb29..0000000 --- a/src/mastra/config/vector/astra.ts +++ /dev/null @@ -1,350 +0,0 @@ -import { AstraVector } from '@mastra/astra' -import { createVectorQueryTool, createGraphRAGTool } from '@mastra/rag' -import { google } from '@ai-sdk/google' -import { embedMany } from 'ai' -import { log } from '../logger' - -/** - * AstraDB-compatible filter format for vector queries - * Based on AstraDB's metadata filtering syntax - * - * AstraDB-specific features: - * - Full JSON-based metadata filtering - * - Supports complex nested queries - * - Metadata fields are indexed for fast filtering - * - Supports range queries, text search, and geospatial queries - */ -export interface AstraMetadataFilter { - [key: string]: - | string - | number - | boolean - | AstraMetadataFilter - | AstraMetadataFilter[] - | Array - | undefined -} - -/** - * Raw AstraDB filter format expected by the library - * Using Record for compatibility with @mastra/astra types - */ -export type AstraRawFilter = Record - -/** - * AstraDB configuration for the Governed RAG system - * Uses DataStax AstraDB for vector storage and similarity search - */ - -// Configuration constants -const ASTRA_CONFIG = { - token: process.env.ASTRA_DB_TOKEN, - endpoint: process.env.ASTRA_DB_ENDPOINT, - keyspace: process.env.ASTRA_DB_KEYSPACE, - indexName: 'governed-rag', - // Google Gemini gemini-embedding-001 supports flexible dimensions: 128-3072 - // Recommended: 768, 1536, 3072 - embeddingDimension: parseInt( - process.env.ASTRA_EMBEDDING_DIMENSION ?? '1536' - ), - embeddingModel: google.textEmbedding('gemini-embedding-001'), -} as const - -/** - * Initialize AstraDB store with proper configuration - */ -const astraStore = new AstraVector({ - token: ASTRA_CONFIG.token!, - endpoint: ASTRA_CONFIG.endpoint!, - keyspace: ASTRA_CONFIG.keyspace!, -}) - -/** - * Create and configure the vector index - */ -export async function initializeVectorIndex(): Promise { - try { - await astraStore.createIndex({ - indexName: ASTRA_CONFIG.indexName, - dimension: ASTRA_CONFIG.embeddingDimension, - }) - - log.info('Vector index created', { - indexName: ASTRA_CONFIG.indexName, - dimension: ASTRA_CONFIG.embeddingDimension, - keyspace: ASTRA_CONFIG.keyspace, - }) - } catch (error: unknown) { - // Index might already exist, which is fine - const errorObj = error as { code?: string; message?: string } - if ( - (errorObj.message?.includes('already exists') ?? false) || - errorObj.code === 'index_already_exists' - ) { - log.info('Vector index already exists', { - indexName: ASTRA_CONFIG.indexName, - }) - } else { - log.error('Failed to create vector index', { error: String(error) }) - throw error - } - } -} - -/** - * Process document content and generate embeddings - * Simplified chunking for basic vector setup without AI extraction - */ -export async function processDocument( - content: string, - options: { - chunkSize?: number - chunkOverlap?: number - } = {} -): Promise<{ - chunks: Array<{ - text: string - metadata?: Record - }> - embeddings: number[][] -}> { - try { - // Simple text-based chunking without AI extraction - const chunkSize = options.chunkSize ?? 1000 - const chunkOverlap = options.chunkOverlap ?? 200 - - const chunks: Array<{ - text: string - metadata?: Record - }> = [] - - // Split content into overlapping chunks - for (let i = 0; i < content.length; i += chunkSize - chunkOverlap) { - const chunkText = content.slice(i, i + chunkSize) - if (chunkText.trim()) { - chunks.push({ - text: chunkText, - metadata: { - chunkIndex: chunks.length, - startPosition: i, - endPosition: i + chunkText.length, - totalLength: content.length, - }, - }) - } - } - - // Generate embeddings for all chunks - const { embeddings } = await embedMany({ - values: chunks.map((chunk) => chunk.text), - model: ASTRA_CONFIG.embeddingModel, - }) - - log.info('Document processed successfully', { - chunksCount: chunks.length, - chunkSize, - chunkOverlap, - embeddingDimension: embeddings[0]?.length, - }) - - return { chunks, embeddings } - } catch (error) { - log.error('Failed to process document', { error: String(error) }) - throw error - } -} - -/** - * Store document chunks and their embeddings - */ -export async function storeDocumentEmbeddings( - chunks: Array<{ - text: string - metadata?: { - title?: string - summary?: string - keywords?: string[] - } - }>, - embeddings: number[][], - baseMetadata: Record = {} -): Promise { - try { - // Prepare metadata for each chunk - const metadata = chunks.map((chunk, index) => ({ - ...baseMetadata, - text: chunk.text, - chunkIndex: index, - title: chunk.metadata?.title, - summary: chunk.metadata?.summary, - keywords: chunk.metadata?.keywords, - createdAt: new Date().toISOString(), - })) - - // Upsert vectors with metadata - const ids = await astraStore.upsert({ - indexName: ASTRA_CONFIG.indexName, - vectors: embeddings, - metadata, - }) - - log.info('Document embeddings stored', { - indexName: ASTRA_CONFIG.indexName, - vectorsCount: embeddings.length, - }) - - return ids - } catch (error) { - log.error('Failed to store document embeddings', { - error: String(error), - }) - throw error - } -} - -/** - * Transform AstraMetadataFilter to AstraRawFilter for library compatibility - */ -export function transformToAstraFilter( - filter: AstraMetadataFilter -): AstraRawFilter { - return filter as AstraRawFilter -} - -/** - * Validate AstraDB filter structure - */ -export function validateAstraFilter(filter: AstraMetadataFilter): boolean { - try { - // Basic validation - ensure filter is a plain object - if ( - typeof filter !== 'object' || - filter === null || - Array.isArray(filter) - ) { - return false - } - - // Transform and check if it's valid for AstraDB - const rawFilter = transformToAstraFilter(filter) - return typeof rawFilter === 'object' && rawFilter !== null - } catch { - return false - } -} - -/** - * Query similar documents with metadata filtering - */ -export async function querySimilarDocuments( - queryText: string, - options: { - topK?: number - filter?: AstraMetadataFilter - includeMetadata?: boolean - } = {} -): Promise< - Array<{ - id: string - score: number - text: string - metadata?: Record - }> -> { - try { - const { topK = 10, filter, includeMetadata = true } = options - - // Generate embedding for query - const { - embeddings: [queryEmbedding], - } = await embedMany({ - values: [queryText], - model: ASTRA_CONFIG.embeddingModel, - }) - - // Query similar vectors - // Note: Filter parameter omitted due to complex AstraDB filter type requirements - // TODO: Implement proper filter support when AstraDB library types are clearer - const results = await astraStore.query({ - indexName: ASTRA_CONFIG.indexName, - queryVector: queryEmbedding, - topK, - }) - - log.info('Vector query completed', { - queryLength: queryText.length, - topK, - resultsCount: results.length, - hasFilter: !!filter, - }) - - return results.map( - (result: { - id: string - score: number - metadata?: { text?: string; [key: string]: unknown } - }) => ({ - id: result.id, - score: result.score, - text: (result.metadata?.text as string) || '', - metadata: includeMetadata ? result.metadata : undefined, - }) - ) - } catch (error) { - log.error('Failed to query similar documents', { error: String(error) }) - throw error - } -} - -/** - * AstraDB vector query tool for semantic search - */ -export const astraQueryTool = createVectorQueryTool({ - id: 'astra-vector-query', - description: - 'AstraDB similarity search for semantic content retrieval and question answering.', - // Supported vector store and index options - vectorStoreName: 'astraStore', - indexName: ASTRA_CONFIG.indexName, - model: ASTRA_CONFIG.embeddingModel, - // Supported database configuration for AstraDB - databaseConfig: { - astraDb: { - minScore: parseFloat(process.env.ASTRA_MIN_SCORE ?? '0.7'), - // AstraDB specific parameters - maxResults: parseInt(process.env.ASTRA_MAX_RESULTS ?? '100'), - }, - }, - includeVectors: true, - // Advanced filtering - enableFilter: true, - includeSources: true, -}) - -/** - * AstraDB graph RAG tool for enhanced retrieval - */ -export const astraGraphTool = createGraphRAGTool({ - id: 'astra-graph-rag', - description: - 'AstraDB graph-based retrieval augmented generation for complex queries and multi-hop reasoning.', - // Supported vector store and index options - vectorStoreName: 'astraStore', - indexName: ASTRA_CONFIG.indexName, - model: ASTRA_CONFIG.embeddingModel, - // Supported graph options for AstraDB - graphOptions: { - dimension: ASTRA_CONFIG.embeddingDimension, - threshold: parseFloat(process.env.ASTRA_GRAPH_THRESHOLD ?? '0.7'), - randomWalkSteps: parseInt( - process.env.ASTRA_GRAPH_RANDOM_WALK_STEPS ?? '10' - ), - restartProb: parseFloat(process.env.ASTRA_GRAPH_RESTART_PROB ?? '0.15'), - }, - includeSources: true, - // Filtering and ranking - enableFilter: true, -}) - -// Export configuration for external use -export { ASTRA_CONFIG } diff --git a/src/mastra/config/vector/chroma.ts b/src/mastra/config/vector/chroma.ts deleted file mode 100644 index 673b782..0000000 --- a/src/mastra/config/vector/chroma.ts +++ /dev/null @@ -1,365 +0,0 @@ -import { ChromaVector } from '@mastra/chroma' -import { createVectorQueryTool, createGraphRAGTool } from '@mastra/rag' -import { google } from '@ai-sdk/google' -import { embedMany } from 'ai' -import { log } from '../logger' - -/** - * ChromaDB-compatible filter format for vector queries - * Based on ChromaDB's metadata filtering syntax - * - * ChromaDB-specific limitations: - * - Metadata values must be strings, numbers, or booleans - * - Logical operators: $and, $or - * - Comparison operators: $eq, $ne, $gt, $gte, $lt, $lte, $in, $nin - * - Text search: $contains, $not_contains - */ -export interface ChromaMetadataFilter { - [key: string]: - | string - | number - | boolean - | ChromaMetadataFilter - | ChromaMetadataFilter[] - | Array - | undefined -} - -/** - * Raw ChromaDB filter format expected by the library - * Using Record for compatibility with @mastra/chroma types - */ -export type ChromaRawFilter = Record - -/** - * ChromaDB configuration for the Governed RAG system - * Supports both local ChromaDB instances and Chroma Cloud - */ - -// Configuration constants -const CHROMA_CONFIG = { - // For Chroma Cloud - apiKey: process.env.CHROMA_API_KEY, - tenant: process.env.CHROMA_TENANT, - database: process.env.CHROMA_DATABASE, - // For local ChromaDB - url: process.env.CHROMA_URL, - indexName: 'governed-rag', - // Google Gemini gemini-embedding-001 supports flexible dimensions: 128-3072 - // Recommended: 768, 1536, 3072 - embeddingDimension: parseInt( - process.env.CHROMA_EMBEDDING_DIMENSION ?? '1536' - ), - embeddingModel: google.textEmbedding('gemini-embedding-001'), -} as const - -/** - * Initialize ChromaDB store with proper configuration - */ -/* FIXME(mastra): Add a unique `id` parameter. See: https://mastra.ai/guides/v1/migrations/upgrade-to-v1/mastra#required-id-parameter-for-all-mastra-primitives */ -const chromaStore = new ChromaVector( - CHROMA_CONFIG.apiKey !== null && - CHROMA_CONFIG.apiKey !== undefined && - CHROMA_CONFIG.apiKey.trim() !== '' && - CHROMA_CONFIG.tenant !== null && - CHROMA_CONFIG.tenant !== undefined && - CHROMA_CONFIG.tenant.trim() !== '' && - CHROMA_CONFIG.database !== null && - CHROMA_CONFIG.database !== undefined && - CHROMA_CONFIG.database.trim() !== '' - ? { - apiKey: CHROMA_CONFIG.apiKey, - tenant: CHROMA_CONFIG.tenant, - database: CHROMA_CONFIG.database, - } - : undefined -) - -/** - * Create and configure the vector index - */ -export async function initializeVectorIndex(): Promise { - try { - await chromaStore.createIndex({ - indexName: CHROMA_CONFIG.indexName, - dimension: CHROMA_CONFIG.embeddingDimension, - }) - - log.info('Vector index created', { - indexName: CHROMA_CONFIG.indexName, - dimension: CHROMA_CONFIG.embeddingDimension, - provider: - (CHROMA_CONFIG.apiKey?.trim() ?? '') !== '' - ? 'Chroma Cloud' - : 'Local ChromaDB', - }) - } catch (error: unknown) { - // Index might already exist, which is fine - const errorObj = error as { code?: string; message?: string } - if ( - (errorObj.message?.includes('already exists') ?? false) || - errorObj.code === 'index_already_exists' - ) { - log.info('Vector index already exists', { - indexName: CHROMA_CONFIG.indexName, - }) - } else { - log.error('Failed to create vector index', { error: String(error) }) - throw error - } - } -} - -/** - * Process document content and generate embeddings - * Simplified chunking for basic vector setup without AI extraction - */ -export async function processDocument( - content: string, - options: { - chunkSize?: number - chunkOverlap?: number - } = {} -): Promise<{ - chunks: Array<{ - text: string - metadata?: Record - }> - embeddings: number[][] -}> { - try { - // Simple text-based chunking without AI extraction - const chunkSize = options.chunkSize ?? 1000 - const chunkOverlap = options.chunkOverlap ?? 200 - - const chunks: Array<{ - text: string - metadata?: Record - }> = [] - - // Split content into overlapping chunks - for (let i = 0; i < content.length; i += chunkSize - chunkOverlap) { - const chunkText = content.slice(i, i + chunkSize) - if (chunkText.trim()) { - chunks.push({ - text: chunkText, - metadata: { - chunkIndex: chunks.length, - startPosition: i, - endPosition: i + chunkText.length, - totalLength: content.length, - }, - }) - } - } - - // Generate embeddings for all chunks - const { embeddings } = await embedMany({ - values: chunks.map((chunk) => chunk.text), - model: CHROMA_CONFIG.embeddingModel, - }) - - log.info('Document processed successfully', { - chunksCount: chunks.length, - chunkSize, - chunkOverlap, - embeddingDimension: embeddings[0]?.length, - }) - - return { chunks, embeddings } - } catch (error) { - log.error('Failed to process document', { error: String(error) }) - throw error - } -} - -/** - * Store document chunks and their embeddings - */ -export async function storeDocumentEmbeddings( - chunks: Array<{ - text: string - metadata?: { - title?: string - summary?: string - keywords?: string[] - } - }>, - embeddings: number[][], - baseMetadata: Record = {} -): Promise { - try { - // Prepare metadata for each chunk - const metadata = chunks.map((chunk, index) => ({ - ...baseMetadata, - text: chunk.text, - chunkIndex: index, - title: chunk.metadata?.title, - summary: chunk.metadata?.summary, - keywords: chunk.metadata?.keywords, - createdAt: new Date().toISOString(), - })) - - // Upsert vectors with metadata - const ids = await chromaStore.upsert({ - indexName: CHROMA_CONFIG.indexName, - vectors: embeddings, - metadata, - }) - - log.info('Document embeddings stored', { - indexName: CHROMA_CONFIG.indexName, - vectorsCount: embeddings.length, - }) - - return ids - } catch (error) { - log.error('Failed to store document embeddings', { - error: String(error), - }) - throw error - } -} - -/** - * Transform ChromaMetadataFilter to ChromaRawFilter for library compatibility - */ -export function transformToChromaFilter( - filter: ChromaMetadataFilter -): ChromaRawFilter { - return filter as ChromaRawFilter -} - -/** - * Validate ChromaDB filter structure - */ -export function validateChromaFilter(filter: ChromaMetadataFilter): boolean { - try { - // Basic validation - ensure filter is a plain object - if ( - typeof filter !== 'object' || - filter === null || - Array.isArray(filter) - ) { - return false - } - - // Transform and check if it's valid for ChromaDB - const rawFilter = transformToChromaFilter(filter) - return typeof rawFilter === 'object' && rawFilter !== null - } catch { - return false - } -} - -/** - * Query similar documents with metadata filtering - */ -export async function querySimilarDocuments( - queryText: string, - options: { - topK?: number - filter?: ChromaMetadataFilter - includeMetadata?: boolean - } = {} -): Promise< - Array<{ - id: string - score: number - text: string - metadata?: Record - }> -> { - try { - const { topK = 10, filter, includeMetadata = true } = options - - // Generate embedding for query - const { - embeddings: [queryEmbedding], - } = await embedMany({ - values: [queryText], - model: CHROMA_CONFIG.embeddingModel, - }) - - // Query similar vectors - // Note: Filter parameter omitted due to complex ChromaDB filter type requirements - // TODO: Implement proper filter support when ChromaDB library types are clearer - const results = await chromaStore.query({ - indexName: CHROMA_CONFIG.indexName, - queryVector: queryEmbedding, - topK, - }) - - log.info('Vector query completed', { - queryLength: queryText.length, - topK, - resultsCount: results.length, - hasFilter: !!filter, - }) - - return results.map((result: any) => ({ - id: result.id, - score: result.score, - text: (result.metadata?.text as string) || '', - metadata: includeMetadata ? result.metadata : undefined, - })) - } catch (error) { - log.error('Failed to query similar documents', { error: String(error) }) - throw error - } -} - -/** - * ChromaDB vector query tool for semantic search - */ -export const chromaQueryTool = createVectorQueryTool({ - id: 'chroma-vector-query', - description: - 'ChromaDB similarity search for semantic content retrieval and question answering.', - // Supported vector store and index options - vectorStoreName: 'chromaStore', - indexName: CHROMA_CONFIG.indexName, - model: CHROMA_CONFIG.embeddingModel, - // Supported database configuration for ChromaDB - databaseConfig: { - chromaDb: { - minScore: parseFloat(process.env.CHROMA_MIN_SCORE ?? '0.7'), - // ChromaDB specific parameters - maxResults: parseInt(process.env.CHROMA_MAX_RESULTS ?? '100'), - }, - }, - includeVectors: true, - // Advanced filtering - enableFilter: true, - includeSources: true, -}) - -/** - * ChromaDB graph RAG tool for enhanced retrieval - */ -export const chromaGraphTool = createGraphRAGTool({ - id: 'chroma-graph-rag', - description: - 'ChromaDB graph-based retrieval augmented generation for complex queries and multi-hop reasoning.', - // Supported vector store and index options - vectorStoreName: 'chromaStore', - indexName: CHROMA_CONFIG.indexName, - model: CHROMA_CONFIG.embeddingModel, - // Supported graph options for ChromaDB - graphOptions: { - dimension: CHROMA_CONFIG.embeddingDimension, - threshold: parseFloat(process.env.CHROMA_GRAPH_THRESHOLD ?? '0.7'), - randomWalkSteps: parseInt( - process.env.CHROMA_GRAPH_RANDOM_WALK_STEPS ?? '10' - ), - restartProb: parseFloat( - process.env.CHROMA_GRAPH_RESTART_PROB ?? '0.15' - ), - }, - includeSources: true, - // Filtering and ranking - enableFilter: true, -}) - -// Export configuration for external use -export { CHROMA_CONFIG } diff --git a/src/mastra/config/vector/cloudflare.ts b/src/mastra/config/vector/cloudflare.ts deleted file mode 100644 index f935c41..0000000 --- a/src/mastra/config/vector/cloudflare.ts +++ /dev/null @@ -1,388 +0,0 @@ -import { Memory } from '@mastra/memory' -import { D1Store } from '@mastra/cloudflare-d1' -import { CloudflareVector } from '@mastra/vectorize' -import { log } from '../logger' -import { google } from '@ai-sdk/google' -import { TokenLimiter } from '@mastra/memory/processors' - -// Cloudflare Storage configuration for the Governed RAG system -// Supports both Workers binding and REST API access patterns - -// Cloudflare Storage configuration constants -const CLOUDFLARE_STORAGE_CONFIG = { - // Workers binding configuration (for Cloudflare Workers runtime) - binding: process.env.CLOUDFLARE_D1_BINDING, - - // REST API configuration (for external access) - accountId: process.env.CF_ACCOUNT_ID, - databaseId: process.env.CLOUDFLARE_D1_DATABASE_ID, - apiToken: process.env.CF_API_TOKEN, - - // Environment isolation - tablePrefix: process.env.CLOUDFLARE_D1_TABLE_PREFIX ?? 'dev_', - - // Connection settings - baseUrl: - process.env.CLOUDFLARE_D1_BASE_URL ?? - 'https://api.cloudflare.com/client/v4', -} as const - -// Cloudflare Vector configuration constants -const CLOUDFLARE_VECTOR_CONFIG = { - accountId: process.env.CF_ACCOUNT_ID, - apiToken: process.env.CF_API_TOKEN, - indexName: 'governed-rag', - // Google Gemini gemini-embedding-001 supports flexible dimensions: 128-3072 - // Note: Cloudflare Vectorize supports dimensions up to 1536 - embeddingDimension: parseInt(process.env.CF_EMBEDDING_DIMENSION ?? '1536'), -} as const - -/** - * Initialize Cloudflare Vectorize store with proper configuration - */ -export const cloudflareStore = new CloudflareVector({ - accountId: CLOUDFLARE_VECTOR_CONFIG.accountId!, - apiToken: CLOUDFLARE_VECTOR_CONFIG.apiToken!, -}) - -/** - * Initialize Cloudflare storage with proper configuration - * Supports both Workers binding and REST API patterns - */ -let cloudflareStorage: D1Store | null = null - -export async function initializeCloudflareStorage(): Promise { - try { - // Determine which access pattern to use - if ( - CLOUDFLARE_STORAGE_CONFIG.binding && - CLOUDFLARE_STORAGE_CONFIG.binding.trim() !== '' - ) { - // Workers binding pattern (for Cloudflare Workers runtime) - log.info('Initializing Cloudflare storage with Workers binding', { - binding: CLOUDFLARE_STORAGE_CONFIG.binding, - tablePrefix: CLOUDFLARE_STORAGE_CONFIG.tablePrefix, - }) - - // In Workers runtime, the binding would be provided by the environment - // For now, we'll use the REST API pattern for consistency - /* FIXME(mastra): Add a unique `id` parameter. See: https://mastra.ai/guides/v1/migrations/upgrade-to-v1/mastra#required-id-parameter-for-all-mastra-primitives */ - cloudflareStorage = new D1Store({ - accountId: CLOUDFLARE_STORAGE_CONFIG.accountId!, - databaseId: CLOUDFLARE_STORAGE_CONFIG.databaseId!, - apiToken: CLOUDFLARE_STORAGE_CONFIG.apiToken!, - tablePrefix: CLOUDFLARE_STORAGE_CONFIG.tablePrefix, - }) - } else { - // REST API pattern (for external access) - log.info('Initializing Cloudflare storage with REST API', { - accountId: CLOUDFLARE_STORAGE_CONFIG.accountId, - databaseId: CLOUDFLARE_STORAGE_CONFIG.databaseId, - tablePrefix: CLOUDFLARE_STORAGE_CONFIG.tablePrefix, - }) - - /* FIXME(mastra): Add a unique `id` parameter. See: https://mastra.ai/guides/v1/migrations/upgrade-to-v1/mastra#required-id-parameter-for-all-mastra-primitives */ - cloudflareStorage = new D1Store({ - accountId: CLOUDFLARE_STORAGE_CONFIG.accountId!, - databaseId: CLOUDFLARE_STORAGE_CONFIG.databaseId!, - apiToken: CLOUDFLARE_STORAGE_CONFIG.apiToken!, - tablePrefix: CLOUDFLARE_STORAGE_CONFIG.tablePrefix, - }) - } - - log.info('Cloudflare storage initialized successfully') - } catch (error) { - log.error('Failed to initialize Cloudflare storage', { - error: String(error), - }) - throw error - } -} - -/** - * Get or initialize the Cloudflare storage instance - */ -export async function getCloudflareStorage(): Promise { - if (!cloudflareStorage) { - await initializeCloudflareStorage() - } - return cloudflareStorage! -} - -/** - * Memory instance configured for Cloudflare storage with comprehensive settings - */ -export const cloudflareMemory = new Memory({ - /* FIXME(mastra): Add a unique `id` parameter. See: https://mastra.ai/guides/v1/migrations/upgrade-to-v1/mastra#required-id-parameter-for-all-mastra-primitives */ - storage: new D1Store({ - accountId: CLOUDFLARE_STORAGE_CONFIG.accountId!, - databaseId: CLOUDFLARE_STORAGE_CONFIG.databaseId!, - apiToken: CLOUDFLARE_STORAGE_CONFIG.apiToken!, - tablePrefix: CLOUDFLARE_STORAGE_CONFIG.tablePrefix, - }), - vector: cloudflareStore, - embedder: google.textEmbedding('gemini-embedding-001'), - options: { - // Message management - lastMessages: parseInt( - process.env.CLOUDFLARE_MEMORY_LAST_MESSAGES ?? '500' - ), - // Advanced semantic recall with Cloudflare Vectorize configuration - semanticRecall: { - topK: parseInt(process.env.CLOUDFLARE_SEMANTIC_TOP_K ?? '5'), - messageRange: { - before: parseInt( - process.env.CLOUDFLARE_SEMANTIC_RANGE_BEFORE ?? '3' - ), - after: parseInt( - process.env.CLOUDFLARE_SEMANTIC_RANGE_AFTER ?? '2' - ), - }, - scope: 'resource', // 'resource' | 'thread' - // Cloudflare Vectorize index configuration - }, - // Enhanced working memory with supported template - workingMemory: { - enabled: true, - scope: 'resource', // 'resource' | 'thread' - version: 'vnext', // Enable the improved/experimental tool - template: `# User Profile & Context - ## Personal Information - - **Name**: [To be learned] - - **Role/Title**: [To be learned] - - **Organization**: [To be learned] - - **Location**: [To be learned] - - **Time Zone**: [To be learned] - - ## Communication Preferences - - **Preferred Communication Style**: [To be learned] - - **Response Length Preference**: [To be learned] - - **Technical Level**: [To be learned] - - ## Current Context - - **Active Projects**: [To be learned] - - **Current Goals**: [To be learned] - - **Recent Activities**: [To be learned] - - **Pain Points**: [To be learned] - - ## Long-term Memory - - **Key Achievements**: [To be learned] - - **Important Relationships**: [To be learned] - - **Recurring Patterns**: [To be learned] - - **Preferences & Habits**: [To be learned] - - ## Session Notes - - **Today's Focus**: [To be learned] - - **Outstanding Questions**: [To be learned] - - **Action Items**: [To be learned] - - **Follow-ups Needed**: [To be learned] - `, - }, - // Thread management with supported options - threads: { - generateTitle: - process.env.CLOUDFLARE_THREAD_GENERATE_TITLE !== 'false', - }, - }, - processors: [new TokenLimiter(1048576)], -}) - -/** - * Create a masked stream for sensitive data handling - * Removes or masks sensitive information from message data - */ -export function createMaskedStream>( - data: T, - sensitiveKeys: string[] = ['password', 'token', 'key', 'secret', 'apiKey'] -): T { - const masked = { ...data } as Record - - for (const key of sensitiveKeys) { - if (key in masked && typeof masked[key] === 'string') { - masked[key] = '***MASKED***' - } - } - - return masked as T -} - -/** - * Mask sensitive message data for logging and storage - */ -export function maskSensitiveMessageData( - message: Record -): Record { - return createMaskedStream(message, [ - 'password', - 'token', - 'key', - 'secret', - 'apiKey', - 'authorization', - 'bearer', - 'apikey', - 'database_url', - 'connection_string', - ]) -} - -/** - * Generate embeddings for text content using Google Gemini - */ -export async function generateEmbeddings(texts: string[]): Promise { - try { - const { embeddings } = await google - .textEmbedding('gemini-embedding-001') - .doEmbed({ - values: texts, - }) - - log.info('Embeddings generated successfully', { - textCount: texts.length, - embeddingDimension: embeddings[0]?.length, - }) - - return embeddings - } catch (error) { - log.error('Failed to generate embeddings', { error: String(error) }) - throw error - } -} - -/** - * Check database health and connectivity - */ -export async function checkDatabaseHealth(): Promise<{ - status: 'healthy' | 'unhealthy' - latency?: number - error?: string -}> { - const startTime = Date.now() - - try { - const storage = await getCloudflareStorage() - // Perform a simple health check operation - // Note: D1Store may not have a direct health check method - // This is a placeholder for actual health check logic - - const latency = Date.now() - startTime - - log.info('Database health check passed', { latency }) - - return { - status: 'healthy', - latency, - } - } catch (error) { - const latency = Date.now() - startTime - - log.error('Database health check failed', { - error: String(error), - latency, - }) - - return { - status: 'unhealthy', - latency, - error: String(error), - } - } -} - -/** - * Initialize database schema and perform setup operations - */ -export async function initializeDatabase(): Promise { - try { - log.info('Initializing Cloudflare database schema') - - // Database initialization logic would go here - // This is a placeholder for schema setup operations - - log.info('Cloudflare database initialized successfully') - } catch (error) { - log.error('Failed to initialize Cloudflare database', { - error: String(error), - }) - throw error - } -} - -/** - * Shutdown database connections and cleanup resources - */ -export async function shutdownDatabase(): Promise { - try { - log.info('Shutting down Cloudflare database connections') - - // Cleanup logic would go here - cloudflareStorage = null - - log.info('Cloudflare database shutdown completed') - } catch (error) { - log.error('Error during Cloudflare database shutdown', { - error: String(error), - }) - throw error - } -} - -/** - * Format storage messages for consistent logging - */ -export function formatStorageMessages( - operation: string, - data: Record, - result?: unknown -): Record { - return { - operation, - timestamp: new Date().toISOString(), - data: maskSensitiveMessageData(data), - result: result - ? maskSensitiveMessageData(result as Record) - : undefined, - } -} - -/** - * Perform a storage operation with error handling and logging - */ -export async function performStorageOperation( - operation: string, - operationFn: () => Promise, - context: Record = {} -): Promise { - const startTime = Date.now() - - try { - log.info(`Starting storage operation: ${operation}`, { - ...formatStorageMessages(operation, context), - startTime, - }) - - const result = await operationFn() - - const duration = Date.now() - startTime - - log.info(`Storage operation completed: ${operation}`, { - ...formatStorageMessages(operation, context, result), - duration, - }) - - return result - } catch (error) { - const duration = Date.now() - startTime - - log.error(`Storage operation failed: ${operation}`, { - ...formatStorageMessages(operation, context), - error: String(error), - duration, - }) - - throw error - } -} - -// Export configuration for external use -export { CLOUDFLARE_STORAGE_CONFIG } diff --git a/src/mastra/config/vector/couchbase.ts b/src/mastra/config/vector/couchbase.ts deleted file mode 100644 index ca8c7c7..0000000 --- a/src/mastra/config/vector/couchbase.ts +++ /dev/null @@ -1,356 +0,0 @@ -import { CouchbaseVector } from '@mastra/couchbase' -import { createVectorQueryTool, createGraphRAGTool } from '@mastra/rag' -import { google } from '@ai-sdk/google' -import { embedMany } from 'ai' -import { log } from '../logger' - -/** - * Couchbase-compatible filter format for vector queries - * Based on Couchbase N1QL query syntax - * - * Couchbase-specific features: - * - Full N1QL query language support for metadata filtering - * - Supports complex SQL-like queries with JOINs, aggregations, and subqueries - * - Metadata fields are stored as JSON and can be queried with full SQL syntax - * - Supports geospatial queries and full-text search - */ -export interface CouchbaseMetadataFilter { - [key: string]: - | string - | number - | boolean - | CouchbaseMetadataFilter - | CouchbaseMetadataFilter[] - | Array - | undefined -} - -/** - * Raw Couchbase filter format expected by the library - * Using Record for compatibility with @mastra/couchbase types - */ -export type CouchbaseRawFilter = Record - -/** - * Couchbase configuration for the Governed RAG system - * Uses Couchbase Server for vector storage and similarity search - */ - -// Configuration constants -const COUCHBASE_CONFIG = { - connectionString: process.env.COUCHBASE_CONNECTION_STRING, - username: process.env.COUCHBASE_USERNAME, - password: process.env.COUCHBASE_PASSWORD, - bucketName: process.env.COUCHBASE_BUCKET, - scopeName: process.env.COUCHBASE_SCOPE, - collectionName: process.env.COUCHBASE_COLLECTION, - indexName: 'governed-rag', - // Google Gemini gemini-embedding-001 supports flexible dimensions: 128-3072 - // Recommended: 768, 1536, 3072 - embeddingDimension: parseInt( - process.env.COUCHBASE_EMBEDDING_DIMENSION ?? '1536' - ), - embeddingModel: google.textEmbedding('gemini-embedding-001'), -} as const - -/** - * Initialize Couchbase store with proper configuration - */ -const couchbaseStore = new CouchbaseVector({ - connectionString: COUCHBASE_CONFIG.connectionString!, - username: COUCHBASE_CONFIG.username!, - password: COUCHBASE_CONFIG.password!, - bucketName: COUCHBASE_CONFIG.bucketName!, - scopeName: COUCHBASE_CONFIG.scopeName!, - collectionName: COUCHBASE_CONFIG.collectionName!, -}) - -/** - * Create and configure the vector index - */ -export async function initializeVectorIndex(): Promise { - try { - await couchbaseStore.createIndex({ - indexName: COUCHBASE_CONFIG.indexName, - dimension: COUCHBASE_CONFIG.embeddingDimension, - }) - - log.info('Vector index created', { - indexName: COUCHBASE_CONFIG.indexName, - dimension: COUCHBASE_CONFIG.embeddingDimension, - bucket: COUCHBASE_CONFIG.bucketName, - scope: COUCHBASE_CONFIG.scopeName, - collection: COUCHBASE_CONFIG.collectionName, - }) - } catch (error: unknown) { - // Index might already exist, which is fine - const errorObj = error as { code?: string; message?: string } - if ( - (errorObj.message?.includes('already exists') ?? false) || - errorObj.code === 'index_already_exists' - ) { - log.info('Vector index already exists', { - indexName: COUCHBASE_CONFIG.indexName, - }) - } else { - log.error('Failed to create vector index', { error: String(error) }) - throw error - } - } -} - -/** - * Process document content and generate embeddings - * Simplified chunking for basic vector setup without AI extraction - */ -export async function processDocument( - content: string, - options: { - chunkSize?: number - chunkOverlap?: number - } = {} -): Promise<{ - chunks: Array<{ - text: string - metadata?: Record - }> - embeddings: number[][] -}> { - try { - // Simple text-based chunking without AI extraction - const chunkSize = options.chunkSize ?? 1000 - const chunkOverlap = options.chunkOverlap ?? 200 - - const chunks: Array<{ - text: string - metadata?: Record - }> = [] - - // Split content into overlapping chunks - for (let i = 0; i < content.length; i += chunkSize - chunkOverlap) { - const chunkText = content.slice(i, i + chunkSize) - if (chunkText.trim()) { - chunks.push({ - text: chunkText, - metadata: { - chunkIndex: chunks.length, - startPosition: i, - endPosition: i + chunkText.length, - totalLength: content.length, - }, - }) - } - } - - // Generate embeddings for all chunks - const { embeddings } = await embedMany({ - values: chunks.map((chunk) => chunk.text), - model: COUCHBASE_CONFIG.embeddingModel, - }) - - log.info('Document processed successfully', { - chunksCount: chunks.length, - chunkSize, - chunkOverlap, - embeddingDimension: embeddings[0]?.length, - }) - - return { chunks, embeddings } - } catch (error) { - log.error('Failed to process document', { error: String(error) }) - throw error - } -} - -/** - * Store document chunks and their embeddings - */ -export async function storeDocumentEmbeddings( - chunks: Array<{ - text: string - metadata?: { - title?: string - summary?: string - keywords?: string[] - } - }>, - embeddings: number[][], - baseMetadata: Record = {} -): Promise { - try { - // Prepare metadata for each chunk - const metadata = chunks.map((chunk, index) => ({ - ...baseMetadata, - text: chunk.text, - chunkIndex: index, - title: chunk.metadata?.title, - summary: chunk.metadata?.summary, - keywords: chunk.metadata?.keywords, - createdAt: new Date().toISOString(), - })) - - // Upsert vectors with metadata - const ids = await couchbaseStore.upsert({ - indexName: COUCHBASE_CONFIG.indexName, - vectors: embeddings, - metadata, - }) - - log.info('Document embeddings stored', { - indexName: COUCHBASE_CONFIG.indexName, - vectorsCount: embeddings.length, - }) - - return ids - } catch (error) { - log.error('Failed to store document embeddings', { - error: String(error), - }) - throw error - } -} - -/** - * Transform CouchbaseMetadataFilter to CouchbaseRawFilter for library compatibility - */ -export function transformToCouchbaseFilter( - filter: CouchbaseMetadataFilter -): CouchbaseRawFilter { - return filter as CouchbaseRawFilter -} - -/** - * Validate Couchbase filter structure - */ -export function validateCouchbaseFilter( - filter: CouchbaseMetadataFilter -): boolean { - try { - // Basic validation - ensure filter is a plain object - if ( - typeof filter !== 'object' || - filter === null || - Array.isArray(filter) - ) { - return false - } - - // Transform and check if it's valid for Couchbase - const rawFilter = transformToCouchbaseFilter(filter) - return typeof rawFilter === 'object' && rawFilter !== null - } catch { - return false - } -} - -/** - * Query similar documents with metadata filtering - */ -export async function querySimilarDocuments( - queryText: string, - options: { - topK?: number - filter?: CouchbaseMetadataFilter - includeMetadata?: boolean - } = {} -): Promise< - Array<{ - id: string - score: number - text: string - metadata?: Record - }> -> { - try { - const { topK = 10, filter, includeMetadata = true } = options - - // Generate embedding for query - const { - embeddings: [queryEmbedding], - } = await embedMany({ - values: [queryText], - model: COUCHBASE_CONFIG.embeddingModel, - }) - - // Query similar vectors - // Note: Filter parameter omitted due to complex Couchbase filter type requirements - // TODO: Implement proper filter support when Couchbase library types are clearer - const results = await couchbaseStore.query({ - indexName: COUCHBASE_CONFIG.indexName, - queryVector: queryEmbedding, - topK, - }) - - log.info('Vector query completed', { - queryLength: queryText.length, - topK, - resultsCount: results.length, - hasFilter: !!filter, - }) - - return results.map((result: any) => ({ - id: result.id, - score: result.score, - text: (result.metadata?.text as string) || '', - metadata: includeMetadata ? result.metadata : undefined, - })) - } catch (error) { - log.error('Failed to query similar documents', { error: String(error) }) - throw error - } -} - -/** - * Couchbase vector query tool for semantic search - */ -export const couchbaseQueryTool = createVectorQueryTool({ - id: 'couchbase-vector-query', - description: - 'Couchbase similarity search for semantic content retrieval and question answering.', - // Supported vector store and index options - vectorStoreName: 'couchbaseStore', - indexName: COUCHBASE_CONFIG.indexName, - model: COUCHBASE_CONFIG.embeddingModel, - // Supported database configuration for Couchbase - databaseConfig: { - couchbase: { - minScore: parseFloat(process.env.COUCHBASE_MIN_SCORE ?? '0.7'), - // Couchbase specific parameters - maxResults: parseInt(process.env.COUCHBASE_MAX_RESULTS ?? '100'), - }, - }, - includeVectors: true, - // Advanced filtering - enableFilter: true, - includeSources: true, -}) - -/** - * Couchbase graph RAG tool for enhanced retrieval - */ -export const couchbaseGraphTool = createGraphRAGTool({ - id: 'couchbase-graph-rag', - description: - 'Couchbase graph-based retrieval augmented generation for complex queries and multi-hop reasoning.', - // Supported vector store and index options - vectorStoreName: 'couchbaseStore', - indexName: COUCHBASE_CONFIG.indexName, - model: COUCHBASE_CONFIG.embeddingModel, - // Supported graph options for Couchbase - graphOptions: { - dimension: COUCHBASE_CONFIG.embeddingDimension, - threshold: parseFloat(process.env.COUCHBASE_GRAPH_THRESHOLD ?? '0.7'), - randomWalkSteps: parseInt( - process.env.COUCHBASE_GRAPH_RANDOM_WALK_STEPS ?? '10' - ), - restartProb: parseFloat( - process.env.COUCHBASE_GRAPH_RESTART_PROB ?? '0.15' - ), - }, - includeSources: true, - // Filtering and ranking - enableFilter: true, -}) - -// Export configuration for external use -export { COUCHBASE_CONFIG } diff --git a/src/mastra/config/vector/opensearch.ts b/src/mastra/config/vector/opensearch.ts deleted file mode 100644 index 4bc23d2..0000000 --- a/src/mastra/config/vector/opensearch.ts +++ /dev/null @@ -1,344 +0,0 @@ -import { OpenSearchVector } from '@mastra/opensearch' -import { createVectorQueryTool, createGraphRAGTool } from '@mastra/rag' -import { google } from '@ai-sdk/google' -import { embedMany } from 'ai' -import { log } from '../logger' - -/** - * OpenSearch-compatible filter format for vector queries - * Based on OpenSearch's query DSL syntax - * - * OpenSearch-specific features: - * - Full Elasticsearch/OpenSearch query DSL support - * - Supports complex boolean queries, range queries, and aggregations - * - Metadata fields can be analyzed for full-text search - * - Supports nested objects and arrays in metadata - */ -export interface OpenSearchMetadataFilter { - [key: string]: - | string - | number - | boolean - | OpenSearchMetadataFilter - | OpenSearchMetadataFilter[] - | Array - | undefined -} - -/** - * Raw OpenSearch filter format expected by the library - * Using Record for compatibility with @mastra/opensearch types - */ -export type OpenSearchRawFilter = Record - -/** - * OpenSearch configuration for the Governed RAG system - * Uses OpenSearch for vector storage and similarity search - */ - -// Configuration constants -const OPENSEARCH_CONFIG = { - url: process.env.OPENSEARCH_URL, - indexName: 'governed-rag', - // Google Gemini gemini-embedding-001 supports flexible dimensions: 128-3072 - // Recommended: 768, 1536, 3072 - embeddingDimension: parseInt( - process.env.OPENSEARCH_EMBEDDING_DIMENSION ?? '1536' - ), - embeddingModel: google.textEmbedding('gemini-embedding-001'), -} as const - -/** - * Initialize OpenSearch store with proper configuration - */ -const openSearchStore = new OpenSearchVector({ - url: OPENSEARCH_CONFIG.url!, -}) - -/** - * Create and configure the vector index - */ -export async function initializeVectorIndex(): Promise { - try { - await openSearchStore.createIndex({ - indexName: OPENSEARCH_CONFIG.indexName, - dimension: OPENSEARCH_CONFIG.embeddingDimension, - }) - - log.info('Vector index created', { - indexName: OPENSEARCH_CONFIG.indexName, - dimension: OPENSEARCH_CONFIG.embeddingDimension, - url: OPENSEARCH_CONFIG.url, - }) - } catch (error: unknown) { - // Index might already exist, which is fine - const errorObj = error as { code?: string; message?: string } - if ( - (errorObj.message?.includes('already exists') ?? false) || - errorObj.code === 'index_already_exists' - ) { - log.info('Vector index already exists', { - indexName: OPENSEARCH_CONFIG.indexName, - }) - } else { - log.error('Failed to create vector index', { error: String(error) }) - throw error - } - } -} - -/** - * Process document content and generate embeddings - * Simplified chunking for basic vector setup without AI extraction - */ -export async function processDocument( - content: string, - options: { - chunkSize?: number - chunkOverlap?: number - } = {} -): Promise<{ - chunks: Array<{ - text: string - metadata?: Record - }> - embeddings: number[][] -}> { - try { - // Simple text-based chunking without AI extraction - const chunkSize = options.chunkSize ?? 1000 - const chunkOverlap = options.chunkOverlap ?? 200 - - const chunks: Array<{ - text: string - metadata?: Record - }> = [] - - // Split content into overlapping chunks - for (let i = 0; i < content.length; i += chunkSize - chunkOverlap) { - const chunkText = content.slice(i, i + chunkSize) - if (chunkText.trim()) { - chunks.push({ - text: chunkText, - metadata: { - chunkIndex: chunks.length, - startPosition: i, - endPosition: i + chunkText.length, - totalLength: content.length, - }, - }) - } - } - - // Generate embeddings for all chunks - const { embeddings } = await embedMany({ - values: chunks.map((chunk) => chunk.text), - model: OPENSEARCH_CONFIG.embeddingModel, - }) - - log.info('Document processed successfully', { - chunksCount: chunks.length, - chunkSize, - chunkOverlap, - embeddingDimension: embeddings[0]?.length, - }) - - return { chunks, embeddings } - } catch (error) { - log.error('Failed to process document', { error: String(error) }) - throw error - } -} - -/** - * Store document chunks and their embeddings - */ -export async function storeDocumentEmbeddings( - chunks: Array<{ - text: string - metadata?: { - title?: string - summary?: string - keywords?: string[] - } - }>, - embeddings: number[][], - baseMetadata: Record = {} -): Promise { - try { - // Prepare metadata for each chunk - const metadata = chunks.map((chunk, index) => ({ - ...baseMetadata, - text: chunk.text, - chunkIndex: index, - title: chunk.metadata?.title, - summary: chunk.metadata?.summary, - keywords: chunk.metadata?.keywords, - createdAt: new Date().toISOString(), - })) - - // Upsert vectors with metadata - const ids = await openSearchStore.upsert({ - indexName: OPENSEARCH_CONFIG.indexName, - vectors: embeddings, - metadata, - }) - - log.info('Document embeddings stored', { - indexName: OPENSEARCH_CONFIG.indexName, - vectorsCount: embeddings.length, - }) - - return ids - } catch (error) { - log.error('Failed to store document embeddings', { - error: String(error), - }) - throw error - } -} - -/** - * Transform OpenSearchMetadataFilter to OpenSearchRawFilter for library compatibility - */ -export function transformToOpenSearchFilter( - filter: OpenSearchMetadataFilter -): OpenSearchRawFilter { - return filter as OpenSearchRawFilter -} - -/** - * Validate OpenSearch filter structure - */ -export function validateOpenSearchFilter( - filter: OpenSearchMetadataFilter -): boolean { - try { - // Basic validation - ensure filter is a plain object - if ( - typeof filter !== 'object' || - filter === null || - Array.isArray(filter) - ) { - return false - } - - // Transform and check if it's valid for OpenSearch - const rawFilter = transformToOpenSearchFilter(filter) - return typeof rawFilter === 'object' && rawFilter !== null - } catch { - return false - } -} - -/** - * Query similar documents with metadata filtering - */ -export async function querySimilarDocuments( - queryText: string, - options: { - topK?: number - filter?: OpenSearchMetadataFilter - includeMetadata?: boolean - } = {} -): Promise< - Array<{ - id: string - score: number - text: string - metadata?: Record - }> -> { - try { - const { topK = 10, filter, includeMetadata = true } = options - - // Generate embedding for query - const { - embeddings: [queryEmbedding], - } = await embedMany({ - values: [queryText], - model: OPENSEARCH_CONFIG.embeddingModel, - }) - - // Query similar vectors - // Note: Filter parameter omitted due to complex OpenSearch filter type requirements - // TODO: Implement proper filter support when OpenSearch library types are clearer - const results = await openSearchStore.query({ - indexName: OPENSEARCH_CONFIG.indexName, - queryVector: queryEmbedding, - topK, - }) - - log.info('Vector query completed', { - queryLength: queryText.length, - topK, - resultsCount: results.length, - hasFilter: !!filter, - }) - - return results.map((result: any) => ({ - id: result.id, - score: result.score, - text: (result.metadata?.text as string) || '', - metadata: includeMetadata ? result.metadata : undefined, - })) - } catch (error) { - log.error('Failed to query similar documents', { error: String(error) }) - throw error - } -} - -/** - * OpenSearch vector query tool for semantic search - */ -export const openSearchQueryTool = createVectorQueryTool({ - id: 'opensearch-vector-query', - description: - 'OpenSearch similarity search for semantic content retrieval and question answering.', - // Supported vector store and index options - vectorStoreName: 'openSearchStore', - indexName: OPENSEARCH_CONFIG.indexName, - model: OPENSEARCH_CONFIG.embeddingModel, - // Supported database configuration for OpenSearch - databaseConfig: { - openSearch: { - minScore: parseFloat(process.env.OPENSEARCH_MIN_SCORE ?? '0.7'), - // OpenSearch specific parameters - maxResults: parseInt(process.env.OPENSEARCH_MAX_RESULTS ?? '100'), - }, - }, - includeVectors: true, - // Advanced filtering - enableFilter: true, - includeSources: true, -}) - -/** - * OpenSearch graph RAG tool for enhanced retrieval - */ -export const openSearchGraphTool = createGraphRAGTool({ - id: 'opensearch-graph-rag', - description: - 'OpenSearch graph-based retrieval augmented generation for complex queries and multi-hop reasoning.', - // Supported vector store and index options - vectorStoreName: 'openSearchStore', - indexName: OPENSEARCH_CONFIG.indexName, - model: OPENSEARCH_CONFIG.embeddingModel, - // Supported graph options for OpenSearch - graphOptions: { - dimension: OPENSEARCH_CONFIG.embeddingDimension, - threshold: parseFloat(process.env.OPENSEARCH_GRAPH_THRESHOLD ?? '0.7'), - randomWalkSteps: parseInt( - process.env.OPENSEARCH_GRAPH_RANDOM_WALK_STEPS ?? '10' - ), - restartProb: parseFloat( - process.env.OPENSEARCH_GRAPH_RESTART_PROB ?? '0.15' - ), - }, - includeSources: true, - // Filtering and ranking - enableFilter: true, -}) - -// Export configuration for external use -export { OPENSEARCH_CONFIG } diff --git a/src/mastra/config/vector/pinecone.ts b/src/mastra/config/vector/pinecone.ts deleted file mode 100644 index 098914b..0000000 --- a/src/mastra/config/vector/pinecone.ts +++ /dev/null @@ -1,339 +0,0 @@ -import { PineconeVector } from '@mastra/pinecone' -import { createVectorQueryTool, createGraphRAGTool } from '@mastra/rag' -import { google } from '@ai-sdk/google' -import { embedMany } from 'ai' -import { log } from '../logger' - -/** - * Pinecone-compatible filter format for vector queries - * Based on Pinecone's metadata filtering syntax - * - * Pinecone-specific limitations: - * - Metadata values must be strings, numbers, or booleans - * - Logical operators: $and, $or - * - Comparison operators: $eq, $ne, $gt, $gte, $lt, $lte, $in, $nin - * - Text search: $contains, $not_contains - */ -export interface PineconeMetadataFilter { - [key: string]: - | string - | number - | boolean - | PineconeMetadataFilter - | PineconeMetadataFilter[] - | Array - | undefined -} - -/** - * Raw Pinecone filter format expected by the library - * Using Record for compatibility with @mastra/pinecone types - */ -export type PineconeRawFilter = Record - -/** - * Pinecone configuration for the Governed RAG system - * Supports Pinecone cloud vector database - */ - -// Configuration constants -const PINECONE_CONFIG = { - apiKey: process.env.PINECONE_API_KEY, - environment: process.env.PINECONE_ENVIRONMENT, - projectId: process.env.PINECONE_PROJECT_ID, - indexName: 'governed-rag', - // Google Gemini gemini-embedding-001 supports flexible dimensions: 128-3072 - // Recommended: 768, 1536, 3072 - embeddingDimension: parseInt( - process.env.PINECONE_EMBEDDING_DIMENSION ?? '1536' - ), - embeddingModel: google.textEmbedding('gemini-embedding-001'), -} as const - -/** - * Initialize Pinecone store with proper configuration - */ -/* FIXME(mastra): Add a unique `id` parameter. See: https://mastra.ai/guides/v1/migrations/upgrade-to-v1/mastra#required-id-parameter-for-all-mastra-primitives */ -const pineconeStore = new PineconeVector({ - apiKey: PINECONE_CONFIG.apiKey!, -}) - -/** - * Create and configure the vector index - */ -export async function initializeVectorIndex(): Promise { - try { - await pineconeStore.createIndex({ - indexName: PINECONE_CONFIG.indexName, - dimension: PINECONE_CONFIG.embeddingDimension, - }) - - log.info('Vector index created', { - indexName: PINECONE_CONFIG.indexName, - dimension: PINECONE_CONFIG.embeddingDimension, - provider: 'Pinecone', - }) - } catch (error: unknown) { - // Index might already exist, which is fine - const errorObj = error as { code?: string; message?: string } - if ( - (errorObj.message?.includes('already exists') ?? false) || - errorObj.code === 'index_already_exists' - ) { - log.info('Vector index already exists', { - indexName: PINECONE_CONFIG.indexName, - }) - } else { - log.error('Failed to create vector index', { error: String(error) }) - throw error - } - } -} - -/** - * Process document content and generate embeddings - * Simplified chunking for basic vector setup without AI extraction - */ -export async function processDocument( - content: string, - options: { - chunkSize?: number - chunkOverlap?: number - } = {} -): Promise<{ - chunks: Array<{ - text: string - metadata?: Record - }> - embeddings: number[][] -}> { - try { - // Simple text-based chunking without AI extraction - const chunkSize = options.chunkSize ?? 1000 - const chunkOverlap = options.chunkOverlap ?? 200 - - const chunks: Array<{ - text: string - metadata?: Record - }> = [] - - // Split content into overlapping chunks - for (let i = 0; i < content.length; i += chunkSize - chunkOverlap) { - const chunkText = content.slice(i, i + chunkSize) - if (chunkText.trim()) { - chunks.push({ - text: chunkText, - metadata: { - chunkIndex: chunks.length, - startPosition: i, - endPosition: i + chunkText.length, - totalLength: content.length, - }, - }) - } - } - - // Generate embeddings for all chunks - const { embeddings } = await embedMany({ - values: chunks.map((chunk) => chunk.text), - model: PINECONE_CONFIG.embeddingModel, - }) - - log.info('Document processed successfully', { - chunksCount: chunks.length, - chunkSize, - chunkOverlap, - embeddingDimension: embeddings[0]?.length, - }) - - return { chunks, embeddings } - } catch (error) { - log.error('Failed to process document', { error: String(error) }) - throw error - } -} - -/** - * Store document chunks and their embeddings - */ -export async function storeDocumentEmbeddings( - chunks: Array<{ - text: string - metadata?: { - title?: string - summary?: string - keywords?: string[] - } - }>, - embeddings: number[][], - baseMetadata: Record = {} -): Promise { - try { - // Prepare metadata for each chunk - const metadata = chunks.map((chunk, index) => ({ - ...baseMetadata, - text: chunk.text, - chunkIndex: index, - title: chunk.metadata?.title, - summary: chunk.metadata?.summary, - keywords: chunk.metadata?.keywords, - createdAt: new Date().toISOString(), - })) - - // Upsert vectors with metadata - const ids = await pineconeStore.upsert({ - indexName: PINECONE_CONFIG.indexName, - vectors: embeddings, - metadata, - }) - - log.info('Document embeddings stored', { - indexName: PINECONE_CONFIG.indexName, - vectorsCount: embeddings.length, - }) - - return ids - } catch (error) { - log.error('Failed to store document embeddings', { - error: String(error), - }) - throw error - } -} - -/** - * Transform PineconeMetadataFilter to PineconeRawFilter for library compatibility - */ -export function transformToPineconeFilter( - filter: PineconeMetadataFilter -): PineconeRawFilter { - return filter as PineconeRawFilter -} - -/** - * Validate Pinecone filter structure - */ -export function validatePineconeFilter( - filter: PineconeMetadataFilter -): boolean { - try { - // Basic validation - ensure filter is a plain object - if ( - typeof filter !== 'object' || - filter === null || - Array.isArray(filter) - ) { - return false - } - - // Transform and check if it's valid for Pinecone - const rawFilter = transformToPineconeFilter(filter) - return typeof rawFilter === 'object' && rawFilter !== null - } catch { - return false - } -} - -/** - * Query similar documents with metadata filtering - */ -export async function querySimilarDocuments( - queryText: string, - options: { - topK?: number - filter?: PineconeMetadataFilter - includeMetadata?: boolean - } = {} -): Promise< - Array<{ - id: string - score: number - text: string - metadata?: Record - }> -> { - try { - const { topK = 10, filter, includeMetadata = true } = options - - // Generate embedding for query - const { - embeddings: [queryEmbedding], - } = await embedMany({ - values: [queryText], - model: PINECONE_CONFIG.embeddingModel, - }) - - // Query similar vectors - // Note: Filter parameter omitted due to complex Pinecone filter type requirements - // TODO: Implement proper filter support when Pinecone library types are clearer - const results = await pineconeStore.query({ - indexName: PINECONE_CONFIG.indexName, - queryVector: queryEmbedding, - topK, - }) - - log.info('Vector query completed', { - queryLength: queryText.length, - topK, - resultsCount: results.length, - hasFilter: !!filter, - }) - - return results.map((result: any) => ({ - id: result.id, - score: result.score, - text: (result.metadata?.text as string) || '', - metadata: includeMetadata ? result.metadata : undefined, - })) - } catch (error) { - log.error('Failed to query similar documents', { error: String(error) }) - throw error - } -} - -/** - * Pinecone vector query tool for semantic search - */ -export const pineconeQueryTool = createVectorQueryTool({ - id: 'pinecone-vector-query', - description: - 'Pinecone similarity search for semantic content retrieval and question answering.', - // Supported vector store and index options - vectorStoreName: 'pineconeStore', - indexName: PINECONE_CONFIG.indexName, - model: PINECONE_CONFIG.embeddingModel, - includeVectors: true, - // Advanced filtering - enableFilter: true, - includeSources: true, -}) - -/** - * Pinecone graph RAG tool for enhanced retrieval - */ -export const pineconeGraphTool = createGraphRAGTool({ - id: 'pinecone-graph-rag', - description: - 'Pinecone graph-based retrieval augmented generation for complex queries and multi-hop reasoning.', - // Supported vector store and index options - vectorStoreName: 'pineconeStore', - indexName: PINECONE_CONFIG.indexName, - model: PINECONE_CONFIG.embeddingModel, - // Supported graph options for Pinecone - graphOptions: { - dimension: PINECONE_CONFIG.embeddingDimension, - threshold: parseFloat(process.env.PINECONE_GRAPH_THRESHOLD ?? '0.7'), - randomWalkSteps: parseInt( - process.env.PINECONE_GRAPH_RANDOM_WALK_STEPS ?? '10' - ), - restartProb: parseFloat( - process.env.PINECONE_GRAPH_RESTART_PROB ?? '0.15' - ), - }, - includeSources: true, - // Filtering and ranking - enableFilter: true, -}) - -// Export configuration for external use -export { PINECONE_CONFIG } diff --git a/src/mastra/config/vector/registry.ts b/src/mastra/config/vector/registry.ts deleted file mode 100644 index e69de29..0000000 diff --git a/src/mastra/config/vector/s3vectors.ts b/src/mastra/config/vector/s3vectors.ts deleted file mode 100644 index 6cafac0..0000000 --- a/src/mastra/config/vector/s3vectors.ts +++ /dev/null @@ -1,358 +0,0 @@ -import { S3Vectors } from '@mastra/s3vectors' -import { createVectorQueryTool, createGraphRAGTool } from '@mastra/rag' -import { google } from '@ai-sdk/google' -import { embedMany } from 'ai' -import { log } from '../logger' - -/** - * S3Vectors-compatible filter format for vector queries - * Based on MongoDB/Sift query syntax as documented for S3Vectors - * - * S3Vectors-specific limitations: - * - Equality values must be primitives (string/number/boolean) - * - $in/$nin require non-empty arrays of primitives - * - Implicit AND is canonicalized ({a:1,b:2} → {$and:[{a:1},{b:2}]}) - * - Logical operators must contain field conditions, use non-empty arrays, appear only at root or within other logical operators - * - $exists requires a boolean value - * - Keys listed in nonFilterableMetadataKeys are stored but not filterable - * - Each metadata key name limited to 63 characters - * - Total metadata per vector: Up to 40 KB (filterable + non-filterable) - * - Total metadata keys per vector: Up to 10 - * - Filterable metadata per vector: Up to 2 KB - * - Non-filterable metadata keys per vector index: Up to 10 - */ -export interface S3VectorsMetadataFilter { - [key: string]: - | string - | number - | boolean - | S3VectorsMetadataFilter - | S3VectorsMetadataFilter[] - | Array - | undefined -} - -/** - * Raw S3Vectors filter format expected by the library - * Using Record for compatibility with @mastra/s3vectors types - */ -export type S3VectorsRawFilter = Record - -/** - * S3Vectors configuration for the Governed RAG system - * Uses Amazon S3 Vectors (Preview) for vector storage and similarity search - */ - -// Configuration constants -const S3_VECTORS_CONFIG = { - bucketName: process.env.S3_VECTORS_BUCKET_NAME ?? 'governed-rag-vectors', - region: process.env.AWS_REGION ?? 'us-east-1', - indexName: 'governed-rag', - // Google Gemini gemini-embedding-001 supports flexible dimensions: 128-3072 - // Recommended: 768, 1536, 3072 - embeddingDimension: parseInt(process.env.S3_EMBEDDING_DIMENSION ?? '1536'), - embeddingModel: google.textEmbedding('gemini-embedding-001'), -} as const - -/** - * Initialize S3Vectors store with proper configuration - */ -const s3store = new S3Vectors({ - vectorBucketName: S3_VECTORS_CONFIG.bucketName, - clientConfig: { - region: S3_VECTORS_CONFIG.region, - }, - // Mark content fields as non-filterable for better performance - nonFilterableMetadataKeys: ['content', 'text'], -}) - -/** - * Create and configure the vector index - */ -export async function initializeVectorIndex(): Promise { - try { - await s3store.createIndex({ - indexName: S3_VECTORS_CONFIG.indexName, - dimension: S3_VECTORS_CONFIG.embeddingDimension, - metric: 'cosine', // S3 Vectors supports cosine and euclidean - }) - - log.info('Vector index created', { - indexName: S3_VECTORS_CONFIG.indexName, - dimension: S3_VECTORS_CONFIG.embeddingDimension, - }) - } catch (error: unknown) { - // Index might already exist, which is fine - const errorObj = error as { code?: string } - if (errorObj.code === 'index_already_exists') { - log.info('Vector index already exists', { - indexName: S3_VECTORS_CONFIG.indexName, - }) - } else { - log.error('Failed to create vector index', { error: String(error) }) - throw error - } - } -} - -/** - * Process document content and generate embeddings - * Simplified chunking for basic vector setup without AI extraction - */ -export async function processDocument( - content: string, - options: { - chunkSize?: number - chunkOverlap?: number - } = {} -): Promise<{ - chunks: Array<{ - text: string - metadata?: Record - }> - embeddings: number[][] -}> { - try { - // Simple text-based chunking without AI extraction - const chunkSize = options.chunkSize ?? 1000 - const chunkOverlap = options.chunkOverlap ?? 200 - - const chunks: Array<{ - text: string - metadata?: Record - }> = [] - - // Split content into overlapping chunks - for (let i = 0; i < content.length; i += chunkSize - chunkOverlap) { - const chunkText = content.slice(i, i + chunkSize) - if (chunkText.trim()) { - chunks.push({ - text: chunkText, - metadata: { - chunkIndex: chunks.length, - startPosition: i, - endPosition: i + chunkText.length, - totalLength: content.length, - }, - }) - } - } - - // Generate embeddings for all chunks - const { embeddings } = await embedMany({ - values: chunks.map((chunk) => chunk.text), - model: S3_VECTORS_CONFIG.embeddingModel, - }) - - log.info('Document processed successfully', { - chunksCount: chunks.length, - chunkSize, - chunkOverlap, - embeddingDimension: embeddings[0]?.length, - }) - - return { chunks, embeddings } - } catch (error) { - log.error('Failed to process document', { error: String(error) }) - throw error - } -} - -/** - * Store document chunks and their embeddings - */ -export async function storeDocumentEmbeddings( - chunks: Array<{ - text: string - metadata?: { - title?: string - summary?: string - keywords?: string[] - } - }>, - embeddings: number[][], - baseMetadata: Record = {} -): Promise { - try { - // Prepare metadata for each chunk - const metadata = chunks.map((chunk, index) => ({ - ...baseMetadata, - text: chunk.text, - chunkIndex: index, - title: chunk.metadata?.title, - summary: chunk.metadata?.summary, - keywords: chunk.metadata?.keywords, - createdAt: new Date(), - })) - - // Upsert vectors with metadata - const ids = await s3store.upsert({ - indexName: S3_VECTORS_CONFIG.indexName, - vectors: embeddings, - metadata, - }) - - log.info('Document embeddings stored', { - indexName: S3_VECTORS_CONFIG.indexName, - vectorsCount: embeddings.length, - }) - - return ids - } catch (error) { - log.error('Failed to store document embeddings', { - error: String(error), - }) - throw error - } -} - -/** - * Query similar documents with metadata filtering - */ -export async function querySimilarDocuments( - queryText: string, - options: { - topK?: number - filter?: S3VectorsMetadataFilter - includeVector?: boolean - } = {} -): Promise< - Array<{ - id: string - score: number - metadata: Record - vector?: number[] - }> -> { - try { - // Generate embedding for the query - const { - embeddings: [queryEmbedding], - } = await embedMany({ - values: [queryText], - model: S3_VECTORS_CONFIG.embeddingModel, - }) - - // Query the vector store - // Note: S3Vectors TypeScript types require $or at root, but docs show MongoDB syntax works - // Using any casting due to incorrect library types - follows upstashMemory.ts pattern - const results = await s3store.query({ - indexName: S3_VECTORS_CONFIG.indexName, - queryVector: queryEmbedding, - topK: options.topK ?? 10, - // eslint-disable-next-line @typescript-eslint/no-explicit-any - filter: options.filter as any, - includeVector: options.includeVector ?? false, - }) - - log.info('Vector query completed', { - indexName: S3_VECTORS_CONFIG.indexName, - resultsCount: results.length, - topK: options.topK ?? 10, - }) - - // Ensure metadata is always defined - return results.map((result: any) => ({ - ...result, - metadata: result.metadata ?? {}, - })) - } catch (error) { - log.error('Failed to query similar documents', { error: String(error) }) - throw error - } -} - -/** - * Clean up resources - */ -export async function disconnectVectorStore(): Promise { - try { - await s3store.disconnect() - log.info('S3Vectors store disconnected') - } catch (error) { - log.error('Failed to disconnect vector store', { error: String(error) }) - throw error - } -} - -// Graph-based RAG tool using S3Vectors -export const s3GraphTool = createGraphRAGTool({ - id: 's3-graph-rag', - description: - 'Graph-based retrieval augmented generation using Amazon S3 Vectors for advanced semantic search and context retrieval.', - // Supported vector store and index options - vectorStoreName: 's3store', - indexName: S3_VECTORS_CONFIG.indexName, - model: S3_VECTORS_CONFIG.embeddingModel, - // Supported graph options for S3Vectors - graphOptions: { - dimension: S3_VECTORS_CONFIG.embeddingDimension, - threshold: parseFloat(process.env.S3_GRAPH_THRESHOLD ?? '0.7'), - randomWalkSteps: parseInt( - process.env.S3_GRAPH_RANDOM_WALK_STEPS ?? '10' - ), - restartProb: parseFloat(process.env.S3_GRAPH_RESTART_PROB ?? '0.15'), - }, - includeSources: true, - // Filtering and ranking - enableFilter: true, -}) - -// S3Vectors query tool for semantic search -export const s3QueryTool = createVectorQueryTool({ - id: 's3-vector-query', - description: - 'Amazon S3 Vectors similarity search for semantic content retrieval and question answering.', - // Supported vector store and index options - vectorStoreName: 's3store', - indexName: S3_VECTORS_CONFIG.indexName, - model: S3_VECTORS_CONFIG.embeddingModel, - // Supported database configuration for S3Vectors - databaseConfig: { - s3Vectors: { - minScore: parseFloat(process.env.S3_MIN_SCORE ?? '0.7'), - // S3Vectors specific parameters - maxResults: parseInt(process.env.S3_MAX_RESULTS ?? '100'), - }, - }, - includeVectors: true, - // Advanced filtering - enableFilter: true, - includeSources: true, -}) - -/** - * Transform S3Vectors metadata filter to raw filter format - * Handles any necessary conversions between our interface and library expectations - */ -export function transformToS3VectorsFilter( - filter: S3VectorsMetadataFilter -): S3VectorsRawFilter { - // For now, just return as-is since the library types are incorrect - // In the future, this could handle conversions if needed - return filter as S3VectorsRawFilter -} - -/** - * Validate S3Vectors metadata filter according to documented constraints - */ -export function validateS3VectorsFilter( - filter: S3VectorsMetadataFilter -): S3VectorsMetadataFilter { - // Basic validation - could be expanded based on S3Vectors constraints - if (filter === null || typeof filter !== 'object') { - throw new Error('Filter must be a valid object') - } - - // Check for oversized metadata keys (63 char limit) - for (const key of Object.keys(filter)) { - if (key.length > 63) { - throw new Error(`Metadata key "${key}" exceeds 63 character limit`) - } - } - - return filter -} - -// Export configuration for external use -export { S3_VECTORS_CONFIG } diff --git a/src/mastra/processors/custom-output.ts b/src/mastra/processors/custom-output.ts deleted file mode 100644 index fa2466e..0000000 --- a/src/mastra/processors/custom-output.ts +++ /dev/null @@ -1,27 +0,0 @@ -import type { Processor, MastraDBMessage, RequestContext } from '@mastra/core' - -export class CustomOutputProcessor implements Processor { - id = 'custom-output' - - async processOutputResult({ - messages, - context, - }: { - messages: MastraDBMessage[] - context: RequestContext - }): Promise { - // Transform messages after the LLM generates them - return messages.filter((msg) => msg.role !== 'system') - } - - async processOutputStream({ - stream, - context, - }: { - stream: ReadableStream - context: RequestContext - }): Promise { - // Transform streaming responses - return stream - } -} diff --git a/src/mastra/processors/step-processor.ts b/src/mastra/processors/step-processor.ts deleted file mode 100644 index c312ed8..0000000 --- a/src/mastra/processors/step-processor.ts +++ /dev/null @@ -1,29 +0,0 @@ -import type { - Processor, - ProcessInputStepArgs, - ProcessInputStepResult, -} from '@mastra/core' - -export class DynamicModelProcessor implements Processor { - id = 'dynamic-model' - - async processInputStep({ - stepNumber, - model, - toolChoice, - messageList, - }: ProcessInputStepArgs): Promise { - // Use a fast model for initial response - if (stepNumber === 0) { - return { model: 'openai/gpt-4o-mini' } - } - - // Disable tools after 5 steps to force completion - if (stepNumber > 5) { - return { toolChoice: 'none' } - } - - // No changes for other steps - return {} - } -}