From 4a61cc3959cc4b63c5d5526d8733bdea4bb284d7 Mon Sep 17 00:00:00 2001 From: Bernhard Merkle Date: Sat, 21 Feb 2026 23:05:27 +0100 Subject: [PATCH] - added plan for pydantic?ai migration (see PYDANTIC_AI.md) - enhance implementation and test to use either openai or azure - refactored provider selection into one place (instead several ones) --- .env-example | 34 ++++ PYDANTIC_AI.md | 244 ++++++++++++++++++++++++ src/typeagent/aitools/embeddings.py | 5 +- src/typeagent/aitools/model_adapters.py | 53 ++++- src/typeagent/aitools/utils.py | 71 ++++--- tests/conftest.py | 4 +- tests/test_model_adapters.py | 132 ++++++++++++- tests/test_utils.py | 4 +- 8 files changed, 505 insertions(+), 42 deletions(-) create mode 100644 .env-example create mode 100644 PYDANTIC_AI.md diff --git a/.env-example b/.env-example new file mode 100644 index 0000000..e131564 --- /dev/null +++ b/.env-example @@ -0,0 +1,34 @@ +####################### +# openAI +####################### +#OPENAI_API_KEY=... +#OPENAI_MODEL=gpt-4o + +####################### +# auzre openAI +####################### +# gpt-4o +AZURE_OPENAI_API_KEY= +AZURE_OPENAI_ENDPOINT= +# text-embedding-ada-002 +#AZURE_OPENAI_API_KEY_EMBEDDING= +#AZURE_OPENAI_ENDPOINT_EMBEDDING= +# text-embedding-3-small +AZURE_OPENAI_API_KEY_EMBEDDING= +AZURE_OPENAI_ENDPOINT_EMBEDDING= + +####################### +# Outlook mail dump (tools/mail/outlook_dump.py) +####################### +# User email address for login (pre-fills the sign-in page) +OUTLOOK_CLIENT_ID= +# Azure AD app registration client ID (GUID) +OUTLOOK_APPLICATION_CLIENT_ID= +# Azure AD tenant ID (GUID) +OUTLOOK_TENANT_ID= + +####################### +# pydantic-ai setup +####################### +PYDANTIC_AI_MODEL=azure:gpt-4o +PYDANTIC_AI_EMBEDDING_MODEL=azure:text-embedding-3-small \ No newline at end of file diff --git a/PYDANTIC_AI.md b/PYDANTIC_AI.md new file mode 100644 index 0000000..e899e2e --- /dev/null +++ b/PYDANTIC_AI.md @@ -0,0 +1,244 @@ +# Pydantic AI Migration Plan + +## Overview + +typeagent-py currently uses **TypeChat** as its primary structured-output LLM +library and **pydantic_ai** as an adapter layer for multi-provider model +wiring. This document proposes a phased plan for replacing TypeChat with +pydantic_ai Agents wherever it makes sense, while preserving the existing +query pipeline architecture. + +--- + +## Current State + +### Libraries in play + +| Library | Role | Dependency | +|---|---|---| +| `typechat` | Structured LLM output (JSON schema → prompt → validate → repair) | production | +| `pydantic-ai-slim[openai]` | Model adapter layer, embedding adapter, `make_agent()` (unused) | dev-only | +| `openai` | Direct embedding API calls (`AsyncOpenAI` / `AsyncAzureOpenAI`) | production | +| `pydantic` | Dataclass serialization/validation, schema classes | production | + +### TypeChat call sites (migration targets) + +There are **3 TypeChat translator** patterns, each wrapping a +`pydantic.dataclass` schema: + +| Call Site | Schema | Purpose | +|---|---|---| +| `convknowledge.py` → `KnowledgeExtractor.extract()` | `KnowledgeResponse` | Extract entities, actions, topics from messages | +| `searchlang.py` → `search_query_from_language()` | `SearchQuery` | Translate NL question → structured search query | +| `answers.py` → `generate_answer()` / `combine_answers()` | `AnswerResponse` | Generate answer from context | + +These 3 translators are created in: +- `ConversationBase.query()` (`conversation_base.py` – lazy init of query + answer translators) +- `KnowledgeExtractor.__init__()` (`convknowledge.py` – knowledge translator) +- `EmailMemorySettings.__init__()` (`email_memory.py` – query + answer translators) +- `MCPTypeChatModel` usage in MCP server (`mcp/server.py`) + +### pydantic_ai already used + +- `model_adapters.py` — `PydanticAIChatModel` wraps a pydantic_ai `Model` + back into TypeChat's `TypeChatLanguageModel` interface. This adapter is + a stepping stone; once TypeChat is gone, it is unnecessary. +- `model_adapters.py` — `PydanticAIEmbeddingModel` wraps `pydantic_ai.Embedder` + into `IEmbeddingModel`. This is clean and should be kept. +- `utils.py` — `make_agent()` creates a `pydantic_ai.Agent` with structured + output but **is never called** anywhere in the codebase. + +--- + +## Where Pydantic AI Services Make Sense + +### 1. Structured LLM Output (replace TypeChat translators) + +**Why**: pydantic_ai `Agent[None, T]` with `output_type=T` provides the same +structured-output flow TypeChat gives (schema → prompt → validate → retry) +but with: +- Native structured-output support (OpenAI JSON mode / tool calling) +- Built-in retries with configurable count +- No separate validator/translator/repair machinery +- Direct Pydantic model validation (our schemas already use `pydantic.dataclass`) +- Multi-provider support via `infer_model()` (25+ providers) +- Built-in logfire observability + +**What to replace**: + +| Current | Replacement | +|---|---| +| `TypeChatJsonTranslator[KnowledgeResponse]` | `Agent[None, KnowledgeResponse]` | +| `TypeChatJsonTranslator[SearchQuery]` | `Agent[None, SearchQuery]` | +| `TypeChatJsonTranslator[AnswerResponse]` | `Agent[None, AnswerResponse]` | +| `typechat.Result[T]` return type | native return or exception handling | +| `TypeChatLanguageModel` protocol | `pydantic_ai.models.Model` | +| `ModelWrapper` (Azure token refresh) | Handled by pydantic_ai provider | + +### 2. Model Configuration / Provider Wiring + +**Why**: `create_typechat_model()` manually parses env vars, constructs +OpenAI/Azure clients, handles token refresh. pydantic_ai's `infer_model()` +does all of this from a single `"provider:model"` string. + +**Already partially done**: `model_adapters.create_chat_model()` wraps +`infer_model()` → `PydanticAIChatModel` → TypeChat interface. After +removing TypeChat, this adapter layer collapses. + +### 3. Embedding Model Abstraction + +**Status**: Already done via `PydanticAIEmbeddingModel`. Keep as-is. + +The existing `embeddings.py` (`OpenAIEmbeddingModel`) should remain +available as a fallback for direct OpenAI/Azure embedding calls with +fine-grained batching and token-aware truncation that pydantic_ai's +`Embedder` does not provide. + +### 4. MCP Server LLM Routing + +**Why**: `MCPTypeChatModel` converts TypeChat prompts to MCP sampling +messages. With pydantic_ai, this could be replaced by a custom +pydantic_ai `Model` implementation that routes through MCP, eliminating +the TypeChat intermediate format. + +### 5. Observability / Tracing + +**Why**: `setup_logfire()` manually instruments pydantic_ai and httpx. +With pydantic_ai as the primary LLM layer, logfire integration is +native — every agent call is automatically traced with input/output, +token usage, retries. + +--- + +## Where Pydantic AI Does NOT Make Sense + +- **Search execution** (`search.py`, `searchlib.py`): Pure index queries, + no LLM calls. No change needed. +- **Data ingestion** (`podcast_ingest.py`, `transcript_ingest.py`, + `email_import.py`): Regex/MIME parsing, no LLM calls. No change. +- **Core data structures** (`interfaces_core.py`, `kplib.py`): Pydantic + dataclasses for entities, actions, etc. Keep as-is (they become + pydantic_ai `output_type` directly). +- **Token counting / truncation** (`embeddings.py`): tiktoken-based logic + specific to OpenAI. Keep as-is. + +--- + +## Migration Plan + +### Phase 0: Preparation + +- [ ] Move `pydantic-ai-slim[openai]` from dev dependency to production + dependency in `pyproject.toml`. +- [ ] Decide on Azure identity integration approach: pydantic_ai's + `AzureProvider` vs keeping `AzureTokenProvider` wrapper. +- [ ] Add integration test fixtures that verify structured output with + pydantic_ai Agents (can start from the unused `make_agent()` pattern). + +### Phase 1: Agent Abstraction Layer + +Build a thin abstraction that wraps pydantic_ai `Agent` to match the +current call patterns, making the rest of the migration mechanical. + +- [ ] Create `aitools/agents.py` with: + - `create_agent(model_spec, output_type, system_prompt, retries)` factory + - Agent wrapper that returns `typechat.Result[T]` for backward compat + during transition + - Azure identity token refresh support (if not handled by provider) +- [ ] Update `IKnowledgeExtractor` protocol to accept either TypeChat or + Agent-based implementations. + +### Phase 2: Knowledge Extraction Migration + +Replace `KnowledgeExtractor` in `convknowledge.py`. + +- [ ] Create `PydanticAIKnowledgeExtractor` as alternative implementation of + `IKnowledgeExtractor`. +- [ ] Port the custom system prompt from `create_request_prompt()` to + pydantic_ai `system_prompt`. +- [ ] Wire through `ConversationSettings` so callers can choose the + implementation. +- [ ] Validate output equivalence against existing TypeChat results on + test podcasts. +- [ ] Remove `TypeChatJsonTranslator[KnowledgeResponse]` once validated. + +### Phase 3: Search Query Translation Migration + +Replace translator in `searchlang.py`. + +- [ ] Create `SearchQueryAgent` that wraps `Agent[None, SearchQuery]`. +- [ ] Port `prompt_preamble` / time-range context to pydantic_ai + `system_prompt` or `user_prompt` construction. +- [ ] Update `search_query_from_language()` to use the agent. +- [ ] Update `SearchQueryTranslator` type alias. +- [ ] Test against existing query test suite (`test_query.py`, + `test_searchlib.py`, etc.). + +### Phase 4: Answer Generation Migration + +Replace translator in `answers.py`. + +- [ ] Create `AnswerAgent` that wraps `Agent[None, AnswerResponse]`. +- [ ] Port `create_question_prompt()` and `create_context_prompt()` to + pydantic_ai prompt sections. +- [ ] Update `generate_answer()` and `combine_answers()`. +- [ ] Test against existing answer tests. + +### Phase 5: Wiring / Cleanup + +- [ ] Update `ConversationBase.query()` to create pydantic_ai agents + instead of TypeChat translators. +- [ ] Update `EmailMemorySettings` to use pydantic_ai agents. +- [ ] Port `MCPTypeChatModel` to a pydantic_ai custom `Model` + implementation. +- [ ] Remove `PydanticAIChatModel` adapter (no longer needed). +- [ ] Remove `create_typechat_model()`, `create_translator()`, and + `ModelWrapper` from `utils.py`. +- [ ] Remove unused `make_agent()` from `utils.py`. +- [ ] Remove `typechat` from production dependencies. +- [ ] Update `convsettings.py` to accept a `pydantic_ai.models.Model` + (or `str` spec) for chat model configuration. +- [ ] Update all imports and type annotations. + +### Phase 6: Enhanced Capabilities (Optional) + +Once on pydantic_ai, these become easy to add: + +- [ ] **Multi-provider support**: Switch models per task (e.g. cheaper + model for knowledge extraction, stronger model for answers). +- [ ] **Streaming answers**: pydantic_ai supports streamed structured + output for real-time UX. +- [ ] **Tool use in agents**: Give the answer agent tools to look up + additional context on demand (agentic RAG). +- [ ] **Conversation memory**: Use pydantic_ai's message history for + multi-turn query refinement. +- [ ] **Cost tracking**: pydantic_ai exposes token usage per call. + +--- + +## Risk Assessment + +| Risk | Mitigation | +|---|---| +| Schema compatibility | All 3 schemas already use `pydantic.dataclass` — direct `output_type` use | +| Prompt regression | Port prompts 1:1; compare outputs on test corpus before switching | +| TypeChat repair loop loss | pydantic_ai `retries=N` provides equivalent retry; native structured output reduces need for repair | +| Azure identity auth | Test `AzureProvider` with identity tokens; keep `AzureTokenProvider` as fallback | +| MCP server compat | Build custom pydantic_ai `Model` for MCP sampling before removing TypeChat | +| `CamelCaseField` serialization | Verify pydantic_ai preserves alias config from the custom dataclass wrapper | + +--- + +## Estimated Effort + +| Phase | Effort | Dependencies | +|---|---|---| +| Phase 0 (Preparation) | 1 day | None | +| Phase 1 (Abstraction) | 2 days | Phase 0 | +| Phase 2 (Knowledge) | 2 days | Phase 1 | +| Phase 3 (Search Query) | 1-2 days | Phase 1 | +| Phase 4 (Answers) | 1-2 days | Phase 1 | +| Phase 5 (Cleanup) | 2 days | Phases 2-4 | +| Phase 6 (Enhanced) | Ongoing | Phase 5 | + +**Total core migration: ~9-11 days** diff --git a/src/typeagent/aitools/embeddings.py b/src/typeagent/aitools/embeddings.py index ce8ea06..0da898c 100644 --- a/src/typeagent/aitools/embeddings.py +++ b/src/typeagent/aitools/embeddings.py @@ -15,7 +15,7 @@ from tiktoken.core import Encoding from .auth import AzureTokenProvider, get_shared_token_provider -from .utils import timelog +from .utils import prefers_azure, timelog type NormalizedEmbedding = NDArray[np.float32] # A single embedding type NormalizedEmbeddings = NDArray[np.float32] # An array of embeddings @@ -127,8 +127,7 @@ def __init__( if use_azure is not None: self.use_azure = use_azure else: - # Prefer OpenAI if both are set, use Azure if only Azure is set - self.use_azure = bool(azure_api_key) and not bool(openai_api_key) + self.use_azure = prefers_azure() if endpoint_envvar is None: # Check if OpenAI credentials are available, prefer OpenAI over Azure diff --git a/src/typeagent/aitools/model_adapters.py b/src/typeagent/aitools/model_adapters.py index 6d8f06a..d60151e 100644 --- a/src/typeagent/aitools/model_adapters.py +++ b/src/typeagent/aitools/model_adapters.py @@ -22,6 +22,8 @@ required environment variables. """ +import os + import numpy as np from numpy.typing import NDArray @@ -159,35 +161,61 @@ async def get_embeddings(self, keys: list[str]) -> NormalizedEmbeddings: return np.array([self._cache[k] for k in keys], dtype=np.float32) +# --------------------------------------------------------------------------- +# Env-var defaults +# --------------------------------------------------------------------------- + +DEFAULT_CHAT_MODEL_NAME = "gpt-4o" +DEFAULT_EMBEDDING_MODEL_NAME = "text-embedding-3-small" + +# Re-export from utils for backward compatibility and convenience. +from .utils import infer_provider_prefix as infer_provider_prefix # noqa: E402 + # --------------------------------------------------------------------------- # Public API # --------------------------------------------------------------------------- def create_chat_model( - model_spec: str, + model_spec: str | None = None, ) -> PydanticAIChatModel: """Create a chat model from a ``provider:model`` spec. + If *model_spec* is not given it falls back to the + ``PYDANTIC_AI_MODEL`` environment variable, then auto-detects the + provider from ``OPENAI_API_KEY`` / ``AZURE_OPENAI_API_KEY``. + Delegates to :func:`pydantic_ai.models.infer_model` for provider wiring. Examples:: model = create_chat_model("openai:gpt-4o") model = create_chat_model("anthropic:claude-sonnet-4-20250514") - model = create_chat_model("google:gemini-2.0-flash") + model = create_chat_model() # auto-detects provider """ + if model_spec is None: + from dotenv import load_dotenv + + load_dotenv() + model_spec = os.getenv( + "PYDANTIC_AI_MODEL", + f"{infer_provider_prefix()}:{DEFAULT_CHAT_MODEL_NAME}", + ) model = infer_model(model_spec) return PydanticAIChatModel(model) def create_embedding_model( - model_spec: str, + model_spec: str | None = None, *, embedding_size: int = 0, ) -> PydanticAIEmbeddingModel: """Create an embedding model from a ``provider:model`` spec. + If *model_spec* is not given it falls back to the + ``PYDANTIC_AI_EMBEDDING_MODEL`` environment variable, then auto-detects + the provider from ``OPENAI_API_KEY`` / ``AZURE_OPENAI_API_KEY``. + Delegates to :class:`pydantic_ai.Embedder` for provider wiring. If *embedding_size* is not given, it will be probed automatically @@ -197,21 +225,34 @@ def create_embedding_model( model = create_embedding_model("openai:text-embedding-3-small") model = create_embedding_model("cohere:embed-english-v3.0") - model = create_embedding_model("google:text-embedding-004") + model = create_embedding_model() # auto-detects provider """ + if model_spec is None: + from dotenv import load_dotenv + + load_dotenv() + model_spec = os.getenv( + "PYDANTIC_AI_EMBEDDING_MODEL", + f"{infer_provider_prefix()}:{DEFAULT_EMBEDDING_MODEL_NAME}", + ) model_name = model_spec.split(":")[-1] if ":" in model_spec else model_spec embedder = _PydanticAIEmbedder(model_spec) return PydanticAIEmbeddingModel(embedder, model_name, embedding_size) def configure_models( - chat_model_spec: str, - embedding_model_spec: str, + chat_model_spec: str | None = None, + embedding_model_spec: str | None = None, *, embedding_size: int = 0, ) -> tuple[PydanticAIChatModel, PydanticAIEmbeddingModel]: """Configure both a chat model and an embedding model at once. + Both specs fall back to their respective environment variables + (``PYDANTIC_AI_MODEL``, ``PYDANTIC_AI_EMBEDDING_MODEL``) and + then auto-detect the provider from ``OPENAI_API_KEY`` / + ``AZURE_OPENAI_API_KEY``. + Delegates to pydantic_ai's model registry for provider wiring. Example:: diff --git a/src/typeagent/aitools/utils.py b/src/typeagent/aitools/utils.py index 9d57b85..0d1474f 100644 --- a/src/typeagent/aitools/utils.py +++ b/src/typeagent/aitools/utils.py @@ -19,6 +19,38 @@ from .auth import AzureTokenProvider, get_shared_token_provider +# --------------------------------------------------------------------------- +# Provider / API-key detection helpers +# --------------------------------------------------------------------------- + + +def infer_provider_prefix() -> str: + """Return ``"openai"`` or ``"azure"`` based on available API keys. + + Checks ``OPENAI_API_KEY`` first (preferred), then ``AZURE_OPENAI_API_KEY``. + Raises :class:`RuntimeError` if neither is set. + + This is the single canonical check used throughout the codebase. + """ + if os.getenv("OPENAI_API_KEY"): + return "openai" + if os.getenv("AZURE_OPENAI_API_KEY"): + return "azure" + raise RuntimeError( + "Neither OPENAI_API_KEY nor AZURE_OPENAI_API_KEY is set. " + "Set one of them or pass an explicit model spec (e.g. 'openai:gpt-4o')." + ) + + +def has_api_key() -> bool: + """Return ``True`` if at least one API key is available.""" + return bool(os.getenv("OPENAI_API_KEY") or os.getenv("AZURE_OPENAI_API_KEY")) + + +def prefers_azure() -> bool: + """Return ``True`` when Azure should be used (no OpenAI key, but Azure key is set).""" + return not os.getenv("OPENAI_API_KEY") and bool(os.getenv("AZURE_OPENAI_API_KEY")) + @contextmanager def timelog(label: str, verbose: bool = True): @@ -299,23 +331,20 @@ def create_async_openai_client( """ from openai import AsyncAzureOpenAI, AsyncOpenAI - if openai_api_key := os.getenv("OPENAI_API_KEY"): - return AsyncOpenAI(api_key=openai_api_key, base_url=base_url) + provider = infer_provider_prefix() # raises RuntimeError if no key - elif azure_api_key := os.getenv("AZURE_OPENAI_API_KEY"): - azure_api_key = get_azure_api_key(azure_api_key) - azure_endpoint, api_version = parse_azure_endpoint(endpoint_envvar) + if provider == "openai": + return AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"), base_url=base_url) - return AsyncAzureOpenAI( - api_version=api_version, - azure_endpoint=azure_endpoint, - api_key=azure_api_key, - ) + # provider == "azure" + azure_api_key = get_azure_api_key(os.environ["AZURE_OPENAI_API_KEY"]) + azure_endpoint, api_version = parse_azure_endpoint(endpoint_envvar) - else: - raise RuntimeError( - "Neither OPENAI_API_KEY nor AZURE_OPENAI_API_KEY was provided." - ) + return AsyncAzureOpenAI( + api_version=api_version, + azure_endpoint=azure_endpoint, + api_key=azure_api_key, + ) # The true return type is pydantic_ai.Agent[T], but that's an optional dependency. @@ -325,14 +354,15 @@ def make_agent[T](cls: type[T]): from pydantic_ai.models.openai import OpenAIChatModel from pydantic_ai.providers.azure import AzureProvider - # Prefer straight OpenAI over Azure OpenAI. - if os.getenv("OPENAI_API_KEY"): + provider = infer_provider_prefix() # raises RuntimeError if no key + + if provider == "openai": Wrapper = NativeOutput print(f"## Using OpenAI with {Wrapper.__name__} ##") model = OpenAIChatModel("gpt-4o") # Retrieves OPENAI_API_KEY again. - elif azure_api_key := os.getenv("AZURE_OPENAI_API_KEY"): - azure_api_key = get_azure_api_key(azure_api_key) + else: # provider == "azure" + azure_api_key = get_azure_api_key(os.environ["AZURE_OPENAI_API_KEY"]) azure_endpoint, api_version = parse_azure_endpoint("AZURE_OPENAI_ENDPOINT") print(f"## {azure_endpoint} ##") @@ -348,9 +378,4 @@ def make_agent[T](cls: type[T]): ), ) - else: - raise RuntimeError( - "Neither OPENAI_API_KEY nor AZURE_OPENAI_API_KEY was provided." - ) - return Agent(model, output_type=Wrapper(cls, strict=True), retries=3) diff --git a/tests/conftest.py b/tests/conftest.py index 7f7ce21..66ad78b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -20,6 +20,7 @@ IEmbeddingModel, TEST_MODEL_NAME, ) +from typeagent.aitools.utils import has_api_key from typeagent.aitools.vectorbase import TextEmbeddingIndexSettings from typeagent.knowpro.convsettings import ( ConversationSettings, @@ -88,8 +89,7 @@ def needs_auth() -> None: @pytest.fixture(scope="session") def really_needs_auth() -> None: load_dotenv() - # Check if any of the supported API keys is set - if not (os.getenv("OPENAI_API_KEY") or os.getenv("AZURE_OPENAI_API_KEY")): + if not has_api_key(): pytest.skip("No API key found") diff --git a/tests/test_model_adapters.py b/tests/test_model_adapters.py index 33bd78c..2f747e2 100644 --- a/tests/test_model_adapters.py +++ b/tests/test_model_adapters.py @@ -11,10 +11,20 @@ configure_models, create_chat_model, create_embedding_model, + DEFAULT_CHAT_MODEL_NAME, + DEFAULT_EMBEDDING_MODEL_NAME, + infer_provider_prefix, PydanticAIChatModel, PydanticAIEmbeddingModel, ) + +@pytest.fixture +def provider() -> str: + """Current provider prefix based on available API keys.""" + return infer_provider_prefix() + + # --------------------------------------------------------------------------- # Spec format # --------------------------------------------------------------------------- @@ -32,15 +42,17 @@ def test_spec_uses_colon_separator() -> None: # --------------------------------------------------------------------------- -def test_explicit_embedding_size() -> None: +def test_explicit_embedding_size(provider: str) -> None: """Passing embedding_size= sets it immediately.""" - model = create_embedding_model("openai:text-embedding-3-small", embedding_size=42) + model = create_embedding_model( + f"{provider}:text-embedding-3-small", embedding_size=42 + ) assert model.embedding_size == 42 -def test_default_embedding_size_is_zero() -> None: +def test_default_embedding_size_is_zero(provider: str) -> None: """Without embedding_size=, it defaults to 0 (probed on first call).""" - model = create_embedding_model("openai:text-embedding-3-small") + model = create_embedding_model(f"{provider}:text-embedding-3-small") assert model.embedding_size == 0 @@ -244,9 +256,117 @@ async def test_embedding_adapter_empty_batch() -> None: # --------------------------------------------------------------------------- -def test_configure_models_returns_correct_types() -> None: +def test_configure_models_returns_correct_types(provider: str) -> None: """configure_models creates both adapters.""" - chat, embedder = configure_models("openai:gpt-4o", "openai:text-embedding-3-small") + chat, embedder = configure_models( + f"{provider}:gpt-4o", f"{provider}:text-embedding-3-small" + ) assert isinstance(chat, PydanticAIChatModel) assert isinstance(embedder, PydanticAIEmbeddingModel) assert typechat.TypeChatLanguageModel in type(chat).__mro__ + + +# --------------------------------------------------------------------------- +# Environment-variable defaults +# --------------------------------------------------------------------------- + + +def test_create_chat_model_env_default( + monkeypatch: pytest.MonkeyPatch, provider: str +) -> None: + """create_chat_model() reads PYDANTIC_AI_MODEL from the environment.""" + monkeypatch.setenv("PYDANTIC_AI_MODEL", f"{provider}:gpt-4o-mini") + model = create_chat_model() + assert isinstance(model, PydanticAIChatModel) + + +def test_create_chat_model_builtin_default(monkeypatch: pytest.MonkeyPatch) -> None: + """Without PYDANTIC_AI_MODEL, falls back to DEFAULT_CHAT_MODEL.""" + monkeypatch.delenv("PYDANTIC_AI_MODEL", raising=False) + model = create_chat_model() + assert isinstance(model, PydanticAIChatModel) + + +def test_create_embedding_model_env_default( + monkeypatch: pytest.MonkeyPatch, provider: str +) -> None: + """create_embedding_model() reads PYDANTIC_AI_EMBEDDING_MODEL from the environment.""" + monkeypatch.setenv( + "PYDANTIC_AI_EMBEDDING_MODEL", f"{provider}:text-embedding-3-small" + ) + model = create_embedding_model() + assert isinstance(model, PydanticAIEmbeddingModel) + + +def test_create_embedding_model_builtin_default( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Without PYDANTIC_AI_EMBEDDING_MODEL, falls back to auto-detected provider.""" + monkeypatch.delenv("PYDANTIC_AI_EMBEDDING_MODEL", raising=False) + model = create_embedding_model() + assert isinstance(model, PydanticAIEmbeddingModel) + assert model.model_name == DEFAULT_EMBEDDING_MODEL_NAME + + +def test_configure_models_env_defaults( + monkeypatch: pytest.MonkeyPatch, provider: str +) -> None: + """configure_models() falls back to env vars when specs are omitted.""" + monkeypatch.setenv("PYDANTIC_AI_MODEL", f"{provider}:gpt-4o-mini") + monkeypatch.setenv( + "PYDANTIC_AI_EMBEDDING_MODEL", f"{provider}:text-embedding-3-small" + ) + chat, embedder = configure_models() + assert isinstance(chat, PydanticAIChatModel) + assert isinstance(embedder, PydanticAIEmbeddingModel) + + +def test_explicit_spec_overrides_env( + monkeypatch: pytest.MonkeyPatch, provider: str +) -> None: + """An explicit model_spec takes precedence over the env var.""" + monkeypatch.setenv("PYDANTIC_AI_MODEL", f"{provider}:gpt-4o-mini") + model = create_chat_model(f"{provider}:gpt-4o") + assert isinstance(model, PydanticAIChatModel) + + +def test_default_constants() -> None: + """Verify the built-in default model name constants.""" + assert DEFAULT_CHAT_MODEL_NAME == "gpt-4o" + assert DEFAULT_EMBEDDING_MODEL_NAME == "text-embedding-3-small" + + +# --------------------------------------------------------------------------- +# Provider auto-detection +# --------------------------------------------------------------------------- + + +def test_infer_provider_openai(monkeypatch: pytest.MonkeyPatch) -> None: + """Prefers openai when OPENAI_API_KEY is set.""" + monkeypatch.setenv("OPENAI_API_KEY", "sk-test") + monkeypatch.delenv("AZURE_OPENAI_API_KEY", raising=False) + assert infer_provider_prefix() == "openai" + + +def test_infer_provider_openai_over_azure(monkeypatch: pytest.MonkeyPatch) -> None: + """Prefers openai when both keys are set.""" + monkeypatch.setenv("OPENAI_API_KEY", "sk-test") + monkeypatch.setenv("AZURE_OPENAI_API_KEY", "az-test") + assert infer_provider_prefix() == "openai" + + +def test_infer_provider_azure(monkeypatch: pytest.MonkeyPatch) -> None: + """Falls back to azure when only AZURE_OPENAI_API_KEY is set.""" + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + monkeypatch.setenv("AZURE_OPENAI_API_KEY", "az-test") + assert infer_provider_prefix() == "azure" + + +def test_infer_provider_missing(monkeypatch: pytest.MonkeyPatch) -> None: + """Raises RuntimeError when no API key is available.""" + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + monkeypatch.delenv("AZURE_OPENAI_API_KEY", raising=False) + with pytest.raises( + RuntimeError, match="Neither OPENAI_API_KEY nor AZURE_OPENAI_API_KEY" + ): + infer_provider_prefix() diff --git a/tests/test_utils.py b/tests/test_utils.py index cb95c93..bccb3eb 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -3,11 +3,11 @@ from contextlib import redirect_stderr, redirect_stdout from io import StringIO -import os from dotenv import load_dotenv import typeagent.aitools.utils as utils +from typeagent.aitools.utils import has_api_key def test_timelog(): @@ -33,7 +33,7 @@ def test_pretty_print(): def test_load_dotenv(really_needs_auth): # Call load_dotenv and check for at least one expected key load_dotenv() - assert "OPENAI_API_KEY" in os.environ or "AZURE_OPENAI_API_KEY" in os.environ + assert has_api_key() def test_create_translator():