From 11d1a22d7df860d91924d9ac93560f1eeced47cb Mon Sep 17 00:00:00 2001 From: Vishwa Vignan Date: Mon, 4 May 2026 17:26:21 +0530 Subject: [PATCH 1/3] sk/python/connectors/ai/anthropic | chatcompletion with cache options MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds AnthropicCacheSettings and a `cache` field on AnthropicChatPromptExecutionSettings to enable opt-in prompt caching via the Anthropic cache_control API. When enabled, prepare_settings_dict() injects cache_control blocks on the system message and the last tool definition before the request is sent. No changes to AnthropicChatCompletion — caching is fully contained in the settings layer. Off by default; opt in with cache=AnthropicCacheSettings.on(). Convenience constructors: .on() .off() .system() .tools() .short() .long() TTL: "5m" -> {"type":"ephemeral"}, "1h" -> {"type":"ephemeral","ttl":3600} Includes 16 new unit tests and a usage sample at samples/concepts/caching/anthropic_prompt_caching.py. --- .../caching/anthropic_prompt_caching.py | 99 ++++++++++ .../connectors/ai/anthropic/__init__.py | 2 + .../anthropic_prompt_execution_settings.py | 119 +++++++++++- .../test_anthropic_request_settings.py | 170 ++++++++++++++++++ 4 files changed, 387 insertions(+), 3 deletions(-) create mode 100644 python/samples/concepts/caching/anthropic_prompt_caching.py diff --git a/python/samples/concepts/caching/anthropic_prompt_caching.py b/python/samples/concepts/caching/anthropic_prompt_caching.py new file mode 100644 index 000000000000..4015064f1de2 --- /dev/null +++ b/python/samples/concepts/caching/anthropic_prompt_caching.py @@ -0,0 +1,99 @@ +# Copyright (c) Microsoft. All rights reserved. + +# This sample demonstrates Anthropic prompt caching with Semantic Kernel. +# Prompt caching lets you mark parts of a request (system message, tool definitions) +# as cacheable so that repeated calls reuse the cached tokens at 0.1x read cost. +# +# Prerequisites: +# - Set ANTHROPIC_API_KEY and ANTHROPIC_CHAT_MODEL_ID in your environment or a .env file. +# - Model must support caching (claude-haiku-4-5, claude-sonnet-4-x, claude-opus-4-x). +# - Minimum tokens to activate cache: 4,096 (Haiku), 1,024 (Sonnet/Opus). +# +# Run: +# uv run python samples/concepts/caching/anthropic_prompt_caching.py + +import asyncio + +from semantic_kernel import Kernel +from semantic_kernel.connectors.ai.anthropic import ( + AnthropicCacheSettings, + AnthropicChatCompletion, + AnthropicChatPromptExecutionSettings, +) +from semantic_kernel.contents import ChatHistory + +# A long system prompt that exceeds the minimum token threshold for caching. +# In production this would typically be a large instruction set, persona, or +# document that stays the same across many turns. +SYSTEM_PROMPT = ( + """ +You are an expert software engineer specializing in Python and distributed systems. +You provide precise, production-quality answers. When writing code you follow these rules: + - Use type hints throughout. + - Prefer composition over inheritance. + - Write small, single-purpose functions. + - Handle errors explicitly; never silence exceptions. + - Include a brief docstring only when the intent is non-obvious. + - Use async/await for all I/O-bound operations. + - Prefer dataclasses or Pydantic models for structured data. + +You are also familiar with the following internal guidelines: + - All public APIs must be versioned. + - Services communicate over gRPC with Protobuf schemas checked into the repo. + - Secrets are injected at runtime via environment variables; never hardcoded. + - Observability: every service emits structured JSON logs and OpenTelemetry traces. + - Deployments use Kubernetes with Helm charts; no raw manifests. + +When asked to review code, structure your response as: + 1. Summary (1-2 sentences) + 2. Issues (bulleted, severity labeled) + 3. Suggested fix (code block if applicable) +""" + * 3 +) # repeat to ensure we comfortably exceed the 1,024-token minimum + + +async def chat_with_caching() -> None: + """Run a multi-turn chat with prompt caching enabled on the system message.""" + kernel = Kernel() + + service = AnthropicChatCompletion(service_id="anthropic") + kernel.add_service(service) + + # AnthropicCacheSettings.on() enables caching for both the system message and + # tool definitions. Use .system() or .tools() to cache only one section. + # Use .long() for 1-hour TTL when calls are infrequent. + settings = AnthropicChatPromptExecutionSettings( + service_id="anthropic", + max_tokens=512, + cache=AnthropicCacheSettings.on(), + ) + + chat_history = ChatHistory(system_message=SYSTEM_PROMPT) + + questions = [ + "What is the difference between asyncio.gather and asyncio.TaskGroup?", + "When would you choose gRPC over REST for an internal service?", + "How do you structure a Pydantic settings class for a twelve-factor app?", + ] + + print("Anthropic Prompt Caching Demo") + print("=" * 50) + print("System prompt is marked for caching. The first call writes the cache;") + print("subsequent calls read from it at 0.1x token cost.\n") + + for i, question in enumerate(questions, start=1): + print(f"Turn {i}: {question}") + chat_history.add_user_message(question) + + response = await service.get_chat_message_content( + chat_history=chat_history, + settings=settings, + ) + if response: + print(f"Assistant: {response}\n") + chat_history.add_message(response) + + +if __name__ == "__main__": + asyncio.run(chat_with_caching()) diff --git a/python/semantic_kernel/connectors/ai/anthropic/__init__.py b/python/semantic_kernel/connectors/ai/anthropic/__init__.py index c5d96ddd147f..93521cf393b2 100644 --- a/python/semantic_kernel/connectors/ai/anthropic/__init__.py +++ b/python/semantic_kernel/connectors/ai/anthropic/__init__.py @@ -1,11 +1,13 @@ # Copyright (c) Microsoft. All rights reserved. from semantic_kernel.connectors.ai.anthropic.prompt_execution_settings.anthropic_prompt_execution_settings import ( + AnthropicCacheSettings, AnthropicChatPromptExecutionSettings, ) from semantic_kernel.connectors.ai.anthropic.services.anthropic_chat_completion import AnthropicChatCompletion __all__ = [ + "AnthropicCacheSettings", "AnthropicChatCompletion", "AnthropicChatPromptExecutionSettings", ] diff --git a/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py index c18fcb30c732..316ba7b60a83 100644 --- a/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py +++ b/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py @@ -1,9 +1,10 @@ # Copyright (c) Microsoft. All rights reserved. +import copy import logging -from typing import Annotated, Any +from typing import Annotated, Any, Literal -from pydantic import Field, model_validator +from pydantic import BaseModel, Field, model_validator from semantic_kernel.connectors.ai.function_choice_type import FunctionChoiceType from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings @@ -12,6 +13,88 @@ logger = logging.getLogger(__name__) +class AnthropicCacheSettings(BaseModel): + """Configuration for Anthropic prompt caching. + + Controls which parts of the request receive cache_control injection. + + Anthropic minimum token thresholds for cache activation: + - claude-haiku-4-5 : 4,096 tokens + - claude-sonnet-4-x: 1,024 tokens + - claude-opus-4-x : 1,024 tokens + + TTL options: + - "5m": ephemeral 5-minute cache (1.25x write cost, 0.1x read cost) + - "1h": extended 1-hour cache (2x write cost, 0.1x read cost) + + Use the classmethods for common configurations:: + + AnthropicCacheSettings.on() # enable system + tools caching + AnthropicCacheSettings.off() # disable all caching (default) + AnthropicCacheSettings.system() # cache system message only + AnthropicCacheSettings.tools() # cache tool definitions only + """ + + enabled: Annotated[ + bool, + Field(description="Master switch — disabling skips all cache_control injection regardless of other flags."), + ] = False + cache_system: Annotated[ + bool, + Field(description="Inject cache_control on the system message content block."), + ] = False + cache_tools: Annotated[ + bool, + Field(description="Inject cache_control on the last tool definition, caching the entire tools array prefix."), + ] = False + ttl: Annotated[ + Literal["5m", "1h"], + Field(description="Cache TTL. '5m' = 5-minute ephemeral (default). '1h' = 1-hour extended."), + ] = "5m" + + def _cache_control(self) -> dict[str, Any]: + """Return the cache_control block for the configured TTL.""" + if self.ttl == "1h": + return {"type": "ephemeral", "ttl": 3600} + return {"type": "ephemeral"} + + @classmethod + def on(cls, ttl: Literal["5m", "1h"] = "5m") -> "AnthropicCacheSettings": + """Enable caching for all supported request sections (system + tools).""" + return cls(enabled=True, cache_system=True, cache_tools=True, ttl=ttl) + + @classmethod + def off(cls) -> "AnthropicCacheSettings": + """Disable all cache_control injection.""" + return cls(enabled=False) + + @classmethod + def system(cls, ttl: Literal["5m", "1h"] = "5m") -> "AnthropicCacheSettings": + """Enable caching for the system message only.""" + return cls(enabled=True, cache_system=True, cache_tools=False, ttl=ttl) + + @classmethod + def tools(cls, ttl: Literal["5m", "1h"] = "5m") -> "AnthropicCacheSettings": + """Enable caching for tool definitions only.""" + return cls(enabled=True, cache_system=False, cache_tools=True, ttl=ttl) + + @classmethod + def short(cls) -> "AnthropicCacheSettings": + """5-minute TTL. Use for tight agentic loops where the same prompt repeats within minutes. + + Write cost: 1.25x. Read cost: 0.1x. Breaks even after a single cache hit. + """ + return cls(enabled=True, cache_system=True, cache_tools=True, ttl="5m") + + @classmethod + def long(cls) -> "AnthropicCacheSettings": + """1-hour TTL. Use for batch jobs or scheduled tasks with long gaps between calls. + + Write cost: 2x. Read cost: 0.1x. Needs at least 2 cache hits to break even. + """ + return cls(enabled=True, cache_system=True, cache_tools=True, ttl="1h") + + class AnthropicPromptExecutionSettings(PromptExecutionSettings): """Common request settings for Anthropic services.""" @@ -23,7 +106,7 @@ class AnthropicChatPromptExecutionSettings(AnthropicPromptExecutionSettings): messages: list[dict[str, Any]] | None = None stream: bool | None = None - system: str | None = None + system: str | list[dict[str, Any]] | None = None max_tokens: Annotated[int, Field(gt=0)] = 1024 temperature: Annotated[float | None, Field(ge=0.0, le=2.0)] = None stop_sequences: list[str] | None = None @@ -43,6 +126,13 @@ class AnthropicChatPromptExecutionSettings(AnthropicPromptExecutionSettings): description="Do not set this manually. It is set by the service based on the function choice configuration." ), ] = None + cache: Annotated[ + AnthropicCacheSettings, + Field( + description="Prompt caching configuration. Disabled by default.", + exclude=True, + ), + ] = Field(default_factory=AnthropicCacheSettings) @model_validator(mode="after") def validate_tool_choice(self) -> "AnthropicChatPromptExecutionSettings": @@ -53,3 +143,26 @@ def validate_tool_choice(self) -> "AnthropicChatPromptExecutionSettings": raise ServiceInvalidExecutionSettingsError("Tool choice 'none' is not supported by Anthropic.") return self + + def prepare_settings_dict(self, **kwargs: Any) -> dict[str, Any]: + """Prepare the settings dictionary, injecting cache_control blocks when caching is enabled.""" + data = super().prepare_settings_dict(**kwargs) + + if not self.cache.enabled: + return data + + cache_control = self.cache._cache_control() + + if self.cache.cache_system: + system = data.get("system") + if isinstance(system, str) and system: + data["system"] = [{"type": "text", "text": system, "cache_control": cache_control}] + + if self.cache.cache_tools: + tools: list[dict[str, Any]] | None = data.get("tools") + if tools: + tools = copy.deepcopy(tools) + tools[-1]["cache_control"] = cache_control + data["tools"] = tools + + return data diff --git a/python/tests/unit/connectors/ai/anthropic/test_anthropic_request_settings.py b/python/tests/unit/connectors/ai/anthropic/test_anthropic_request_settings.py index 531823281ae2..525acfdab1d1 100644 --- a/python/tests/unit/connectors/ai/anthropic/test_anthropic_request_settings.py +++ b/python/tests/unit/connectors/ai/anthropic/test_anthropic_request_settings.py @@ -3,6 +3,7 @@ import pytest from semantic_kernel.connectors.ai.anthropic.prompt_execution_settings.anthropic_prompt_execution_settings import ( + AnthropicCacheSettings, AnthropicChatPromptExecutionSettings, ) from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior @@ -127,3 +128,172 @@ def test_tool_choice_none(): }, function_choice_behavior=FunctionChoiceBehavior.NoneInvoke(), ) + + +# region AnthropicCacheSettings + + +def test_cache_settings_default_is_off(): + settings = AnthropicCacheSettings() + assert settings.enabled is False + assert settings.cache_system is False + assert settings.cache_tools is False + assert settings.ttl == "5m" + + +def test_cache_settings_on(): + settings = AnthropicCacheSettings.on() + assert settings.enabled is True + assert settings.cache_system is True + assert settings.cache_tools is True + assert settings.ttl == "5m" + + +def test_cache_settings_on_with_1h_ttl(): + settings = AnthropicCacheSettings.on(ttl="1h") + assert settings.enabled is True + assert settings.ttl == "1h" + + +def test_cache_settings_off(): + settings = AnthropicCacheSettings.off() + assert settings.enabled is False + + +def test_cache_settings_system_only(): + settings = AnthropicCacheSettings.system() + assert settings.enabled is True + assert settings.cache_system is True + assert settings.cache_tools is False + + +def test_cache_settings_tools_only(): + settings = AnthropicCacheSettings.tools() + assert settings.enabled is True + assert settings.cache_system is False + assert settings.cache_tools is True + + +def test_cache_control_5m(): + ctrl = AnthropicCacheSettings.on(ttl="5m")._cache_control() + assert ctrl == {"type": "ephemeral"} + + +def test_cache_control_1h(): + ctrl = AnthropicCacheSettings.on(ttl="1h")._cache_control() + assert ctrl == {"type": "ephemeral", "ttl": 3600} + + +def test_cache_settings_short(): + settings = AnthropicCacheSettings.short() + assert settings.enabled is True + assert settings.cache_system is True + assert settings.cache_tools is True + assert settings.ttl == "5m" + + +def test_cache_settings_long(): + settings = AnthropicCacheSettings.long() + assert settings.enabled is True + assert settings.cache_system is True + assert settings.cache_tools is True + assert settings.ttl == "1h" + + +# endregion + +# region prepare_settings_dict with caching + + +def test_prepare_settings_dict_cache_off_no_injection(): + settings = AnthropicChatPromptExecutionSettings( + system="You are a helpful assistant.", + tools=[{"name": "search", "description": "Search the web"}], + cache=AnthropicCacheSettings.off(), + ) + data = settings.prepare_settings_dict() + assert data["system"] == "You are a helpful assistant." + assert "cache_control" not in data["tools"][-1] + + +def test_prepare_settings_dict_cache_system_only(): + settings = AnthropicChatPromptExecutionSettings( + system="You are a helpful assistant.", + cache=AnthropicCacheSettings.system(), + ) + data = settings.prepare_settings_dict() + assert isinstance(data["system"], list) + assert data["system"] == [ + {"type": "text", "text": "You are a helpful assistant.", "cache_control": {"type": "ephemeral"}} + ] + + +def test_prepare_settings_dict_cache_tools_only(): + tools = [ + {"name": "tool_a", "description": "Tool A"}, + {"name": "tool_b", "description": "Tool B"}, + ] + settings = AnthropicChatPromptExecutionSettings( + tools=tools, + cache=AnthropicCacheSettings.tools(), + ) + data = settings.prepare_settings_dict() + assert "cache_control" not in data["tools"][0] + assert data["tools"][-1]["cache_control"] == {"type": "ephemeral"} + # original tools list must not be mutated + assert "cache_control" not in tools[-1] + + +def test_prepare_settings_dict_cache_on_system_and_tools(): + tools = [{"name": "search", "description": "Search the web"}] + settings = AnthropicChatPromptExecutionSettings( + system="You are a helpful assistant.", + tools=tools, + cache=AnthropicCacheSettings.on(), + ) + data = settings.prepare_settings_dict() + assert isinstance(data["system"], list) + assert data["system"][0]["cache_control"] == {"type": "ephemeral"} + assert data["tools"][-1]["cache_control"] == {"type": "ephemeral"} + + +def test_prepare_settings_dict_cache_on_1h_ttl(): + tools = [{"name": "search", "description": "Search the web"}] + settings = AnthropicChatPromptExecutionSettings( + system="You are a helpful assistant.", + tools=tools, + cache=AnthropicCacheSettings.on(ttl="1h"), + ) + data = settings.prepare_settings_dict() + assert data["system"][0]["cache_control"] == {"type": "ephemeral", "ttl": 3600} + assert data["tools"][-1]["cache_control"] == {"type": "ephemeral", "ttl": 3600} + + +def test_prepare_settings_dict_cache_system_empty_string_no_injection(): + """Empty system string should not be wrapped in a cache block.""" + settings = AnthropicChatPromptExecutionSettings( + system="", + cache=AnthropicCacheSettings.system(), + ) + data = settings.prepare_settings_dict() + # empty string — no injection expected + assert not isinstance(data.get("system"), list) + + +def test_prepare_settings_dict_cache_tools_empty_no_injection(): + """No tools present — cache_tools flag should be a no-op.""" + settings = AnthropicChatPromptExecutionSettings( + cache=AnthropicCacheSettings.tools(), + ) + data = settings.prepare_settings_dict() + assert data.get("tools") is None + + +def test_prepare_settings_dict_cache_excluded_from_serialization(): + """The cache field must not appear in the serialized API payload.""" + settings = AnthropicChatPromptExecutionSettings(cache=AnthropicCacheSettings.on()) + data = settings.prepare_settings_dict() + assert "cache" not in data + + +# endregion From da6de642219aab494db28dabb7756a0b3336e750 Mon Sep 17 00:00:00 2001 From: Vishwa Vignan Date: Tue, 5 May 2026 15:13:58 +0530 Subject: [PATCH 2/3] fix: correct cache_control TTL value and handle pre-structured system blocks - _cache_control() now emits {"ttl":"1h"} string per CacheControlEphemeralParam spec instead of integer 3600 - prepare_settings_dict() now injects cache_control on list[dict] system blocks in addition to plain strings, closing the silent no-op design gap - add test covering cache injection when system is pre-structured as list[dict] - update 1h TTL test assertions to match corrected string value --- .../anthropic_prompt_execution_settings.py | 6 ++++- .../test_anthropic_request_settings.py | 24 ++++++++++++++++--- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py index 316ba7b60a83..9d518e88c73a 100644 --- a/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py +++ b/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py @@ -55,7 +55,7 @@ class AnthropicCacheSettings(BaseModel): def _cache_control(self) -> dict[str, Any]: """Return the cache_control block for the configured TTL.""" if self.ttl == "1h": - return {"type": "ephemeral", "ttl": 3600} + return {"type": "ephemeral", "ttl": "1h"} return {"type": "ephemeral"} @classmethod @@ -157,6 +157,10 @@ def prepare_settings_dict(self, **kwargs: Any) -> dict[str, Any]: system = data.get("system") if isinstance(system, str) and system: data["system"] = [{"type": "text", "text": system, "cache_control": cache_control}] + elif isinstance(system, list) and system: + system = copy.deepcopy(system) + system[-1]["cache_control"] = cache_control + data["system"] = system if self.cache.cache_tools: tools: list[dict[str, Any]] | None = data.get("tools") diff --git a/python/tests/unit/connectors/ai/anthropic/test_anthropic_request_settings.py b/python/tests/unit/connectors/ai/anthropic/test_anthropic_request_settings.py index 525acfdab1d1..2143bdcbaedd 100644 --- a/python/tests/unit/connectors/ai/anthropic/test_anthropic_request_settings.py +++ b/python/tests/unit/connectors/ai/anthropic/test_anthropic_request_settings.py @@ -181,7 +181,7 @@ def test_cache_control_5m(): def test_cache_control_1h(): ctrl = AnthropicCacheSettings.on(ttl="1h")._cache_control() - assert ctrl == {"type": "ephemeral", "ttl": 3600} + assert ctrl == {"type": "ephemeral", "ttl": "1h"} def test_cache_settings_short(): @@ -265,8 +265,26 @@ def test_prepare_settings_dict_cache_on_1h_ttl(): cache=AnthropicCacheSettings.on(ttl="1h"), ) data = settings.prepare_settings_dict() - assert data["system"][0]["cache_control"] == {"type": "ephemeral", "ttl": 3600} - assert data["tools"][-1]["cache_control"] == {"type": "ephemeral", "ttl": 3600} + assert data["system"][0]["cache_control"] == {"type": "ephemeral", "ttl": "1h"} + assert data["tools"][-1]["cache_control"] == {"type": "ephemeral", "ttl": "1h"} + + +def test_prepare_settings_dict_cache_system_already_list(): + """When system is pre-structured as list[dict], cache_control is injected on the last block.""" + system_blocks = [ + {"type": "text", "text": "First block."}, + {"type": "text", "text": "Second block."}, + ] + settings = AnthropicChatPromptExecutionSettings( + system=system_blocks, + cache=AnthropicCacheSettings.system(), + ) + data = settings.prepare_settings_dict() + assert isinstance(data["system"], list) + assert "cache_control" not in data["system"][0] + assert data["system"][-1]["cache_control"] == {"type": "ephemeral"} + # original list must not be mutated + assert "cache_control" not in system_blocks[-1] def test_prepare_settings_dict_cache_system_empty_string_no_injection(): From f71799f4dde0a54e5dbc6a513e63834b916c00ee Mon Sep 17 00:00:00 2001 From: Vishwa Vignan Date: Tue, 5 May 2026 21:54:57 +0530 Subject: [PATCH 3/3] =?UTF-8?q?fix:=20address=20Copilot=20review=20?= =?UTF-8?q?=E2=80=94=20KernelBaseSettings,=20field=20rename,=20shallow=20c?= =?UTF-8?q?opy,=20no-overwrite?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - AnthropicCacheSettings now inherits KernelBaseSettings (consistent with rest of SDK; enables validate_assignment, populate_by_name) - Added env_prefix = "ANTHROPIC_CACHE_" so caching can be toggled via environment variables (ANTHROPIC_CACHE_ENABLED, ANTHROPIC_CACHE_TTL, etc.) - Renamed cache_system/cache_tools fields to include_system/include_tools (removes redundant "cache" prefix on fields inside a cache settings class) - Replaced copy.deepcopy with shallow list + dict spread — cheaper for large tool catalogs where caching is most beneficial - inject now skips if cache_control already present on last block — avoids silently clobbering a caller's explicit setting - Replaced two _cache_control() private-method tests with prepare_settings_dict() equivalents; added env-var tests (monkeypatch) and no-overwrite test Co-Authored-By: Claude Sonnet 4.6 --- .../anthropic_prompt_execution_settings.py | 48 ++++---- .../test_anthropic_request_settings.py | 110 ++++++++++++++---- 2 files changed, 113 insertions(+), 45 deletions(-) diff --git a/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py index 9d518e88c73a..e175d2240a02 100644 --- a/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py +++ b/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py @@ -1,22 +1,30 @@ # Copyright (c) Microsoft. All rights reserved. -import copy import logging -from typing import Annotated, Any, Literal +from typing import Annotated, Any, ClassVar, Literal -from pydantic import BaseModel, Field, model_validator +from pydantic import Field, model_validator from semantic_kernel.connectors.ai.function_choice_type import FunctionChoiceType from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings from semantic_kernel.exceptions import ServiceInvalidExecutionSettingsError +from semantic_kernel.kernel_pydantic import KernelBaseSettings logger = logging.getLogger(__name__) -class AnthropicCacheSettings(BaseModel): +class AnthropicCacheSettings(KernelBaseSettings): """Configuration for Anthropic prompt caching. Controls which parts of the request receive cache_control injection. + Settings are loaded from environment variables with the prefix 'ANTHROPIC_CACHE_', + then from a .env file, then from defaults. Explicit constructor arguments always win. + + Environment variables (prefix 'ANTHROPIC_CACHE_'): + - ANTHROPIC_CACHE_ENABLED — master switch, bool (default: false) + - ANTHROPIC_CACHE_INCLUDE_SYSTEM — cache system message, bool (default: false) + - ANTHROPIC_CACHE_INCLUDE_TOOLS — cache tool definitions, bool (default: false) + - ANTHROPIC_CACHE_TTL — cache TTL, "5m" or "1h" (default: "5m") Anthropic minimum token thresholds for cache activation: - claude-haiku-4-5 : 4,096 tokens @@ -35,15 +43,17 @@ class AnthropicCacheSettings(BaseModel): AnthropicCacheSettings.tools() # cache tool definitions only """ + env_prefix: ClassVar[str] = "ANTHROPIC_CACHE_" + enabled: Annotated[ bool, Field(description="Master switch — disabling skips all cache_control injection regardless of other flags."), ] = False - cache_system: Annotated[ + include_system: Annotated[ bool, Field(description="Inject cache_control on the system message content block."), ] = False - cache_tools: Annotated[ + include_tools: Annotated[ bool, Field(description="Inject cache_control on the last tool definition, caching the entire tools array prefix."), ] = False @@ -61,7 +71,7 @@ def _cache_control(self) -> dict[str, Any]: @classmethod def on(cls, ttl: Literal["5m", "1h"] = "5m") -> "AnthropicCacheSettings": """Enable caching for all supported request sections (system + tools).""" - return cls(enabled=True, cache_system=True, cache_tools=True, ttl=ttl) + return cls(enabled=True, include_system=True, include_tools=True, ttl=ttl) @classmethod def off(cls) -> "AnthropicCacheSettings": @@ -71,12 +81,12 @@ def off(cls) -> "AnthropicCacheSettings": @classmethod def system(cls, ttl: Literal["5m", "1h"] = "5m") -> "AnthropicCacheSettings": """Enable caching for the system message only.""" - return cls(enabled=True, cache_system=True, cache_tools=False, ttl=ttl) + return cls(enabled=True, include_system=True, include_tools=False, ttl=ttl) @classmethod def tools(cls, ttl: Literal["5m", "1h"] = "5m") -> "AnthropicCacheSettings": """Enable caching for tool definitions only.""" - return cls(enabled=True, cache_system=False, cache_tools=True, ttl=ttl) + return cls(enabled=True, include_system=False, include_tools=True, ttl=ttl) @classmethod def short(cls) -> "AnthropicCacheSettings": @@ -84,7 +94,7 @@ def short(cls) -> "AnthropicCacheSettings": Write cost: 1.25x. Read cost: 0.1x. Breaks even after a single cache hit. """ - return cls(enabled=True, cache_system=True, cache_tools=True, ttl="5m") + return cls(enabled=True, include_system=True, include_tools=True, ttl="5m") @classmethod def long(cls) -> "AnthropicCacheSettings": @@ -92,7 +102,7 @@ def long(cls) -> "AnthropicCacheSettings": Write cost: 2x. Read cost: 0.1x. Needs at least 2 cache hits to break even. """ - return cls(enabled=True, cache_system=True, cache_tools=True, ttl="1h") + return cls(enabled=True, include_system=True, include_tools=True, ttl="1h") class AnthropicPromptExecutionSettings(PromptExecutionSettings): @@ -153,20 +163,16 @@ def prepare_settings_dict(self, **kwargs: Any) -> dict[str, Any]: cache_control = self.cache._cache_control() - if self.cache.cache_system: + if self.cache.include_system: system = data.get("system") if isinstance(system, str) and system: data["system"] = [{"type": "text", "text": system, "cache_control": cache_control}] - elif isinstance(system, list) and system: - system = copy.deepcopy(system) - system[-1]["cache_control"] = cache_control - data["system"] = system + elif isinstance(system, list) and system and "cache_control" not in system[-1]: + data["system"] = [*system[:-1], {**system[-1], "cache_control": cache_control}] - if self.cache.cache_tools: + if self.cache.include_tools: tools: list[dict[str, Any]] | None = data.get("tools") - if tools: - tools = copy.deepcopy(tools) - tools[-1]["cache_control"] = cache_control - data["tools"] = tools + if tools and "cache_control" not in tools[-1]: + data["tools"] = [*tools[:-1], {**tools[-1], "cache_control": cache_control}] return data diff --git a/python/tests/unit/connectors/ai/anthropic/test_anthropic_request_settings.py b/python/tests/unit/connectors/ai/anthropic/test_anthropic_request_settings.py index 2143bdcbaedd..885915d1e3ae 100644 --- a/python/tests/unit/connectors/ai/anthropic/test_anthropic_request_settings.py +++ b/python/tests/unit/connectors/ai/anthropic/test_anthropic_request_settings.py @@ -136,16 +136,16 @@ def test_tool_choice_none(): def test_cache_settings_default_is_off(): settings = AnthropicCacheSettings() assert settings.enabled is False - assert settings.cache_system is False - assert settings.cache_tools is False + assert settings.include_system is False + assert settings.include_tools is False assert settings.ttl == "5m" def test_cache_settings_on(): settings = AnthropicCacheSettings.on() assert settings.enabled is True - assert settings.cache_system is True - assert settings.cache_tools is True + assert settings.include_system is True + assert settings.include_tools is True assert settings.ttl == "5m" @@ -163,40 +163,50 @@ def test_cache_settings_off(): def test_cache_settings_system_only(): settings = AnthropicCacheSettings.system() assert settings.enabled is True - assert settings.cache_system is True - assert settings.cache_tools is False + assert settings.include_system is True + assert settings.include_tools is False def test_cache_settings_tools_only(): settings = AnthropicCacheSettings.tools() assert settings.enabled is True - assert settings.cache_system is False - assert settings.cache_tools is True + assert settings.include_system is False + assert settings.include_tools is True -def test_cache_control_5m(): - ctrl = AnthropicCacheSettings.on(ttl="5m")._cache_control() - assert ctrl == {"type": "ephemeral"} +def test_cache_control_5m_via_prepare(): + """5m TTL emits ephemeral block without a ttl key.""" + settings = AnthropicChatPromptExecutionSettings( + system="Hello.", + cache=AnthropicCacheSettings.on(ttl="5m"), + ) + data = settings.prepare_settings_dict() + assert data["system"][0]["cache_control"] == {"type": "ephemeral"} -def test_cache_control_1h(): - ctrl = AnthropicCacheSettings.on(ttl="1h")._cache_control() - assert ctrl == {"type": "ephemeral", "ttl": "1h"} +def test_cache_control_1h_via_prepare(): + """1h TTL emits ephemeral block with ttl string '1h'.""" + settings = AnthropicChatPromptExecutionSettings( + system="Hello.", + cache=AnthropicCacheSettings.on(ttl="1h"), + ) + data = settings.prepare_settings_dict() + assert data["system"][0]["cache_control"] == {"type": "ephemeral", "ttl": "1h"} def test_cache_settings_short(): settings = AnthropicCacheSettings.short() assert settings.enabled is True - assert settings.cache_system is True - assert settings.cache_tools is True + assert settings.include_system is True + assert settings.include_tools is True assert settings.ttl == "5m" def test_cache_settings_long(): settings = AnthropicCacheSettings.long() assert settings.enabled is True - assert settings.cache_system is True - assert settings.cache_tools is True + assert settings.include_system is True + assert settings.include_tools is True assert settings.ttl == "1h" @@ -216,7 +226,7 @@ def test_prepare_settings_dict_cache_off_no_injection(): assert "cache_control" not in data["tools"][-1] -def test_prepare_settings_dict_cache_system_only(): +def test_prepare_settings_dict_include_system_only(): settings = AnthropicChatPromptExecutionSettings( system="You are a helpful assistant.", cache=AnthropicCacheSettings.system(), @@ -228,7 +238,7 @@ def test_prepare_settings_dict_cache_system_only(): ] -def test_prepare_settings_dict_cache_tools_only(): +def test_prepare_settings_dict_include_tools_only(): tools = [ {"name": "tool_a", "description": "Tool A"}, {"name": "tool_b", "description": "Tool B"}, @@ -269,7 +279,7 @@ def test_prepare_settings_dict_cache_on_1h_ttl(): assert data["tools"][-1]["cache_control"] == {"type": "ephemeral", "ttl": "1h"} -def test_prepare_settings_dict_cache_system_already_list(): +def test_prepare_settings_dict_include_system_already_list(): """When system is pre-structured as list[dict], cache_control is injected on the last block.""" system_blocks = [ {"type": "text", "text": "First block."}, @@ -287,7 +297,7 @@ def test_prepare_settings_dict_cache_system_already_list(): assert "cache_control" not in system_blocks[-1] -def test_prepare_settings_dict_cache_system_empty_string_no_injection(): +def test_prepare_settings_dict_include_system_empty_string_no_injection(): """Empty system string should not be wrapped in a cache block.""" settings = AnthropicChatPromptExecutionSettings( system="", @@ -298,8 +308,8 @@ def test_prepare_settings_dict_cache_system_empty_string_no_injection(): assert not isinstance(data.get("system"), list) -def test_prepare_settings_dict_cache_tools_empty_no_injection(): - """No tools present — cache_tools flag should be a no-op.""" +def test_prepare_settings_dict_include_tools_empty_no_injection(): + """No tools present — include_tools flag should be a no-op.""" settings = AnthropicChatPromptExecutionSettings( cache=AnthropicCacheSettings.tools(), ) @@ -314,4 +324,56 @@ def test_prepare_settings_dict_cache_excluded_from_serialization(): assert "cache" not in data +def test_prepare_settings_dict_existing_cache_control_not_overwritten(): + """cache_control already present on the last tool/system block must not be clobbered.""" + existing_ctrl = {"type": "ephemeral", "ttl": "1h"} + tools = [{"name": "t", "description": "d", "cache_control": existing_ctrl}] + settings = AnthropicChatPromptExecutionSettings( + tools=tools, + cache=AnthropicCacheSettings.tools(ttl="5m"), + ) + data = settings.prepare_settings_dict() + assert data["tools"][-1]["cache_control"] == existing_ctrl + + +# endregion + +# region AnthropicCacheSettings — environment variable support + + +def test_cache_settings_from_env(monkeypatch): + """Settings are populated from ANTHROPIC_CACHE_* env vars.""" + monkeypatch.setenv("ANTHROPIC_CACHE_ENABLED", "true") + monkeypatch.setenv("ANTHROPIC_CACHE_INCLUDE_SYSTEM", "true") + monkeypatch.setenv("ANTHROPIC_CACHE_INCLUDE_TOOLS", "false") + monkeypatch.setenv("ANTHROPIC_CACHE_TTL", "1h") + settings = AnthropicCacheSettings() + assert settings.enabled is True + assert settings.include_system is True + assert settings.include_tools is False + assert settings.ttl == "1h" + + +def test_cache_settings_explicit_overrides_env(monkeypatch): + """Explicit constructor arguments take priority over environment variables.""" + monkeypatch.setenv("ANTHROPIC_CACHE_ENABLED", "true") + monkeypatch.setenv("ANTHROPIC_CACHE_TTL", "1h") + settings = AnthropicCacheSettings(enabled=False, ttl="5m") + assert settings.enabled is False + assert settings.ttl == "5m" + + +def test_cache_settings_env_disabled_by_default(monkeypatch): + """With no env vars set, cache is disabled by default.""" + for key in ( + "ANTHROPIC_CACHE_ENABLED", + "ANTHROPIC_CACHE_INCLUDE_SYSTEM", + "ANTHROPIC_CACHE_INCLUDE_TOOLS", + "ANTHROPIC_CACHE_TTL", + ): + monkeypatch.delenv(key, raising=False) + settings = AnthropicCacheSettings() + assert settings.enabled is False + + # endregion