From 11d1a22d7df860d91924d9ac93560f1eeced47cb Mon Sep 17 00:00:00 2001
From: Vishwa Vignan <iamvish83@gmail.com>
Date: Mon, 4 May 2026 17:26:21 +0530
Subject: [PATCH 1/3] sk/python/connectors/ai/anthropic | chatcompletion with
 cache options
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds AnthropicCacheSettings and a `cache` field on AnthropicChatPromptExecutionSettings
to enable opt-in prompt caching via the Anthropic cache_control API.

When enabled, prepare_settings_dict() injects cache_control blocks on the system
message and the last tool definition before the request is sent. No changes to
AnthropicChatCompletion — caching is fully contained in the settings layer.

Off by default; opt in with cache=AnthropicCacheSettings.on().
Convenience constructors: .on() .off() .system() .tools() .short() .long()
TTL: "5m" -> {"type":"ephemeral"}, "1h" -> {"type":"ephemeral","ttl":3600}

Includes 16 new unit tests and a usage sample at
samples/concepts/caching/anthropic_prompt_caching.py.
---
 .../caching/anthropic_prompt_caching.py       |  99 ++++++++++
 .../connectors/ai/anthropic/__init__.py       |   2 +
 .../anthropic_prompt_execution_settings.py    | 119 +++++++++++-
 .../test_anthropic_request_settings.py        | 170 ++++++++++++++++++
 4 files changed, 387 insertions(+), 3 deletions(-)
 create mode 100644 python/samples/concepts/caching/anthropic_prompt_caching.py

diff --git a/python/samples/concepts/caching/anthropic_prompt_caching.py b/python/samples/concepts/caching/anthropic_prompt_caching.py
new file mode 100644
index 000000000000..4015064f1de2
--- /dev/null
+++ b/python/samples/concepts/caching/anthropic_prompt_caching.py
@@ -0,0 +1,99 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+# This sample demonstrates Anthropic prompt caching with Semantic Kernel.
+# Prompt caching lets you mark parts of a request (system message, tool definitions)
+# as cacheable so that repeated calls reuse the cached tokens at 0.1x read cost.
+#
+# Prerequisites:
+#   - Set ANTHROPIC_API_KEY and ANTHROPIC_CHAT_MODEL_ID in your environment or a .env file.
+#   - Model must support caching (claude-haiku-4-5, claude-sonnet-4-x, claude-opus-4-x).
+#   - Minimum tokens to activate cache: 4,096 (Haiku), 1,024 (Sonnet/Opus).
+#
+# Run:
+#   uv run python samples/concepts/caching/anthropic_prompt_caching.py
+
+import asyncio
+
+from semantic_kernel import Kernel
+from semantic_kernel.connectors.ai.anthropic import (
+    AnthropicCacheSettings,
+    AnthropicChatCompletion,
+    AnthropicChatPromptExecutionSettings,
+)
+from semantic_kernel.contents import ChatHistory
+
+# A long system prompt that exceeds the minimum token threshold for caching.
+# In production this would typically be a large instruction set, persona, or
+# document that stays the same across many turns.
+SYSTEM_PROMPT = (
+    """
+You are an expert software engineer specializing in Python and distributed systems.
+You provide precise, production-quality answers. When writing code you follow these rules:
+  - Use type hints throughout.
+  - Prefer composition over inheritance.
+  - Write small, single-purpose functions.
+  - Handle errors explicitly; never silence exceptions.
+  - Include a brief docstring only when the intent is non-obvious.
+  - Use async/await for all I/O-bound operations.
+  - Prefer dataclasses or Pydantic models for structured data.
+
+You are also familiar with the following internal guidelines:
+  - All public APIs must be versioned.
+  - Services communicate over gRPC with Protobuf schemas checked into the repo.
+  - Secrets are injected at runtime via environment variables; never hardcoded.
+  - Observability: every service emits structured JSON logs and OpenTelemetry traces.
+  - Deployments use Kubernetes with Helm charts; no raw manifests.
+
+When asked to review code, structure your response as:
+  1. Summary (1-2 sentences)
+  2. Issues (bulleted, severity labeled)
+  3. Suggested fix (code block if applicable)
+"""
+    * 3
+)  # repeat to ensure we comfortably exceed the 1,024-token minimum
+
+
+async def chat_with_caching() -> None:
+    """Run a multi-turn chat with prompt caching enabled on the system message."""
+    kernel = Kernel()
+
+    service = AnthropicChatCompletion(service_id="anthropic")
+    kernel.add_service(service)
+
+    # AnthropicCacheSettings.on() enables caching for both the system message and
+    # tool definitions. Use .system() or .tools() to cache only one section.
+    # Use .long() for 1-hour TTL when calls are infrequent.
+    settings = AnthropicChatPromptExecutionSettings(
+        service_id="anthropic",
+        max_tokens=512,
+        cache=AnthropicCacheSettings.on(),
+    )
+
+    chat_history = ChatHistory(system_message=SYSTEM_PROMPT)
+
+    questions = [
+        "What is the difference between asyncio.gather and asyncio.TaskGroup?",
+        "When would you choose gRPC over REST for an internal service?",
+        "How do you structure a Pydantic settings class for a twelve-factor app?",
+    ]
+
+    print("Anthropic Prompt Caching Demo")
+    print("=" * 50)
+    print("System prompt is marked for caching. The first call writes the cache;")
+    print("subsequent calls read from it at 0.1x token cost.\n")
+
+    for i, question in enumerate(questions, start=1):
+        print(f"Turn {i}: {question}")
+        chat_history.add_user_message(question)
+
+        response = await service.get_chat_message_content(
+            chat_history=chat_history,
+            settings=settings,
+        )
+        if response:
+            print(f"Assistant: {response}\n")
+            chat_history.add_message(response)
+
+
+if __name__ == "__main__":
+    asyncio.run(chat_with_caching())
diff --git a/python/semantic_kernel/connectors/ai/anthropic/__init__.py b/python/semantic_kernel/connectors/ai/anthropic/__init__.py
index c5d96ddd147f..93521cf393b2 100644
--- a/python/semantic_kernel/connectors/ai/anthropic/__init__.py
+++ b/python/semantic_kernel/connectors/ai/anthropic/__init__.py
@@ -1,11 +1,13 @@
 # Copyright (c) Microsoft. All rights reserved.
 
 from semantic_kernel.connectors.ai.anthropic.prompt_execution_settings.anthropic_prompt_execution_settings import (
+    AnthropicCacheSettings,
     AnthropicChatPromptExecutionSettings,
 )
 from semantic_kernel.connectors.ai.anthropic.services.anthropic_chat_completion import AnthropicChatCompletion
 
 __all__ = [
+    "AnthropicCacheSettings",
     "AnthropicChatCompletion",
     "AnthropicChatPromptExecutionSettings",
 ]
diff --git a/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py
index c18fcb30c732..316ba7b60a83 100644
--- a/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py
+++ b/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py
@@ -1,9 +1,10 @@
 # Copyright (c) Microsoft. All rights reserved.
 
+import copy
 import logging
-from typing import Annotated, Any
+from typing import Annotated, Any, Literal
 
-from pydantic import Field, model_validator
+from pydantic import BaseModel, Field, model_validator
 
 from semantic_kernel.connectors.ai.function_choice_type import FunctionChoiceType
 from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
@@ -12,6 +13,88 @@
 logger = logging.getLogger(__name__)
 
 
+class AnthropicCacheSettings(BaseModel):
+    """Configuration for Anthropic prompt caching.
+
+    Controls which parts of the request receive cache_control injection.
+
+    Anthropic minimum token thresholds for cache activation:
+      - claude-haiku-4-5 : 4,096 tokens
+      - claude-sonnet-4-x: 1,024 tokens
+      - claude-opus-4-x  : 1,024 tokens
+
+    TTL options:
+      - "5m": ephemeral 5-minute cache (1.25x write cost, 0.1x read cost)
+      - "1h": extended 1-hour cache (2x write cost, 0.1x read cost)
+
+    Use the classmethods for common configurations::
+
+        AnthropicCacheSettings.on()  # enable system + tools caching
+        AnthropicCacheSettings.off()  # disable all caching (default)
+        AnthropicCacheSettings.system()  # cache system message only
+        AnthropicCacheSettings.tools()  # cache tool definitions only
+    """
+
+    enabled: Annotated[
+        bool,
+        Field(description="Master switch — disabling skips all cache_control injection regardless of other flags."),
+    ] = False
+    cache_system: Annotated[
+        bool,
+        Field(description="Inject cache_control on the system message content block."),
+    ] = False
+    cache_tools: Annotated[
+        bool,
+        Field(description="Inject cache_control on the last tool definition, caching the entire tools array prefix."),
+    ] = False
+    ttl: Annotated[
+        Literal["5m", "1h"],
+        Field(description="Cache TTL. '5m' = 5-minute ephemeral (default). '1h' = 1-hour extended."),
+    ] = "5m"
+
+    def _cache_control(self) -> dict[str, Any]:
+        """Return the cache_control block for the configured TTL."""
+        if self.ttl == "1h":
+            return {"type": "ephemeral", "ttl": 3600}
+        return {"type": "ephemeral"}
+
+    @classmethod
+    def on(cls, ttl: Literal["5m", "1h"] = "5m") -> "AnthropicCacheSettings":
+        """Enable caching for all supported request sections (system + tools)."""
+        return cls(enabled=True, cache_system=True, cache_tools=True, ttl=ttl)
+
+    @classmethod
+    def off(cls) -> "AnthropicCacheSettings":
+        """Disable all cache_control injection."""
+        return cls(enabled=False)
+
+    @classmethod
+    def system(cls, ttl: Literal["5m", "1h"] = "5m") -> "AnthropicCacheSettings":
+        """Enable caching for the system message only."""
+        return cls(enabled=True, cache_system=True, cache_tools=False, ttl=ttl)
+
+    @classmethod
+    def tools(cls, ttl: Literal["5m", "1h"] = "5m") -> "AnthropicCacheSettings":
+        """Enable caching for tool definitions only."""
+        return cls(enabled=True, cache_system=False, cache_tools=True, ttl=ttl)
+
+    @classmethod
+    def short(cls) -> "AnthropicCacheSettings":
+        """5-minute TTL. Use for tight agentic loops where the same prompt repeats within minutes.
+
+        Write cost: 1.25x. Read cost: 0.1x. Breaks even after a single cache hit.
+        """
+        return cls(enabled=True, cache_system=True, cache_tools=True, ttl="5m")
+
+    @classmethod
+    def long(cls) -> "AnthropicCacheSettings":
+        """1-hour TTL. Use for batch jobs or scheduled tasks with long gaps between calls.
+
+        Write cost: 2x. Read cost: 0.1x. Needs at least 2 cache hits to break even.
+        """
+        return cls(enabled=True, cache_system=True, cache_tools=True, ttl="1h")
+
+
 class AnthropicPromptExecutionSettings(PromptExecutionSettings):
     """Common request settings for Anthropic services."""
 
@@ -23,7 +106,7 @@ class AnthropicChatPromptExecutionSettings(AnthropicPromptExecutionSettings):
 
     messages: list[dict[str, Any]] | None = None
     stream: bool | None = None
-    system: str | None = None
+    system: str | list[dict[str, Any]] | None = None
     max_tokens: Annotated[int, Field(gt=0)] = 1024
     temperature: Annotated[float | None, Field(ge=0.0, le=2.0)] = None
     stop_sequences: list[str] | None = None
@@ -43,6 +126,13 @@ class AnthropicChatPromptExecutionSettings(AnthropicPromptExecutionSettings):
             description="Do not set this manually. It is set by the service based on the function choice configuration."
         ),
     ] = None
+    cache: Annotated[
+        AnthropicCacheSettings,
+        Field(
+            description="Prompt caching configuration. Disabled by default.",
+            exclude=True,
+        ),
+    ] = Field(default_factory=AnthropicCacheSettings)
 
     @model_validator(mode="after")
     def validate_tool_choice(self) -> "AnthropicChatPromptExecutionSettings":
@@ -53,3 +143,26 @@ def validate_tool_choice(self) -> "AnthropicChatPromptExecutionSettings":
             raise ServiceInvalidExecutionSettingsError("Tool choice 'none' is not supported by Anthropic.")
 
         return self
+
+    def prepare_settings_dict(self, **kwargs: Any) -> dict[str, Any]:
+        """Prepare the settings dictionary, injecting cache_control blocks when caching is enabled."""
+        data = super().prepare_settings_dict(**kwargs)
+
+        if not self.cache.enabled:
+            return data
+
+        cache_control = self.cache._cache_control()
+
+        if self.cache.cache_system:
+            system = data.get("system")
+            if isinstance(system, str) and system:
+                data["system"] = [{"type": "text", "text": system, "cache_control": cache_control}]
+
+        if self.cache.cache_tools:
+            tools: list[dict[str, Any]] | None = data.get("tools")
+            if tools:
+                tools = copy.deepcopy(tools)
+                tools[-1]["cache_control"] = cache_control
+                data["tools"] = tools
+
+        return data
diff --git a/python/tests/unit/connectors/ai/anthropic/test_anthropic_request_settings.py b/python/tests/unit/connectors/ai/anthropic/test_anthropic_request_settings.py
index 531823281ae2..525acfdab1d1 100644
--- a/python/tests/unit/connectors/ai/anthropic/test_anthropic_request_settings.py
+++ b/python/tests/unit/connectors/ai/anthropic/test_anthropic_request_settings.py
@@ -3,6 +3,7 @@
 import pytest
 
 from semantic_kernel.connectors.ai.anthropic.prompt_execution_settings.anthropic_prompt_execution_settings import (
+    AnthropicCacheSettings,
     AnthropicChatPromptExecutionSettings,
 )
 from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
@@ -127,3 +128,172 @@ def test_tool_choice_none():
             },
             function_choice_behavior=FunctionChoiceBehavior.NoneInvoke(),
         )
+
+
+# region AnthropicCacheSettings
+
+
+def test_cache_settings_default_is_off():
+    settings = AnthropicCacheSettings()
+    assert settings.enabled is False
+    assert settings.cache_system is False
+    assert settings.cache_tools is False
+    assert settings.ttl == "5m"
+
+
+def test_cache_settings_on():
+    settings = AnthropicCacheSettings.on()
+    assert settings.enabled is True
+    assert settings.cache_system is True
+    assert settings.cache_tools is True
+    assert settings.ttl == "5m"
+
+
+def test_cache_settings_on_with_1h_ttl():
+    settings = AnthropicCacheSettings.on(ttl="1h")
+    assert settings.enabled is True
+    assert settings.ttl == "1h"
+
+
+def test_cache_settings_off():
+    settings = AnthropicCacheSettings.off()
+    assert settings.enabled is False
+
+
+def test_cache_settings_system_only():
+    settings = AnthropicCacheSettings.system()
+    assert settings.enabled is True
+    assert settings.cache_system is True
+    assert settings.cache_tools is False
+
+
+def test_cache_settings_tools_only():
+    settings = AnthropicCacheSettings.tools()
+    assert settings.enabled is True
+    assert settings.cache_system is False
+    assert settings.cache_tools is True
+
+
+def test_cache_control_5m():
+    ctrl = AnthropicCacheSettings.on(ttl="5m")._cache_control()
+    assert ctrl == {"type": "ephemeral"}
+
+
+def test_cache_control_1h():
+    ctrl = AnthropicCacheSettings.on(ttl="1h")._cache_control()
+    assert ctrl == {"type": "ephemeral", "ttl": 3600}
+
+
+def test_cache_settings_short():
+    settings = AnthropicCacheSettings.short()
+    assert settings.enabled is True
+    assert settings.cache_system is True
+    assert settings.cache_tools is True
+    assert settings.ttl == "5m"
+
+
+def test_cache_settings_long():
+    settings = AnthropicCacheSettings.long()
+    assert settings.enabled is True
+    assert settings.cache_system is True
+    assert settings.cache_tools is True
+    assert settings.ttl == "1h"
+
+
+# endregion
+
+# region prepare_settings_dict with caching
+
+
+def test_prepare_settings_dict_cache_off_no_injection():
+    settings = AnthropicChatPromptExecutionSettings(
+        system="You are a helpful assistant.",
+        tools=[{"name": "search", "description": "Search the web"}],
+        cache=AnthropicCacheSettings.off(),
+    )
+    data = settings.prepare_settings_dict()
+    assert data["system"] == "You are a helpful assistant."
+    assert "cache_control" not in data["tools"][-1]
+
+
+def test_prepare_settings_dict_cache_system_only():
+    settings = AnthropicChatPromptExecutionSettings(
+        system="You are a helpful assistant.",
+        cache=AnthropicCacheSettings.system(),
+    )
+    data = settings.prepare_settings_dict()
+    assert isinstance(data["system"], list)
+    assert data["system"] == [
+        {"type": "text", "text": "You are a helpful assistant.", "cache_control": {"type": "ephemeral"}}
+    ]
+
+
+def test_prepare_settings_dict_cache_tools_only():
+    tools = [
+        {"name": "tool_a", "description": "Tool A"},
+        {"name": "tool_b", "description": "Tool B"},
+    ]
+    settings = AnthropicChatPromptExecutionSettings(
+        tools=tools,
+        cache=AnthropicCacheSettings.tools(),
+    )
+    data = settings.prepare_settings_dict()
+    assert "cache_control" not in data["tools"][0]
+    assert data["tools"][-1]["cache_control"] == {"type": "ephemeral"}
+    # original tools list must not be mutated
+    assert "cache_control" not in tools[-1]
+
+
+def test_prepare_settings_dict_cache_on_system_and_tools():
+    tools = [{"name": "search", "description": "Search the web"}]
+    settings = AnthropicChatPromptExecutionSettings(
+        system="You are a helpful assistant.",
+        tools=tools,
+        cache=AnthropicCacheSettings.on(),
+    )
+    data = settings.prepare_settings_dict()
+    assert isinstance(data["system"], list)
+    assert data["system"][0]["cache_control"] == {"type": "ephemeral"}
+    assert data["tools"][-1]["cache_control"] == {"type": "ephemeral"}
+
+
+def test_prepare_settings_dict_cache_on_1h_ttl():
+    tools = [{"name": "search", "description": "Search the web"}]
+    settings = AnthropicChatPromptExecutionSettings(
+        system="You are a helpful assistant.",
+        tools=tools,
+        cache=AnthropicCacheSettings.on(ttl="1h"),
+    )
+    data = settings.prepare_settings_dict()
+    assert data["system"][0]["cache_control"] == {"type": "ephemeral", "ttl": 3600}
+    assert data["tools"][-1]["cache_control"] == {"type": "ephemeral", "ttl": 3600}
+
+
+def test_prepare_settings_dict_cache_system_empty_string_no_injection():
+    """Empty system string should not be wrapped in a cache block."""
+    settings = AnthropicChatPromptExecutionSettings(
+        system="",
+        cache=AnthropicCacheSettings.system(),
+    )
+    data = settings.prepare_settings_dict()
+    # empty string — no injection expected
+    assert not isinstance(data.get("system"), list)
+
+
+def test_prepare_settings_dict_cache_tools_empty_no_injection():
+    """No tools present — cache_tools flag should be a no-op."""
+    settings = AnthropicChatPromptExecutionSettings(
+        cache=AnthropicCacheSettings.tools(),
+    )
+    data = settings.prepare_settings_dict()
+    assert data.get("tools") is None
+
+
+def test_prepare_settings_dict_cache_excluded_from_serialization():
+    """The cache field must not appear in the serialized API payload."""
+    settings = AnthropicChatPromptExecutionSettings(cache=AnthropicCacheSettings.on())
+    data = settings.prepare_settings_dict()
+    assert "cache" not in data
+
+
+# endregion

From da6de642219aab494db28dabb7756a0b3336e750 Mon Sep 17 00:00:00 2001
From: Vishwa Vignan <iamvish83@gmail.com>
Date: Tue, 5 May 2026 15:13:58 +0530
Subject: [PATCH 2/3] fix: correct cache_control TTL value and handle
 pre-structured system blocks

- _cache_control() now emits {"ttl":"1h"} string per CacheControlEphemeralParam
  spec instead of integer 3600
- prepare_settings_dict() now injects cache_control on list[dict] system blocks
  in addition to plain strings, closing the silent no-op design gap
- add test covering cache injection when system is pre-structured as list[dict]
- update 1h TTL test assertions to match corrected string value
---
 .../anthropic_prompt_execution_settings.py    |  6 ++++-
 .../test_anthropic_request_settings.py        | 24 ++++++++++++++++---
 2 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py
index 316ba7b60a83..9d518e88c73a 100644
--- a/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py
+++ b/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py
@@ -55,7 +55,7 @@ class AnthropicCacheSettings(BaseModel):
     def _cache_control(self) -> dict[str, Any]:
         """Return the cache_control block for the configured TTL."""
         if self.ttl == "1h":
-            return {"type": "ephemeral", "ttl": 3600}
+            return {"type": "ephemeral", "ttl": "1h"}
         return {"type": "ephemeral"}
 
     @classmethod
@@ -157,6 +157,10 @@ def prepare_settings_dict(self, **kwargs: Any) -> dict[str, Any]:
             system = data.get("system")
             if isinstance(system, str) and system:
                 data["system"] = [{"type": "text", "text": system, "cache_control": cache_control}]
+            elif isinstance(system, list) and system:
+                system = copy.deepcopy(system)
+                system[-1]["cache_control"] = cache_control
+                data["system"] = system
 
         if self.cache.cache_tools:
             tools: list[dict[str, Any]] | None = data.get("tools")
diff --git a/python/tests/unit/connectors/ai/anthropic/test_anthropic_request_settings.py b/python/tests/unit/connectors/ai/anthropic/test_anthropic_request_settings.py
index 525acfdab1d1..2143bdcbaedd 100644
--- a/python/tests/unit/connectors/ai/anthropic/test_anthropic_request_settings.py
+++ b/python/tests/unit/connectors/ai/anthropic/test_anthropic_request_settings.py
@@ -181,7 +181,7 @@ def test_cache_control_5m():
 
 def test_cache_control_1h():
     ctrl = AnthropicCacheSettings.on(ttl="1h")._cache_control()
-    assert ctrl == {"type": "ephemeral", "ttl": 3600}
+    assert ctrl == {"type": "ephemeral", "ttl": "1h"}
 
 
 def test_cache_settings_short():
@@ -265,8 +265,26 @@ def test_prepare_settings_dict_cache_on_1h_ttl():
         cache=AnthropicCacheSettings.on(ttl="1h"),
     )
     data = settings.prepare_settings_dict()
-    assert data["system"][0]["cache_control"] == {"type": "ephemeral", "ttl": 3600}
-    assert data["tools"][-1]["cache_control"] == {"type": "ephemeral", "ttl": 3600}
+    assert data["system"][0]["cache_control"] == {"type": "ephemeral", "ttl": "1h"}
+    assert data["tools"][-1]["cache_control"] == {"type": "ephemeral", "ttl": "1h"}
+
+
+def test_prepare_settings_dict_cache_system_already_list():
+    """When system is pre-structured as list[dict], cache_control is injected on the last block."""
+    system_blocks = [
+        {"type": "text", "text": "First block."},
+        {"type": "text", "text": "Second block."},
+    ]
+    settings = AnthropicChatPromptExecutionSettings(
+        system=system_blocks,
+        cache=AnthropicCacheSettings.system(),
+    )
+    data = settings.prepare_settings_dict()
+    assert isinstance(data["system"], list)
+    assert "cache_control" not in data["system"][0]
+    assert data["system"][-1]["cache_control"] == {"type": "ephemeral"}
+    # original list must not be mutated
+    assert "cache_control" not in system_blocks[-1]
 
 
 def test_prepare_settings_dict_cache_system_empty_string_no_injection():

From f71799f4dde0a54e5dbc6a513e63834b916c00ee Mon Sep 17 00:00:00 2001
From: Vishwa Vignan <iamvish83@gmail.com>
Date: Tue, 5 May 2026 21:54:57 +0530
Subject: [PATCH 3/3] =?UTF-8?q?fix:=20address=20Copilot=20review=20?=
 =?UTF-8?q?=E2=80=94=20KernelBaseSettings,=20field=20rename,=20shallow=20c?=
 =?UTF-8?q?opy,=20no-overwrite?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- AnthropicCacheSettings now inherits KernelBaseSettings (consistent with
  rest of SDK; enables validate_assignment, populate_by_name)
- Added env_prefix = "ANTHROPIC_CACHE_" so caching can be toggled via
  environment variables (ANTHROPIC_CACHE_ENABLED, ANTHROPIC_CACHE_TTL, etc.)
- Renamed cache_system/cache_tools fields to include_system/include_tools
  (removes redundant "cache" prefix on fields inside a cache settings class)
- Replaced copy.deepcopy with shallow list + dict spread — cheaper for large
  tool catalogs where caching is most beneficial
- inject now skips if cache_control already present on last block — avoids
  silently clobbering a caller's explicit setting
- Replaced two _cache_control() private-method tests with prepare_settings_dict()
  equivalents; added env-var tests (monkeypatch) and no-overwrite test

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../anthropic_prompt_execution_settings.py    |  48 ++++----
 .../test_anthropic_request_settings.py        | 110 ++++++++++++++----
 2 files changed, 113 insertions(+), 45 deletions(-)

diff --git a/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py
index 9d518e88c73a..e175d2240a02 100644
--- a/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py
+++ b/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py
@@ -1,22 +1,30 @@
 # Copyright (c) Microsoft. All rights reserved.
 
-import copy
 import logging
-from typing import Annotated, Any, Literal
+from typing import Annotated, Any, ClassVar, Literal
 
-from pydantic import BaseModel, Field, model_validator
+from pydantic import Field, model_validator
 
 from semantic_kernel.connectors.ai.function_choice_type import FunctionChoiceType
 from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 from semantic_kernel.exceptions import ServiceInvalidExecutionSettingsError
+from semantic_kernel.kernel_pydantic import KernelBaseSettings
 
 logger = logging.getLogger(__name__)
 
 
-class AnthropicCacheSettings(BaseModel):
+class AnthropicCacheSettings(KernelBaseSettings):
     """Configuration for Anthropic prompt caching.
 
     Controls which parts of the request receive cache_control injection.
+    Settings are loaded from environment variables with the prefix 'ANTHROPIC_CACHE_',
+    then from a .env file, then from defaults. Explicit constructor arguments always win.
+
+    Environment variables (prefix 'ANTHROPIC_CACHE_'):
+      - ANTHROPIC_CACHE_ENABLED        — master switch, bool (default: false)
+      - ANTHROPIC_CACHE_INCLUDE_SYSTEM — cache system message, bool (default: false)
+      - ANTHROPIC_CACHE_INCLUDE_TOOLS  — cache tool definitions, bool (default: false)
+      - ANTHROPIC_CACHE_TTL            — cache TTL, "5m" or "1h" (default: "5m")
 
     Anthropic minimum token thresholds for cache activation:
       - claude-haiku-4-5 : 4,096 tokens
@@ -35,15 +43,17 @@ class AnthropicCacheSettings(BaseModel):
         AnthropicCacheSettings.tools()  # cache tool definitions only
     """
 
+    env_prefix: ClassVar[str] = "ANTHROPIC_CACHE_"
+
     enabled: Annotated[
         bool,
         Field(description="Master switch — disabling skips all cache_control injection regardless of other flags."),
     ] = False
-    cache_system: Annotated[
+    include_system: Annotated[
         bool,
         Field(description="Inject cache_control on the system message content block."),
     ] = False
-    cache_tools: Annotated[
+    include_tools: Annotated[
         bool,
         Field(description="Inject cache_control on the last tool definition, caching the entire tools array prefix."),
     ] = False
@@ -61,7 +71,7 @@ def _cache_control(self) -> dict[str, Any]:
     @classmethod
     def on(cls, ttl: Literal["5m", "1h"] = "5m") -> "AnthropicCacheSettings":
         """Enable caching for all supported request sections (system + tools)."""
-        return cls(enabled=True, cache_system=True, cache_tools=True, ttl=ttl)
+        return cls(enabled=True, include_system=True, include_tools=True, ttl=ttl)
 
     @classmethod
     def off(cls) -> "AnthropicCacheSettings":
@@ -71,12 +81,12 @@ def off(cls) -> "AnthropicCacheSettings":
     @classmethod
     def system(cls, ttl: Literal["5m", "1h"] = "5m") -> "AnthropicCacheSettings":
         """Enable caching for the system message only."""
-        return cls(enabled=True, cache_system=True, cache_tools=False, ttl=ttl)
+        return cls(enabled=True, include_system=True, include_tools=False, ttl=ttl)
 
     @classmethod
     def tools(cls, ttl: Literal["5m", "1h"] = "5m") -> "AnthropicCacheSettings":
         """Enable caching for tool definitions only."""
-        return cls(enabled=True, cache_system=False, cache_tools=True, ttl=ttl)
+        return cls(enabled=True, include_system=False, include_tools=True, ttl=ttl)
 
     @classmethod
     def short(cls) -> "AnthropicCacheSettings":
@@ -84,7 +94,7 @@ def short(cls) -> "AnthropicCacheSettings":
 
         Write cost: 1.25x. Read cost: 0.1x. Breaks even after a single cache hit.
         """
-        return cls(enabled=True, cache_system=True, cache_tools=True, ttl="5m")
+        return cls(enabled=True, include_system=True, include_tools=True, ttl="5m")
 
     @classmethod
     def long(cls) -> "AnthropicCacheSettings":
@@ -92,7 +102,7 @@ def long(cls) -> "AnthropicCacheSettings":
 
         Write cost: 2x. Read cost: 0.1x. Needs at least 2 cache hits to break even.
         """
-        return cls(enabled=True, cache_system=True, cache_tools=True, ttl="1h")
+        return cls(enabled=True, include_system=True, include_tools=True, ttl="1h")
 
 
 class AnthropicPromptExecutionSettings(PromptExecutionSettings):
@@ -153,20 +163,16 @@ def prepare_settings_dict(self, **kwargs: Any) -> dict[str, Any]:
 
         cache_control = self.cache._cache_control()
 
-        if self.cache.cache_system:
+        if self.cache.include_system:
             system = data.get("system")
             if isinstance(system, str) and system:
                 data["system"] = [{"type": "text", "text": system, "cache_control": cache_control}]
-            elif isinstance(system, list) and system:
-                system = copy.deepcopy(system)
-                system[-1]["cache_control"] = cache_control
-                data["system"] = system
+            elif isinstance(system, list) and system and "cache_control" not in system[-1]:
+                data["system"] = [*system[:-1], {**system[-1], "cache_control": cache_control}]
 
-        if self.cache.cache_tools:
+        if self.cache.include_tools:
             tools: list[dict[str, Any]] | None = data.get("tools")
-            if tools:
-                tools = copy.deepcopy(tools)
-                tools[-1]["cache_control"] = cache_control
-                data["tools"] = tools
+            if tools and "cache_control" not in tools[-1]:
+                data["tools"] = [*tools[:-1], {**tools[-1], "cache_control": cache_control}]
 
         return data
diff --git a/python/tests/unit/connectors/ai/anthropic/test_anthropic_request_settings.py b/python/tests/unit/connectors/ai/anthropic/test_anthropic_request_settings.py
index 2143bdcbaedd..885915d1e3ae 100644
--- a/python/tests/unit/connectors/ai/anthropic/test_anthropic_request_settings.py
+++ b/python/tests/unit/connectors/ai/anthropic/test_anthropic_request_settings.py
@@ -136,16 +136,16 @@ def test_tool_choice_none():
 def test_cache_settings_default_is_off():
     settings = AnthropicCacheSettings()
     assert settings.enabled is False
-    assert settings.cache_system is False
-    assert settings.cache_tools is False
+    assert settings.include_system is False
+    assert settings.include_tools is False
     assert settings.ttl == "5m"
 
 
 def test_cache_settings_on():
     settings = AnthropicCacheSettings.on()
     assert settings.enabled is True
-    assert settings.cache_system is True
-    assert settings.cache_tools is True
+    assert settings.include_system is True
+    assert settings.include_tools is True
     assert settings.ttl == "5m"
 
 
@@ -163,40 +163,50 @@ def test_cache_settings_off():
 def test_cache_settings_system_only():
     settings = AnthropicCacheSettings.system()
     assert settings.enabled is True
-    assert settings.cache_system is True
-    assert settings.cache_tools is False
+    assert settings.include_system is True
+    assert settings.include_tools is False
 
 
 def test_cache_settings_tools_only():
     settings = AnthropicCacheSettings.tools()
     assert settings.enabled is True
-    assert settings.cache_system is False
-    assert settings.cache_tools is True
+    assert settings.include_system is False
+    assert settings.include_tools is True
 
 
-def test_cache_control_5m():
-    ctrl = AnthropicCacheSettings.on(ttl="5m")._cache_control()
-    assert ctrl == {"type": "ephemeral"}
+def test_cache_control_5m_via_prepare():
+    """5m TTL emits ephemeral block without a ttl key."""
+    settings = AnthropicChatPromptExecutionSettings(
+        system="Hello.",
+        cache=AnthropicCacheSettings.on(ttl="5m"),
+    )
+    data = settings.prepare_settings_dict()
+    assert data["system"][0]["cache_control"] == {"type": "ephemeral"}
 
 
-def test_cache_control_1h():
-    ctrl = AnthropicCacheSettings.on(ttl="1h")._cache_control()
-    assert ctrl == {"type": "ephemeral", "ttl": "1h"}
+def test_cache_control_1h_via_prepare():
+    """1h TTL emits ephemeral block with ttl string '1h'."""
+    settings = AnthropicChatPromptExecutionSettings(
+        system="Hello.",
+        cache=AnthropicCacheSettings.on(ttl="1h"),
+    )
+    data = settings.prepare_settings_dict()
+    assert data["system"][0]["cache_control"] == {"type": "ephemeral", "ttl": "1h"}
 
 
 def test_cache_settings_short():
     settings = AnthropicCacheSettings.short()
     assert settings.enabled is True
-    assert settings.cache_system is True
-    assert settings.cache_tools is True
+    assert settings.include_system is True
+    assert settings.include_tools is True
     assert settings.ttl == "5m"
 
 
 def test_cache_settings_long():
     settings = AnthropicCacheSettings.long()
     assert settings.enabled is True
-    assert settings.cache_system is True
-    assert settings.cache_tools is True
+    assert settings.include_system is True
+    assert settings.include_tools is True
     assert settings.ttl == "1h"
 
 
@@ -216,7 +226,7 @@ def test_prepare_settings_dict_cache_off_no_injection():
     assert "cache_control" not in data["tools"][-1]
 
 
-def test_prepare_settings_dict_cache_system_only():
+def test_prepare_settings_dict_include_system_only():
     settings = AnthropicChatPromptExecutionSettings(
         system="You are a helpful assistant.",
         cache=AnthropicCacheSettings.system(),
@@ -228,7 +238,7 @@ def test_prepare_settings_dict_cache_system_only():
     ]
 
 
-def test_prepare_settings_dict_cache_tools_only():
+def test_prepare_settings_dict_include_tools_only():
     tools = [
         {"name": "tool_a", "description": "Tool A"},
         {"name": "tool_b", "description": "Tool B"},
@@ -269,7 +279,7 @@ def test_prepare_settings_dict_cache_on_1h_ttl():
     assert data["tools"][-1]["cache_control"] == {"type": "ephemeral", "ttl": "1h"}
 
 
-def test_prepare_settings_dict_cache_system_already_list():
+def test_prepare_settings_dict_include_system_already_list():
     """When system is pre-structured as list[dict], cache_control is injected on the last block."""
     system_blocks = [
         {"type": "text", "text": "First block."},
@@ -287,7 +297,7 @@ def test_prepare_settings_dict_cache_system_already_list():
     assert "cache_control" not in system_blocks[-1]
 
 
-def test_prepare_settings_dict_cache_system_empty_string_no_injection():
+def test_prepare_settings_dict_include_system_empty_string_no_injection():
     """Empty system string should not be wrapped in a cache block."""
     settings = AnthropicChatPromptExecutionSettings(
         system="",
@@ -298,8 +308,8 @@ def test_prepare_settings_dict_cache_system_empty_string_no_injection():
     assert not isinstance(data.get("system"), list)
 
 
-def test_prepare_settings_dict_cache_tools_empty_no_injection():
-    """No tools present — cache_tools flag should be a no-op."""
+def test_prepare_settings_dict_include_tools_empty_no_injection():
+    """No tools present — include_tools flag should be a no-op."""
     settings = AnthropicChatPromptExecutionSettings(
         cache=AnthropicCacheSettings.tools(),
     )
@@ -314,4 +324,56 @@ def test_prepare_settings_dict_cache_excluded_from_serialization():
     assert "cache" not in data
 
 
+def test_prepare_settings_dict_existing_cache_control_not_overwritten():
+    """cache_control already present on the last tool/system block must not be clobbered."""
+    existing_ctrl = {"type": "ephemeral", "ttl": "1h"}
+    tools = [{"name": "t", "description": "d", "cache_control": existing_ctrl}]
+    settings = AnthropicChatPromptExecutionSettings(
+        tools=tools,
+        cache=AnthropicCacheSettings.tools(ttl="5m"),
+    )
+    data = settings.prepare_settings_dict()
+    assert data["tools"][-1]["cache_control"] == existing_ctrl
+
+
+# endregion
+
+# region AnthropicCacheSettings — environment variable support
+
+
+def test_cache_settings_from_env(monkeypatch):
+    """Settings are populated from ANTHROPIC_CACHE_* env vars."""
+    monkeypatch.setenv("ANTHROPIC_CACHE_ENABLED", "true")
+    monkeypatch.setenv("ANTHROPIC_CACHE_INCLUDE_SYSTEM", "true")
+    monkeypatch.setenv("ANTHROPIC_CACHE_INCLUDE_TOOLS", "false")
+    monkeypatch.setenv("ANTHROPIC_CACHE_TTL", "1h")
+    settings = AnthropicCacheSettings()
+    assert settings.enabled is True
+    assert settings.include_system is True
+    assert settings.include_tools is False
+    assert settings.ttl == "1h"
+
+
+def test_cache_settings_explicit_overrides_env(monkeypatch):
+    """Explicit constructor arguments take priority over environment variables."""
+    monkeypatch.setenv("ANTHROPIC_CACHE_ENABLED", "true")
+    monkeypatch.setenv("ANTHROPIC_CACHE_TTL", "1h")
+    settings = AnthropicCacheSettings(enabled=False, ttl="5m")
+    assert settings.enabled is False
+    assert settings.ttl == "5m"
+
+
+def test_cache_settings_env_disabled_by_default(monkeypatch):
+    """With no env vars set, cache is disabled by default."""
+    for key in (
+        "ANTHROPIC_CACHE_ENABLED",
+        "ANTHROPIC_CACHE_INCLUDE_SYSTEM",
+        "ANTHROPIC_CACHE_INCLUDE_TOOLS",
+        "ANTHROPIC_CACHE_TTL",
+    ):
+        monkeypatch.delenv(key, raising=False)
+    settings = AnthropicCacheSettings()
+    assert settings.enabled is False
+
+
 # endregion