diff --git a/python/samples/concepts/caching/anthropic_prompt_caching.py b/python/samples/concepts/caching/anthropic_prompt_caching.py
new file mode 100644
index 000000000000..4015064f1de2
--- /dev/null
+++ b/python/samples/concepts/caching/anthropic_prompt_caching.py
@@ -0,0 +1,99 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+# This sample demonstrates Anthropic prompt caching with Semantic Kernel.
+# Prompt caching lets you mark parts of a request (system message, tool definitions)
+# as cacheable so that repeated calls reuse the cached tokens at 0.1x read cost.
+#
+# Prerequisites:
+#   - Set ANTHROPIC_API_KEY and ANTHROPIC_CHAT_MODEL_ID in your environment or a .env file.
+#   - Model must support caching (claude-haiku-4-5, claude-sonnet-4-x, claude-opus-4-x).
+#   - Minimum tokens to activate cache: 4,096 (Haiku), 1,024 (Sonnet/Opus).
+#
+# Run:
+#   uv run python samples/concepts/caching/anthropic_prompt_caching.py
+
+import asyncio
+
+from semantic_kernel import Kernel
+from semantic_kernel.connectors.ai.anthropic import (
+    AnthropicCacheSettings,
+    AnthropicChatCompletion,
+    AnthropicChatPromptExecutionSettings,
+)
+from semantic_kernel.contents import ChatHistory
+
+# A long system prompt that exceeds the minimum token threshold for caching.
+# In production this would typically be a large instruction set, persona, or
+# document that stays the same across many turns.
+SYSTEM_PROMPT = (
+    """
+You are an expert software engineer specializing in Python and distributed systems.
+You provide precise, production-quality answers. When writing code you follow these rules:
+  - Use type hints throughout.
+  - Prefer composition over inheritance.
+  - Write small, single-purpose functions.
+  - Handle errors explicitly; never silence exceptions.
+  - Include a brief docstring only when the intent is non-obvious.
+  - Use async/await for all I/O-bound operations.
+  - Prefer dataclasses or Pydantic models for structured data.
+
+You are also familiar with the following internal guidelines:
+  - All public APIs must be versioned.
+  - Services communicate over gRPC with Protobuf schemas checked into the repo.
+  - Secrets are injected at runtime via environment variables; never hardcoded.
+  - Observability: every service emits structured JSON logs and OpenTelemetry traces.
+  - Deployments use Kubernetes with Helm charts; no raw manifests.
+
+When asked to review code, structure your response as:
+  1. Summary (1-2 sentences)
+  2. Issues (bulleted, severity labeled)
+  3. Suggested fix (code block if applicable)
+"""
+    * 3
+)  # repeat to ensure we comfortably exceed the 1,024-token minimum
+
+
+async def chat_with_caching() -> None:
+    """Run a multi-turn chat with prompt caching enabled on the system message."""
+    kernel = Kernel()
+
+    service = AnthropicChatCompletion(service_id="anthropic")
+    kernel.add_service(service)
+
+    # AnthropicCacheSettings.on() enables caching for both the system message and
+    # tool definitions. Use .system() or .tools() to cache only one section.
+    # Use .long() for 1-hour TTL when calls are infrequent.
+    settings = AnthropicChatPromptExecutionSettings(
+        service_id="anthropic",
+        max_tokens=512,
+        cache=AnthropicCacheSettings.on(),
+    )
+
+    chat_history = ChatHistory(system_message=SYSTEM_PROMPT)
+
+    questions = [
+        "What is the difference between asyncio.gather and asyncio.TaskGroup?",
+        "When would you choose gRPC over REST for an internal service?",
+        "How do you structure a Pydantic settings class for a twelve-factor app?",
+    ]
+
+    print("Anthropic Prompt Caching Demo")
+    print("=" * 50)
+    print("System prompt is marked for caching. The first call writes the cache;")
+    print("subsequent calls read from it at 0.1x token cost.\n")
+
+    for i, question in enumerate(questions, start=1):
+        print(f"Turn {i}: {question}")
+        chat_history.add_user_message(question)
+
+        response = await service.get_chat_message_content(
+            chat_history=chat_history,
+            settings=settings,
+        )
+        if response:
+            print(f"Assistant: {response}\n")
+            chat_history.add_message(response)
+
+
+if __name__ == "__main__":
+    asyncio.run(chat_with_caching())
diff --git a/python/semantic_kernel/connectors/ai/anthropic/__init__.py b/python/semantic_kernel/connectors/ai/anthropic/__init__.py
index c5d96ddd147f..93521cf393b2 100644
--- a/python/semantic_kernel/connectors/ai/anthropic/__init__.py
+++ b/python/semantic_kernel/connectors/ai/anthropic/__init__.py
@@ -1,11 +1,13 @@
 # Copyright (c) Microsoft. All rights reserved.
 
 from semantic_kernel.connectors.ai.anthropic.prompt_execution_settings.anthropic_prompt_execution_settings import (
+    AnthropicCacheSettings,
     AnthropicChatPromptExecutionSettings,
 )
 from semantic_kernel.connectors.ai.anthropic.services.anthropic_chat_completion import AnthropicChatCompletion
 
 __all__ = [
+    "AnthropicCacheSettings",
     "AnthropicChatCompletion",
     "AnthropicChatPromptExecutionSettings",
 ]
diff --git a/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py
index c18fcb30c732..e175d2240a02 100644
--- a/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py
+++ b/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py
@@ -1,17 +1,110 @@
 # Copyright (c) Microsoft. All rights reserved.
 
 import logging
-from typing import Annotated, Any
+from typing import Annotated, Any, ClassVar, Literal
 
 from pydantic import Field, model_validator
 
 from semantic_kernel.connectors.ai.function_choice_type import FunctionChoiceType
 from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 from semantic_kernel.exceptions import ServiceInvalidExecutionSettingsError
+from semantic_kernel.kernel_pydantic import KernelBaseSettings
 
 logger = logging.getLogger(__name__)
 
 
+class AnthropicCacheSettings(KernelBaseSettings):
+    """Configuration for Anthropic prompt caching.
+
+    Controls which parts of the request receive cache_control injection.
+    Settings are loaded from environment variables with the prefix 'ANTHROPIC_CACHE_',
+    then from a .env file, then from defaults. Explicit constructor arguments always win.
+
+    Environment variables (prefix 'ANTHROPIC_CACHE_'):
+      - ANTHROPIC_CACHE_ENABLED        — master switch, bool (default: false)
+      - ANTHROPIC_CACHE_INCLUDE_SYSTEM — cache system message, bool (default: false)
+      - ANTHROPIC_CACHE_INCLUDE_TOOLS  — cache tool definitions, bool (default: false)
+      - ANTHROPIC_CACHE_TTL            — cache TTL, "5m" or "1h" (default: "5m")
+
+    Anthropic minimum token thresholds for cache activation:
+      - claude-haiku-4-5 : 4,096 tokens
+      - claude-sonnet-4-x: 1,024 tokens
+      - claude-opus-4-x  : 1,024 tokens
+
+    TTL options:
+      - "5m": ephemeral 5-minute cache (1.25x write cost, 0.1x read cost)
+      - "1h": extended 1-hour cache (2x write cost, 0.1x read cost)
+
+    Use the classmethods for common configurations::
+
+        AnthropicCacheSettings.on()  # enable system + tools caching
+        AnthropicCacheSettings.off()  # disable all caching (default)
+        AnthropicCacheSettings.system()  # cache system message only
+        AnthropicCacheSettings.tools()  # cache tool definitions only
+    """
+
+    env_prefix: ClassVar[str] = "ANTHROPIC_CACHE_"
+
+    enabled: Annotated[
+        bool,
+        Field(description="Master switch — disabling skips all cache_control injection regardless of other flags."),
+    ] = False
+    include_system: Annotated[
+        bool,
+        Field(description="Inject cache_control on the system message content block."),
+    ] = False
+    include_tools: Annotated[
+        bool,
+        Field(description="Inject cache_control on the last tool definition, caching the entire tools array prefix."),
+    ] = False
+    ttl: Annotated[
+        Literal["5m", "1h"],
+        Field(description="Cache TTL. '5m' = 5-minute ephemeral (default). '1h' = 1-hour extended."),
+    ] = "5m"
+
+    def _cache_control(self) -> dict[str, Any]:
+        """Return the cache_control block for the configured TTL."""
+        if self.ttl == "1h":
+            return {"type": "ephemeral", "ttl": "1h"}
+        return {"type": "ephemeral"}
+
+    @classmethod
+    def on(cls, ttl: Literal["5m", "1h"] = "5m") -> "AnthropicCacheSettings":
+        """Enable caching for all supported request sections (system + tools)."""
+        return cls(enabled=True, include_system=True, include_tools=True, ttl=ttl)
+
+    @classmethod
+    def off(cls) -> "AnthropicCacheSettings":
+        """Disable all cache_control injection."""
+        return cls(enabled=False)
+
+    @classmethod
+    def system(cls, ttl: Literal["5m", "1h"] = "5m") -> "AnthropicCacheSettings":
+        """Enable caching for the system message only."""
+        return cls(enabled=True, include_system=True, include_tools=False, ttl=ttl)
+
+    @classmethod
+    def tools(cls, ttl: Literal["5m", "1h"] = "5m") -> "AnthropicCacheSettings":
+        """Enable caching for tool definitions only."""
+        return cls(enabled=True, include_system=False, include_tools=True, ttl=ttl)
+
+    @classmethod
+    def short(cls) -> "AnthropicCacheSettings":
+        """5-minute TTL. Use for tight agentic loops where the same prompt repeats within minutes.
+
+        Write cost: 1.25x. Read cost: 0.1x. Breaks even after a single cache hit.
+        """
+        return cls(enabled=True, include_system=True, include_tools=True, ttl="5m")
+
+    @classmethod
+    def long(cls) -> "AnthropicCacheSettings":
+        """1-hour TTL. Use for batch jobs or scheduled tasks with long gaps between calls.
+
+        Write cost: 2x. Read cost: 0.1x. Needs at least 2 cache hits to break even.
+        """
+        return cls(enabled=True, include_system=True, include_tools=True, ttl="1h")
+
+
 class AnthropicPromptExecutionSettings(PromptExecutionSettings):
     """Common request settings for Anthropic services."""
 
@@ -23,7 +116,7 @@ class AnthropicChatPromptExecutionSettings(AnthropicPromptExecutionSettings):
 
     messages: list[dict[str, Any]] | None = None
     stream: bool | None = None
-    system: str | None = None
+    system: str | list[dict[str, Any]] | None = None
     max_tokens: Annotated[int, Field(gt=0)] = 1024
     temperature: Annotated[float | None, Field(ge=0.0, le=2.0)] = None
     stop_sequences: list[str] | None = None
@@ -43,6 +136,13 @@ class AnthropicChatPromptExecutionSettings(AnthropicPromptExecutionSettings):
             description="Do not set this manually. It is set by the service based on the function choice configuration."
         ),
     ] = None
+    cache: Annotated[
+        AnthropicCacheSettings,
+        Field(
+            description="Prompt caching configuration. Disabled by default.",
+            exclude=True,
+        ),
+    ] = Field(default_factory=AnthropicCacheSettings)
 
     @model_validator(mode="after")
     def validate_tool_choice(self) -> "AnthropicChatPromptExecutionSettings":
@@ -53,3 +153,26 @@ def validate_tool_choice(self) -> "AnthropicChatPromptExecutionSettings":
             raise ServiceInvalidExecutionSettingsError("Tool choice 'none' is not supported by Anthropic.")
 
         return self
+
+    def prepare_settings_dict(self, **kwargs: Any) -> dict[str, Any]:
+        """Prepare the settings dictionary, injecting cache_control blocks when caching is enabled."""
+        data = super().prepare_settings_dict(**kwargs)
+
+        if not self.cache.enabled:
+            return data
+
+        cache_control = self.cache._cache_control()
+
+        if self.cache.include_system:
+            system = data.get("system")
+            if isinstance(system, str) and system:
+                data["system"] = [{"type": "text", "text": system, "cache_control": cache_control}]
+            elif isinstance(system, list) and system and "cache_control" not in system[-1]:
+                data["system"] = [*system[:-1], {**system[-1], "cache_control": cache_control}]
+
+        if self.cache.include_tools:
+            tools: list[dict[str, Any]] | None = data.get("tools")
+            if tools and "cache_control" not in tools[-1]:
+                data["tools"] = [*tools[:-1], {**tools[-1], "cache_control": cache_control}]
+
+        return data
diff --git a/python/tests/unit/connectors/ai/anthropic/test_anthropic_request_settings.py b/python/tests/unit/connectors/ai/anthropic/test_anthropic_request_settings.py
index 531823281ae2..885915d1e3ae 100644
--- a/python/tests/unit/connectors/ai/anthropic/test_anthropic_request_settings.py
+++ b/python/tests/unit/connectors/ai/anthropic/test_anthropic_request_settings.py
@@ -3,6 +3,7 @@
 import pytest
 
 from semantic_kernel.connectors.ai.anthropic.prompt_execution_settings.anthropic_prompt_execution_settings import (
+    AnthropicCacheSettings,
     AnthropicChatPromptExecutionSettings,
 )
 from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
@@ -127,3 +128,252 @@ def test_tool_choice_none():
             },
             function_choice_behavior=FunctionChoiceBehavior.NoneInvoke(),
         )
+
+
+# region AnthropicCacheSettings
+
+
+def test_cache_settings_default_is_off():
+    settings = AnthropicCacheSettings()
+    assert settings.enabled is False
+    assert settings.include_system is False
+    assert settings.include_tools is False
+    assert settings.ttl == "5m"
+
+
+def test_cache_settings_on():
+    settings = AnthropicCacheSettings.on()
+    assert settings.enabled is True
+    assert settings.include_system is True
+    assert settings.include_tools is True
+    assert settings.ttl == "5m"
+
+
+def test_cache_settings_on_with_1h_ttl():
+    settings = AnthropicCacheSettings.on(ttl="1h")
+    assert settings.enabled is True
+    assert settings.ttl == "1h"
+
+
+def test_cache_settings_off():
+    settings = AnthropicCacheSettings.off()
+    assert settings.enabled is False
+
+
+def test_cache_settings_system_only():
+    settings = AnthropicCacheSettings.system()
+    assert settings.enabled is True
+    assert settings.include_system is True
+    assert settings.include_tools is False
+
+
+def test_cache_settings_tools_only():
+    settings = AnthropicCacheSettings.tools()
+    assert settings.enabled is True
+    assert settings.include_system is False
+    assert settings.include_tools is True
+
+
+def test_cache_control_5m_via_prepare():
+    """5m TTL emits ephemeral block without a ttl key."""
+    settings = AnthropicChatPromptExecutionSettings(
+        system="Hello.",
+        cache=AnthropicCacheSettings.on(ttl="5m"),
+    )
+    data = settings.prepare_settings_dict()
+    assert data["system"][0]["cache_control"] == {"type": "ephemeral"}
+
+
+def test_cache_control_1h_via_prepare():
+    """1h TTL emits ephemeral block with ttl string '1h'."""
+    settings = AnthropicChatPromptExecutionSettings(
+        system="Hello.",
+        cache=AnthropicCacheSettings.on(ttl="1h"),
+    )
+    data = settings.prepare_settings_dict()
+    assert data["system"][0]["cache_control"] == {"type": "ephemeral", "ttl": "1h"}
+
+
+def test_cache_settings_short():
+    settings = AnthropicCacheSettings.short()
+    assert settings.enabled is True
+    assert settings.include_system is True
+    assert settings.include_tools is True
+    assert settings.ttl == "5m"
+
+
+def test_cache_settings_long():
+    settings = AnthropicCacheSettings.long()
+    assert settings.enabled is True
+    assert settings.include_system is True
+    assert settings.include_tools is True
+    assert settings.ttl == "1h"
+
+
+# endregion
+
+# region prepare_settings_dict with caching
+
+
+def test_prepare_settings_dict_cache_off_no_injection():
+    settings = AnthropicChatPromptExecutionSettings(
+        system="You are a helpful assistant.",
+        tools=[{"name": "search", "description": "Search the web"}],
+        cache=AnthropicCacheSettings.off(),
+    )
+    data = settings.prepare_settings_dict()
+    assert data["system"] == "You are a helpful assistant."
+    assert "cache_control" not in data["tools"][-1]
+
+
+def test_prepare_settings_dict_include_system_only():
+    settings = AnthropicChatPromptExecutionSettings(
+        system="You are a helpful assistant.",
+        cache=AnthropicCacheSettings.system(),
+    )
+    data = settings.prepare_settings_dict()
+    assert isinstance(data["system"], list)
+    assert data["system"] == [
+        {"type": "text", "text": "You are a helpful assistant.", "cache_control": {"type": "ephemeral"}}
+    ]
+
+
+def test_prepare_settings_dict_include_tools_only():
+    tools = [
+        {"name": "tool_a", "description": "Tool A"},
+        {"name": "tool_b", "description": "Tool B"},
+    ]
+    settings = AnthropicChatPromptExecutionSettings(
+        tools=tools,
+        cache=AnthropicCacheSettings.tools(),
+    )
+    data = settings.prepare_settings_dict()
+    assert "cache_control" not in data["tools"][0]
+    assert data["tools"][-1]["cache_control"] == {"type": "ephemeral"}
+    # original tools list must not be mutated
+    assert "cache_control" not in tools[-1]
+
+
+def test_prepare_settings_dict_cache_on_system_and_tools():
+    tools = [{"name": "search", "description": "Search the web"}]
+    settings = AnthropicChatPromptExecutionSettings(
+        system="You are a helpful assistant.",
+        tools=tools,
+        cache=AnthropicCacheSettings.on(),
+    )
+    data = settings.prepare_settings_dict()
+    assert isinstance(data["system"], list)
+    assert data["system"][0]["cache_control"] == {"type": "ephemeral"}
+    assert data["tools"][-1]["cache_control"] == {"type": "ephemeral"}
+
+
+def test_prepare_settings_dict_cache_on_1h_ttl():
+    tools = [{"name": "search", "description": "Search the web"}]
+    settings = AnthropicChatPromptExecutionSettings(
+        system="You are a helpful assistant.",
+        tools=tools,
+        cache=AnthropicCacheSettings.on(ttl="1h"),
+    )
+    data = settings.prepare_settings_dict()
+    assert data["system"][0]["cache_control"] == {"type": "ephemeral", "ttl": "1h"}
+    assert data["tools"][-1]["cache_control"] == {"type": "ephemeral", "ttl": "1h"}
+
+
+def test_prepare_settings_dict_include_system_already_list():
+    """When system is pre-structured as list[dict], cache_control is injected on the last block."""
+    system_blocks = [
+        {"type": "text", "text": "First block."},
+        {"type": "text", "text": "Second block."},
+    ]
+    settings = AnthropicChatPromptExecutionSettings(
+        system=system_blocks,
+        cache=AnthropicCacheSettings.system(),
+    )
+    data = settings.prepare_settings_dict()
+    assert isinstance(data["system"], list)
+    assert "cache_control" not in data["system"][0]
+    assert data["system"][-1]["cache_control"] == {"type": "ephemeral"}
+    # original list must not be mutated
+    assert "cache_control" not in system_blocks[-1]
+
+
+def test_prepare_settings_dict_include_system_empty_string_no_injection():
+    """Empty system string should not be wrapped in a cache block."""
+    settings = AnthropicChatPromptExecutionSettings(
+        system="",
+        cache=AnthropicCacheSettings.system(),
+    )
+    data = settings.prepare_settings_dict()
+    # empty string — no injection expected
+    assert not isinstance(data.get("system"), list)
+
+
+def test_prepare_settings_dict_include_tools_empty_no_injection():
+    """No tools present — include_tools flag should be a no-op."""
+    settings = AnthropicChatPromptExecutionSettings(
+        cache=AnthropicCacheSettings.tools(),
+    )
+    data = settings.prepare_settings_dict()
+    assert data.get("tools") is None
+
+
+def test_prepare_settings_dict_cache_excluded_from_serialization():
+    """The cache field must not appear in the serialized API payload."""
+    settings = AnthropicChatPromptExecutionSettings(cache=AnthropicCacheSettings.on())
+    data = settings.prepare_settings_dict()
+    assert "cache" not in data
+
+
+def test_prepare_settings_dict_existing_cache_control_not_overwritten():
+    """cache_control already present on the last tool/system block must not be clobbered."""
+    existing_ctrl = {"type": "ephemeral", "ttl": "1h"}
+    tools = [{"name": "t", "description": "d", "cache_control": existing_ctrl}]
+    settings = AnthropicChatPromptExecutionSettings(
+        tools=tools,
+        cache=AnthropicCacheSettings.tools(ttl="5m"),
+    )
+    data = settings.prepare_settings_dict()
+    assert data["tools"][-1]["cache_control"] == existing_ctrl
+
+
+# endregion
+
+# region AnthropicCacheSettings — environment variable support
+
+
+def test_cache_settings_from_env(monkeypatch):
+    """Settings are populated from ANTHROPIC_CACHE_* env vars."""
+    monkeypatch.setenv("ANTHROPIC_CACHE_ENABLED", "true")
+    monkeypatch.setenv("ANTHROPIC_CACHE_INCLUDE_SYSTEM", "true")
+    monkeypatch.setenv("ANTHROPIC_CACHE_INCLUDE_TOOLS", "false")
+    monkeypatch.setenv("ANTHROPIC_CACHE_TTL", "1h")
+    settings = AnthropicCacheSettings()
+    assert settings.enabled is True
+    assert settings.include_system is True
+    assert settings.include_tools is False
+    assert settings.ttl == "1h"
+
+
+def test_cache_settings_explicit_overrides_env(monkeypatch):
+    """Explicit constructor arguments take priority over environment variables."""
+    monkeypatch.setenv("ANTHROPIC_CACHE_ENABLED", "true")
+    monkeypatch.setenv("ANTHROPIC_CACHE_TTL", "1h")
+    settings = AnthropicCacheSettings(enabled=False, ttl="5m")
+    assert settings.enabled is False
+    assert settings.ttl == "5m"
+
+
+def test_cache_settings_env_disabled_by_default(monkeypatch):
+    """With no env vars set, cache is disabled by default."""
+    for key in (
+        "ANTHROPIC_CACHE_ENABLED",
+        "ANTHROPIC_CACHE_INCLUDE_SYSTEM",
+        "ANTHROPIC_CACHE_INCLUDE_TOOLS",
+        "ANTHROPIC_CACHE_TTL",
+    ):
+        monkeypatch.delenv(key, raising=False)
+    settings = AnthropicCacheSettings()
+    assert settings.enabled is False
+
+
+# endregion