From 748da1d36f732cdaae201c41a68c7f4701c6c606 Mon Sep 17 00:00:00 2001 From: Yufeng He <40085740+he-yufeng@users.noreply.github.com> Date: Sat, 20 Jun 2026 08:07:01 +0800 Subject: [PATCH 1/5] Python: surface Gemini cached and thinking token counts in usage details --- .../agent_framework_gemini/_chat_client.py | 4 +++ .../gemini/tests/test_gemini_client.py | 25 +++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/python/packages/gemini/agent_framework_gemini/_chat_client.py b/python/packages/gemini/agent_framework_gemini/_chat_client.py index fee44ee368c..f1aa36a9e7c 100644 --- a/python/packages/gemini/agent_framework_gemini/_chat_client.py +++ b/python/packages/gemini/agent_framework_gemini/_chat_client.py @@ -1051,6 +1051,10 @@ def _parse_usage(self, usage: types.GenerateContentResponseUsageMetadata | None) details["output_token_count"] = v if (v := usage.total_token_count) is not None: details["total_token_count"] = v + if (v := usage.cached_content_token_count) is not None: + details["cache_read_input_token_count"] = v + if (v := usage.thoughts_token_count) is not None: + details["reasoning_output_token_count"] = v return details or None def _map_finish_reason(self, reason: str | None) -> FinishReasonLiteral | None: diff --git a/python/packages/gemini/tests/test_gemini_client.py b/python/packages/gemini/tests/test_gemini_client.py index 32da66f56fa..c85d1bbec3b 100644 --- a/python/packages/gemini/tests/test_gemini_client.py +++ b/python/packages/gemini/tests/test_gemini_client.py @@ -93,6 +93,8 @@ def _make_response( prompt_tokens: int | None = 10, output_tokens: int | None = 5, total_tokens: int | None = 15, + cached_tokens: int | None = None, + thoughts_tokens: int | None = None, ) -> MagicMock: """Build a mock types.GenerateContentResponse.""" response = MagicMock() @@ -113,6 +115,8 @@ def _make_response( usage.prompt_token_count = prompt_tokens usage.candidates_token_count = output_tokens usage.total_token_count = total_tokens + usage.cached_content_token_count = cached_tokens + usage.thoughts_token_count = thoughts_tokens response.usage_metadata = usage else: response.usage_metadata = None @@ -374,6 +378,27 @@ async def test_get_response_usage_details() -> None: assert response.usage_details["total_token_count"] == 28 +async def test_get_response_usage_details_includes_cached_and_reasoning_tokens() -> None: + """Surfaces Gemini cached-content and thinking token counts into the canonical usage fields.""" + client, mock = _make_gemini_client() + mock.aio.models.generate_content = AsyncMock( + return_value=_make_response( + [_make_part(text="Hi")], + prompt_tokens=20, + output_tokens=8, + total_tokens=28, + cached_tokens=12, + thoughts_tokens=6, + ) + ) + + response = await client.get_response(messages=[Message(role="user", contents=[Content.from_text("Hi")])]) + + assert response.usage_details is not None + assert response.usage_details["cache_read_input_token_count"] == 12 + assert response.usage_details["reasoning_output_token_count"] == 6 + + async def test_get_response_no_usage_when_metadata_absent() -> None: """Returns None for usage_details when the API response includes no usage metadata.""" client, mock = _make_gemini_client() From dd895b146d6f3556eb5027ac631e2e1f967fc8cf Mon Sep 17 00:00:00 2001 From: Yufeng He <40085740+he-yufeng@users.noreply.github.com> Date: Sat, 20 Jun 2026 10:59:28 +0800 Subject: [PATCH 2/5] Python: surface Bedrock cache token counts in usage details --- .../agent_framework_bedrock/_chat_client.py | 5 +++++ .../bedrock/tests/test_bedrock_client.py | 18 ++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/python/packages/bedrock/agent_framework_bedrock/_chat_client.py b/python/packages/bedrock/agent_framework_bedrock/_chat_client.py index cf8b3c562ae..269b0902bf5 100644 --- a/python/packages/bedrock/agent_framework_bedrock/_chat_client.py +++ b/python/packages/bedrock/agent_framework_bedrock/_chat_client.py @@ -689,6 +689,11 @@ def _parse_usage(self, usage: dict[str, Any] | None) -> UsageDetails | None: details["output_token_count"] = output_tokens if (total_tokens := usage.get("totalTokens")) is not None: details["total_token_count"] = total_tokens + # Bedrock Converse reports these when prompt caching is active. + if (cache_read := usage.get("cacheReadInputTokens")) is not None: + details["cache_read_input_token_count"] = cache_read + if (cache_write := usage.get("cacheWriteInputTokens")) is not None: + details["cache_creation_input_token_count"] = cache_write return details def _parse_message_contents(self, content_blocks: Sequence[dict[str, Any]]) -> list[Any]: diff --git a/python/packages/bedrock/tests/test_bedrock_client.py b/python/packages/bedrock/tests/test_bedrock_client.py index 9e1b42ea251..839dd4371b9 100644 --- a/python/packages/bedrock/tests/test_bedrock_client.py +++ b/python/packages/bedrock/tests/test_bedrock_client.py @@ -169,3 +169,21 @@ def test_prepare_options_tool_choice_required_without_tools_raises() -> None: with pytest.raises(ValueError, match="tool_choice='required' requires at least one tool"): client._prepare_options(messages, options) + + +def test_parse_usage_surfaces_cache_tokens() -> None: + """Bedrock Converse reports cache token counts when prompt caching is used.""" + client = _make_client() + + details = client._parse_usage({ + "inputTokens": 10, + "outputTokens": 5, + "totalTokens": 15, + "cacheReadInputTokens": 8, + "cacheWriteInputTokens": 3, + }) + + assert details is not None + assert details["input_token_count"] == 10 + assert details["cache_read_input_token_count"] == 8 + assert details["cache_creation_input_token_count"] == 3 From 5e23a09132ade7fa6109225758860a8604f81f5f Mon Sep 17 00:00:00 2001 From: Yufeng He <40085740+he-yufeng@users.noreply.github.com> Date: Sat, 20 Jun 2026 08:07:01 +0800 Subject: [PATCH 3/5] Python: surface Gemini cached and thinking token counts in usage details --- .../agent_framework_gemini/_chat_client.py | 4 +++ .../gemini/tests/test_gemini_client.py | 25 +++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/python/packages/gemini/agent_framework_gemini/_chat_client.py b/python/packages/gemini/agent_framework_gemini/_chat_client.py index fee44ee368c..f1aa36a9e7c 100644 --- a/python/packages/gemini/agent_framework_gemini/_chat_client.py +++ b/python/packages/gemini/agent_framework_gemini/_chat_client.py @@ -1051,6 +1051,10 @@ def _parse_usage(self, usage: types.GenerateContentResponseUsageMetadata | None) details["output_token_count"] = v if (v := usage.total_token_count) is not None: details["total_token_count"] = v + if (v := usage.cached_content_token_count) is not None: + details["cache_read_input_token_count"] = v + if (v := usage.thoughts_token_count) is not None: + details["reasoning_output_token_count"] = v return details or None def _map_finish_reason(self, reason: str | None) -> FinishReasonLiteral | None: diff --git a/python/packages/gemini/tests/test_gemini_client.py b/python/packages/gemini/tests/test_gemini_client.py index 32da66f56fa..c85d1bbec3b 100644 --- a/python/packages/gemini/tests/test_gemini_client.py +++ b/python/packages/gemini/tests/test_gemini_client.py @@ -93,6 +93,8 @@ def _make_response( prompt_tokens: int | None = 10, output_tokens: int | None = 5, total_tokens: int | None = 15, + cached_tokens: int | None = None, + thoughts_tokens: int | None = None, ) -> MagicMock: """Build a mock types.GenerateContentResponse.""" response = MagicMock() @@ -113,6 +115,8 @@ def _make_response( usage.prompt_token_count = prompt_tokens usage.candidates_token_count = output_tokens usage.total_token_count = total_tokens + usage.cached_content_token_count = cached_tokens + usage.thoughts_token_count = thoughts_tokens response.usage_metadata = usage else: response.usage_metadata = None @@ -374,6 +378,27 @@ async def test_get_response_usage_details() -> None: assert response.usage_details["total_token_count"] == 28 +async def test_get_response_usage_details_includes_cached_and_reasoning_tokens() -> None: + """Surfaces Gemini cached-content and thinking token counts into the canonical usage fields.""" + client, mock = _make_gemini_client() + mock.aio.models.generate_content = AsyncMock( + return_value=_make_response( + [_make_part(text="Hi")], + prompt_tokens=20, + output_tokens=8, + total_tokens=28, + cached_tokens=12, + thoughts_tokens=6, + ) + ) + + response = await client.get_response(messages=[Message(role="user", contents=[Content.from_text("Hi")])]) + + assert response.usage_details is not None + assert response.usage_details["cache_read_input_token_count"] == 12 + assert response.usage_details["reasoning_output_token_count"] == 6 + + async def test_get_response_no_usage_when_metadata_absent() -> None: """Returns None for usage_details when the API response includes no usage metadata.""" client, mock = _make_gemini_client() From 3daf7cc3d31a00a005a812e000d49112730b3208 Mon Sep 17 00:00:00 2001 From: Yufeng He <40085740+he-yufeng@users.noreply.github.com> Date: Sat, 20 Jun 2026 10:59:28 +0800 Subject: [PATCH 4/5] Python: surface Bedrock cache token counts in usage details --- .../agent_framework_bedrock/_chat_client.py | 5 +++++ .../bedrock/tests/test_bedrock_client.py | 18 ++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/python/packages/bedrock/agent_framework_bedrock/_chat_client.py b/python/packages/bedrock/agent_framework_bedrock/_chat_client.py index cf8b3c562ae..269b0902bf5 100644 --- a/python/packages/bedrock/agent_framework_bedrock/_chat_client.py +++ b/python/packages/bedrock/agent_framework_bedrock/_chat_client.py @@ -689,6 +689,11 @@ def _parse_usage(self, usage: dict[str, Any] | None) -> UsageDetails | None: details["output_token_count"] = output_tokens if (total_tokens := usage.get("totalTokens")) is not None: details["total_token_count"] = total_tokens + # Bedrock Converse reports these when prompt caching is active. + if (cache_read := usage.get("cacheReadInputTokens")) is not None: + details["cache_read_input_token_count"] = cache_read + if (cache_write := usage.get("cacheWriteInputTokens")) is not None: + details["cache_creation_input_token_count"] = cache_write return details def _parse_message_contents(self, content_blocks: Sequence[dict[str, Any]]) -> list[Any]: diff --git a/python/packages/bedrock/tests/test_bedrock_client.py b/python/packages/bedrock/tests/test_bedrock_client.py index 9e1b42ea251..839dd4371b9 100644 --- a/python/packages/bedrock/tests/test_bedrock_client.py +++ b/python/packages/bedrock/tests/test_bedrock_client.py @@ -169,3 +169,21 @@ def test_prepare_options_tool_choice_required_without_tools_raises() -> None: with pytest.raises(ValueError, match="tool_choice='required' requires at least one tool"): client._prepare_options(messages, options) + + +def test_parse_usage_surfaces_cache_tokens() -> None: + """Bedrock Converse reports cache token counts when prompt caching is used.""" + client = _make_client() + + details = client._parse_usage({ + "inputTokens": 10, + "outputTokens": 5, + "totalTokens": 15, + "cacheReadInputTokens": 8, + "cacheWriteInputTokens": 3, + }) + + assert details is not None + assert details["input_token_count"] == 10 + assert details["cache_read_input_token_count"] == 8 + assert details["cache_creation_input_token_count"] == 3 From ab5bdf574fb1435223af43538f26ada7e2d58149 Mon Sep 17 00:00:00 2001 From: Yufeng He <40085740+he-yufeng@users.noreply.github.com> Date: Thu, 25 Jun 2026 00:46:57 +0800 Subject: [PATCH 5/5] Return None from Bedrock _parse_usage when no token counts are present Matches the UsageDetails | None return annotation and the Gemini connector's behavior, so a usage payload with no recognized keys no longer propagates an empty mapping. Adds a regression test. --- .../bedrock/agent_framework_bedrock/_chat_client.py | 2 +- python/packages/bedrock/tests/test_bedrock_client.py | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/python/packages/bedrock/agent_framework_bedrock/_chat_client.py b/python/packages/bedrock/agent_framework_bedrock/_chat_client.py index 269b0902bf5..99207508b61 100644 --- a/python/packages/bedrock/agent_framework_bedrock/_chat_client.py +++ b/python/packages/bedrock/agent_framework_bedrock/_chat_client.py @@ -694,7 +694,7 @@ def _parse_usage(self, usage: dict[str, Any] | None) -> UsageDetails | None: details["cache_read_input_token_count"] = cache_read if (cache_write := usage.get("cacheWriteInputTokens")) is not None: details["cache_creation_input_token_count"] = cache_write - return details + return details or None def _parse_message_contents(self, content_blocks: Sequence[dict[str, Any]]) -> list[Any]: contents: list[Any] = [] diff --git a/python/packages/bedrock/tests/test_bedrock_client.py b/python/packages/bedrock/tests/test_bedrock_client.py index 839dd4371b9..211046cc816 100644 --- a/python/packages/bedrock/tests/test_bedrock_client.py +++ b/python/packages/bedrock/tests/test_bedrock_client.py @@ -187,3 +187,12 @@ def test_parse_usage_surfaces_cache_tokens() -> None: assert details["input_token_count"] == 10 assert details["cache_read_input_token_count"] == 8 assert details["cache_creation_input_token_count"] == 3 + + +def test_parse_usage_returns_none_when_no_recognized_keys() -> None: + """A truthy usage payload with no recognized keys yields None, not an empty mapping.""" + client = _make_client() + + assert client._parse_usage({"unexpected": 1}) is None + assert client._parse_usage({}) is None + assert client._parse_usage(None) is None