From 7696e5c345a1ad9ae60401230622fde8304b47f5 Mon Sep 17 00:00:00 2001 From: Mohamad Osman <35007892+MohamedOthman1@users.noreply.github.com> Date: Fri, 1 May 2026 12:18:07 +0500 Subject: [PATCH 1/4] Fix Gemini streaming token usage metrics --- .../Clients/GeminiChatStreamingTests.cs | 59 +++++++++++++++++++ .../Clients/GeminiChatCompletionClient.cs | 28 ++++++++- 2 files changed, 84 insertions(+), 3 deletions(-) diff --git a/dotnet/src/Connectors/Connectors.Google.UnitTests/Core/Gemini/Clients/GeminiChatStreamingTests.cs b/dotnet/src/Connectors/Connectors.Google.UnitTests/Core/Gemini/Clients/GeminiChatStreamingTests.cs index 90588f80d66e..896f0a111491 100644 --- a/dotnet/src/Connectors/Connectors.Google.UnitTests/Core/Gemini/Clients/GeminiChatStreamingTests.cs +++ b/dotnet/src/Connectors/Connectors.Google.UnitTests/Core/Gemini/Clients/GeminiChatStreamingTests.cs @@ -2,6 +2,7 @@ using System; using System.Collections.Generic; +using System.Diagnostics.Metrics; using System.IO; using System.Linq; using System.Net.Http; @@ -469,6 +470,64 @@ public async Task ShouldHandleStreamingThoughtPartsAsync() Assert.Equal(" 42.", thirdMessage.Content); } + [Fact] + public async Task ShouldEmitUsageMetricsOnceForStreamingResponseAsync() + { + // Arrange + var streamingResponse = """ + data: {"candidates": [{"content": {"parts": [{"text": "One"}], "role": "model"}, "index": 0}], "usageMetadata": {"promptTokenCount": 101, "candidatesTokenCount": 202, "totalTokenCount": 303}} + + data: {"candidates": [{"content": {"parts": [{"text": " two"}], "role": "model"}, "index": 0}], "usageMetadata": {"promptTokenCount": 101, "candidatesTokenCount": 202, "totalTokenCount": 303}} + + data: {"candidates": [{"content": {"parts": [{"text": " three"}], "role": "model"}, "index": 0}], "usageMetadata": {"promptTokenCount": 101, "candidatesTokenCount": 202, "totalTokenCount": 303}} + + data: {"candidates": [{"content": {"parts": [{"text": " four"}], "role": "model"}, "index": 0}], "usageMetadata": {"promptTokenCount": 101, "candidatesTokenCount": 202, "totalTokenCount": 303}} + + data: {"candidates": [{"content": {"parts": [{"text": " five"}], "role": "model"}, "finishReason": "STOP", "index": 0}], "usageMetadata": {"promptTokenCount": 101, "candidatesTokenCount": 202, "totalTokenCount": 303}} + + data: {"candidates": [{"content": {"parts": [{"text": ""}], "role": "model"}, "finishReason": "STOP", "index": 0}]} + + """; + + this._messageHandlerStub.ResponseToReturn.Content = new StringContent(streamingResponse); + + var measurements = new Dictionary> + { + ["Microsoft.SemanticKernel.Connectors.Google.tokens.prompt"] = [], + ["Microsoft.SemanticKernel.Connectors.Google.tokens.completion"] = [], + ["Microsoft.SemanticKernel.Connectors.Google.tokens.total"] = [], + }; + + using MeterListener listener = new(); + listener.InstrumentPublished = (instrument, listener) => + { + if (measurements.ContainsKey(instrument.Name)) + { + listener.EnableMeasurementEvents(instrument); + } + }; + listener.SetMeasurementEventCallback((instrument, measurement, tags, state) => + { + if (measurements.TryGetValue(instrument.Name, out var instrumentMeasurements)) + { + instrumentMeasurements.Add(measurement); + } + }); + listener.Start(); + + var client = this.CreateChatCompletionClient(); + var chatHistory = CreateSampleChatHistory(); + + // Act + var messages = await client.StreamGenerateChatMessageAsync(chatHistory).ToListAsync(); + + // Assert + Assert.Equal(6, messages.Count); + Assert.Equal(1, measurements["Microsoft.SemanticKernel.Connectors.Google.tokens.prompt"].Count(measurement => measurement == 101)); + Assert.Equal(1, measurements["Microsoft.SemanticKernel.Connectors.Google.tokens.completion"].Count(measurement => measurement == 202)); + Assert.Equal(1, measurements["Microsoft.SemanticKernel.Connectors.Google.tokens.total"].Count(measurement => measurement == 303)); + } + private static ChatHistory CreateSampleChatHistory() { var chatHistory = new ChatHistory(); diff --git a/dotnet/src/Connectors/Connectors.Google/Core/Gemini/Clients/GeminiChatCompletionClient.cs b/dotnet/src/Connectors/Connectors.Google/Core/Gemini/Clients/GeminiChatCompletionClient.cs index 3c3501622b74..17d8ca04a467 100644 --- a/dotnet/src/Connectors/Connectors.Google/Core/Gemini/Clients/GeminiChatCompletionClient.cs +++ b/dotnet/src/Connectors/Connectors.Google/Core/Gemini/Clients/GeminiChatCompletionClient.cs @@ -772,13 +772,27 @@ private async IAsyncEnumerable ProcessChatResponseStre Stream responseStream, [EnumeratorCancellation] CancellationToken ct) { + List? lastChatMessageContents = null; + List? lastChatMessageContentsWithUsage = null; + await foreach (var response in this.ParseResponseStreamAsync(responseStream, ct: ct).ConfigureAwait(false)) { - foreach (var messageContent in this.ProcessChatResponse(response)) + lastChatMessageContents = this.ProcessChatResponse(response, logUsage: false); + if (HasTokenUsage(lastChatMessageContents)) + { + lastChatMessageContentsWithUsage = lastChatMessageContents; + } + + foreach (var messageContent in lastChatMessageContents) { yield return messageContent; } } + + if ((lastChatMessageContentsWithUsage ?? lastChatMessageContents) is { } chatMessageContents) + { + this.LogUsage(chatMessageContents); + } } private async IAsyncEnumerable ParseResponseStreamAsync( @@ -791,15 +805,23 @@ private async IAsyncEnumerable ParseResponseStreamAsync( } } - private List ProcessChatResponse(GeminiResponse geminiResponse) + private List ProcessChatResponse(GeminiResponse geminiResponse, bool logUsage = true) { ValidateGeminiResponse(geminiResponse); var chatMessageContents = this.GetChatMessageContentsFromResponse(geminiResponse); - this.LogUsage(chatMessageContents); + + if (logUsage) + { + this.LogUsage(chatMessageContents); + } + return chatMessageContents; } + private static bool HasTokenUsage(List chatMessageContents) + => chatMessageContents.FirstOrDefault()?.Metadata is { TotalTokenCount: > 0 }; + private static void ValidateGeminiResponse(GeminiResponse geminiResponse) { if (geminiResponse.PromptFeedback?.BlockReason is not null) From df8b63ef6ea17f56ae1d9ccb8bc3314ae233faf0 Mon Sep 17 00:00:00 2001 From: Mohamad Osman <35007892+MohamedOthman1@users.noreply.github.com> Date: Fri, 1 May 2026 12:34:02 +0500 Subject: [PATCH 2/4] Handle empty Gemini usage content --- .../Core/Gemini/Clients/GeminiChatCompletionClient.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dotnet/src/Connectors/Connectors.Google/Core/Gemini/Clients/GeminiChatCompletionClient.cs b/dotnet/src/Connectors/Connectors.Google/Core/Gemini/Clients/GeminiChatCompletionClient.cs index 17d8ca04a467..45a2d6933719 100644 --- a/dotnet/src/Connectors/Connectors.Google/Core/Gemini/Clients/GeminiChatCompletionClient.cs +++ b/dotnet/src/Connectors/Connectors.Google/Core/Gemini/Clients/GeminiChatCompletionClient.cs @@ -833,7 +833,7 @@ private static void ValidateGeminiResponse(GeminiResponse geminiResponse) private void LogUsage(List chatMessageContents) { - GeminiMetadata? metadata = chatMessageContents[0].Metadata; + GeminiMetadata? metadata = chatMessageContents.FirstOrDefault()?.Metadata; if (metadata is null || metadata.TotalTokenCount <= 0) { From 114d215b4cc7b06a7a2a51dd0a79866f7337b9ea Mon Sep 17 00:00:00 2001 From: Mohamad Osman <35007892+MohamedOthman1@users.noreply.github.com> Date: Fri, 1 May 2026 12:38:41 +0500 Subject: [PATCH 3/4] Refine Gemini usage response processing --- .../Clients/GeminiChatCompletionClient.cs | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/dotnet/src/Connectors/Connectors.Google/Core/Gemini/Clients/GeminiChatCompletionClient.cs b/dotnet/src/Connectors/Connectors.Google/Core/Gemini/Clients/GeminiChatCompletionClient.cs index 45a2d6933719..de09a4572f15 100644 --- a/dotnet/src/Connectors/Connectors.Google/Core/Gemini/Clients/GeminiChatCompletionClient.cs +++ b/dotnet/src/Connectors/Connectors.Google/Core/Gemini/Clients/GeminiChatCompletionClient.cs @@ -777,7 +777,7 @@ private async IAsyncEnumerable ProcessChatResponseStre await foreach (var response in this.ParseResponseStreamAsync(responseStream, ct: ct).ConfigureAwait(false)) { - lastChatMessageContents = this.ProcessChatResponse(response, logUsage: false); + lastChatMessageContents = this.CreateChatMessageContents(response); if (HasTokenUsage(lastChatMessageContents)) { lastChatMessageContentsWithUsage = lastChatMessageContents; @@ -805,18 +805,18 @@ private async IAsyncEnumerable ParseResponseStreamAsync( } } - private List ProcessChatResponse(GeminiResponse geminiResponse, bool logUsage = true) + private List ProcessChatResponse(GeminiResponse geminiResponse) { - ValidateGeminiResponse(geminiResponse); - - var chatMessageContents = this.GetChatMessageContentsFromResponse(geminiResponse); + var chatMessageContents = this.CreateChatMessageContents(geminiResponse); + this.LogUsage(chatMessageContents); + return chatMessageContents; + } - if (logUsage) - { - this.LogUsage(chatMessageContents); - } + private List CreateChatMessageContents(GeminiResponse geminiResponse) + { + ValidateGeminiResponse(geminiResponse); - return chatMessageContents; + return this.GetChatMessageContentsFromResponse(geminiResponse); } private static bool HasTokenUsage(List chatMessageContents) From fc5198e7c61d2d783fe1a4c2a8d1f6c6e02f0f45 Mon Sep 17 00:00:00 2001 From: Mohamad Osman <35007892+MohamedOthman1@users.noreply.github.com> Date: Fri, 1 May 2026 12:39:32 +0500 Subject: [PATCH 4/4] Simplify Gemini streaming usage logging --- .../Gemini/Clients/GeminiChatCompletionClient.cs | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/dotnet/src/Connectors/Connectors.Google/Core/Gemini/Clients/GeminiChatCompletionClient.cs b/dotnet/src/Connectors/Connectors.Google/Core/Gemini/Clients/GeminiChatCompletionClient.cs index de09a4572f15..29e2ac22f01d 100644 --- a/dotnet/src/Connectors/Connectors.Google/Core/Gemini/Clients/GeminiChatCompletionClient.cs +++ b/dotnet/src/Connectors/Connectors.Google/Core/Gemini/Clients/GeminiChatCompletionClient.cs @@ -772,26 +772,25 @@ private async IAsyncEnumerable ProcessChatResponseStre Stream responseStream, [EnumeratorCancellation] CancellationToken ct) { - List? lastChatMessageContents = null; List? lastChatMessageContentsWithUsage = null; await foreach (var response in this.ParseResponseStreamAsync(responseStream, ct: ct).ConfigureAwait(false)) { - lastChatMessageContents = this.CreateChatMessageContents(response); - if (HasTokenUsage(lastChatMessageContents)) + var chatMessageContents = this.CreateChatMessageContents(response); + if (HasTokenUsage(chatMessageContents)) { - lastChatMessageContentsWithUsage = lastChatMessageContents; + lastChatMessageContentsWithUsage = chatMessageContents; } - foreach (var messageContent in lastChatMessageContents) + foreach (var messageContent in chatMessageContents) { yield return messageContent; } } - if ((lastChatMessageContentsWithUsage ?? lastChatMessageContents) is { } chatMessageContents) + if (lastChatMessageContentsWithUsage is { } chatMessageContentsWithUsage) { - this.LogUsage(chatMessageContents); + this.LogUsage(chatMessageContentsWithUsage); } }