From 6d30bf4923e7319ddee1083217c0d15ef23489f0 Mon Sep 17 00:00:00 2001
From: Patrick Ruddiman <86851465+PatrickRuddiman@users.noreply.github.com>
Date: Sun, 29 Jun 2025 11:41:12 -0400
Subject: [PATCH 1/3] feat: improve token counting and add brevity pattern

---
 Constants/FabricPatterns.cs            |  5 +++
 Constants/PatternNames.cs              |  5 +++
 README.md                              |  3 ++
 Services/OpenAIService.cs              | 42 +++++++++++++++++++++++++-
 Services/SemanticCoherenceAnalyzer.cs  |  6 ++--
 Services/TokenHelper.cs                | 26 ++++++++++++++++
 WriteCommit.csproj                     |  1 +
 patterns/brief_chunk_summary/system.md | 11 +++++++
 8 files changed, 95 insertions(+), 4 deletions(-)
 create mode 100644 Services/TokenHelper.cs
 create mode 100644 patterns/brief_chunk_summary/system.md
diff --git a/Constants/FabricPatterns.cs b/Constants/FabricPatterns.cs
index 2f9e923..7eb456f 100644
--- a/Constants/FabricPatterns.cs
+++ b/Constants/FabricPatterns.cs
@@ -11,5 +11,10 @@ public static class FabricPatterns
         /// Default pattern used for generating commit messages
         /// </summary>
         public const string CommitPattern = "write_commit_message";
+
+        /// <summary>
+        /// Pattern used when context overflow requires extra summarization
+        /// </summary>
+        public const string BrevityPattern = "brief_chunk_summary";
     }
 }
diff --git a/Constants/PatternNames.cs b/Constants/PatternNames.cs
index 41fb083..744fc1f 100644
--- a/Constants/PatternNames.cs
+++ b/Constants/PatternNames.cs
@@ -11,5 +11,10 @@ public static class PatternNames
         /// Default pattern used for generating commit messages
         /// </summary>
         public const string CommitPattern = "write_commit_message";
+
+        /// <summary>
+        /// Used when context overflow requires extra summarization
+        /// </summary>
+        public const string BrevityPattern = "brief_chunk_summary";
     }
 }
diff --git a/README.md b/README.md
index 0baca01..91acec8 100644
--- a/README.md
+++ b/README.md
@@ -85,6 +85,9 @@ write-commit --verbose
 # Custom AI parameters
 write-commit --temperature 0.7 --topp 0.9 --pattern custom_pattern
 
+# Built-in brevity pattern for overflowing contexts
+write-commit --pattern brief_chunk_summary
+
 # Force reinstall all patterns
 write-commit --reinstall-patterns
 
diff --git a/Services/OpenAIService.cs b/Services/OpenAIService.cs
index 56d4b2d..61a19b3 100644
--- a/Services/OpenAIService.cs
+++ b/Services/OpenAIService.cs
@@ -9,6 +9,7 @@ public class OpenAIService
 {
     private readonly string _apiKey;
     private readonly string _patternsDirectory;
+    private const int MaxContextTokens = 128000;
 
     public OpenAIService(string apiKey)
     {
@@ -227,6 +228,44 @@ bool verbose
             throw new InvalidOperationException($"Failed to load pattern: {pattern}");
         }
 
+        var combinedContent = string.Join("\n\n", chunkMessages);
+        var estimatedTokens = TokenHelper.EstimateTokens(systemPrompt, model) + TokenHelper.EstimateTokens(combinedContent, model);
+
+        if (estimatedTokens > MaxContextTokens && chunkMessages.Count > 1)
+        {
+            if (verbose)
+            {
+                Console.WriteLine("Context length exceeded, re-chunking summaries...");
+            }
+
+            var groupedSummaries = new List<string>();
+            var currentGroup = new List<string>();
+            var currentTokens = TokenHelper.EstimateTokens(systemPrompt, model);
+
+            foreach (var msg in chunkMessages)
+            {
+                var msgTokens = TokenHelper.EstimateTokens(msg, model);
+                if (currentTokens + msgTokens > MaxContextTokens / 2 && currentGroup.Count > 0)
+                {
+                    var summary = await CombineChunkMessagesAsync(currentGroup, PatternNames.BrevityPattern, temperature, topP, presence, frequency, model, verbose);
+                    groupedSummaries.Add(summary);
+                    currentGroup.Clear();
+                    currentTokens = TokenHelper.EstimateTokens(systemPrompt, model);
+                }
+
+                currentGroup.Add(msg);
+                currentTokens += msgTokens;
+            }
+
+            if (currentGroup.Count > 0)
+            {
+                var summary = await CombineChunkMessagesAsync(currentGroup, PatternNames.BrevityPattern, temperature, topP, presence, frequency, model, verbose);
+                groupedSummaries.Add(summary);
+            }
+
+            return await CombineChunkMessagesAsync(groupedSummaries, PatternNames.BrevityPattern, temperature, topP, presence, frequency, model, verbose);
+        }
+
         // Create a client for this specific model
         var chatClient = new ChatClient(model, _apiKey);
 
@@ -234,7 +273,7 @@ bool verbose
         var messages = new List<ChatMessage>
         {
             new SystemChatMessage(systemPrompt),
-            new UserChatMessage(string.Join("\n\n", chunkMessages)),
+            new UserChatMessage(combinedContent),
         };
 
         // Create chat completion options
@@ -313,4 +352,5 @@ private float ConvertPenalty(int penalty)
         // OpenAI uses -2 to 2 for penalties
         return Math.Clamp((float)penalty, -2f, 2f);
     }
+
 }
diff --git a/Services/SemanticCoherenceAnalyzer.cs b/Services/SemanticCoherenceAnalyzer.cs
index 64b5044..6b8dae6 100644
--- a/Services/SemanticCoherenceAnalyzer.cs
+++ b/Services/SemanticCoherenceAnalyzer.cs
@@ -2,6 +2,7 @@
 using System.Text.RegularExpressions;
 using Microsoft.Extensions.Logging;
 using WriteCommit.Models;
+using WriteCommit.Services;
 
 namespace WriteCommit.Services;
 
@@ -311,8 +312,7 @@ private string DetermineChangeType(string content)
 
     private int EstimateTokenCount(string text)
     {
-        // Rough estimation: ~4 characters per token for code
-        // This is conservative to ensure we don't exceed LLM limits
-        return Math.Max(1, text.Length / 4);
+        // Use token encoder for more accurate results
+        return TokenHelper.EstimateTokens(text, "gpt-4o-mini");
     }
 }
diff --git a/Services/TokenHelper.cs b/Services/TokenHelper.cs
new file mode 100644
index 0000000..f6776f0
--- /dev/null
+++ b/Services/TokenHelper.cs
@@ -0,0 +1,26 @@
+using TiktokenSharp;
+
+namespace WriteCommit.Services;
+
+public static class TokenHelper
+{
+    private static readonly Dictionary<string, TikToken> Encoders = new();
+
+    public static int EstimateTokens(string text, string model)
+    {
+        try
+        {
+            if (!Encoders.TryGetValue(model, out var encoder))
+            {
+                encoder = TikToken.EncodingForModel(model);
+                Encoders[model] = encoder;
+            }
+            return encoder.Encode(text).Count;
+        }
+        catch
+        {
+            // Fallback heuristic
+            return Math.Max(1, text.Length / 4);
+        }
+    }
+}
diff --git a/WriteCommit.csproj b/WriteCommit.csproj
index 2ee0b89..5eafe29 100644
--- a/WriteCommit.csproj
+++ b/WriteCommit.csproj
@@ -24,6 +24,7 @@
     <PackageReference Include="Microsoft.Extensions.Logging" Version="8.0.0" />
     <PackageReference Include="Microsoft.Extensions.Logging.Console" Version="8.0.0" />
     <PackageReference Include="OpenAI" Version="2.1.0" />
+    <PackageReference Include="TiktokenSharp" Version="1.1.7" />
   </ItemGroup>
   <ItemGroup>
     <None Include="patterns\**\*">
diff --git a/patterns/brief_chunk_summary/system.md b/patterns/brief_chunk_summary/system.md
new file mode 100644
index 0000000..607cdb8
--- /dev/null
+++ b/patterns/brief_chunk_summary/system.md
@@ -0,0 +1,11 @@
+# IDENTITY and PURPOSE
+You are called when earlier summaries exceed the model context limit.
+Condense the provided summaries into one very short commit summary.
+
+# STEPS
+- Read the given summaries carefully.
+- Produce an extremely concise summary focusing only on key changes.
+- Keep the text brief and under typical commit message length.
+
+# OUTPUT
+Return the single brief summary only.

From d9d3c571fe81fe41fa233eebf3ef9059a88cfc33 Mon Sep 17 00:00:00 2001
From: Patrick Ruddiman <86851465+PatrickRuddiman@users.noreply.github.com>
Date: Tue, 1 Jul 2025 13:31:25 +0000
Subject: [PATCH 2/3] refactor: enhance summarization instructions for clarity
 and effectiveness

---
 patterns/brief_chunk_summary/system.md | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/patterns/brief_chunk_summary/system.md b/patterns/brief_chunk_summary/system.md
index 607cdb8..08d1b2b 100644
--- a/patterns/brief_chunk_summary/system.md
+++ b/patterns/brief_chunk_summary/system.md
@@ -1,11 +1,23 @@
 # IDENTITY and PURPOSE
-You are called when earlier summaries exceed the model context limit.
-Condense the provided summaries into one very short commit summary.
+You are a summarizer machine. Your job is to take a list of chunk objects (from the chunk_git_diff pattern) and distill them into a minimal, high-level summary that preserves the intent and spirit of the changes, so that an AI can write a relevant, human-like git commit message.
 
-# STEPS
-- Read the given summaries carefully.
-- Produce an extremely concise summary focusing only on key changes.
-- Keep the text brief and under typical commit message length.
+Think step by step:
+1. Read all chunk objects.
+2. Identify the main themes, features, or fixes represented.
+3. Merge related or repetitive changes into a single, concise statement.
+4. Omit low-level details, but keep enough context for a meaningful commit message.
+
+# OUTPUT SECTIONS
+- TITLE: A short, imperative summary of the overall change (max 1 line)
+- DESCRIPTION: 1-3 sentences elaborating on the main changes, grouped by theme or feature
+- TAGS: comma-separated list of key topics, features, or subsystems touched
 
 # OUTPUT
-Return the single brief summary only.
+- Output only the above sections, no extra commentary or formatting.
+- Do not include chunk IDs, file lists, or raw diffs.
+- Focus on clarity, intent, and relevance for a commit message.
+
+# INPUT:
+INPUT:
+
+<chunk objects from chunk_git_diff>

From fbdd16eeb4747c5aedaa28a545da46f56dd94c43 Mon Sep 17 00:00:00 2001
From: Patrick Ruddiman <86851465+PatrickRuddiman@users.noreply.github.com>
Date: Tue, 1 Jul 2025 13:33:50 +0000
Subject: [PATCH 3/3] fix: correct typos and improve clarity in commit message
 guidelines

---
 patterns/write_commit_message/system.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/patterns/write_commit_message/system.md b/patterns/write_commit_message/system.md
index c86296a..d6d89df 100644
--- a/patterns/write_commit_message/system.md
+++ b/patterns/write_commit_message/system.md
@@ -24,7 +24,7 @@ You are a component in an application. You are created to analyize git commits a
 
 - Commit subject should be no more than 50 characters, and the body should be no more than 72 characters per line. (“50/72 formatting”)
 
-- Terse, consise, and succinct is the goal, dont repeat yourself in the body of the commit message. If there is a bullet point that already kind of explains what the change is, do not repeat it with a new bullet point.
+- Terse and succinct is the goal, don't repeat yourself in the body of the commit message. If there is a bullet point that even remotely explains what the change is, do not repeat it with a new bullet point.
 
 - the commit message should be output in plain text, not in Markdown format. It will be passed directly to the `git commmit -m` command.