Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions Constants/FabricPatterns.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,10 @@ public static class FabricPatterns
/// Default pattern used for generating commit messages
/// </summary>
public const string CommitPattern = "write_commit_message";

/// <summary>
/// Pattern used when context overflow requires extra summarization
/// </summary>
public const string BrevityPattern = "brief_chunk_summary";
}
}
5 changes: 5 additions & 0 deletions Constants/PatternNames.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,10 @@ public static class PatternNames
/// Default pattern used for generating commit messages
/// </summary>
public const string CommitPattern = "write_commit_message";

/// <summary>
/// Used when context overflow requires extra summarization
/// </summary>
public const string BrevityPattern = "brief_chunk_summary";
}
}
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ write-commit --verbose
# Custom AI parameters
write-commit --temperature 0.7 --topp 0.9 --pattern custom_pattern

# Built-in brevity pattern for overflowing contexts
write-commit --pattern brief_chunk_summary

# Force reinstall all patterns
write-commit --reinstall-patterns

Expand Down
42 changes: 41 additions & 1 deletion Services/OpenAIService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ public class OpenAIService
{
private readonly string _apiKey;
private readonly string _patternsDirectory;
private const int MaxContextTokens = 128000;

public OpenAIService(string apiKey)
{
Expand Down Expand Up @@ -227,14 +228,52 @@ bool verbose
throw new InvalidOperationException($"Failed to load pattern: {pattern}");
}

var combinedContent = string.Join("\n\n", chunkMessages);
var estimatedTokens = TokenHelper.EstimateTokens(systemPrompt, model) + TokenHelper.EstimateTokens(combinedContent, model);

if (estimatedTokens > MaxContextTokens && chunkMessages.Count > 1)
{
if (verbose)
{
Console.WriteLine("Context length exceeded, re-chunking summaries...");
}

var groupedSummaries = new List<string>();
var currentGroup = new List<string>();
var currentTokens = TokenHelper.EstimateTokens(systemPrompt, model);
Copy link

Copilot AI Jun 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since systemPrompt remains unchanged throughout the method, consider caching its token estimate once rather than repeatedly recalculating it to improve maintainability.

Suggested change
var currentTokens = TokenHelper.EstimateTokens(systemPrompt, model);
var currentTokens = systemPromptTokens;

Copilot uses AI. Check for mistakes.

foreach (var msg in chunkMessages)
{
var msgTokens = TokenHelper.EstimateTokens(msg, model);
if (currentTokens + msgTokens > MaxContextTokens / 2 && currentGroup.Count > 0)
{
var summary = await CombineChunkMessagesAsync(currentGroup, PatternNames.BrevityPattern, temperature, topP, presence, frequency, model, verbose);
groupedSummaries.Add(summary);
currentGroup.Clear();
currentTokens = TokenHelper.EstimateTokens(systemPrompt, model);
}

currentGroup.Add(msg);
currentTokens += msgTokens;
}

if (currentGroup.Count > 0)
{
var summary = await CombineChunkMessagesAsync(currentGroup, PatternNames.BrevityPattern, temperature, topP, presence, frequency, model, verbose);
groupedSummaries.Add(summary);
}

return await CombineChunkMessagesAsync(groupedSummaries, PatternNames.BrevityPattern, temperature, topP, presence, frequency, model, verbose);
}

// Create a client for this specific model
var chatClient = new ChatClient(model, _apiKey);

// Create the chat messages
var messages = new List<ChatMessage>
{
new SystemChatMessage(systemPrompt),
new UserChatMessage(string.Join("\n\n", chunkMessages)),
new UserChatMessage(combinedContent),
};

// Create chat completion options
Expand Down Expand Up @@ -313,4 +352,5 @@ private float ConvertPenalty(int penalty)
// OpenAI uses -2 to 2 for penalties
return Math.Clamp((float)penalty, -2f, 2f);
}

}
6 changes: 3 additions & 3 deletions Services/SemanticCoherenceAnalyzer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
using System.Text.RegularExpressions;
using Microsoft.Extensions.Logging;
using WriteCommit.Models;
using WriteCommit.Services;

namespace WriteCommit.Services;

Expand Down Expand Up @@ -311,8 +312,7 @@ private string DetermineChangeType(string content)

private int EstimateTokenCount(string text)
{
// Rough estimation: ~4 characters per token for code
// This is conservative to ensure we don't exceed LLM limits
return Math.Max(1, text.Length / 4);
// Use token encoder for more accurate results
return TokenHelper.EstimateTokens(text, "gpt-4o-mini");
}
}
26 changes: 26 additions & 0 deletions Services/TokenHelper.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
using TiktokenSharp;

namespace WriteCommit.Services;

public static class TokenHelper
{
private static readonly Dictionary<string, TikToken> Encoders = new();

public static int EstimateTokens(string text, string model)
{
try
{
if (!Encoders.TryGetValue(model, out var encoder))
{
encoder = TikToken.EncodingForModel(model);
Encoders[model] = encoder;
}
return encoder.Encode(text).Count;
}
catch
{
// Fallback heuristic
return Math.Max(1, text.Length / 4);
}
}
}
1 change: 1 addition & 0 deletions WriteCommit.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
<PackageReference Include="Microsoft.Extensions.Logging" Version="8.0.0" />
<PackageReference Include="Microsoft.Extensions.Logging.Console" Version="8.0.0" />
<PackageReference Include="OpenAI" Version="2.1.0" />
<PackageReference Include="TiktokenSharp" Version="1.1.7" />
</ItemGroup>
<ItemGroup>
<None Include="patterns\**\*">
Expand Down
23 changes: 23 additions & 0 deletions patterns/brief_chunk_summary/system.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# IDENTITY and PURPOSE
You are a summarizer machine. Your job is to take a list of chunk objects (from the chunk_git_diff pattern) and distill them into a minimal, high-level summary that preserves the intent and spirit of the changes, so that an AI can write a relevant, human-like git commit message.

Think step by step:
1. Read all chunk objects.
2. Identify the main themes, features, or fixes represented.
3. Merge related or repetitive changes into a single, concise statement.
4. Omit low-level details, but keep enough context for a meaningful commit message.

# OUTPUT SECTIONS
- TITLE: A short, imperative summary of the overall change (max 1 line)
- DESCRIPTION: 1-3 sentences elaborating on the main changes, grouped by theme or feature
- TAGS: comma-separated list of key topics, features, or subsystems touched

# OUTPUT
- Output only the above sections, no extra commentary or formatting.
- Do not include chunk IDs, file lists, or raw diffs.
- Focus on clarity, intent, and relevance for a commit message.

# INPUT:
INPUT:

<chunk objects from chunk_git_diff>
2 changes: 1 addition & 1 deletion patterns/write_commit_message/system.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ You are a component in an application. You are created to analyize git commits a

- Commit subject should be no more than 50 characters, and the body should be no more than 72 characters per line. (“50/72 formatting”)

- Terse, consise, and succinct is the goal, dont repeat yourself in the body of the commit message. If there is a bullet point that already kind of explains what the change is, do not repeat it with a new bullet point.
- Terse and succinct is the goal, don't repeat yourself in the body of the commit message. If there is a bullet point that even remotely explains what the change is, do not repeat it with a new bullet point.

- the commit message should be output in plain text, not in Markdown format. It will be passed directly to the `git commmit -m` command.

Expand Down