Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/usage-cache-breakdown.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@moonshot-ai/kimi-code": minor
---

Show prompt cache hit rate and read/other breakdown in /usage command.
29 changes: 25 additions & 4 deletions apps/kimi-code/src/tui/components/messages/usage-panel.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import type { ColorPalette } from '#/tui/theme/colors';
const LEFT_MARGIN = 2;
const SIDE_PADDING = 1;
const MIN_INTERIOR_WIDTH = 20;
const PROGRESS_BAR_WIDTH = 20;

type Colorize = (text: string) => string;

Expand Down Expand Up @@ -80,20 +81,40 @@ function buildSessionUsageSection(
const lines: string[] = [];
let totalInput = 0;
let totalOutput = 0;
// Compute max model name width for alignment (include "total" for multi-model)
const maxModelWidth =
entries.length === 0
? 0
: Math.max(
...entries.map(([model]) => model.length),
entries.length > 1 ? 'total'.length : 0,
);
for (const [model, row] of entries) {
const input = usageInputTotal(row);
const output = usageNumber(row.output);
totalInput += input;
totalOutput += output;
const paddedModel = model.padEnd(maxModelWidth);
lines.push(
` ${muted(model)} input ${value(formatTokenCount(input))} output ${value(
` ${muted(paddedModel)} input ${value(formatTokenCount(input))} output ${value(
formatTokenCount(output),
)} total ${value(formatTokenCount(input + output))}`,
);
// Cache breakdown subline
const cacheIndent = ' '.repeat(maxModelWidth + 4); // " model " → 2 + maxModelWidth + 2
const cacheRatio = input > 0 ? usageNumber(row.inputCacheRead) / input : 0;
const bar = renderProgressBar(cacheRatio, PROGRESS_BAR_WIDTH);
const pct = `${(cacheRatio * 100).toFixed(1).replace(/\.0$/, '')}%`;
lines.push(
`${cacheIndent}${muted('cache')} ${bar} ${value(pct)} ${muted('hit')} ` +
`(${value(formatTokenCount(usageNumber(row.inputCacheRead)))} ${muted('read')} ` +
`· ${value(formatTokenCount(usageNumber(row.inputOther)))} ${muted('other')})`,
Comment on lines +110 to +111
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Include cache-creation tokens in the cache breakdown

When a provider reports inputCacheCreation, those input tokens are included in the model's input total and denominator for the hit percentage, but this new subline only prints inputCacheRead and inputOther. In sessions that create prompt-cache entries (for example inputCacheCreation > 0 and little/no inputOther), /usage can show thousands of input tokens while the breakdown says 0 read · 0 other, hiding the cache-write/miss portion users need to understand cache effectiveness. Include cache-creation as its own field or fold it into the non-hit count.

Useful? React with 👍 / 👎.

);
Comment on lines +105 to +112
}
if (entries.length > 1) {
const paddedTotal = 'total'.padEnd(maxModelWidth);
lines.push(
` ${muted('total')} input ${value(formatTokenCount(totalInput))} output ${value(
` ${muted(paddedTotal)} input ${value(formatTokenCount(totalInput))} output ${value(
formatTokenCount(totalOutput),
)} total ${value(formatTokenCount(totalInput + totalOutput))}`,
);
Expand Down Expand Up @@ -127,7 +148,7 @@ function buildManagedUsageSection(
const out: string[] = [accent('Plan usage')];
for (const row of rows) {
const ratioUsed = usedRatio(row);
const bar = renderProgressBar(ratioUsed, 20);
const bar = renderProgressBar(ratioUsed, PROGRESS_BAR_WIDTH);
const pct = `${Math.round(ratioUsed * 100)}% used`;
const barColoured = chalk.hex(severityHex(ratioSeverity(ratioUsed)))(bar);
const label = row.label.padEnd(labelWidth, ' ');
Expand Down Expand Up @@ -179,7 +200,7 @@ export function buildUsageReportLines(options: UsageReportOptions): string[] {

if (options.maxContextTokens > 0) {
const ratio = safeUsageRatio(options.contextUsage);
const bar = renderProgressBar(ratio, 20);
const bar = renderProgressBar(ratio, PROGRESS_BAR_WIDTH);
const pct = `${(ratio * 100).toFixed(1)}%`;
const barColoured = chalk.hex(severityHex(ratioSeverity(ratio)))(bar);
lines.push('');
Expand Down
110 changes: 109 additions & 1 deletion apps/kimi-code/test/tui/components/messages/usage-panel.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,61 @@ function strip(text: string): string {
}

describe('UsagePanelComponent', () => {
it('shows cache hit ratio bar and read/other breakdown below each model line', () => {
const lines = buildUsageReportLines({
colors: darkColors,
sessionUsage: {
byModel: {
kimi: {
inputOther: 1000,
inputCacheRead: 500,
inputCacheCreation: 500,
output: 250,
},
},
} as never,
contextUsage: 0.25,
contextTokens: 2500,
maxContextTokens: 10000,
}).map(strip);

// Model line unchanged
expect(lines).toContain(' kimi input 2.0k output 250 total 2.3k');
// Cache subline: indent=model prefix width, bar=5/20 filled, 25% hit, 500 read, 1.0k other
const cacheLine = lines.find((l) => l.includes('cache') && l.includes('hit'));
expect(cacheLine).toBeDefined();
expect(cacheLine).toContain('25% hit');
expect(cacheLine).toContain('500 read');
expect(cacheLine).toContain('1.0k other');
// Bar: 5 filled out of 20
expect(cacheLine).toContain('█████░░░░░░░░░░░░░░░');
});

it('shows zero cache hit ratio when no cache reads occurred', () => {
const lines = buildUsageReportLines({
colors: darkColors,
sessionUsage: {
byModel: {
kimi: {
inputOther: 3000,
inputCacheRead: 0,
inputCacheCreation: 0,
output: 1000,
},
},
} as never,
contextUsage: 0,
contextTokens: 0,
maxContextTokens: 0,
}).map(strip);

const cacheLine = lines.find((l) => l.includes('cache') && l.includes('hit'));
expect(cacheLine).toBeDefined();
expect(cacheLine).toContain('0% hit');
expect(cacheLine).toContain('0 read');
expect(cacheLine).toContain('3.0k other');
});

it('formats session, context, and managed usage sections', () => {
const lines = buildUsageReportLines({
colors: darkColors,
Expand Down Expand Up @@ -37,14 +92,67 @@ describe('UsagePanelComponent', () => {
}).map(strip);

expect(lines).toContain('Session usage');
expect(lines).toContain(' kimi input 2.0k output 250 total 2.3k');
expect(lines).toContain('Context window');
expect(lines.join('\n')).toContain('25.0%');
expect(lines).toContain('Plan usage');
expect(lines.join('\n')).toContain('20% used');
expect(lines.join('\n')).toContain('resets tomorrow');
});

it('shows separate cache lines for each model in multi-model sessions', () => {
const lines = buildUsageReportLines({
colors: darkColors,
sessionUsage: {
byModel: {
'kimi-k2.5': {
inputOther: 4000,
inputCacheRead: 6000,
inputCacheCreation: 200,
output: 500,
},
'deepseek-v4': {
inputOther: 2000,
inputCacheRead: 0,
inputCacheCreation: 0,
output: 1000,
},
},
} as never,
contextUsage: 0,
contextTokens: 0,
maxContextTokens: 0,
}).map(strip);

// Both model lines present (model padded to max width for alignment)
const modelLines = lines.filter(
(l) => l.startsWith(' ') && (l.includes('kimi-k2.5') || l.includes('deepseek-v4'))
&& l.includes('input') && l.includes('output') && l.includes('total')
&& !l.includes('cache'),
);
expect(modelLines).toHaveLength(2);
expect(modelLines[0]).toContain('kimi-k2.5');
expect(modelLines[0]).toContain('10.2k');
expect(modelLines[0]).toContain('500');
expect(modelLines[0]).toContain('10.7k');
expect(modelLines[1]).toContain('deepseek-v4');
expect(modelLines[1]).toContain('2.0k');
expect(modelLines[1]).toContain('1.0k');
expect(modelLines[1]).toContain('3.0k');
// Both have cache sublines
const cacheLines = lines.filter((l) => l.includes('cache') && l.includes('hit'));
expect(cacheLines).toHaveLength(2);
// kimi-k2.5: 6000/10200 ≈ 58.8% hit
expect(cacheLines[0]).toContain('58.8% hit');
expect(cacheLines[0]).toContain('6.0k read');
// deepseek-v4: 0% hit
expect(cacheLines[1]).toContain('0% hit');
expect(cacheLines[1]).toContain('2.0k other');
// Total line itself contains no cache info
const totalLine = lines.find((l) => l.startsWith(' total'));
expect(totalLine).toBeDefined();
expect(totalLine!).not.toContain('cache');
});

it('wraps preformatted usage lines in a bordered panel', () => {
const component = new UsagePanelComponent(['Session usage'], darkColors.primary);
const output = component.render(80).map(strip);
Expand Down
Loading