diff --git a/crates/forge_app/src/orch.rs b/crates/forge_app/src/orch.rs index e63ce75f1e..17aa81c0b1 100644 --- a/crates/forge_app/src/orch.rs +++ b/crates/forge_app/src/orch.rs @@ -387,6 +387,11 @@ impl> Orc self.services.update(self.conversation.clone()).await?; request_count += 1; + // Emit live token usage after context is fully updated (LLM + // response + tool results folded in) so the UI shows the + // current context size before the next LLM call. + self.emit_context_usage(&context).await?; + if !should_yield && let Some(max_request_allowed) = max_requests_per_turn { // Check if agent has reached the maximum request per turn limit if request_count >= max_request_allowed { @@ -454,4 +459,38 @@ impl> Orc fn get_model(&self) -> ModelId { self.agent.model.clone() } + + /// Emits a debug-style context usage line to the UI showing current + /// token count, cached tokens, and compaction threshold. + async fn emit_context_usage(&self, context: &Context) -> anyhow::Result<()> { + let token_count = context.token_count(); + let threshold_info = self + .agent + .compact + .token_threshold + .map(|t| format!(" / {}", Self::humanize(t))) + .unwrap_or_default(); + let prefix = match token_count { + TokenCount::Approx(_) => "~", + TokenCount::Actual(_) => "", + }; + self.send( + TitleFormat::debug(format!( + "Context {}{}{threshold_info}", + prefix, + Self::humanize(*token_count), + )) + .into(), + ) + .await + } + + /// Formats a token count into a human-readable string (e.g. 1.5k, 2.3M) + fn humanize(n: usize) -> String { + match n { + n if n >= 1_000_000 => format!("{:.1}M", n as f64 / 1_000_000.0), + n if n >= 1_000 => format!("{:.1}k", n as f64 / 1_000.0), + _ => n.to_string(), + } + } } diff --git a/crates/forge_domain/src/agent.rs b/crates/forge_domain/src/agent.rs index d0428f3159..bcdf65daf9 100644 --- a/crates/forge_domain/src/agent.rs +++ b/crates/forge_domain/src/agent.rs @@ -475,6 +475,31 @@ mod tests { ); } + #[test] + fn test_compaction_threshold_claude_opus_4_6_with_1m_context_window() { + // Simulates the user config: token_threshold = 1_000_000, + // token_threshold_percentage = 0.9 + // with claude-opus-4-6 which has context_length = 1_000_000 + let fixture = Agent::new( + AgentId::new("forge"), + ProviderId::OPENAI, + ModelId::new("claude-opus-4-6"), + ) + .compact( + Compact::new() + .token_threshold(1_000_000_usize) + .token_threshold_percentage(0.9_f64), + ); + + let selected_model = model_fixture("claude-opus-4-6", Some(1_000_000)); + + let actual = fixture.compaction_threshold(Some(&selected_model)); + // min(1_000_000, 1_000_000 * 0.9) = 900_000 + let expected = Some(900_000); + + assert_eq!(actual.compact.token_threshold, expected); + } + /// BUG 3: Agent with no compact config and no model info should still work, /// but currently compaction_threshold does nothing and context grows /// unbounded.