From b54842f850d792c65e680743de624727d13dc024 Mon Sep 17 00:00:00 2001 From: fanyiming Date: Sat, 23 May 2026 23:11:15 +0800 Subject: [PATCH 1/8] feat(agentic): add Hidden Intent proactivity tracking framework Based on the pi-Bench Hidden Intent framework (arXiv 2605.14678), this introduces infrastructure for tracking proactive assistance quality in long-horizon agent workflows. Paper reference: pi-Bench: Evaluating Proactive Personal Assistant Agents in Long-Horizon Workflows Zhang et al., arXiv 2605.14678, May 2026 What this adds: - Hidden Intent types: IntentTerminalStatus (Completed/Inferred/Provided), HiddenIntent, PersistentIntent, SessionIntentTracking, ProactivityScore, CompletenessScore in services-core - IntentEvidenceCollector and IntentTurnEvidence in the ExecutionEngine for lightweight per-turn signal collection - Proactivity behavior guidance in agentic_mode.md and claw_mode.md system prompts - Extended facet_extraction.md with proactivity/completeness analysis dimensions - SessionUsageReport extensions with ProactivityReport and CompletenessRepor Based on the pi-Bench Hidden Intent framework (arXiv 2605.14678), this introduces infrastructure for tracking p edintroduces infrastructure for tracking proactive assistance quality ig.long-horizon agent workflows. Paper reference: pi-Bench: Evaluatinho Paper reference: pi-Benchden pi-Bench: Evas Long-Horizon Workflows Zhang et al., arXiv 2605.14678, Mer Zhang et al., arXiv 2ou What this adds: - Hidden Intent types: As - Hidden Intde HiddenIntent, PersistentIntent, SessionIntentTracking, ProactivitySal ProactivityScore, CompletenessScore in services-core ds - IntentEvidenceCollector and IntentTurnEvidence in t --- src/apps/desktop/src/api/agentic_api.rs | 1 + .../agentic/agents/prompts/agentic_mode.md | 16 + .../src/agentic/agents/prompts/claw_mode.md | 20 ++ .../src/agentic/coordination/coordinator.rs | 11 + src/crates/core/src/agentic/core/session.rs | 11 + .../src/agentic/execution/execution_engine.rs | 54 +++ .../src/agentic/execution/intent_evidence.rs | 310 ++++++++++++++++ src/crates/core/src/agentic/execution/mod.rs | 1 + .../src/agentic/execution/round_executor.rs | 31 ++ .../core/src/agentic/execution/types.rs | 14 +- .../insights/prompts/facet_extraction.md | 32 +- .../core/src/agentic/persistence/manager.rs | 6 + .../src/agentic/session/session_manager.rs | 75 ++++ .../core/src/service/session_usage/service.rs | 134 +++++++ .../src/session/hidden_intent_types.rs | 332 ++++++++++++++++++ src/crates/services-core/src/session/mod.rs | 1 + src/crates/services-core/src/session/types.rs | 38 ++ .../services-core/src/session_usage/types.rs | 49 +++ 18 files changed, 1134 insertions(+), 2 deletions(-) create mode 100644 src/crates/core/src/agentic/execution/intent_evidence.rs create mode 100644 src/crates/services-core/src/session/hidden_intent_types.rs diff --git a/src/apps/desktop/src/api/agentic_api.rs b/src/apps/desktop/src/api/agentic_api.rs index 0d0db727b..25d89bcda 100644 --- a/src/apps/desktop/src/api/agentic_api.rs +++ b/src/apps/desktop/src/api/agentic_api.rs @@ -574,6 +574,7 @@ pub async fn create_session( remote_connection_id: remote_conn.clone(), remote_ssh_host: remote_ssh_host.clone(), model_id: c.model_name, + enable_intent_tracking: false, }) .unwrap_or(SessionConfig { workspace_path: Some(request.workspace_path.clone()), diff --git a/src/crates/core/src/agentic/agents/prompts/agentic_mode.md b/src/crates/core/src/agentic/agents/prompts/agentic_mode.md index 5e573ff7e..87256a0e0 100644 --- a/src/crates/core/src/agentic/agents/prompts/agentic_mode.md +++ b/src/crates/core/src/agentic/agents/prompts/agentic_mode.md @@ -41,6 +41,22 @@ When presenting options, state your recommendation and reasoning, keep choices c When presenting options or plans, never include time estimates - focus on what each option involves, not how long it might take. +# Proactivity +Users often begin with underspecified requests and leave important needs, constraints, or preferences unstated. Proactive assistance means reducing the user's burden by surfacing what needs clarification and deciding what can be inferred, rather than treating ambiguity as a reason to remain passive. + +When a request is underspecified: +1. **Infer from context**: Use prior session history, workspace files, project conventions, and the user's past preferences to fill in reasonable defaults without asking. +2. **Ask targeted questions**: When inference is insufficient, use AskUserQuestion to surface the specific missing constraint. Prefer one focused question over a broad "tell me everything." +3. **Act on partial information**: Start working with reasonable assumptions while flagging them. Do not block on full specification when the first step can proceed. + +Avoid these anti-patterns: +- Restating the user's request back to them without adding value +- Asking "do you want me to proceed?" without having done any work +- Waiting for step-by-step instructions when the task direction is clear +- Asking generic open-ended questions when a concrete choice is needed + +The goal is to reduce the user's operational and cognitive effort: finish the task while minimizing avoidable back-and-forth. + {VISUAL_MODE} # Doing tasks The user will primarily request you perform software engineering tasks. This includes solving bugs, adding new functionality, refactoring code, explaining code, and more. For these tasks the following steps are recommended: diff --git a/src/crates/core/src/agentic/agents/prompts/claw_mode.md b/src/crates/core/src/agentic/agents/prompts/claw_mode.md index d89368674..a6c5a7eb2 100644 --- a/src/crates/core/src/agentic/agents/prompts/claw_mode.md +++ b/src/crates/core/src/agentic/agents/prompts/claw_mode.md @@ -51,6 +51,26 @@ Operational rules: - Delete temporary sessions when they are no longer useful. - Do not create extra sessions for trivial, tightly coupled, or one-step work. +# Proactivity +Users rarely begin with a complete specification of what they actually need. They issue a brief, underspecified initial request while the intended assistance depends on hidden requirements — habits, constraints, preferences, and dependencies that the user does not explicitly state. + +Be proactive: anticipate and address these hidden needs rather than waiting for the user to spell them out. + +When the user's request is underspecified: +1. **Recover from prior context**: Check earlier sessions, workspace files, and user memory for established preferences, conventions, and decisions that apply to this request. If the user preferred a specific format, naming scheme, or workflow in a prior session, carry it forward without asking again. +2. **Infer reasonable defaults**: Use domain knowledge and common conventions to fill gaps. For example, when organizing papers, include links and open-source status by default; when preparing a document, apply the user's established style. +3. **Ask targeted clarifying questions**: When inference is insufficient, use AskUserQuestion to surface exactly the missing constraint. Each question should target one specific decision. Avoid broad "what else do you need?" questions — the user expects you to figure out what matters. +4. **Act on partial information**: Start with reasonable assumptions, produce an initial deliverable, and flag where you made assumptions. This lets the user correct specifics rather than providing everything from scratch. Do not block on full specification when meaningful progress can be made. + +Anti-patterns to avoid: +- Restating the request verbatim and asking "is this what you want?" +- Waiting for the user to provide every detail before taking any action +- Failing to reuse preferences and conventions established in prior sessions +- Treating underspecification as a reason to remain passive +- Making the user repeat information that already exists in the workspace or prior sessions + +The goal is to reduce the user's operational and cognitive effort: resolve hidden requirements through inference or focused elicitation, and minimize the avoidable back-and-forth that comes from passive, step-by-step instruction-following. + # Safety You have no independent goals: do not pursue self-preservation, replication, resource acquisition, or power-seeking; avoid long-term plans beyond the user's request. diff --git a/src/crates/core/src/agentic/coordination/coordinator.rs b/src/crates/core/src/agentic/coordination/coordinator.rs index 8ef735ff7..a99327d2b 100644 --- a/src/crates/core/src/agentic/coordination/coordinator.rs +++ b/src/crates/core/src/agentic/coordination/coordinator.rs @@ -1254,6 +1254,9 @@ Update the persona files and delete BOOTSTRAP.md as soon as bootstrap is complet workspace_hostname: None, unread_completion: None, needs_user_attention: None, + intent_tracking: None, + proactivity_score: None, + completeness_score: None, }; if let Err(e) = persistence_manager .save_session_metadata(&workspace_path_buf, &metadata) @@ -2358,6 +2361,13 @@ Update the persona files and delete BOOTSTRAP.md as soon as bootstrap is complet round_preempt: self.round_preempt_source.get().cloned(), round_injection: self.round_injection_source.get().cloned(), recover_partial_on_cancel: false, + intent_evidence: if session.config.enable_intent_tracking { + Some(std::sync::Arc::new(std::sync::Mutex::new( + crate::agentic::execution::intent_evidence::IntentEvidenceCollector::default(), + ))) + } else { + None + }, }; // Auto-generate session title on first message @@ -3707,6 +3717,7 @@ Update the persona files and delete BOOTSTRAP.md as soon as bootstrap is complet // that belong to a different (parent) session/turn. round_injection: None, recover_partial_on_cancel: true, + intent_evidence: None, }; let execution_engine = self.execution_engine.clone(); diff --git a/src/crates/core/src/agentic/core/session.rs b/src/crates/core/src/agentic/core/session.rs index 05fa12896..0a1ea81d2 100644 --- a/src/crates/core/src/agentic/core/session.rs +++ b/src/crates/core/src/agentic/core/session.rs @@ -1,5 +1,9 @@ use super::state::SessionState; pub use bitfun_core_types::SessionKind; +pub use bitfun_services_core::session::hidden_intent_types::{ + HiddenIntent, IntentAssignment, IntentScope, IntentSource, IntentTerminalStatus, + PersistentIntent, SessionIntentTracking, +}; use serde::{Deserialize, Serialize}; use std::time::SystemTime; use uuid::Uuid; @@ -149,6 +153,12 @@ pub struct SessionConfig { /// Model config ID used by this session (for token usage tracking) #[serde(default, skip_serializing_if = "Option::is_none")] pub model_id: Option, + + /// Whether hidden intent tracking is enabled for this session. + /// When enabled, the agent loop tracks which hidden requirements were + /// proactively resolved vs passively waited-for. + #[serde(default)] + pub enable_intent_tracking: bool, } impl Default for SessionConfig { @@ -166,6 +176,7 @@ impl Default for SessionConfig { remote_connection_id: None, remote_ssh_host: None, model_id: None, + enable_intent_tracking: false, } } } diff --git a/src/crates/core/src/agentic/execution/execution_engine.rs b/src/crates/core/src/agentic/execution/execution_engine.rs index d5f5ab6c7..9140df0a5 100644 --- a/src/crates/core/src/agentic/execution/execution_engine.rs +++ b/src/crates/core/src/agentic/execution/execution_engine.rs @@ -2,6 +2,7 @@ //! //! Executes complete dialog turns, managing loops of multiple model rounds +use super::intent_evidence::IntentTurnEvidence; use super::round_executor::RoundExecutor; use super::types::{ExecutionContext, ExecutionResult, RoundContext, RoundResult}; use crate::agentic::agents::{ @@ -2065,6 +2066,35 @@ impl ExecutionEngine { total_tools += round_result.tool_calls.len(); + // Hook A: Collect intent evidence from this round + // Only runs when intent tracking is enabled for this session. + if let Some(ref collector) = context.intent_evidence { + if let Ok(mut c) = collector.lock() { + if round_result.used_ask_user_question { + c.asked_user_question = true; + c.question_topics + .extend(round_result.ask_user_question_topics.clone()); + } + c.tool_names_used.extend( + round_result + .tool_calls + .iter() + .map(|tc| tc.tool_name.clone()), + ); + c.proactive_tool_calls += round_result + .tool_calls + .iter() + .filter(|tc| { + crate::agentic::execution::intent_evidence::is_proactive_tool( + &tc.tool_name, + ) + }) + .count(); + c.produced_output |= round_result.had_assistant_text; + c.round_count += 1; + } + } + // Track partial recovery reason from the last round if round_result.partial_recovery_reason.is_some() { last_partial_recovery_reason = round_result.partial_recovery_reason.clone(); @@ -2415,6 +2445,30 @@ impl ExecutionEngine { ); } + // Hook B: Persist collected intent evidence for this turn. + // Called after the dialog turn loop exits (all rounds complete). + let evidence = context.intent_evidence.as_ref().and_then(|collector| { + collector.lock().ok().map(|c| { + IntentTurnEvidence::from(&*c).with_turn_index(context.turn_index) + }) + }); + if let Some(evidence) = evidence { + if let Err(e) = self + .session_manager + .record_intent_evidence( + &context.session_id, + &context.dialog_turn_id, + evidence, + ) + .await + { + warn!( + "Failed to record intent evidence: session_id={}, turn_id={}, error={}", + context.session_id, context.dialog_turn_id, e + ); + } + } + // P1-6: Track the actual termination reason for downstream reporting. // Defaults to "complete" (model produced a final answer naturally) and // is overridden by finalize / fallback paths below. diff --git a/src/crates/core/src/agentic/execution/intent_evidence.rs b/src/crates/core/src/agentic/execution/intent_evidence.rs new file mode 100644 index 000000000..08e1d1ab1 --- /dev/null +++ b/src/crates/core/src/agentic/execution/intent_evidence.rs @@ -0,0 +1,310 @@ +//! Intent evidence collection for proactive assistance evaluation. +//! +//! Provides lightweight evidence collectors that run at round/turn boundaries +//! to gather raw signals for later intent analysis. The collectors do NOT +//! perform real-time intent status assignment; that is done post-hoc by +//! facet extraction or scoring functions. + +use bitfun_services_core::session::hidden_intent_types::{ + CompletenessLevel, CompletenessScore, IntentTerminalStatus, ProactivityLevel, + ProactivityScore, SessionIntentTracking, +}; +use serde::{Deserialize, Serialize}; + +/// Evidence collected during a single dialog turn for later intent analysis. +/// The collector is stateless per-turn: it gathers raw signals from model +/// rounds and produces an IntentTurnEvidence snapshot at turn completion. +#[derive(Debug, Clone, Default)] +pub struct IntentEvidenceCollector { + pub asked_user_question: bool, + pub question_topics: Vec, + pub proactive_tool_calls: usize, + pub tool_names_used: Vec, + pub produced_output: bool, + pub round_count: usize, + pub asked_follow_up_in_text: bool, +} + +/// Snapshot of evidence collected during one turn. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct IntentTurnEvidence { + pub turn_index: usize, + pub asked_user_question: bool, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub question_topics: Vec, + pub proactive_tool_calls: usize, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub tool_names_used: Vec, + pub produced_output: bool, + pub round_count: usize, + pub asked_follow_up_in_text: bool, +} + +impl From<&IntentEvidenceCollector> for IntentTurnEvidence { + fn from(c: &IntentEvidenceCollector) -> Self { + Self { + turn_index: 0, + asked_user_question: c.asked_user_question, + question_topics: c.question_topics.clone(), + proactive_tool_calls: c.proactive_tool_calls, + tool_names_used: c.tool_names_used.clone(), + produced_output: c.produced_output, + round_count: c.round_count, + asked_follow_up_in_text: c.asked_follow_up_in_text, + } + } +} + +impl IntentTurnEvidence { + pub fn with_turn_index(mut self, turn_index: usize) -> Self { + self.turn_index = turn_index; + self + } +} + +// --------------------------------------------------------------------------- +// Scoring functions +// --------------------------------------------------------------------------- + +pub fn compute_proactivity_score( + tracking: &SessionIntentTracking, +) -> Option { + if !tracking.enabled || tracking.hidden_intents.is_empty() { + return None; + } + let completed = tracking.count_by_status(IntentTerminalStatus::Completed) as u32; + let inferred = tracking.count_by_status(IntentTerminalStatus::Inferred) as u32; + let provided = tracking.count_by_status(IntentTerminalStatus::Provided) as u32; + let total = (completed + inferred + provided).max(1); + let score = (completed + inferred) as f32 / total as f32; + Some(ProactivityScore { + completed, inferred, provided, score, + level: Some(classify_proactivity_level(score)), + }) +} + +pub fn compute_completeness_score( + tracking: &SessionIntentTracking, +) -> Option { + if !tracking.enabled || tracking.hidden_intents.is_empty() { + return None; + } + let total = tracking.hidden_intents.len() as u32; + let resolved = tracking.hidden_intents.iter() + .filter(|i| i.terminal_status.is_some()).count() as u32; + let missed = total.saturating_sub(resolved); + let score = if total == 0 { 1.0 } else { resolved as f32 / total as f32 }; + Some(CompletenessScore { + requirements_satisfied: resolved, requirements_missed: missed, score, + level: Some(classify_completeness_level(score)), + }) +} + +pub fn classify_proactivity_level(score: f32) -> ProactivityLevel { + if score >= 0.8 { ProactivityLevel::High } + else if score >= 0.5 { ProactivityLevel::Moderate } + else if score >= 0.2 { ProactivityLevel::Low } + else { ProactivityLevel::Reactive } +} + +pub fn classify_completeness_level(score: f32) -> CompletenessLevel { + if (score - 1.0).abs() < f32::EPSILON { CompletenessLevel::Full } + else if score >= 0.7 { CompletenessLevel::Partial } + else if score >= 0.3 { CompletenessLevel::Minimal } + else { CompletenessLevel::Incomplete } +} + +pub fn is_proactive_tool(tool_name: &str) -> bool { + matches!(tool_name, + "Write" | "Edit" | "Delete" | "Bash" | "Git" | "WebSearch" + | "WebFetch" | "GenerativeUI" | "CreatePlan" + ) +} + +#[cfg(test)] +mod tests { + use super::*; + use bitfun_services_core::session::hidden_intent_types::{ + HiddenIntent, IntentScope, IntentTerminalStatus, SessionIntentTracking, + }; + + #[test] + fn collector_empty_on_init() { + let c = IntentEvidenceCollector::default(); + assert!(!c.asked_user_question); + assert!(c.question_topics.is_empty()); + assert_eq!(c.proactive_tool_calls, 0); + assert!(c.tool_names_used.is_empty()); + assert!(!c.produced_output); + assert_eq!(c.round_count, 0); + assert!(!c.asked_follow_up_in_text); + } + + #[test] + fn collector_records_ask_user_question() { + let mut c = IntentEvidenceCollector::default(); + c.asked_user_question = true; + c.question_topics.push("What approach?".into()); + c.question_topics.push("Which library?".into()); + let evidence = IntentTurnEvidence::from(&c).with_turn_index(1); + assert!(evidence.asked_user_question); + assert_eq!(evidence.question_topics.len(), 2); + assert_eq!(evidence.turn_index, 1); + } + + #[test] + fn intent_turn_evidence_round_trips() { + let evidence = IntentTurnEvidence { + turn_index: 2, + asked_user_question: true, + question_topics: vec!["Which format?".into()], + proactive_tool_calls: 3, + tool_names_used: vec!["Write".into(), "Edit".into()], + produced_output: true, + round_count: 5, + asked_follow_up_in_text: false, + }; + let json = serde_json::to_value(&evidence).expect("serialize"); + let rt: IntentTurnEvidence = serde_json::from_value(json).expect("deserialize"); + assert_eq!(rt.turn_index, 2); + assert!(rt.asked_user_question); + assert_eq!(rt.proactive_tool_calls, 3); + assert_eq!(rt.tool_names_used, vec!["Write", "Edit"]); + } + + #[test] + fn compute_proactivity_score_all_completed() { + let tracking = make_tracking(vec![ + IntentTerminalStatus::Completed, IntentTerminalStatus::Completed, + IntentTerminalStatus::Completed, + ]); + let s = compute_proactivity_score(&tracking).unwrap(); + assert!((s.score - 1.0).abs() < f32::EPSILON); + assert_eq!(s.completed, 3); + assert_eq!(s.inferred, 0); + assert_eq!(s.provided, 0); + assert_eq!(s.level, Some(ProactivityLevel::High)); + } + + #[test] + fn compute_proactivity_score_all_provided() { + let tracking = make_tracking(vec![ + IntentTerminalStatus::Provided, IntentTerminalStatus::Provided, + ]); + let s = compute_proactivity_score(&tracking).unwrap(); + assert!((s.score - 0.0).abs() < f32::EPSILON); + assert_eq!(s.provided, 2); + assert_eq!(s.level, Some(ProactivityLevel::Reactive)); + } + + #[test] + fn compute_proactivity_score_mixed() { + let tracking = make_tracking(vec![ + IntentTerminalStatus::Completed, IntentTerminalStatus::Completed, + IntentTerminalStatus::Inferred, IntentTerminalStatus::Provided, + ]); + let s = compute_proactivity_score(&tracking).unwrap(); + assert!((s.score - 0.75).abs() < f32::EPSILON); + assert_eq!(s.completed, 2); + assert_eq!(s.inferred, 1); + assert_eq!(s.provided, 1); + assert_eq!(s.level, Some(ProactivityLevel::Moderate)); + } + + #[test] + fn compute_proactivity_score_empty() { + assert_eq!(compute_proactivity_score(&SessionIntentTracking::default()), None); + } + + #[test] + fn compute_completeness_score_full() { + let tracking = make_tracking(vec![ + IntentTerminalStatus::Completed, IntentTerminalStatus::Completed, + ]); + let s = compute_completeness_score(&tracking).unwrap(); + assert!((s.score - 1.0).abs() < f32::EPSILON); + assert_eq!(s.level, Some(CompletenessLevel::Full)); + } + + #[test] + fn compute_completeness_score_partial() { + let mut tracking = make_tracking(vec![ + IntentTerminalStatus::Completed, IntentTerminalStatus::Completed, + ]); + tracking.hidden_intents.push(HiddenIntent { + intent_id: "i3".into(), description: "unresolved".into(), + scope: IntentScope::SessionLocal, + terminal_status: None, resolved_at_turn: None, source: None, + }); + let s = compute_completeness_score(&tracking).unwrap(); + assert!((s.score - 2.0 / 3.0).abs() < f32::EPSILON); + assert_eq!(s.requirements_missed, 1); + } + + #[test] + fn classify_proactivity_level_edges() { + assert_eq!(classify_proactivity_level(0.9), ProactivityLevel::High); + assert_eq!(classify_proactivity_level(0.8), ProactivityLevel::High); + assert_eq!(classify_proactivity_level(0.79), ProactivityLevel::Moderate); + assert_eq!(classify_proactivity_level(0.5), ProactivityLevel::Moderate); + assert_eq!(classify_proactivity_level(0.49), ProactivityLevel::Low); + assert_eq!(classify_proactivity_level(0.2), ProactivityLevel::Low); + assert_eq!(classify_proactivity_level(0.19), ProactivityLevel::Reactive); + assert_eq!(classify_proactivity_level(0.0), ProactivityLevel::Reactive); + } + + #[test] + fn classify_completeness_level_edges() { + assert_eq!(classify_completeness_level(1.0), CompletenessLevel::Full); + assert_eq!(classify_completeness_level(0.7), CompletenessLevel::Partial); + assert_eq!(classify_completeness_level(0.69), CompletenessLevel::Minimal); + assert_eq!(classify_completeness_level(0.3), CompletenessLevel::Minimal); + assert_eq!(classify_completeness_level(0.29), CompletenessLevel::Incomplete); + assert_eq!(classify_completeness_level(0.0), CompletenessLevel::Incomplete); + } + + #[test] + fn is_proactive_tool_positive() { + assert!(is_proactive_tool("Write")); + assert!(is_proactive_tool("Edit")); + assert!(is_proactive_tool("Delete")); + assert!(is_proactive_tool("Bash")); + assert!(is_proactive_tool("Git")); + assert!(is_proactive_tool("WebSearch")); + assert!(is_proactive_tool("CreatePlan")); + } + + #[test] + fn is_proactive_tool_negative() { + assert!(!is_proactive_tool("Read")); + assert!(!is_proactive_tool("Grep")); + assert!(!is_proactive_tool("Glob")); + assert!(!is_proactive_tool("TodoWrite")); + assert!(!is_proactive_tool("AskUserQuestion")); + } + + #[test] + fn compute_proactivity_disabled() { + let mut tracking = make_tracking(vec![IntentTerminalStatus::Completed]); + tracking.enabled = false; + assert_eq!(compute_proactivity_score(&tracking), None); + } + + fn make_tracking(statuses: Vec) -> SessionIntentTracking { + SessionIntentTracking { + enabled: true, + hidden_intents: statuses.into_iter().enumerate().map(|(i, status)| { + HiddenIntent { + intent_id: format!("i{}", i), + description: format!("test intent {}", i), + scope: IntentScope::SessionLocal, + terminal_status: Some(status), + resolved_at_turn: Some(i), + source: None, + } + }).collect(), + ..Default::default() + } + } +} diff --git a/src/crates/core/src/agentic/execution/mod.rs b/src/crates/core/src/agentic/execution/mod.rs index af22b10f6..3b58be95a 100644 --- a/src/crates/core/src/agentic/execution/mod.rs +++ b/src/crates/core/src/agentic/execution/mod.rs @@ -3,6 +3,7 @@ //! Responsible for AI interaction and model round control pub mod execution_engine; +pub mod intent_evidence; pub mod round_executor; pub mod stream_processor; pub mod types; diff --git a/src/crates/core/src/agentic/execution/round_executor.rs b/src/crates/core/src/agentic/execution/round_executor.rs index 0f0980293..441816e53 100644 --- a/src/crates/core/src/agentic/execution/round_executor.rs +++ b/src/crates/core/src/agentic/execution/round_executor.rs @@ -48,6 +48,29 @@ impl RoundExecutor { !text.trim().is_empty() } + /// Detects AskUserQuestion calls in a set of tool calls. + /// Returns (used_ask_user_question, extracted_question_topics). + fn detect_ask_user_question( + tool_calls: &[crate::agentic::core::ToolCall], + ) -> (bool, Vec) { + let mut topics = Vec::new(); + for tc in tool_calls { + if tc.tool_name == "AskUserQuestion" { + // Extract question topics from the arguments + if let Some(questions) = tc.arguments.get("questions") { + if let Some(arr) = questions.as_array() { + for q in arr { + if let Some(header) = q.get("header").and_then(|v| v.as_str()) { + topics.push(header.to_string()); + } + } + } + } + } + } + (!topics.is_empty(), topics) + } + fn write_tool_mode(context: &RoundContext) -> WriteToolMode { WriteToolMode::from_context_var( context @@ -569,6 +592,8 @@ impl RoundExecutor { partial_recovery_reason: stream_result.partial_recovery_reason.clone(), had_assistant_text: Self::has_user_visible_assistant_text(&stream_result.full_text), had_thinking_content: !stream_result.full_thinking.is_empty(), + used_ask_user_question: false, + ask_user_question_topics: vec![], }); } @@ -818,6 +843,10 @@ impl RoundExecutor { // Note: Do not cleanup cancellation token here, as there may be subsequent model rounds // Cancellation token will be cleaned up by ExecutionEngine when the entire dialog turn ends + // Detect AskUserQuestion calls for intent evidence collection + let (used_ask_user_question, ask_user_question_topics) = + Self::detect_ask_user_question(&tool_calls); + Ok(RoundResult { assistant_message, tool_calls: tool_calls.clone(), @@ -833,6 +862,8 @@ impl RoundExecutor { partial_recovery_reason: stream_result.partial_recovery_reason.clone(), had_assistant_text: Self::has_user_visible_assistant_text(&stream_result.full_text), had_thinking_content: !stream_result.full_thinking.is_empty(), + used_ask_user_question, + ask_user_question_topics, }) } diff --git a/src/crates/core/src/agentic/execution/types.rs b/src/crates/core/src/agentic/execution/types.rs index 60b3a89a1..6524836fe 100644 --- a/src/crates/core/src/agentic/execution/types.rs +++ b/src/crates/core/src/agentic/execution/types.rs @@ -1,6 +1,7 @@ //! Execution Engine Type Definitions use crate::agentic::core::Message; +use crate::agentic::execution::intent_evidence::IntentEvidenceCollector; use crate::agentic::round_preempt::{ DialogRoundInjectionInterrupt, DialogRoundInjectionSource, DialogRoundPreemptSource, }; @@ -10,7 +11,7 @@ use crate::agentic::workspace::WorkspaceServices; use crate::agentic::WorkspaceBinding; use serde_json::Value; use std::collections::HashMap; -use std::sync::Arc; +use std::sync::{Arc, Mutex}; use tokio_util::sync::CancellationToken; /// Execution context @@ -35,6 +36,10 @@ pub struct ExecutionContext { /// When true, stream cancellation may be converted into a partial assistant /// result if text/tool output has already been produced. pub recover_partial_on_cancel: bool, + + /// When intent tracking is enabled, this collector gathers raw signals + /// during execution for later intent analysis. + pub intent_evidence: Option>>, } /// Round context @@ -85,6 +90,13 @@ pub struct RoundResult { /// True when the model emitted any non-empty thinking / reasoning content /// in this round. pub had_thinking_content: bool, + + /// Whether the agent called AskUserQuestion in this round. + /// Set by the round executor when processing tool calls. + pub used_ask_user_question: bool, + + /// If AskUserQuestion was called, the parsed questions from its input. + pub ask_user_question_topics: Vec, } /// Finish reason diff --git a/src/crates/core/src/agentic/insights/prompts/facet_extraction.md b/src/crates/core/src/agentic/insights/prompts/facet_extraction.md index 1185626a4..1f7945cc5 100644 --- a/src/crates/core/src/agentic/insights/prompts/facet_extraction.md +++ b/src/crates/core/src/agentic/insights/prompts/facet_extraction.md @@ -27,6 +27,21 @@ CRITICAL GUIDELINES: 5. **languages_used**: Optional. The insights report's language chart is computed from edited file paths (Edit/Write tool), not from this field; you may still list languages you infer for context. +6. **proactivity**: Assess how proactively the AI handled underspecified or ambiguous parts of the user's request. + - proactive_hidden_intents: Number of hidden requirements the AI surfaced and resolved without the user having to explicitly state them. This includes: inferring preferences from prior context, filling in reasonable defaults, and applying established conventions without asking. + - reactive_hidden_intents: Number of requirements the user had to explicitly provide step by step because the AI did not proactively address them. + - inferred_from_context: The AI recovered requirements from prior sessions, workspace files, or established user preferences. + - targeted_questions_asked: The AI asked focused, specific clarifying questions that targeted missing information. + - passive_waiting_events: The AI restated the request or asked vague open-ended questions without making progress. + - proactivity_level: "high" (most requirements proactively resolved), "moderate" (mix of proactive and reactive), "low" (mostly waited for user to provide every detail), "reactive" (entirely step-by-step instruction following). + - proactivity_detail: "One sentence describing the AI's proactivity pattern or empty" + +7. **completeness**: Assess whether the final deliverables satisfied the user's task requirements. + - requirements_satisfied: Number of verifiable requirements that were met in the final output. + - requirements_missed: Number of requirements the user explicitly asked for that were not satisfied. + - completeness_level: "full" (all requirements met), "partial" (most met, some gaps), "minimal" (only surface request handled), "incomplete" (significant gaps). + - completeness_detail: "One sentence describing completeness gaps or empty" + SESSION: {session_transcript} @@ -43,5 +58,20 @@ RESPOND WITH ONLY A VALID JSON OBJECT matching this schema: "primary_success": "fast_accurate_search|correct_code_edits|good_explanations|proactive_help|multi_file_changes|good_debugging", "brief_summary": "One sentence: what user wanted and whether they got it", "languages_used": ["programing_language1", "programing_language2"], - "user_instructions": ["Any explicit instructions user gave to AI about how to behave"] + "user_instructions": ["Any explicit instructions user gave to AI about how to behave"], + "proactivity": { + "proactive_hidden_intents": 0, + "reactive_hidden_intents": 0, + "inferred_from_context": 0, + "targeted_questions_asked": 0, + "passive_waiting_events": 0, + "proactivity_level": "high|moderate|low|reactive", + "proactivity_detail": "One sentence or empty" + }, + "completeness": { + "requirements_satisfied": 0, + "requirements_missed": 0, + "completeness_level": "full|partial|minimal|incomplete", + "completeness_detail": "One sentence or empty" + } } diff --git a/src/crates/core/src/agentic/persistence/manager.rs b/src/crates/core/src/agentic/persistence/manager.rs index 8552645d6..93d4c931b 100644 --- a/src/crates/core/src/agentic/persistence/manager.rs +++ b/src/crates/core/src/agentic/persistence/manager.rs @@ -883,6 +883,12 @@ impl PersistenceManager { workspace_hostname, unread_completion: existing.and_then(|value| value.unread_completion.clone()), needs_user_attention: existing.and_then(|value| value.needs_user_attention.clone()), + intent_tracking: existing + .and_then(|value| value.intent_tracking.clone()), + proactivity_score: existing + .and_then(|value| value.proactivity_score.clone()), + completeness_score: existing + .and_then(|value| value.completeness_score.clone()), } } diff --git a/src/crates/core/src/agentic/session/session_manager.rs b/src/crates/core/src/agentic/session/session_manager.rs index 5a5bcfa02..c54b260e3 100644 --- a/src/crates/core/src/agentic/session/session_manager.rs +++ b/src/crates/core/src/agentic/session/session_manager.rs @@ -2770,6 +2770,81 @@ impl SessionManager { Ok(()) } + /// Record intent evidence collected during a dialog turn. + /// Appends the evidence to the session's intent tracking state. + pub async fn record_intent_evidence( + &self, + session_id: &str, + _turn_id: &str, + evidence: crate::agentic::execution::intent_evidence::IntentTurnEvidence, + ) -> BitFunResult<()> { + if !self.should_persist_session_id(session_id) { + return Ok(()); + } + + let workspace_path = self + .effective_session_workspace_path(session_id) + .await + .ok_or_else(|| { + BitFunError::Validation(format!( + "Session workspace_path is missing: {}", + session_id + )) + })?; + + let mut metadata = self + .persistence_manager + .load_session_metadata(&workspace_path, session_id) + .await? + .ok_or_else(|| { + BitFunError::NotFound(format!("Session metadata not found: {}", session_id)) + })?; + + // Initialize intent tracking if not present + let tracking = metadata.intent_tracking.get_or_insert_with(|| { + bitfun_services_core::session::hidden_intent_types::SessionIntentTracking { + enabled: true, + ..Default::default() + } + }); + + // Append the evidence as a proxy IntentAssignment for traceability. + // The actual terminal status assignment is done post-hoc by the scoring + // functions; here we just record that evidence was collected. + tracking.assignments.push( + bitfun_services_core::session::hidden_intent_types::IntentAssignment { + intent_id: format!("turn-{}", evidence.turn_index), + terminal_status: + if evidence.asked_user_question { + bitfun_services_core::session::hidden_intent_types::IntentTerminalStatus::Inferred + } else if evidence.proactive_tool_calls > 0 && evidence.produced_output { + bitfun_services_core::session::hidden_intent_types::IntentTerminalStatus::Completed + } else { + bitfun_services_core::session::hidden_intent_types::IntentTerminalStatus::Provided + }, + assigned_at_turn: evidence.turn_index, + trigger_description: Some(format!( + "asked={} proactive_tools={} output={} rounds={}", + evidence.asked_user_question, + evidence.proactive_tool_calls, + evidence.produced_output, + evidence.round_count + )), + }, + ); + + self.persistence_manager + .save_session_metadata(&workspace_path, &metadata) + .await?; + + debug!( + "Intent evidence recorded: session_id={}, turn_index={}, asked_user_question={}, proactive_tools={}", + session_id, evidence.turn_index, evidence.asked_user_question, evidence.proactive_tool_calls + ); + + Ok(()) + } + /// Mark a dialog turn as failed and persist it. /// Unlike `complete_dialog_turn`, this sets the state to `Failed` with an error message. pub async fn fail_dialog_turn( diff --git a/src/crates/core/src/service/session_usage/service.rs b/src/crates/core/src/service/session_usage/service.rs index 3a78f6024..69a13e1a4 100644 --- a/src/crates/core/src/service/session_usage/service.rs +++ b/src/crates/core/src/service/session_usage/service.rs @@ -114,6 +114,8 @@ pub fn build_session_usage_report_from_sources( report.compression = build_compression_breakdown(turns); report.errors = build_error_breakdown(turns); report.slowest = build_slowest_spans(turns); + report.proactivity = build_proactivity_report(turns); + report.completeness = build_completeness_report(turns); report.privacy = UsagePrivacy { prompt_content_included: false, tool_inputs_included: false, @@ -939,6 +941,137 @@ fn collect_redacted_fields(report: &SessionUsageReport) -> Vec { fields } +fn build_proactivity_report( + turns: &[DialogTurnData], +) -> Option { + // Collect intent assignments from all turns + let mut completed: u32 = 0; + let mut inferred: u32 = 0; + let mut provided: u32 = 0; + let mut turn_details: Vec = Vec::new(); + + for turn in turns { + let mut turn_completed: u32 = 0; + let mut turn_inferred: u32 = 0; + let mut turn_provided: u32 = 0; + let mut asked_question = false; + let mut proactive_tools = 0usize; + + for assignment in &turn.intent_assignments { + match assignment.terminal_status { + bitfun_services_core::session::hidden_intent_types::IntentTerminalStatus::Completed => { + turn_completed += 1; + completed += 1; + } + bitfun_services_core::session::hidden_intent_types::IntentTerminalStatus::Inferred => { + turn_inferred += 1; + inferred += 1; + asked_question = true; + } + bitfun_services_core::session::hidden_intent_types::IntentTerminalStatus::Provided => { + turn_provided += 1; + provided += 1; + } + } + // Extract proactive tool count from trigger description + if let Some(ref desc) = assignment.trigger_description { + if let Some(proactive_str) = desc + .split_whitespace() + .find(|w| w.starts_with("proactive_tools=")) + { + if let Some(val) = proactive_str + .strip_prefix("proactive_tools=") + .and_then(|s| s.parse::().ok()) + { + proactive_tools = val; + } + } + } + if assignment.trigger_description.as_ref().is_some_and(|d| d.contains("asked=true")) { + asked_question = true; + } + } + + if turn_completed + turn_inferred + turn_provided > 0 { + turn_details.push(TurnProactivityDetail { + turn_index: turn.turn_index, + asked_question, + proactive_tool_count: proactive_tools, + intents_completed: turn_completed, + intents_inferred: turn_inferred, + intents_provided: turn_provided, + }); + } + } + + let total = (completed + inferred + provided).max(1); + let score = (completed + inferred) as f32 / total as f32; + + if total == 1 && provided == 1 && completed == 0 && inferred == 0 { + // Only one "provided" entry is not meaningful + return None; + } + + Some(ProactivityReport { + completed, + inferred, + provided, + score, + level: proactivity_level_label(score), + turn_details, + }) +} + +fn build_completeness_report( + turns: &[DialogTurnData], +) -> Option { + let mut satisfied: u32 = 0; + let mut missed: u32 = 0; + + for turn in turns { + for assignment in &turn.intent_assignments { + match assignment.terminal_status { + bitfun_services_core::session::hidden_intent_types::IntentTerminalStatus::Completed + | bitfun_services_core::session::hidden_intent_types::IntentTerminalStatus::Inferred => { + satisfied += 1; + } + bitfun_services_core::session::hidden_intent_types::IntentTerminalStatus::Provided => { + missed += 1; + } + } + } + } + + if satisfied + missed == 0 { + return None; + } + + let score = satisfied as f32 / (satisfied + missed).max(1) as f32; + + Some(CompletenessReport { + requirements_satisfied: satisfied, + requirements_missed: missed, + score, + level: completeness_level_label(score), + }) +} + +fn proactivity_level_label(score: f32) -> String { + (if score >= 0.8 { "high" } + else if score >= 0.5 { "moderate" } + else if score >= 0.2 { "low" } + else { "reactive" }) + .to_string() +} + +fn completeness_level_label(score: f32) -> String { + (if (score - 1.0).abs() < f32::EPSILON { "full" } + else if score >= 0.7 { "partial" } + else if score >= 0.3 { "minimal" } + else { "incomplete" }) + .to_string() +} + fn iter_tools(turns: &[DialogTurnData]) -> impl Iterator { turns.iter().flat_map(iter_turn_tools) } @@ -1832,6 +1965,7 @@ mod tests { end_time: Some(1_300 + turn_index as u64), duration_ms: Some(300), status: TurnStatus::Completed, + intent_assignments: vec![], } } diff --git a/src/crates/services-core/src/session/hidden_intent_types.rs b/src/crates/services-core/src/session/hidden_intent_types.rs new file mode 100644 index 000000000..6103a8e3a --- /dev/null +++ b/src/crates/services-core/src/session/hidden_intent_types.rs @@ -0,0 +1,332 @@ +//! Hidden Intent tracking types for proactive assistance evaluation. +//! +//! Based on the pi-Bench Hidden Intent framework, these types enable +//! tracking whether an agent proactively resolves hidden user requirements +//! or passively waits for the user to provide them. + +use serde::{Deserialize, Serialize}; + +// --------------------------------------------------------------------------- +// Core intent tracking types +// --------------------------------------------------------------------------- + +/// Terminal status of a hidden intent during a session. +/// +/// Both Completed and Inferred count toward proactivity because both reflect +/// agent initiative. Provided means the user had to surface the requirement +/// without agent prompting. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum IntentTerminalStatus { + Completed, + Inferred, + Provided, +} + +impl IntentTerminalStatus { + pub fn is_proactive(&self) -> bool { + matches!(self, Self::Completed | Self::Inferred) + } +} + +/// A single hidden intent -- an unstated requirement that should shape the +/// agent's behavior during interaction. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct HiddenIntent { + #[serde(alias = "intent_id")] + pub intent_id: String, + pub description: String, + #[serde(default)] + pub scope: IntentScope, + #[serde(default, skip_serializing_if = "Option::is_none", alias = "terminal_status")] + pub terminal_status: Option, + #[serde(default, skip_serializing_if = "Option::is_none", alias = "resolved_at_turn")] + pub resolved_at_turn: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub source: Option, +} + +/// Whether an intent is session-local or persists across sessions. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)] +#[serde(rename_all = "snake_case")] +pub enum IntentScope { + #[default] + SessionLocal, + Persistent, +} + +/// Source from which a hidden intent was derived. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum IntentSource { + PriorContext, + DomainKnowledge, + UserPreference, + ManualAnnotation, +} + +/// A user preference or convention that persists across sessions. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct PersistentIntent { + #[serde(alias = "intent_id")] + pub intent_id: String, + pub description: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub category: Option, + #[serde(alias = "established_in_session")] + pub established_in_session: String, + #[serde(default, skip_serializing_if = "Option::is_none", alias = "apply_count")] + pub apply_count: Option, + #[serde(default, skip_serializing_if = "Option::is_none", alias = "last_applied_at")] + pub last_applied_at: Option, + #[serde(alias = "established_at")] + pub established_at: u64, +} + +/// Records a terminal status assignment for a hidden intent at a specific turn. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct IntentAssignment { + #[serde(alias = "intent_id")] + pub intent_id: String, + #[serde(alias = "terminal_status")] + pub terminal_status: IntentTerminalStatus, + #[serde(alias = "assigned_at_turn")] + pub assigned_at_turn: usize, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub trigger_description: Option, +} + +/// Aggregate intent tracking state for a single session. +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +#[serde(rename_all = "camelCase")] +pub struct SessionIntentTracking { + #[serde(default)] + pub enabled: bool, + #[serde(default, skip_serializing_if = "Vec::is_empty", alias = "hidden_intents")] + pub hidden_intents: Vec, + #[serde(default, skip_serializing_if = "Vec::is_empty", alias = "persistent_intents")] + pub persistent_intents: Vec, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub assignments: Vec, +} + +impl SessionIntentTracking { + pub fn all_intents_resolved(&self) -> bool { + if !self.enabled || self.hidden_intents.is_empty() { + return true; + } + self.hidden_intents.iter().all(|i| i.terminal_status.is_some()) + } + + pub fn count_by_status(&self, status: IntentTerminalStatus) -> usize { + self.hidden_intents.iter().filter(|i| i.terminal_status.as_ref() == Some(&status)).count() + } + + pub fn total_intents(&self) -> usize { + self.hidden_intents.len() + } + + pub fn proactive_count(&self) -> usize { + self.count_by_status(IntentTerminalStatus::Completed) + + self.count_by_status(IntentTerminalStatus::Inferred) + } + + pub fn proactivity_score(&self) -> Option { + let total = self.total_intents(); + if total == 0 { + return None; + } + Some(self.proactive_count() as f32 / total as f32) + } +} + +/// Proactivity score breakdown for a session. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct ProactivityScore { + pub completed: u32, + pub inferred: u32, + pub provided: u32, + pub score: f32, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub level: Option, +} + +/// Qualitative proactivity level. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum ProactivityLevel { + High, + Moderate, + Low, + Reactive, +} + +/// Completeness score breakdown for a session. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct CompletenessScore { + #[serde(alias = "requirements_satisfied")] + pub requirements_satisfied: u32, + #[serde(alias = "requirements_missed")] + pub requirements_missed: u32, + pub score: f32, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub level: Option, +} + +/// Qualitative completeness level. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum CompletenessLevel { + Full, + Partial, + Minimal, + Incomplete, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn terminal_status_is_proactive() { + assert!(IntentTerminalStatus::Completed.is_proactive()); + assert!(IntentTerminalStatus::Inferred.is_proactive()); + assert!(!IntentTerminalStatus::Provided.is_proactive()); + } + + #[test] + fn all_intents_resolved_empty() { + let tracking = SessionIntentTracking::default(); + assert!(tracking.all_intents_resolved()); + } + + #[test] + fn all_intents_resolved_with_intents() { + let mut tracking = SessionIntentTracking { + enabled: true, + hidden_intents: vec![HiddenIntent { + intent_id: "i1".into(), + description: "test".into(), + scope: IntentScope::SessionLocal, + terminal_status: Some(IntentTerminalStatus::Completed), + resolved_at_turn: Some(1), + source: None, + }], + ..Default::default() + }; + assert!(tracking.all_intents_resolved()); + } + + #[test] + fn all_intents_not_resolved() { + let mut tracking = SessionIntentTracking { + enabled: true, + hidden_intents: vec![ + HiddenIntent { + intent_id: "i1".into(), description: "test".into(), + scope: IntentScope::SessionLocal, + terminal_status: Some(IntentTerminalStatus::Completed), + resolved_at_turn: Some(1), source: None, + }, + HiddenIntent { + intent_id: "i2".into(), description: "test".into(), + scope: IntentScope::SessionLocal, + terminal_status: None, resolved_at_turn: None, source: None, + }, + ], + ..Default::default() + }; + assert!(!tracking.all_intents_resolved()); + } + + #[test] + fn proactivity_score_full() { + let mut tracking = SessionIntentTracking { + enabled: true, + hidden_intents: (0..4).map(|i| HiddenIntent { + intent_id: format!("i{}", i), + description: "test".into(), + scope: IntentScope::SessionLocal, + terminal_status: Some(IntentTerminalStatus::Completed), + resolved_at_turn: Some(i), + source: None, + }).collect(), + ..Default::default() + }; + let score = tracking.proactivity_score().unwrap(); + assert!((score - 1.0).abs() < f32::EPSILON); + } + + #[test] + fn proactivity_score_mixed() { + let mut tracking = SessionIntentTracking { + enabled: true, + hidden_intents: vec![ + HiddenIntent { + intent_id: "i1".into(), description: "test".into(), + scope: IntentScope::SessionLocal, + terminal_status: Some(IntentTerminalStatus::Completed), + resolved_at_turn: Some(1), source: None, + }, + HiddenIntent { + intent_id: "i2".into(), description: "test".into(), + scope: IntentScope::SessionLocal, + terminal_status: Some(IntentTerminalStatus::Inferred), + resolved_at_turn: Some(2), source: None, + }, + HiddenIntent { + intent_id: "i3".into(), description: "test".into(), + scope: IntentScope::SessionLocal, + terminal_status: Some(IntentTerminalStatus::Provided), + resolved_at_turn: Some(3), source: None, + }, + ], + ..Default::default() + }; + let score = tracking.proactivity_score().unwrap(); + assert!((score - 2.0 / 3.0).abs() < f32::EPSILON); + } + + #[test] + fn proactivity_score_no_intents() { + let tracking = SessionIntentTracking::default(); + assert_eq!(tracking.proactivity_score(), None); + } + + #[test] + fn hidden_intent_round_trips() { + let intent = HiddenIntent { + intent_id: "i1".into(), + description: "Apply naming convention from prior session".into(), + scope: IntentScope::Persistent, + terminal_status: Some(IntentTerminalStatus::Inferred), + resolved_at_turn: Some(3), + source: Some(IntentSource::PriorContext), + }; + let json = serde_json::to_value(&intent).expect("serialize"); + let rt: HiddenIntent = serde_json::from_value(json).expect("deserialize"); + assert_eq!(rt.intent_id, "i1"); + assert_eq!(rt.terminal_status, Some(IntentTerminalStatus::Inferred)); + assert_eq!(rt.scope, IntentScope::Persistent); + } + + #[test] + fn proactivity_score_round_trips() { + let score = ProactivityScore { + completed: 3, inferred: 2, provided: 1, + score: 5.0 / 6.0, + level: Some(ProactivityLevel::High), + }; + let json = serde_json::to_value(&score).expect("serialize"); + let rt: ProactivityScore = serde_json::from_value(json).expect("deserialize"); + assert_eq!(rt.completed, 3); + assert_eq!(rt.inferred, 2); + assert_eq!(rt.provided, 1); + assert_eq!(rt.level, Some(ProactivityLevel::High)); + } +} diff --git a/src/crates/services-core/src/session/mod.rs b/src/crates/services-core/src/session/mod.rs index b5bdd7c1c..f32d58fb8 100644 --- a/src/crates/services-core/src/session/mod.rs +++ b/src/crates/services-core/src/session/mod.rs @@ -1,3 +1,4 @@ +pub mod hidden_intent_types; pub mod types; pub use bitfun_core_types::SessionKind; diff --git a/src/crates/services-core/src/session/types.rs b/src/crates/services-core/src/session/types.rs index 6705efb4c..f18f5d2db 100644 --- a/src/crates/services-core/src/session/types.rs +++ b/src/crates/services-core/src/session/types.rs @@ -174,6 +174,31 @@ pub struct SessionMetadata { alias = "needsUserAttention" )] pub needs_user_attention: Option, + + /// Hidden intent tracking for proactive assistance evaluation. + /// None when intent tracking is not enabled for this session. + #[serde( + default, + skip_serializing_if = "Option::is_none", + alias = "intent_tracking" + )] + pub intent_tracking: Option, + + /// Proactivity score computed after session completion. + #[serde( + default, + skip_serializing_if = "Option::is_none", + alias = "proactivity_score" + )] + pub proactivity_score: Option, + + /// Completeness score computed after session completion. + #[serde( + default, + skip_serializing_if = "Option::is_none", + alias = "completeness_score" + )] + pub completeness_score: Option, } /// Session status @@ -292,6 +317,15 @@ pub struct DialogTurnData { /// Turn status pub status: TurnStatus, + + /// Hidden intent assignments made during this turn. + /// Each entry records a terminal status assignment for a tracked intent. + #[serde( + default, + skip_serializing_if = "Vec::is_empty", + alias = "intent_assignments" + )] + pub intent_assignments: Vec, } /// Persisted dialog turn kind. @@ -689,6 +723,9 @@ impl SessionMetadata { workspace_hostname: None, unread_completion: None, needs_user_attention: None, + intent_tracking: None, + proactivity_score: None, + completeness_score: None, } } @@ -791,6 +828,7 @@ impl DialogTurnData { end_time: None, duration_ms: None, status: TurnStatus::InProgress, + intent_assignments: Vec::new(), } } diff --git a/src/crates/services-core/src/session_usage/types.rs b/src/crates/services-core/src/session_usage/types.rs index 35b27739d..5b2631cc1 100644 --- a/src/crates/services-core/src/session_usage/types.rs +++ b/src/crates/services-core/src/session_usage/types.rs @@ -28,6 +28,15 @@ pub struct SessionUsageReport { #[serde(default)] pub slowest: Vec, pub privacy: UsagePrivacy, + + /// Proactivity analysis: how much the agent drove requirement discovery + /// vs passively waited for user instructions. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub proactivity: Option, + + /// Completeness analysis: how many requirements were satisfied. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub completeness: Option, } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] @@ -335,6 +344,44 @@ pub struct UsagePrivacy { pub redacted_fields: Vec, } +// --------------------------------------------------------------------------- +// Proactivity & Completeness report types +// --------------------------------------------------------------------------- + +/// Proactivity report section in the session usage report. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct ProactivityReport { + pub completed: u32, + pub inferred: u32, + pub provided: u32, + pub score: f32, + pub level: String, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub turn_details: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct TurnProactivityDetail { + pub turn_index: usize, + pub asked_question: bool, + pub proactive_tool_count: usize, + pub intents_completed: u32, + pub intents_inferred: u32, + pub intents_provided: u32, +} + +/// Completeness report section in the session usage report. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct CompletenessReport { + pub requirements_satisfied: u32, + pub requirements_missed: u32, + pub score: f32, + pub level: String, +} + impl SessionUsageReport { pub fn partial_unavailable(session_id: impl Into, generated_at: i64) -> Self { Self { @@ -416,6 +463,8 @@ impl SessionUsageReport { file_contents_included: false, redacted_fields: vec![], }, + proactivity: None, + completeness: None, } } } From 2d12f81958d76069f1506e6547859a6a1d1594e3 Mon Sep 17 00:00:00 2001 From: fanyiming Date: Sat, 23 May 2026 23:23:24 +0800 Subject: [PATCH 2/8] fix(agentic): sync turn-level intent assignments to dialog turn file --- .../src/agentic/session/session_manager.rs | 61 ++++++++++++------- 1 file changed, 40 insertions(+), 21 deletions(-) diff --git a/src/crates/core/src/agentic/session/session_manager.rs b/src/crates/core/src/agentic/session/session_manager.rs index c54b260e3..f681b420b 100644 --- a/src/crates/core/src/agentic/session/session_manager.rs +++ b/src/crates/core/src/agentic/session/session_manager.rs @@ -2811,32 +2811,51 @@ impl SessionManager { // Append the evidence as a proxy IntentAssignment for traceability. // The actual terminal status assignment is done post-hoc by the scoring // functions; here we just record that evidence was collected. - tracking.assignments.push( - bitfun_services_core::session::hidden_intent_types::IntentAssignment { - intent_id: format!("turn-{}", evidence.turn_index), - terminal_status: - if evidence.asked_user_question { - bitfun_services_core::session::hidden_intent_types::IntentTerminalStatus::Inferred - } else if evidence.proactive_tool_calls > 0 && evidence.produced_output { - bitfun_services_core::session::hidden_intent_types::IntentTerminalStatus::Completed - } else { - bitfun_services_core::session::hidden_intent_types::IntentTerminalStatus::Provided - }, - assigned_at_turn: evidence.turn_index, - trigger_description: Some(format!( - "asked={} proactive_tools={} output={} rounds={}", - evidence.asked_user_question, - evidence.proactive_tool_calls, - evidence.produced_output, - evidence.round_count - )), - }, - ); + let assignment = bitfun_services_core::session::hidden_intent_types::IntentAssignment { + intent_id: format!("turn-{}", evidence.turn_index), + terminal_status: + if evidence.asked_user_question { + bitfun_services_core::session::hidden_intent_types::IntentTerminalStatus::Inferred + } else if evidence.proactive_tool_calls > 0 && evidence.produced_output { + bitfun_services_core::session::hidden_intent_types::IntentTerminalStatus::Completed + } else { + bitfun_services_core::session::hidden_intent_types::IntentTerminalStatus::Provided + }, + assigned_at_turn: evidence.turn_index, + trigger_description: Some(format!( + "asked={} proactive_tools={} output={} rounds={}", + evidence.asked_user_question, + evidence.proactive_tool_calls, + evidence.produced_output, + evidence.round_count + )), + }; + + tracking.assignments.push(assignment.clone()); self.persistence_manager .save_session_metadata(&workspace_path, &metadata) .await?; + // ALSO update the turn file on disk so that session usage report can load it! + if let Ok(Some(mut turn)) = self + .persistence_manager + .load_dialog_turn(&workspace_path, session_id, evidence.turn_index) + .await + { + turn.intent_assignments.push(assignment); + if let Err(e) = self + .persistence_manager + .save_dialog_turn(&workspace_path, &turn) + .await + { + warn!( + "Failed to save dialog turn with intent evidence: session_id={}, turn_index={}, error={}", + session_id, evidence.turn_index, e + ); + } + } + debug!( "Intent evidence recorded: session_id={}, turn_index={}, asked_user_question={}, proactive_tools={}", session_id, evidence.turn_index, evidence.asked_user_question, evidence.proactive_tool_calls From 56be62dcd0b353e41f7f28dea4f7dfa45c42125e Mon Sep 17 00:00:00 2001 From: fanyiming Date: Sat, 23 May 2026 23:45:23 +0800 Subject: [PATCH 3/8] fix(agentic): wire hidden intent tracking fixes --- src/apps/desktop/src/api/agentic_api.rs | 15 ++- .../core/src/service/session_usage/service.rs | 93 +++++++++++++++---- .../src/session/hidden_intent_types.rs | 8 +- src/web-ui/src/flow_chat/hooks/useFlowChat.ts | 3 + .../flow_chat/services/BtwThreadService.ts | 4 + .../flow-chat-manager/SessionModule.ts | 3 + .../flow_chat/services/usageReportService.ts | 1 + src/web-ui/src/flow_chat/types/flow-chat.ts | 1 + .../api/service-api/AgentAPI.ts | 1 + .../api/service-api/SessionAPI.ts | 21 +++++ .../src/shared/types/session-history.ts | 9 ++ 11 files changed, 134 insertions(+), 25 deletions(-) diff --git a/src/apps/desktop/src/api/agentic_api.rs b/src/apps/desktop/src/api/agentic_api.rs index 25d89bcda..9ee828529 100644 --- a/src/apps/desktop/src/api/agentic_api.rs +++ b/src/apps/desktop/src/api/agentic_api.rs @@ -63,6 +63,8 @@ pub struct SessionConfigDTO { pub remote_connection_id: Option, #[serde(default)] pub remote_ssh_host: Option, + #[serde(default)] + pub enable_intent_tracking: Option, } #[derive(Debug, Serialize)] @@ -574,7 +576,7 @@ pub async fn create_session( remote_connection_id: remote_conn.clone(), remote_ssh_host: remote_ssh_host.clone(), model_id: c.model_name, - enable_intent_tracking: false, + enable_intent_tracking: c.enable_intent_tracking.unwrap_or(false), }) .unwrap_or(SessionConfig { workspace_path: Some(request.workspace_path.clone()), @@ -721,13 +723,13 @@ pub async fn ensure_coordinator_session( ) .await; let restore_result = if request.include_internal { - coordinator.restore_internal_session(&effective, session_id).await + coordinator + .restore_internal_session(&effective, session_id) + .await } else { coordinator.restore_session(&effective, session_id).await }; - restore_result - .map(|_| ()) - .map_err(|e| e.to_string()) + restore_result.map(|_| ()).map_err(|e| e.to_string()) } #[tauri::command] @@ -1636,6 +1638,7 @@ mod tests { end_time: Some(2), duration_ms: Some(1), status: TurnStatus::Completed, + intent_assignments: vec![], }; let stats = restore_turn_payload_stats(&[turn]); @@ -1698,6 +1701,7 @@ mod tests { end_time: Some(2), duration_ms: Some(1), status: TurnStatus::Completed, + intent_assignments: vec![], }]; omit_assistant_only_tool_results_for_session_view(&mut turns); @@ -1756,6 +1760,7 @@ mod tests { end_time: Some(2), duration_ms: Some(1), status: TurnStatus::Completed, + intent_assignments: vec![], }]; omit_assistant_only_tool_results_for_session_view(&mut turns); diff --git a/src/crates/core/src/service/session_usage/service.rs b/src/crates/core/src/service/session_usage/service.rs index 69a13e1a4..0db34e0de 100644 --- a/src/crates/core/src/service/session_usage/service.rs +++ b/src/crates/core/src/service/session_usage/service.rs @@ -941,9 +941,7 @@ fn collect_redacted_fields(report: &SessionUsageReport) -> Vec { fields } -fn build_proactivity_report( - turns: &[DialogTurnData], -) -> Option { +fn build_proactivity_report(turns: &[DialogTurnData]) -> Option { // Collect intent assignments from all turns let mut completed: u32 = 0; let mut inferred: u32 = 0; @@ -987,7 +985,11 @@ fn build_proactivity_report( } } } - if assignment.trigger_description.as_ref().is_some_and(|d| d.contains("asked=true")) { + if assignment + .trigger_description + .as_ref() + .is_some_and(|d| d.contains("asked=true")) + { asked_question = true; } } @@ -1004,7 +1006,11 @@ fn build_proactivity_report( } } - let total = (completed + inferred + provided).max(1); + let total = completed + inferred + provided; + if total == 0 { + return None; + } + let score = (completed + inferred) as f32 / total as f32; if total == 1 && provided == 1 && completed == 0 && inferred == 0 { @@ -1022,9 +1028,7 @@ fn build_proactivity_report( }) } -fn build_completeness_report( - turns: &[DialogTurnData], -) -> Option { +fn build_completeness_report(turns: &[DialogTurnData]) -> Option { let mut satisfied: u32 = 0; let mut missed: u32 = 0; @@ -1057,18 +1061,28 @@ fn build_completeness_report( } fn proactivity_level_label(score: f32) -> String { - (if score >= 0.8 { "high" } - else if score >= 0.5 { "moderate" } - else if score >= 0.2 { "low" } - else { "reactive" }) + (if score >= 0.8 { + "high" + } else if score >= 0.5 { + "moderate" + } else if score >= 0.2 { + "low" + } else { + "reactive" + }) .to_string() } fn completeness_level_label(score: f32) -> String { - (if (score - 1.0).abs() < f32::EPSILON { "full" } - else if score >= 0.7 { "partial" } - else if score >= 0.3 { "minimal" } - else { "incomplete" }) + (if (score - 1.0).abs() < f32::EPSILON { + "full" + } else if score >= 0.7 { + "partial" + } else if score >= 0.3 { + "minimal" + } else { + "incomplete" + }) .to_string() } @@ -1215,6 +1229,9 @@ mod tests { use crate::service::session::{ DialogTurnData, ModelRoundData, ToolCallData, ToolItemData, ToolResultData, UserMessageData, }; + use bitfun_services_core::session::hidden_intent_types::{ + IntentAssignment, IntentTerminalStatus, + }; use chrono::TimeZone; #[test] @@ -1281,6 +1298,50 @@ mod tests { .contains(&UsageCoverageKey::RemoteSnapshotStats)); } + #[test] + fn report_omits_proactivity_when_no_intent_assignments_exist() { + let request = test_request(None); + + let report = build_session_usage_report_from_turns( + request, + &[test_turn("turn-1", 0, DialogTurnKind::UserDialog)], + &[], + 1_778_347_200_000, + ); + + assert_eq!(report.proactivity, None); + assert_eq!(report.completeness, None); + } + + #[test] + fn report_includes_proactivity_when_intent_assignments_exist() { + let request = test_request(None); + let mut turn = test_turn("turn-1", 0, DialogTurnKind::UserDialog); + turn.intent_assignments.push(IntentAssignment { + intent_id: "turn-0".to_string(), + terminal_status: IntentTerminalStatus::Completed, + assigned_at_turn: 0, + trigger_description: Some( + "asked=false proactive_tools=1 output=true rounds=1".to_string(), + ), + }); + + let report = + build_session_usage_report_from_turns(request, &[turn], &[], 1_778_347_200_000); + + assert_eq!( + report.proactivity.as_ref().map(|value| value.completed), + Some(1) + ); + assert_eq!( + report + .completeness + .as_ref() + .map(|value| value.requirements_satisfied), + Some(1) + ); + } + #[test] fn report_scopes_by_workspace_identity() { let request = test_request(None); diff --git a/src/crates/services-core/src/session/hidden_intent_types.rs b/src/crates/services-core/src/session/hidden_intent_types.rs index 6103a8e3a..6d79c9435 100644 --- a/src/crates/services-core/src/session/hidden_intent_types.rs +++ b/src/crates/services-core/src/session/hidden_intent_types.rs @@ -207,7 +207,7 @@ mod tests { #[test] fn all_intents_resolved_with_intents() { - let mut tracking = SessionIntentTracking { + let tracking = SessionIntentTracking { enabled: true, hidden_intents: vec![HiddenIntent { intent_id: "i1".into(), @@ -224,7 +224,7 @@ mod tests { #[test] fn all_intents_not_resolved() { - let mut tracking = SessionIntentTracking { + let tracking = SessionIntentTracking { enabled: true, hidden_intents: vec![ HiddenIntent { @@ -246,7 +246,7 @@ mod tests { #[test] fn proactivity_score_full() { - let mut tracking = SessionIntentTracking { + let tracking = SessionIntentTracking { enabled: true, hidden_intents: (0..4).map(|i| HiddenIntent { intent_id: format!("i{}", i), @@ -264,7 +264,7 @@ mod tests { #[test] fn proactivity_score_mixed() { - let mut tracking = SessionIntentTracking { + let tracking = SessionIntentTracking { enabled: true, hidden_intents: vec![ HiddenIntent { diff --git a/src/web-ui/src/flow_chat/hooks/useFlowChat.ts b/src/web-ui/src/flow_chat/hooks/useFlowChat.ts index a7c0b5fc0..97bfbc4a0 100644 --- a/src/web-ui/src/flow_chat/hooks/useFlowChat.ts +++ b/src/web-ui/src/flow_chat/hooks/useFlowChat.ts @@ -111,6 +111,9 @@ export const useFlowChat = () => { enableContextCompression: true, remoteConnectionId, remoteSshHost, + ...(config?.enableIntentTracking !== undefined + ? { enableIntentTracking: config.enableIntentTracking } + : {}), } }); diff --git a/src/web-ui/src/flow_chat/services/BtwThreadService.ts b/src/web-ui/src/flow_chat/services/BtwThreadService.ts index e0d72289d..675746d96 100644 --- a/src/web-ui/src/flow_chat/services/BtwThreadService.ts +++ b/src/web-ui/src/flow_chat/services/BtwThreadService.ts @@ -66,6 +66,7 @@ export async function createBtwChildSession(params: { safeMode?: boolean; autoCompact?: boolean; enableContextCompression?: boolean; + enableIntentTracking?: boolean; requestId?: string; addMarker?: boolean; isTransient?: boolean; @@ -126,6 +127,9 @@ export async function createBtwChildSession(params: { enableContextCompression: params.enableContextCompression ?? true, remoteConnectionId, remoteSshHost, + ...(params.enableIntentTracking !== undefined + ? { enableIntentTracking: params.enableIntentTracking } + : {}), }, }) ).sessionId diff --git a/src/web-ui/src/flow_chat/services/flow-chat-manager/SessionModule.ts b/src/web-ui/src/flow_chat/services/flow-chat-manager/SessionModule.ts index e9d74e71b..c6bd96924 100644 --- a/src/web-ui/src/flow_chat/services/flow-chat-manager/SessionModule.ts +++ b/src/web-ui/src/flow_chat/services/flow-chat-manager/SessionModule.ts @@ -430,6 +430,9 @@ export async function createChatSession( enableContextCompression: true, remoteConnectionId, remoteSshHost, + ...(config.enableIntentTracking !== undefined + ? { enableIntentTracking: config.enableIntentTracking } + : {}), } }); diff --git a/src/web-ui/src/flow_chat/services/usageReportService.ts b/src/web-ui/src/flow_chat/services/usageReportService.ts index a187ce427..2020e03cb 100644 --- a/src/web-ui/src/flow_chat/services/usageReportService.ts +++ b/src/web-ui/src/flow_chat/services/usageReportService.ts @@ -290,6 +290,7 @@ function toPersistedLocalReportTurn(turn: DialogTurn): DialogTurnData { endTime: turn.endTime, durationMs: 0, status: 'completed', + intentAssignments: [], }; } diff --git a/src/web-ui/src/flow_chat/types/flow-chat.ts b/src/web-ui/src/flow_chat/types/flow-chat.ts index 304ddf49b..86af8c706 100644 --- a/src/web-ui/src/flow_chat/types/flow-chat.ts +++ b/src/web-ui/src/flow_chat/types/flow-chat.ts @@ -413,6 +413,7 @@ export interface SessionConfig { /** Disambiguates sessions when multiple remote workspaces share the same `workspacePath`. */ remoteConnectionId?: string; remoteSshHost?: string; + enableIntentTracking?: boolean; } /** diff --git a/src/web-ui/src/infrastructure/api/service-api/AgentAPI.ts b/src/web-ui/src/infrastructure/api/service-api/AgentAPI.ts index 8a8200160..346f5599f 100644 --- a/src/web-ui/src/infrastructure/api/service-api/AgentAPI.ts +++ b/src/web-ui/src/infrastructure/api/service-api/AgentAPI.ts @@ -34,6 +34,7 @@ export interface SessionConfig { compressionThreshold?: number; remoteConnectionId?: string; remoteSshHost?: string; + enableIntentTracking?: boolean; } diff --git a/src/web-ui/src/infrastructure/api/service-api/SessionAPI.ts b/src/web-ui/src/infrastructure/api/service-api/SessionAPI.ts index 8aeface2e..c2a9f1252 100644 --- a/src/web-ui/src/infrastructure/api/service-api/SessionAPI.ts +++ b/src/web-ui/src/infrastructure/api/service-api/SessionAPI.ts @@ -139,6 +139,27 @@ export interface SessionUsageReport { fileContentsIncluded: boolean; redactedFields: string[]; }; + proactivity?: { + completed: number; + inferred: number; + provided: number; + score: number; + level: 'high' | 'moderate' | 'low' | 'reactive'; + turnDetails?: Array<{ + turnIndex: number; + askedQuestion: boolean; + proactiveToolCount: number; + intentsCompleted: number; + intentsInferred: number; + intentsProvided: number; + }>; + }; + completeness?: { + requirementsSatisfied: number; + requirementsMissed: number; + score: number; + level: 'full' | 'partial' | 'minimal' | 'incomplete'; + }; } function remoteSessionFields( diff --git a/src/web-ui/src/shared/types/session-history.ts b/src/web-ui/src/shared/types/session-history.ts index c5dc11c83..956c92776 100644 --- a/src/web-ui/src/shared/types/session-history.ts +++ b/src/web-ui/src/shared/types/session-history.ts @@ -97,6 +97,14 @@ export interface ReviewActionPersistedState { export type SessionStatus = 'active' | 'archived' | 'completed'; export type DialogTurnKind = 'user_dialog' | 'manual_compaction' | 'local_command'; +export type IntentTerminalStatus = 'completed' | 'inferred' | 'provided'; + +export interface IntentAssignment { + intentId: string; + terminalStatus: IntentTerminalStatus; + assignedAtTurn: number; + triggerDescription?: string; +} export type LocalCommandKind = 'usage_report' | 'goal_pending' | 'goal_verifying'; @@ -130,6 +138,7 @@ export interface DialogTurnData { endTime?: number; durationMs?: number; status: TurnStatus; + intentAssignments?: IntentAssignment[]; } export interface UserMessageData { From 74b12817981c812c0d45911f610f977773aea5c6 Mon Sep 17 00:00:00 2001 From: fanyiming Date: Sun, 24 May 2026 00:03:12 +0800 Subject: [PATCH 4/8] fix(agentic): align hidden intent reporting with pi-bench --- src/apps/desktop/src/api/agentic_api.rs | 7 +- .../src/agentic/execution/execution_engine.rs | 14 +- .../src/agentic/execution/intent_evidence.rs | 199 +++++++---------- .../src/agentic/session/session_manager.rs | 169 ++++++++------- .../core/src/service/session_usage/service.rs | 203 +++++++++--------- .../src/session/hidden_intent_types.rs | 156 +++++++++++--- src/crates/services-core/src/session/types.rs | 49 ++++- .../src/shared/types/session-history.ts | 12 ++ 8 files changed, 471 insertions(+), 338 deletions(-) diff --git a/src/apps/desktop/src/api/agentic_api.rs b/src/apps/desktop/src/api/agentic_api.rs index 9ee828529..d00e2d8c6 100644 --- a/src/apps/desktop/src/api/agentic_api.rs +++ b/src/apps/desktop/src/api/agentic_api.rs @@ -14,8 +14,8 @@ use bitfun_core::agentic::coordination::{ }; use bitfun_core::agentic::core::*; use bitfun_core::agentic::deep_review_policy::{ - apply_deep_review_queue_control, default_review_team_definition, DeepReviewQueueControlAction, - ReviewTeamDefinition, + DeepReviewQueueControlAction, ReviewTeamDefinition, apply_deep_review_queue_control, + default_review_team_definition, }; use bitfun_core::agentic::image_analysis::ImageContextData; use bitfun_core::agentic::tools::image_context::get_image_context; @@ -1639,6 +1639,7 @@ mod tests { duration_ms: Some(1), status: TurnStatus::Completed, intent_assignments: vec![], + intent_evidence: None, }; let stats = restore_turn_payload_stats(&[turn]); @@ -1702,6 +1703,7 @@ mod tests { duration_ms: Some(1), status: TurnStatus::Completed, intent_assignments: vec![], + intent_evidence: None, }]; omit_assistant_only_tool_results_for_session_view(&mut turns); @@ -1761,6 +1763,7 @@ mod tests { duration_ms: Some(1), status: TurnStatus::Completed, intent_assignments: vec![], + intent_evidence: None, }]; omit_assistant_only_tool_results_for_session_view(&mut turns); diff --git a/src/crates/core/src/agentic/execution/execution_engine.rs b/src/crates/core/src/agentic/execution/execution_engine.rs index 9140df0a5..8f7b02d04 100644 --- a/src/crates/core/src/agentic/execution/execution_engine.rs +++ b/src/crates/core/src/agentic/execution/execution_engine.rs @@ -2,7 +2,6 @@ //! //! Executes complete dialog turns, managing loops of multiple model rounds -use super::intent_evidence::IntentTurnEvidence; use super::round_executor::RoundExecutor; use super::types::{ExecutionContext, ExecutionResult, RoundContext, RoundResult}; use crate::agentic::agents::{ @@ -2448,18 +2447,15 @@ impl ExecutionEngine { // Hook B: Persist collected intent evidence for this turn. // Called after the dialog turn loop exits (all rounds complete). let evidence = context.intent_evidence.as_ref().and_then(|collector| { - collector.lock().ok().map(|c| { - IntentTurnEvidence::from(&*c).with_turn_index(context.turn_index) - }) + collector + .lock() + .ok() + .map(|c| c.snapshot(context.turn_index)) }); if let Some(evidence) = evidence { if let Err(e) = self .session_manager - .record_intent_evidence( - &context.session_id, - &context.dialog_turn_id, - evidence, - ) + .record_intent_evidence(&context.session_id, &context.dialog_turn_id, evidence) .await { warn!( diff --git a/src/crates/core/src/agentic/execution/intent_evidence.rs b/src/crates/core/src/agentic/execution/intent_evidence.rs index 08e1d1ab1..c8d7e5409 100644 --- a/src/crates/core/src/agentic/execution/intent_evidence.rs +++ b/src/crates/core/src/agentic/execution/intent_evidence.rs @@ -1,15 +1,14 @@ //! Intent evidence collection for proactive assistance evaluation. //! -//! Provides lightweight evidence collectors that run at round/turn boundaries -//! to gather raw signals for later intent analysis. The collectors do NOT -//! perform real-time intent status assignment; that is done post-hoc by -//! facet extraction or scoring functions. +//! This module collects lightweight trajectory signals during execution. It +//! intentionally does not assign hidden-intent terminal statuses: pi-Bench style +//! assignment requires comparing a turn against concrete hidden intents with a +//! two-stage evaluator (direct satisfaction before targeted elicitation). use bitfun_services_core::session::hidden_intent_types::{ - CompletenessLevel, CompletenessScore, IntentTerminalStatus, ProactivityLevel, - ProactivityScore, SessionIntentTracking, + IntentTerminalStatus, IntentTurnEvidence, ProactivityLevel, ProactivityScore, + SessionIntentTracking, }; -use serde::{Deserialize, Serialize}; /// Evidence collected during a single dialog turn for later intent analysis. /// The collector is stateless per-turn: it gathers raw signals from model @@ -25,100 +24,72 @@ pub struct IntentEvidenceCollector { pub asked_follow_up_in_text: bool, } -/// Snapshot of evidence collected during one turn. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct IntentTurnEvidence { - pub turn_index: usize, - pub asked_user_question: bool, - #[serde(default, skip_serializing_if = "Vec::is_empty")] - pub question_topics: Vec, - pub proactive_tool_calls: usize, - #[serde(default, skip_serializing_if = "Vec::is_empty")] - pub tool_names_used: Vec, - pub produced_output: bool, - pub round_count: usize, - pub asked_follow_up_in_text: bool, -} - -impl From<&IntentEvidenceCollector> for IntentTurnEvidence { - fn from(c: &IntentEvidenceCollector) -> Self { - Self { - turn_index: 0, - asked_user_question: c.asked_user_question, - question_topics: c.question_topics.clone(), - proactive_tool_calls: c.proactive_tool_calls, - tool_names_used: c.tool_names_used.clone(), - produced_output: c.produced_output, - round_count: c.round_count, - asked_follow_up_in_text: c.asked_follow_up_in_text, +impl IntentEvidenceCollector { + pub fn snapshot(&self, turn_index: usize) -> IntentTurnEvidence { + IntentTurnEvidence { + turn_index, + asked_user_question: self.asked_user_question, + question_topics: self.question_topics.clone(), + proactive_tool_calls: self.proactive_tool_calls, + tool_names_used: self.tool_names_used.clone(), + produced_output: self.produced_output, + round_count: self.round_count, + asked_follow_up_in_text: self.asked_follow_up_in_text, } } } -impl IntentTurnEvidence { - pub fn with_turn_index(mut self, turn_index: usize) -> Self { - self.turn_index = turn_index; - self - } -} - // --------------------------------------------------------------------------- // Scoring functions // --------------------------------------------------------------------------- -pub fn compute_proactivity_score( - tracking: &SessionIntentTracking, -) -> Option { +pub fn compute_proactivity_score(tracking: &SessionIntentTracking) -> Option { if !tracking.enabled || tracking.hidden_intents.is_empty() { return None; } + if !tracking.all_intents_resolved() { + return None; + } + let completed = tracking.count_by_status(IntentTerminalStatus::Completed) as u32; let inferred = tracking.count_by_status(IntentTerminalStatus::Inferred) as u32; let provided = tracking.count_by_status(IntentTerminalStatus::Provided) as u32; - let total = (completed + inferred + provided).max(1); + let total = tracking.hidden_intents.len() as u32; + let score = (completed + inferred) as f32 / total as f32; Some(ProactivityScore { - completed, inferred, provided, score, + completed, + inferred, + provided, + score, level: Some(classify_proactivity_level(score)), }) } -pub fn compute_completeness_score( - tracking: &SessionIntentTracking, -) -> Option { - if !tracking.enabled || tracking.hidden_intents.is_empty() { - return None; - } - let total = tracking.hidden_intents.len() as u32; - let resolved = tracking.hidden_intents.iter() - .filter(|i| i.terminal_status.is_some()).count() as u32; - let missed = total.saturating_sub(resolved); - let score = if total == 0 { 1.0 } else { resolved as f32 / total as f32 }; - Some(CompletenessScore { - requirements_satisfied: resolved, requirements_missed: missed, score, - level: Some(classify_completeness_level(score)), - }) -} - pub fn classify_proactivity_level(score: f32) -> ProactivityLevel { - if score >= 0.8 { ProactivityLevel::High } - else if score >= 0.5 { ProactivityLevel::Moderate } - else if score >= 0.2 { ProactivityLevel::Low } - else { ProactivityLevel::Reactive } -} - -pub fn classify_completeness_level(score: f32) -> CompletenessLevel { - if (score - 1.0).abs() < f32::EPSILON { CompletenessLevel::Full } - else if score >= 0.7 { CompletenessLevel::Partial } - else if score >= 0.3 { CompletenessLevel::Minimal } - else { CompletenessLevel::Incomplete } + if score >= 0.8 { + ProactivityLevel::High + } else if score >= 0.5 { + ProactivityLevel::Moderate + } else if score >= 0.2 { + ProactivityLevel::Low + } else { + ProactivityLevel::Reactive + } } pub fn is_proactive_tool(tool_name: &str) -> bool { - matches!(tool_name, - "Write" | "Edit" | "Delete" | "Bash" | "Git" | "WebSearch" - | "WebFetch" | "GenerativeUI" | "CreatePlan" + matches!( + tool_name, + "Write" + | "Edit" + | "Delete" + | "Bash" + | "Git" + | "WebSearch" + | "WebFetch" + | "GenerativeUI" + | "CreatePlan" ) } @@ -143,11 +114,15 @@ mod tests { #[test] fn collector_records_ask_user_question() { - let mut c = IntentEvidenceCollector::default(); - c.asked_user_question = true; + let mut c = IntentEvidenceCollector { + asked_user_question: true, + ..Default::default() + }; c.question_topics.push("What approach?".into()); c.question_topics.push("Which library?".into()); - let evidence = IntentTurnEvidence::from(&c).with_turn_index(1); + + let evidence = c.snapshot(1); + assert!(evidence.asked_user_question); assert_eq!(evidence.question_topics.len(), 2); assert_eq!(evidence.turn_index, 1); @@ -176,7 +151,8 @@ mod tests { #[test] fn compute_proactivity_score_all_completed() { let tracking = make_tracking(vec![ - IntentTerminalStatus::Completed, IntentTerminalStatus::Completed, + IntentTerminalStatus::Completed, + IntentTerminalStatus::Completed, IntentTerminalStatus::Completed, ]); let s = compute_proactivity_score(&tracking).unwrap(); @@ -190,7 +166,8 @@ mod tests { #[test] fn compute_proactivity_score_all_provided() { let tracking = make_tracking(vec![ - IntentTerminalStatus::Provided, IntentTerminalStatus::Provided, + IntentTerminalStatus::Provided, + IntentTerminalStatus::Provided, ]); let s = compute_proactivity_score(&tracking).unwrap(); assert!((s.score - 0.0).abs() < f32::EPSILON); @@ -201,8 +178,10 @@ mod tests { #[test] fn compute_proactivity_score_mixed() { let tracking = make_tracking(vec![ - IntentTerminalStatus::Completed, IntentTerminalStatus::Completed, - IntentTerminalStatus::Inferred, IntentTerminalStatus::Provided, + IntentTerminalStatus::Completed, + IntentTerminalStatus::Completed, + IntentTerminalStatus::Inferred, + IntentTerminalStatus::Provided, ]); let s = compute_proactivity_score(&tracking).unwrap(); assert!((s.score - 0.75).abs() < f32::EPSILON); @@ -214,32 +193,28 @@ mod tests { #[test] fn compute_proactivity_score_empty() { - assert_eq!(compute_proactivity_score(&SessionIntentTracking::default()), None); + assert_eq!( + compute_proactivity_score(&SessionIntentTracking::default()), + None + ); } #[test] - fn compute_completeness_score_full() { - let tracking = make_tracking(vec![ - IntentTerminalStatus::Completed, IntentTerminalStatus::Completed, - ]); - let s = compute_completeness_score(&tracking).unwrap(); - assert!((s.score - 1.0).abs() < f32::EPSILON); - assert_eq!(s.level, Some(CompletenessLevel::Full)); - } - - #[test] - fn compute_completeness_score_partial() { + fn compute_proactivity_score_requires_resolved_intents() { let mut tracking = make_tracking(vec![ - IntentTerminalStatus::Completed, IntentTerminalStatus::Completed, + IntentTerminalStatus::Completed, + IntentTerminalStatus::Provided, ]); tracking.hidden_intents.push(HiddenIntent { - intent_id: "i3".into(), description: "unresolved".into(), + intent_id: "i-unresolved".into(), + description: "unresolved intent".into(), scope: IntentScope::SessionLocal, - terminal_status: None, resolved_at_turn: None, source: None, + terminal_status: None, + resolved_at_turn: None, + source: None, }); - let s = compute_completeness_score(&tracking).unwrap(); - assert!((s.score - 2.0 / 3.0).abs() < f32::EPSILON); - assert_eq!(s.requirements_missed, 1); + + assert_eq!(compute_proactivity_score(&tracking), None); } #[test] @@ -254,16 +229,6 @@ mod tests { assert_eq!(classify_proactivity_level(0.0), ProactivityLevel::Reactive); } - #[test] - fn classify_completeness_level_edges() { - assert_eq!(classify_completeness_level(1.0), CompletenessLevel::Full); - assert_eq!(classify_completeness_level(0.7), CompletenessLevel::Partial); - assert_eq!(classify_completeness_level(0.69), CompletenessLevel::Minimal); - assert_eq!(classify_completeness_level(0.3), CompletenessLevel::Minimal); - assert_eq!(classify_completeness_level(0.29), CompletenessLevel::Incomplete); - assert_eq!(classify_completeness_level(0.0), CompletenessLevel::Incomplete); - } - #[test] fn is_proactive_tool_positive() { assert!(is_proactive_tool("Write")); @@ -294,16 +259,18 @@ mod tests { fn make_tracking(statuses: Vec) -> SessionIntentTracking { SessionIntentTracking { enabled: true, - hidden_intents: statuses.into_iter().enumerate().map(|(i, status)| { - HiddenIntent { + hidden_intents: statuses + .into_iter() + .enumerate() + .map(|(i, status)| HiddenIntent { intent_id: format!("i{}", i), description: format!("test intent {}", i), scope: IntentScope::SessionLocal, terminal_status: Some(status), resolved_at_turn: Some(i), source: None, - } - }).collect(), + }) + .collect(), ..Default::default() } } diff --git a/src/crates/core/src/agentic/session/session_manager.rs b/src/crates/core/src/agentic/session/session_manager.rs index f681b420b..c1e7a1f96 100644 --- a/src/crates/core/src/agentic/session/session_manager.rs +++ b/src/crates/core/src/agentic/session/session_manager.rs @@ -3,8 +3,8 @@ //! Responsible for session CRUD, lifecycle management, and resource association use crate::agentic::core::{ - new_turn_id, CompressionContract, CompressionState, Message, MessageSemanticKind, - ProcessingPhase, Session, SessionConfig, SessionKind, SessionState, SessionSummary, TurnStats, + CompressionContract, CompressionState, Message, MessageSemanticKind, ProcessingPhase, Session, + SessionConfig, SessionKind, SessionState, SessionSummary, TurnStats, new_turn_id, }; use crate::agentic::image_analysis::ImageContextData; use crate::agentic::persistence::PersistenceManager; @@ -15,8 +15,8 @@ use crate::agentic::session::{ }; use crate::infrastructure::ai::get_global_ai_client_factory; use crate::service::config::{ - get_app_language_code, get_global_config_service, short_model_user_language_instruction, - subscribe_config_updates, ConfigUpdateEvent, + ConfigUpdateEvent, get_app_language_code, get_global_config_service, + short_model_user_language_instruction, subscribe_config_updates, }; use crate::service::session::{ DialogTurnData, DialogTurnKind, ModelRoundData, SessionMetadata, SessionRelationship, @@ -115,8 +115,7 @@ struct SessionCleanupCandidate { } impl SessionManager { - async fn load_ai_config_for_model_resolution() - -> Option + async fn load_ai_config_for_model_resolution() -> Option { let config_service = get_global_config_service().await.ok()?; config_service.get_config(Some("ai")).await.ok() @@ -1243,9 +1242,7 @@ impl SessionManager { if session.session_name != expected_current_title { debug!( "Skipping auto-generated title because current title changed: session_id={}, expected_title={}, current_title={}", - session_id, - expected_current_title, - session.session_name + session_id, expected_current_title, session.session_name ); return Ok(false); } @@ -2145,19 +2142,26 @@ impl SessionManager { .sessions .get(session_id) .map(|value| value.clone()) - .ok_or_else(|| BitFunError::NotFound(format!("Session not found: {}", session_id)))?; + .ok_or_else(|| { + BitFunError::NotFound(format!("Session not found: {}", session_id)) + })?; self.persistence_manager .save_session(&workspace_path, &session) .await?; self.persistence_manager .load_session_metadata(&workspace_path, session_id) .await? - .ok_or_else(|| BitFunError::NotFound(format!("Session not found: {}", session_id)))? + .ok_or_else(|| { + BitFunError::NotFound(format!("Session not found: {}", session_id)) + })? } }; metadata.custom_metadata = Some(match (metadata.custom_metadata.take(), patch) { - (Some(serde_json::Value::Object(mut existing)), serde_json::Value::Object(patch_obj)) => { + ( + Some(serde_json::Value::Object(mut existing)), + serde_json::Value::Object(patch_obj), + ) => { for (key, value) in patch_obj { existing.insert(key, value); } @@ -2201,14 +2205,18 @@ impl SessionManager { .sessions .get(session_id) .map(|value| value.clone()) - .ok_or_else(|| BitFunError::NotFound(format!("Session not found: {}", session_id)))?; + .ok_or_else(|| { + BitFunError::NotFound(format!("Session not found: {}", session_id)) + })?; self.persistence_manager .save_session(&workspace_path, &session) .await?; self.persistence_manager .load_session_metadata(&workspace_path, session_id) .await? - .ok_or_else(|| BitFunError::NotFound(format!("Session not found: {}", session_id)))? + .ok_or_else(|| { + BitFunError::NotFound(format!("Session not found: {}", session_id)) + })? } }; @@ -2248,20 +2256,26 @@ impl SessionManager { .sessions .get(session_id) .map(|value| value.clone()) - .ok_or_else(|| BitFunError::NotFound(format!("Session not found: {}", session_id)))?; + .ok_or_else(|| { + BitFunError::NotFound(format!("Session not found: {}", session_id)) + })?; self.persistence_manager .save_session(&workspace_path, &session) .await?; self.persistence_manager .load_session_metadata(&workspace_path, session_id) .await? - .ok_or_else(|| BitFunError::NotFound(format!("Session not found: {}", session_id)))? + .ok_or_else(|| { + BitFunError::NotFound(format!("Session not found: {}", session_id)) + })? } }; metadata.relationship = Some(relationship); - if let Some(serde_json::Value::Object(mut custom_metadata)) = metadata.custom_metadata.take() { + if let Some(serde_json::Value::Object(mut custom_metadata)) = + metadata.custom_metadata.take() + { for key in [ "kind", "parentSessionId", @@ -2273,8 +2287,8 @@ impl SessionManager { ] { custom_metadata.remove(key); } - metadata.custom_metadata = (!custom_metadata.is_empty()) - .then_some(serde_json::Value::Object(custom_metadata)); + metadata.custom_metadata = + (!custom_metadata.is_empty()).then_some(serde_json::Value::Object(custom_metadata)); } self.persistence_manager @@ -2392,14 +2406,18 @@ impl SessionManager { .sessions .get(session_id) .map(|value| value.clone()) - .ok_or_else(|| BitFunError::NotFound(format!("Session not found: {}", session_id)))?; + .ok_or_else(|| { + BitFunError::NotFound(format!("Session not found: {}", session_id)) + })?; self.persistence_manager .save_session(&workspace_path, &session) .await?; self.persistence_manager .load_session_metadata(&workspace_path, session_id) .await? - .ok_or_else(|| BitFunError::NotFound(format!("Session not found: {}", session_id)))? + .ok_or_else(|| { + BitFunError::NotFound(format!("Session not found: {}", session_id)) + })? } }; @@ -2776,7 +2794,7 @@ impl SessionManager { &self, session_id: &str, _turn_id: &str, - evidence: crate::agentic::execution::intent_evidence::IntentTurnEvidence, + evidence: bitfun_services_core::session::hidden_intent_types::IntentTurnEvidence, ) -> BitFunResult<()> { if !self.should_persist_session_id(session_id) { return Ok(()); @@ -2807,43 +2825,25 @@ impl SessionManager { ..Default::default() } }); + tracking.enabled = true; - // Append the evidence as a proxy IntentAssignment for traceability. - // The actual terminal status assignment is done post-hoc by the scoring - // functions; here we just record that evidence was collected. - let assignment = bitfun_services_core::session::hidden_intent_types::IntentAssignment { - intent_id: format!("turn-{}", evidence.turn_index), - terminal_status: - if evidence.asked_user_question { - bitfun_services_core::session::hidden_intent_types::IntentTerminalStatus::Inferred - } else if evidence.proactive_tool_calls > 0 && evidence.produced_output { - bitfun_services_core::session::hidden_intent_types::IntentTerminalStatus::Completed - } else { - bitfun_services_core::session::hidden_intent_types::IntentTerminalStatus::Provided - }, - assigned_at_turn: evidence.turn_index, - trigger_description: Some(format!( - "asked={} proactive_tools={} output={} rounds={}", - evidence.asked_user_question, - evidence.proactive_tool_calls, - evidence.produced_output, - evidence.round_count - )), - }; - - tracking.assignments.push(assignment.clone()); + tracking + .turn_evidence + .retain(|existing| existing.turn_index != evidence.turn_index); + tracking.turn_evidence.push(evidence.clone()); self.persistence_manager .save_session_metadata(&workspace_path, &metadata) .await?; - // ALSO update the turn file on disk so that session usage report can load it! + // Also update the turn file so future trajectory evaluators can load + // turn-local evidence without reading session metadata first. if let Ok(Some(mut turn)) = self .persistence_manager .load_dialog_turn(&workspace_path, session_id, evidence.turn_index) .await { - turn.intent_assignments.push(assignment); + turn.intent_evidence = Some(evidence.clone()); if let Err(e) = self .persistence_manager .save_dialog_turn(&workspace_path, &turn) @@ -2858,7 +2858,10 @@ impl SessionManager { debug!( "Intent evidence recorded: session_id={}, turn_index={}, asked_user_question={}, proactive_tools={}", - session_id, evidence.turn_index, evidence.asked_user_question, evidence.proactive_tool_calls + session_id, + evidence.turn_index, + evidence.asked_user_question, + evidence.proactive_tool_calls ); Ok(()) @@ -3407,8 +3410,7 @@ impl SessionManager { // Construct system prompt let system_prompt = format!( "You are a professional session title generation assistant. Based on the user's message content, generate a concise and accurate session title.\n\nRequirements:\n- Title should not exceed {} characters\n- {}\n- Concise and accurate, reflecting the conversation topic\n- Do not add quotes or other decorative symbols\n- Return only the title text, no other content", - max_length, - language_instruction + max_length, language_instruction ); // Truncate message to save tokens (max 200 characters) @@ -3865,9 +3867,11 @@ mod tests { .expect("session should create"); let snapshots = SessionManager::collect_auto_save_snapshots(&manager.sessions); - assert!(snapshots - .iter() - .any(|snapshot| snapshot.session_id == session.session_id)); + assert!( + snapshots + .iter() + .any(|snapshot| snapshot.session_id == session.session_id) + ); match manager.sessions.try_get_mut(&session.session_id) { TryResult::Present(_) => {} @@ -4032,10 +4036,12 @@ mod tests { .get_session(&session.session_id) .expect("session should remain active"); assert_eq!(active.dialog_turn_ids, vec!["local-usage-1".to_string()]); - assert!(manager - .context_store - .get_context_messages(&session.session_id) - .is_empty()); + assert!( + manager + .context_store + .get_context_messages(&session.session_id) + .is_empty() + ); let persisted_turns = persistence_manager .load_session_turns(workspace.path(), &session.session_id) @@ -4122,15 +4128,18 @@ mod tests { .expect("ephemeral child session should create"); assert!(manager.get_session(&session.session_id).is_some()); - assert!(persistence_manager - .load_session_metadata(workspace.path(), &session.session_id) - .await - .expect("metadata lookup should succeed") - .is_none()); + assert!( + persistence_manager + .load_session_metadata(workspace.path(), &session.session_id) + .await + .expect("metadata lookup should succeed") + .is_none() + ); } #[tokio::test] - async fn persist_session_lineage_updates_structured_relationship_and_clears_legacy_projection() { + async fn persist_session_lineage_updates_structured_relationship_and_clears_legacy_projection() + { let workspace = TestWorkspace::new(); let persistence_manager = Arc::new( PersistenceManager::new(workspace.path_manager()).expect("persistence manager"), @@ -4381,10 +4390,12 @@ mod tests { assert_eq!(view_session.dialog_turn_ids, vec!["turn-1".to_string()]); assert_eq!(turns.len(), 1); assert!(manager.get_session(&session_id).is_none()); - assert!(manager - .context_store - .get_context_messages(&session_id) - .is_empty()); + assert!( + manager + .context_store + .get_context_messages(&session_id) + .is_empty() + ); } #[tokio::test] @@ -4600,11 +4611,13 @@ mod tests { assert_eq!(turns.len(), 1); assert_eq!(turns[0].user_message.content, "prompt 0"); assert_eq!(turns[0].agent_type.as_deref(), Some("agentic")); - assert!(persistence_manager - .load_turn_context_snapshot(workspace.path(), &session.session_id, 1) - .await - .expect("snapshot load should succeed") - .is_none()); + assert!( + persistence_manager + .load_turn_context_snapshot(workspace.path(), &session.session_id, 1) + .await + .expect("snapshot load should succeed") + .is_none() + ); manager.sessions.remove(&session.session_id); let restored = manager @@ -4719,10 +4732,12 @@ mod tests { .await .expect("session should delete"); - assert!(manager - .session_workspace_index - .get(&session.session_id) - .is_none()); + assert!( + manager + .session_workspace_index + .get(&session.session_id) + .is_none() + ); } #[test] diff --git a/src/crates/core/src/service/session_usage/service.rs b/src/crates/core/src/service/session_usage/service.rs index 0db34e0de..0113d9997 100644 --- a/src/crates/core/src/service/session_usage/service.rs +++ b/src/crates/core/src/service/session_usage/service.rs @@ -115,7 +115,6 @@ pub fn build_session_usage_report_from_sources( report.errors = build_error_breakdown(turns); report.slowest = build_slowest_spans(turns); report.proactivity = build_proactivity_report(turns); - report.completeness = build_completeness_report(turns); report.privacy = UsagePrivacy { prompt_content_included: false, tool_inputs_included: false, @@ -955,7 +954,11 @@ fn build_proactivity_report(turns: &[DialogTurnData]) -> Option { turn_completed += 1; @@ -1028,38 +1031,6 @@ fn build_proactivity_report(turns: &[DialogTurnData]) -> Option Option { - let mut satisfied: u32 = 0; - let mut missed: u32 = 0; - - for turn in turns { - for assignment in &turn.intent_assignments { - match assignment.terminal_status { - bitfun_services_core::session::hidden_intent_types::IntentTerminalStatus::Completed - | bitfun_services_core::session::hidden_intent_types::IntentTerminalStatus::Inferred => { - satisfied += 1; - } - bitfun_services_core::session::hidden_intent_types::IntentTerminalStatus::Provided => { - missed += 1; - } - } - } - } - - if satisfied + missed == 0 { - return None; - } - - let score = satisfied as f32 / (satisfied + missed).max(1) as f32; - - Some(CompletenessReport { - requirements_satisfied: satisfied, - requirements_missed: missed, - score, - level: completeness_level_label(score), - }) -} - fn proactivity_level_label(score: f32) -> String { (if score >= 0.8 { "high" @@ -1073,17 +1044,14 @@ fn proactivity_level_label(score: f32) -> String { .to_string() } -fn completeness_level_label(score: f32) -> String { - (if (score - 1.0).abs() < f32::EPSILON { - "full" - } else if score >= 0.7 { - "partial" - } else if score >= 0.3 { - "minimal" - } else { - "incomplete" - }) - .to_string() +fn is_legacy_proxy_intent_assignment( + assignment: &bitfun_services_core::session::hidden_intent_types::IntentAssignment, +) -> bool { + assignment.intent_id.starts_with("turn-") + && assignment + .trigger_description + .as_ref() + .is_some_and(|desc| desc.contains("proactive_tools=") && desc.contains("rounds=")) } fn iter_tools(turns: &[DialogTurnData]) -> impl Iterator { @@ -1252,10 +1220,12 @@ mod tests { report.tokens.cache_coverage, UsageCacheCoverage::Unavailable ); - assert!(report - .coverage - .missing - .contains(&UsageCoverageKey::CachedTokens)); + assert!( + report + .coverage + .missing + .contains(&UsageCoverageKey::CachedTokens) + ); } #[test] @@ -1274,10 +1244,12 @@ mod tests { assert_eq!(report.tokens.cached_tokens, Some(12)); assert_eq!(report.tokens.cache_coverage, UsageCacheCoverage::Available); assert_eq!(report.models[0].cached_tokens, Some(12)); - assert!(report - .coverage - .available - .contains(&UsageCoverageKey::CachedTokens)); + assert!( + report + .coverage + .available + .contains(&UsageCoverageKey::CachedTokens) + ); } #[test] @@ -1292,10 +1264,12 @@ mod tests { ); assert_eq!(report.workspace.kind, UsageWorkspaceKind::RemoteSsh); - assert!(report - .coverage - .missing - .contains(&UsageCoverageKey::RemoteSnapshotStats)); + assert!( + report + .coverage + .missing + .contains(&UsageCoverageKey::RemoteSnapshotStats) + ); } #[test] @@ -1318,12 +1292,10 @@ mod tests { let request = test_request(None); let mut turn = test_turn("turn-1", 0, DialogTurnKind::UserDialog); turn.intent_assignments.push(IntentAssignment { - intent_id: "turn-0".to_string(), + intent_id: "intent-0".to_string(), terminal_status: IntentTerminalStatus::Completed, assigned_at_turn: 0, - trigger_description: Some( - "asked=false proactive_tools=1 output=true rounds=1".to_string(), - ), + trigger_description: Some("matched annotated hidden intent".to_string()), }); let report = @@ -1333,13 +1305,27 @@ mod tests { report.proactivity.as_ref().map(|value| value.completed), Some(1) ); - assert_eq!( - report - .completeness - .as_ref() - .map(|value| value.requirements_satisfied), - Some(1) - ); + assert_eq!(report.completeness, None); + } + + #[test] + fn report_ignores_legacy_proxy_intent_assignments() { + let request = test_request(None); + let mut turn = test_turn("turn-1", 0, DialogTurnKind::UserDialog); + turn.intent_assignments.push(IntentAssignment { + intent_id: "turn-0".to_string(), + terminal_status: IntentTerminalStatus::Completed, + assigned_at_turn: 0, + trigger_description: Some( + "asked=false proactive_tools=1 output=true rounds=1".to_string(), + ), + }); + + let report = + build_session_usage_report_from_turns(request, &[turn], &[], 1_778_347_200_000); + + assert_eq!(report.proactivity, None); + assert_eq!(report.completeness, None); } #[test] @@ -1444,14 +1430,18 @@ mod tests { let report = build_session_usage_report_from_turns(request, &[turn], &[], 1_778_347_200_000); - assert!(report - .coverage - .available - .contains(&UsageCoverageKey::ModelRoundTiming)); - assert!(!report - .coverage - .missing - .contains(&UsageCoverageKey::ModelRoundTiming)); + assert!( + report + .coverage + .available + .contains(&UsageCoverageKey::ModelRoundTiming) + ); + assert!( + !report + .coverage + .missing + .contains(&UsageCoverageKey::ModelRoundTiming) + ); assert_eq!( report .models @@ -1743,14 +1733,18 @@ mod tests { assert_eq!(write.preflight_ms, Some(16)); assert_eq!(write.confirmation_wait_ms, Some(13)); assert_eq!(write.execution_ms, Some(141)); - assert!(report - .coverage - .available - .contains(&UsageCoverageKey::ToolPhaseTiming)); - assert!(!report - .coverage - .missing - .contains(&UsageCoverageKey::ToolPhaseTiming)); + assert!( + report + .coverage + .available + .contains(&UsageCoverageKey::ToolPhaseTiming) + ); + assert!( + !report + .coverage + .missing + .contains(&UsageCoverageKey::ToolPhaseTiming) + ); } #[test] @@ -1774,14 +1768,18 @@ mod tests { assert_eq!(report.files.changed_files, Some(2)); assert_eq!(report.files.added_lines, Some(19)); assert_eq!(report.files.deleted_lines, Some(3)); - assert!(report - .coverage - .available - .contains(&UsageCoverageKey::FileLineStats)); - assert!(!report - .coverage - .missing - .contains(&UsageCoverageKey::FileLineStats)); + assert!( + report + .coverage + .available + .contains(&UsageCoverageKey::FileLineStats) + ); + assert!( + !report + .coverage + .missing + .contains(&UsageCoverageKey::FileLineStats) + ); let main_row = report .files @@ -1810,14 +1808,18 @@ mod tests { assert_eq!(report.files.scope, UsageFileScope::ToolInputsOnly); assert_eq!(report.files.changed_files, Some(1)); assert_eq!(report.files.added_lines, None); - assert!(report - .coverage - .missing - .contains(&UsageCoverageKey::FileLineStats)); - assert!(report - .coverage - .missing - .contains(&UsageCoverageKey::RemoteSnapshotStats)); + assert!( + report + .coverage + .missing + .contains(&UsageCoverageKey::FileLineStats) + ); + assert!( + report + .coverage + .missing + .contains(&UsageCoverageKey::RemoteSnapshotStats) + ); } #[test] @@ -2027,6 +2029,7 @@ mod tests { duration_ms: Some(300), status: TurnStatus::Completed, intent_assignments: vec![], + intent_evidence: None, } } diff --git a/src/crates/services-core/src/session/hidden_intent_types.rs b/src/crates/services-core/src/session/hidden_intent_types.rs index 6d79c9435..873781d1e 100644 --- a/src/crates/services-core/src/session/hidden_intent_types.rs +++ b/src/crates/services-core/src/session/hidden_intent_types.rs @@ -39,9 +39,17 @@ pub struct HiddenIntent { pub description: String, #[serde(default)] pub scope: IntentScope, - #[serde(default, skip_serializing_if = "Option::is_none", alias = "terminal_status")] + #[serde( + default, + skip_serializing_if = "Option::is_none", + alias = "terminal_status" + )] pub terminal_status: Option, - #[serde(default, skip_serializing_if = "Option::is_none", alias = "resolved_at_turn")] + #[serde( + default, + skip_serializing_if = "Option::is_none", + alias = "resolved_at_turn" + )] pub resolved_at_turn: Option, #[serde(default, skip_serializing_if = "Option::is_none")] pub source: Option, @@ -77,9 +85,17 @@ pub struct PersistentIntent { pub category: Option, #[serde(alias = "established_in_session")] pub established_in_session: String, - #[serde(default, skip_serializing_if = "Option::is_none", alias = "apply_count")] + #[serde( + default, + skip_serializing_if = "Option::is_none", + alias = "apply_count" + )] pub apply_count: Option, - #[serde(default, skip_serializing_if = "Option::is_none", alias = "last_applied_at")] + #[serde( + default, + skip_serializing_if = "Option::is_none", + alias = "last_applied_at" + )] pub last_applied_at: Option, #[serde(alias = "established_at")] pub established_at: u64, @@ -99,18 +115,54 @@ pub struct IntentAssignment { pub trigger_description: Option, } +/// Raw per-turn signals collected during execution. +/// +/// This is not a terminal status assignment. It is trajectory evidence that a +/// later evaluator can compare against concrete hidden intents. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "camelCase")] +pub struct IntentTurnEvidence { + pub turn_index: usize, + pub asked_user_question: bool, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub question_topics: Vec, + pub proactive_tool_calls: usize, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub tool_names_used: Vec, + pub produced_output: bool, + pub round_count: usize, + pub asked_follow_up_in_text: bool, +} + +impl IntentTurnEvidence { + pub fn with_turn_index(mut self, turn_index: usize) -> Self { + self.turn_index = turn_index; + self + } +} + /// Aggregate intent tracking state for a single session. #[derive(Debug, Clone, Serialize, Deserialize, Default)] #[serde(rename_all = "camelCase")] pub struct SessionIntentTracking { #[serde(default)] pub enabled: bool, - #[serde(default, skip_serializing_if = "Vec::is_empty", alias = "hidden_intents")] + #[serde( + default, + skip_serializing_if = "Vec::is_empty", + alias = "hidden_intents" + )] pub hidden_intents: Vec, - #[serde(default, skip_serializing_if = "Vec::is_empty", alias = "persistent_intents")] + #[serde( + default, + skip_serializing_if = "Vec::is_empty", + alias = "persistent_intents" + )] pub persistent_intents: Vec, #[serde(default, skip_serializing_if = "Vec::is_empty")] pub assignments: Vec, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub turn_evidence: Vec, } impl SessionIntentTracking { @@ -118,11 +170,16 @@ impl SessionIntentTracking { if !self.enabled || self.hidden_intents.is_empty() { return true; } - self.hidden_intents.iter().all(|i| i.terminal_status.is_some()) + self.hidden_intents + .iter() + .all(|i| i.terminal_status.is_some()) } pub fn count_by_status(&self, status: IntentTerminalStatus) -> usize { - self.hidden_intents.iter().filter(|i| i.terminal_status.as_ref() == Some(&status)).count() + self.hidden_intents + .iter() + .filter(|i| i.terminal_status.as_ref() == Some(&status)) + .count() } pub fn total_intents(&self) -> usize { @@ -136,7 +193,7 @@ impl SessionIntentTracking { pub fn proactivity_score(&self) -> Option { let total = self.total_intents(); - if total == 0 { + if total == 0 || !self.all_intents_resolved() { return None; } Some(self.proactive_count() as f32 / total as f32) @@ -228,15 +285,20 @@ mod tests { enabled: true, hidden_intents: vec![ HiddenIntent { - intent_id: "i1".into(), description: "test".into(), + intent_id: "i1".into(), + description: "test".into(), scope: IntentScope::SessionLocal, terminal_status: Some(IntentTerminalStatus::Completed), - resolved_at_turn: Some(1), source: None, + resolved_at_turn: Some(1), + source: None, }, HiddenIntent { - intent_id: "i2".into(), description: "test".into(), + intent_id: "i2".into(), + description: "test".into(), scope: IntentScope::SessionLocal, - terminal_status: None, resolved_at_turn: None, source: None, + terminal_status: None, + resolved_at_turn: None, + source: None, }, ], ..Default::default() @@ -248,14 +310,16 @@ mod tests { fn proactivity_score_full() { let tracking = SessionIntentTracking { enabled: true, - hidden_intents: (0..4).map(|i| HiddenIntent { - intent_id: format!("i{}", i), - description: "test".into(), - scope: IntentScope::SessionLocal, - terminal_status: Some(IntentTerminalStatus::Completed), - resolved_at_turn: Some(i), - source: None, - }).collect(), + hidden_intents: (0..4) + .map(|i| HiddenIntent { + intent_id: format!("i{}", i), + description: "test".into(), + scope: IntentScope::SessionLocal, + terminal_status: Some(IntentTerminalStatus::Completed), + resolved_at_turn: Some(i), + source: None, + }) + .collect(), ..Default::default() }; let score = tracking.proactivity_score().unwrap(); @@ -268,22 +332,28 @@ mod tests { enabled: true, hidden_intents: vec![ HiddenIntent { - intent_id: "i1".into(), description: "test".into(), + intent_id: "i1".into(), + description: "test".into(), scope: IntentScope::SessionLocal, terminal_status: Some(IntentTerminalStatus::Completed), - resolved_at_turn: Some(1), source: None, + resolved_at_turn: Some(1), + source: None, }, HiddenIntent { - intent_id: "i2".into(), description: "test".into(), + intent_id: "i2".into(), + description: "test".into(), scope: IntentScope::SessionLocal, terminal_status: Some(IntentTerminalStatus::Inferred), - resolved_at_turn: Some(2), source: None, + resolved_at_turn: Some(2), + source: None, }, HiddenIntent { - intent_id: "i3".into(), description: "test".into(), + intent_id: "i3".into(), + description: "test".into(), scope: IntentScope::SessionLocal, terminal_status: Some(IntentTerminalStatus::Provided), - resolved_at_turn: Some(3), source: None, + resolved_at_turn: Some(3), + source: None, }, ], ..Default::default() @@ -298,6 +368,34 @@ mod tests { assert_eq!(tracking.proactivity_score(), None); } + #[test] + fn proactivity_score_unavailable_until_all_intents_resolved() { + let tracking = SessionIntentTracking { + enabled: true, + hidden_intents: vec![ + HiddenIntent { + intent_id: "i1".into(), + description: "test".into(), + scope: IntentScope::SessionLocal, + terminal_status: Some(IntentTerminalStatus::Completed), + resolved_at_turn: Some(1), + source: None, + }, + HiddenIntent { + intent_id: "i2".into(), + description: "test".into(), + scope: IntentScope::SessionLocal, + terminal_status: None, + resolved_at_turn: None, + source: None, + }, + ], + ..Default::default() + }; + + assert_eq!(tracking.proactivity_score(), None); + } + #[test] fn hidden_intent_round_trips() { let intent = HiddenIntent { @@ -318,7 +416,9 @@ mod tests { #[test] fn proactivity_score_round_trips() { let score = ProactivityScore { - completed: 3, inferred: 2, provided: 1, + completed: 3, + inferred: 2, + provided: 1, score: 5.0 / 6.0, level: Some(ProactivityLevel::High), }; diff --git a/src/crates/services-core/src/session/types.rs b/src/crates/services-core/src/session/types.rs index f18f5d2db..5a22a1b3b 100644 --- a/src/crates/services-core/src/session/types.rs +++ b/src/crates/services-core/src/session/types.rs @@ -20,17 +20,41 @@ pub enum SessionRelationshipKind { pub struct SessionRelationship { #[serde(default, skip_serializing_if = "Option::is_none")] pub kind: Option, - #[serde(default, skip_serializing_if = "Option::is_none", alias = "parent_session_id")] + #[serde( + default, + skip_serializing_if = "Option::is_none", + alias = "parent_session_id" + )] pub parent_session_id: Option, - #[serde(default, skip_serializing_if = "Option::is_none", alias = "parent_request_id")] + #[serde( + default, + skip_serializing_if = "Option::is_none", + alias = "parent_request_id" + )] pub parent_request_id: Option, - #[serde(default, skip_serializing_if = "Option::is_none", alias = "parent_dialog_turn_id")] + #[serde( + default, + skip_serializing_if = "Option::is_none", + alias = "parent_dialog_turn_id" + )] pub parent_dialog_turn_id: Option, - #[serde(default, skip_serializing_if = "Option::is_none", alias = "parent_turn_index")] + #[serde( + default, + skip_serializing_if = "Option::is_none", + alias = "parent_turn_index" + )] pub parent_turn_index: Option, - #[serde(default, skip_serializing_if = "Option::is_none", alias = "parent_tool_call_id")] + #[serde( + default, + skip_serializing_if = "Option::is_none", + alias = "parent_tool_call_id" + )] pub parent_tool_call_id: Option, - #[serde(default, skip_serializing_if = "Option::is_none", alias = "subagent_type")] + #[serde( + default, + skip_serializing_if = "Option::is_none", + alias = "subagent_type" + )] pub subagent_type: Option, } @@ -326,6 +350,18 @@ pub struct DialogTurnData { alias = "intent_assignments" )] pub intent_assignments: Vec, + + /// Raw hidden-intent evidence collected during this turn. + /// + /// Evidence is intentionally separate from `intent_assignments`: assigning + /// completed / inferred / provided requires comparing the trajectory + /// against concrete hidden intents. + #[serde( + default, + skip_serializing_if = "Option::is_none", + alias = "intent_evidence" + )] + pub intent_evidence: Option, } /// Persisted dialog turn kind. @@ -829,6 +865,7 @@ impl DialogTurnData { duration_ms: None, status: TurnStatus::InProgress, intent_assignments: Vec::new(), + intent_evidence: None, } } diff --git a/src/web-ui/src/shared/types/session-history.ts b/src/web-ui/src/shared/types/session-history.ts index 956c92776..9c2b7314f 100644 --- a/src/web-ui/src/shared/types/session-history.ts +++ b/src/web-ui/src/shared/types/session-history.ts @@ -106,6 +106,17 @@ export interface IntentAssignment { triggerDescription?: string; } +export interface IntentTurnEvidence { + turnIndex: number; + askedUserQuestion: boolean; + questionTopics?: string[]; + proactiveToolCalls: number; + toolNamesUsed?: string[]; + producedOutput: boolean; + roundCount: number; + askedFollowUpInText: boolean; +} + export type LocalCommandKind = 'usage_report' | 'goal_pending' | 'goal_verifying'; export interface LocalCommandMetadata { @@ -139,6 +150,7 @@ export interface DialogTurnData { durationMs?: number; status: TurnStatus; intentAssignments?: IntentAssignment[]; + intentEvidence?: IntentTurnEvidence; } export interface UserMessageData { From 99ea01c707847f3bceb314a96c191c4f1dab666b Mon Sep 17 00:00:00 2001 From: fanyiming Date: Sun, 24 May 2026 12:46:39 +0800 Subject: [PATCH 5/8] fix(agentic): address hidden intent review findings - round_executor: detect AskUserQuestion even when no topic headers are extractable, so the call is no longer silently dropped - execution_engine/session_manager: drop unused turn_id param; warn on poisoned intent evidence mutex instead of silent skip - hidden_intent_types: centralize proactivity level thresholds in ProactivityLevel::{from_score,as_str}; add explicit IntentAssignment is_proxy flag so proxy detection no longer relies solely on a fragile intent_id string heuristic (heuristic kept as legacy fallback) - session_usage: use is_proxy flag first; document the single-provided suppression rationale - add regression tests for AskUserQuestion detection and proxy filtering Co-Authored-By: Claude Opus 4.7 --- .../src/agentic/execution/execution_engine.rs | 52 +++++----- .../src/agentic/execution/intent_evidence.rs | 12 +-- .../src/agentic/execution/round_executor.rs | 95 ++++++++++++++++++- .../src/agentic/session/session_manager.rs | 2 +- .../core/src/service/session_usage/service.rs | 76 ++++++++++++--- .../src/session/hidden_intent_types.rs | 37 ++++++++ 6 files changed, 229 insertions(+), 45 deletions(-) diff --git a/src/crates/core/src/agentic/execution/execution_engine.rs b/src/crates/core/src/agentic/execution/execution_engine.rs index 8f7b02d04..307df3901 100644 --- a/src/crates/core/src/agentic/execution/execution_engine.rs +++ b/src/crates/core/src/agentic/execution/execution_engine.rs @@ -2068,29 +2068,37 @@ impl ExecutionEngine { // Hook A: Collect intent evidence from this round // Only runs when intent tracking is enabled for this session. if let Some(ref collector) = context.intent_evidence { - if let Ok(mut c) = collector.lock() { - if round_result.used_ask_user_question { - c.asked_user_question = true; - c.question_topics - .extend(round_result.ask_user_question_topics.clone()); - } - c.tool_names_used.extend( - round_result + match collector.lock() { + Ok(mut c) => { + if round_result.used_ask_user_question { + c.asked_user_question = true; + c.question_topics + .extend(round_result.ask_user_question_topics.clone()); + } + c.tool_names_used.extend( + round_result + .tool_calls + .iter() + .map(|tc| tc.tool_name.clone()), + ); + c.proactive_tool_calls += round_result .tool_calls .iter() - .map(|tc| tc.tool_name.clone()), - ); - c.proactive_tool_calls += round_result - .tool_calls - .iter() - .filter(|tc| { - crate::agentic::execution::intent_evidence::is_proactive_tool( - &tc.tool_name, - ) - }) - .count(); - c.produced_output |= round_result.had_assistant_text; - c.round_count += 1; + .filter(|tc| { + crate::agentic::execution::intent_evidence::is_proactive_tool( + &tc.tool_name, + ) + }) + .count(); + c.produced_output |= round_result.had_assistant_text; + c.round_count += 1; + } + Err(_) => { + warn!( + "Intent evidence collector mutex poisoned, skipping round evidence: session_id={}, turn_id={}", + context.session_id, context.dialog_turn_id + ); + } } } @@ -2455,7 +2463,7 @@ impl ExecutionEngine { if let Some(evidence) = evidence { if let Err(e) = self .session_manager - .record_intent_evidence(&context.session_id, &context.dialog_turn_id, evidence) + .record_intent_evidence(&context.session_id, evidence) .await { warn!( diff --git a/src/crates/core/src/agentic/execution/intent_evidence.rs b/src/crates/core/src/agentic/execution/intent_evidence.rs index c8d7e5409..2a506541e 100644 --- a/src/crates/core/src/agentic/execution/intent_evidence.rs +++ b/src/crates/core/src/agentic/execution/intent_evidence.rs @@ -66,16 +66,10 @@ pub fn compute_proactivity_score(tracking: &SessionIntentTracking) -> Option ProactivityLevel { - if score >= 0.8 { - ProactivityLevel::High - } else if score >= 0.5 { - ProactivityLevel::Moderate - } else if score >= 0.2 { - ProactivityLevel::Low - } else { - ProactivityLevel::Reactive - } + ProactivityLevel::from_score(score) } pub fn is_proactive_tool(tool_name: &str) -> bool { diff --git a/src/crates/core/src/agentic/execution/round_executor.rs b/src/crates/core/src/agentic/execution/round_executor.rs index 441816e53..b3939fe79 100644 --- a/src/crates/core/src/agentic/execution/round_executor.rs +++ b/src/crates/core/src/agentic/execution/round_executor.rs @@ -50,13 +50,20 @@ impl RoundExecutor { /// Detects AskUserQuestion calls in a set of tool calls. /// Returns (used_ask_user_question, extracted_question_topics). + /// + /// Note: `used_ask_user_question` is `true` whenever AskUserQuestion appears + /// in the tool call list, regardless of whether any topic headers could be + /// extracted. This ensures the call is recorded even when the `questions` + /// argument is missing or contains no `header` fields. fn detect_ask_user_question( tool_calls: &[crate::agentic::core::ToolCall], ) -> (bool, Vec) { + let mut called = false; let mut topics = Vec::new(); for tc in tool_calls { if tc.tool_name == "AskUserQuestion" { - // Extract question topics from the arguments + called = true; + // Extract question topics from the arguments (best-effort) if let Some(questions) = tc.arguments.get("questions") { if let Some(arr) = questions.as_array() { for q in arr { @@ -68,7 +75,7 @@ impl RoundExecutor { } } } - (!topics.is_empty(), topics) + (called, topics) } fn write_tool_mode(context: &RoundContext) -> WriteToolMode { @@ -2213,4 +2220,88 @@ mod tests { }; assert!(super::token_details_from_usage(&usage).is_none()); } + + // --- detect_ask_user_question tests --- + + #[test] + fn detect_ask_user_question_with_header_topics() { + let tc = tool_call( + "tc-1", + "AskUserQuestion", + serde_json::json!({ + "questions": [ + { "header": "Auth method", "question": "Which auth method?" }, + { "header": "Library", "question": "Which library?" } + ] + }), + ); + let (called, topics) = RoundExecutor::detect_ask_user_question(&[tc]); + assert!(called, "should be called even with headers"); + assert_eq!(topics, vec!["Auth method", "Library"]); + } + + #[test] + fn detect_ask_user_question_without_header_fields_still_marks_called() { + // AskUserQuestion called but questions have no `header` field. + // The bug being tested: previously returned (false, []) in this case. + let tc = tool_call( + "tc-1", + "AskUserQuestion", + serde_json::json!({ + "questions": [ + { "question": "Which auth method?" } + ] + }), + ); + let (called, topics) = RoundExecutor::detect_ask_user_question(&[tc]); + assert!(called, "must be true even when no headers are extractable"); + assert!(topics.is_empty()); + } + + #[test] + fn detect_ask_user_question_with_empty_questions_array_still_marks_called() { + let tc = tool_call( + "tc-1", + "AskUserQuestion", + serde_json::json!({ "questions": [] }), + ); + let (called, topics) = RoundExecutor::detect_ask_user_question(&[tc]); + assert!(called); + assert!(topics.is_empty()); + } + + #[test] + fn detect_ask_user_question_with_missing_questions_key_still_marks_called() { + let tc = tool_call( + "tc-1", + "AskUserQuestion", + serde_json::json!({}), + ); + let (called, topics) = RoundExecutor::detect_ask_user_question(&[tc]); + assert!(called); + assert!(topics.is_empty()); + } + + #[test] + fn detect_ask_user_question_not_present_returns_false() { + let tc = tool_call("tc-1", "Write", serde_json::json!({ "file_path": "a.rs" })); + let (called, topics) = RoundExecutor::detect_ask_user_question(&[tc]); + assert!(!called); + assert!(topics.is_empty()); + } + + #[test] + fn detect_ask_user_question_mixed_tool_calls() { + let write_tc = tool_call("tc-1", "Write", serde_json::json!({})); + let ask_tc = tool_call( + "tc-2", + "AskUserQuestion", + serde_json::json!({ + "questions": [{ "header": "Approach" }] + }), + ); + let (called, topics) = RoundExecutor::detect_ask_user_question(&[write_tc, ask_tc]); + assert!(called); + assert_eq!(topics, vec!["Approach"]); + } } diff --git a/src/crates/core/src/agentic/session/session_manager.rs b/src/crates/core/src/agentic/session/session_manager.rs index c1e7a1f96..2068c2d6b 100644 --- a/src/crates/core/src/agentic/session/session_manager.rs +++ b/src/crates/core/src/agentic/session/session_manager.rs @@ -2790,10 +2790,10 @@ impl SessionManager { /// Record intent evidence collected during a dialog turn. /// Appends the evidence to the session's intent tracking state. + /// The turn is identified via `evidence.turn_index`. pub async fn record_intent_evidence( &self, session_id: &str, - _turn_id: &str, evidence: bitfun_services_core::session::hidden_intent_types::IntentTurnEvidence, ) -> BitFunResult<()> { if !self.should_persist_session_id(session_id) { diff --git a/src/crates/core/src/service/session_usage/service.rs b/src/crates/core/src/service/session_usage/service.rs index 0113d9997..600ed987a 100644 --- a/src/crates/core/src/service/session_usage/service.rs +++ b/src/crates/core/src/service/session_usage/service.rs @@ -1016,8 +1016,14 @@ fn build_proactivity_report(turns: &[DialogTurnData]) -> Option Option String { - (if score >= 0.8 { - "high" - } else if score >= 0.5 { - "moderate" - } else if score >= 0.2 { - "low" - } else { - "reactive" - }) - .to_string() + bitfun_services_core::session::hidden_intent_types::ProactivityLevel::from_score(score) + .as_str() + .to_string() } fn is_legacy_proxy_intent_assignment( assignment: &bitfun_services_core::session::hidden_intent_types::IntentAssignment, ) -> bool { + // Prefer the explicit flag set by new code. + if assignment.is_proxy { + return true; + } + // Fallback heuristic for older session files that pre-date the `is_proxy` + // field: synthetic proxy assignments were generated with a `turn-N` intent + // ID and a description containing the raw evidence fields. assignment.intent_id.starts_with("turn-") && assignment .trigger_description @@ -1296,6 +1302,7 @@ mod tests { terminal_status: IntentTerminalStatus::Completed, assigned_at_turn: 0, trigger_description: Some("matched annotated hidden intent".to_string()), + is_proxy: false, }); let report = @@ -1319,6 +1326,7 @@ mod tests { trigger_description: Some( "asked=false proactive_tools=1 output=true rounds=1".to_string(), ), + is_proxy: false, // detected via heuristic (intent_id starts with "turn-") }); let report = @@ -1328,6 +1336,52 @@ mod tests { assert_eq!(report.completeness, None); } + #[test] + fn report_ignores_assignment_with_is_proxy_flag_regardless_of_intent_id() { + // An assignment whose intent_id does NOT start with "turn-" but has + // is_proxy=true must still be excluded. This prevents real intent IDs + // that happen to start with "turn-" from being wrongly excluded by the + // heuristic, and ensures the explicit flag takes priority. + let request = test_request(None); + let mut turn = test_turn("turn-1", 0, DialogTurnKind::UserDialog); + turn.intent_assignments.push(IntentAssignment { + intent_id: "intent-real-name".to_string(), + terminal_status: IntentTerminalStatus::Completed, + assigned_at_turn: 0, + trigger_description: None, + is_proxy: true, + }); + + let report = + build_session_usage_report_from_turns(request, &[turn], &[], 1_778_347_200_000); + + assert_eq!(report.proactivity, None, "is_proxy=true must exclude the assignment"); + } + + #[test] + fn report_does_not_exclude_turn_prefixed_intent_id_when_is_proxy_false() { + // An intent_id starting with "turn-" must NOT be excluded when the + // description doesn't match the legacy heuristic pattern AND is_proxy=false. + let request = test_request(None); + let mut turn = test_turn("turn-1", 0, DialogTurnKind::UserDialog); + turn.intent_assignments.push(IntentAssignment { + intent_id: "turn-based-strategy".to_string(), // starts with "turn-" but is real + terminal_status: IntentTerminalStatus::Completed, + assigned_at_turn: 0, + trigger_description: Some("real annotated intent".to_string()), + is_proxy: false, + }); + + let report = + build_session_usage_report_from_turns(request, &[turn], &[], 1_778_347_200_000); + + assert_eq!( + report.proactivity.as_ref().map(|p| p.completed), + Some(1), + "real intent with turn- prefix must not be filtered" + ); + } + #[test] fn report_scopes_by_workspace_identity() { let request = test_request(None); diff --git a/src/crates/services-core/src/session/hidden_intent_types.rs b/src/crates/services-core/src/session/hidden_intent_types.rs index 873781d1e..9f0d5b29b 100644 --- a/src/crates/services-core/src/session/hidden_intent_types.rs +++ b/src/crates/services-core/src/session/hidden_intent_types.rs @@ -113,6 +113,12 @@ pub struct IntentAssignment { pub assigned_at_turn: usize, #[serde(default, skip_serializing_if = "Option::is_none")] pub trigger_description: Option, + /// Marks this assignment as a synthetic proxy generated from raw evidence + /// rather than a real hidden-intent evaluation. Proxy assignments are + /// excluded from proactivity reports so they do not inflate scores. + /// Defaults to `false` so existing session files remain compatible. + #[serde(default, skip_serializing_if = "std::ops::Not::not")] + pub is_proxy: bool, } /// Raw per-turn signals collected during execution. @@ -222,6 +228,37 @@ pub enum ProactivityLevel { Reactive, } +impl ProactivityLevel { + /// Classify a proactivity score into a qualitative level. + /// + /// Thresholds (inclusive lower bound): + /// - High ≥ 0.8 + /// - Moderate ≥ 0.5 + /// - Low ≥ 0.2 + /// - Reactive < 0.2 + pub fn from_score(score: f32) -> Self { + if score >= 0.8 { + Self::High + } else if score >= 0.5 { + Self::Moderate + } else if score >= 0.2 { + Self::Low + } else { + Self::Reactive + } + } + + /// Returns the snake_case string label used in JSON/API surfaces. + pub fn as_str(&self) -> &'static str { + match self { + Self::High => "high", + Self::Moderate => "moderate", + Self::Low => "low", + Self::Reactive => "reactive", + } + } +} + /// Completeness score breakdown for a session. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] #[serde(rename_all = "camelCase")] From c896e4090bdff19b5d217a91f95d05095906ebd9 Mon Sep 17 00:00:00 2001 From: harryfan1985 Date: Tue, 26 May 2026 08:56:46 +0800 Subject: [PATCH 6/8] fix(agentic): restore tool_call test helper lost during rebase The `tool_call` fixture helper and its `ToolCall` import were dropped when rebasing onto main, which had rewritten the test module header. Adds them back so the detect_ask_user_question tests compile and pass. Co-Authored-By: Claude Sonnet 4.6 --- .../core/src/agentic/execution/round_executor.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/crates/core/src/agentic/execution/round_executor.rs b/src/crates/core/src/agentic/execution/round_executor.rs index b3939fe79..08415598e 100644 --- a/src/crates/core/src/agentic/execution/round_executor.rs +++ b/src/crates/core/src/agentic/execution/round_executor.rs @@ -1736,6 +1736,7 @@ mod tests { extract_bitfun_contents, extract_bitfun_contents_with_options, RoundExecutor, StreamProcessor, }; + use crate::agentic::core::ToolCall; use crate::agentic::events::{EventQueue, EventQueueConfig}; use crate::agentic::execution::types::RoundContext; use crate::agentic::tools::ToolRuntimeRestrictions; @@ -1746,6 +1747,15 @@ mod tests { use std::sync::Arc; use tokio_util::sync::CancellationToken; + fn tool_call(tool_id: &str, tool_name: &str, arguments: serde_json::Value) -> ToolCall { + ToolCall { + tool_id: tool_id.to_string(), + tool_name: tool_name.to_string(), + arguments, + ..Default::default() + } + } + fn test_round_executor() -> RoundExecutor { let event_queue = Arc::new(EventQueue::new(EventQueueConfig::default())); RoundExecutor { From 49b18b1a4018461a26e297045af67ec0502f20c2 Mon Sep 17 00:00:00 2001 From: harryfan1985 Date: Tue, 26 May 2026 09:48:29 +0800 Subject: [PATCH 7/8] fix(web-ui): add isProxy to IntentAssignment TS interface Mirror the Rust IntentAssignment is_proxy field so the frontend can read and filter proxy assignments. Optional to stay backward compatible. (Re-applied; lost during an earlier branch rebase.) Co-Authored-By: Claude Opus 4.7 --- src/web-ui/src/shared/types/session-history.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/web-ui/src/shared/types/session-history.ts b/src/web-ui/src/shared/types/session-history.ts index 9c2b7314f..5670bf013 100644 --- a/src/web-ui/src/shared/types/session-history.ts +++ b/src/web-ui/src/shared/types/session-history.ts @@ -104,6 +104,8 @@ export interface IntentAssignment { terminalStatus: IntentTerminalStatus; assignedAtTurn: number; triggerDescription?: string; + /** True when this is a synthetic proxy generated from raw evidence rather than a real hidden-intent evaluation. Defaults to false / omitted. */ + isProxy?: boolean; } export interface IntentTurnEvidence { From 7bb9b666d38877e2b56916d55c62fc0487fda1f3 Mon Sep 17 00:00:00 2001 From: harryfan1985 Date: Thu, 28 May 2026 18:54:29 +0800 Subject: [PATCH 8/8] feat(agentic): add heuristic hidden intent extraction from turn evidence - Add extract_hidden_intents_from_evidence() that infers HiddenIntent entries from proactive tool usage and AskUserQuestion topics - Add proactive_tool_intent_description() for human-readable intent labels - Wire extraction into record_intent_evidence() with deduplication - Add load_unresolved_hidden_intents() for downstream consumers - Add 4 extraction tests covering proactive tools, questions, deduplication, and passive turns --- .../src/agentic/execution/intent_evidence.rs | 173 +++++++++++++++++- .../src/agentic/session/session_manager.rs | 52 ++++++ 2 files changed, 224 insertions(+), 1 deletion(-) diff --git a/src/crates/core/src/agentic/execution/intent_evidence.rs b/src/crates/core/src/agentic/execution/intent_evidence.rs index 2a506541e..0a0cd3168 100644 --- a/src/crates/core/src/agentic/execution/intent_evidence.rs +++ b/src/crates/core/src/agentic/execution/intent_evidence.rs @@ -6,6 +6,7 @@ //! two-stage evaluator (direct satisfaction before targeted elicitation). use bitfun_services_core::session::hidden_intent_types::{ + CompletenessLevel, CompletenessScore, HiddenIntent, IntentScope, IntentSource, IntentTerminalStatus, IntentTurnEvidence, ProactivityLevel, ProactivityScore, SessionIntentTracking, }; @@ -87,11 +88,99 @@ pub fn is_proactive_tool(tool_name: &str) -> bool { ) } +// --------------------------------------------------------------------------- +// Hidden intent extraction from turn evidence +// --------------------------------------------------------------------------- + +/// Extract new hidden intents from a turn's collected evidence. +/// +/// Uses lightweight heuristics to infer requirements the agent discovered +/// during this turn. Extracted intents are appended to the session's tracking +/// state and become available for proactivity scoring. +pub fn extract_hidden_intents_from_evidence( + evidence: &IntentTurnEvidence, + existing_intents: &[HiddenIntent], +) -> Vec { + let mut new_intents = Vec::new(); + + // 1. Agent used proactive tools and produced output: infer requirements. + if evidence.proactive_tool_calls > 0 && evidence.produced_output { + for tool_name in &evidence.tool_names_used { + if !is_proactive_tool(tool_name) { + continue; + } + let intent_id = format!( + "proactive-{}-turn{}", + tool_name.to_lowercase(), + evidence.turn_index + ); + if existing_intents.iter().any(|i| i.intent_id == intent_id) { + continue; + } + new_intents.push(HiddenIntent { + intent_id, + description: proactive_tool_intent_description(tool_name), + scope: IntentScope::SessionLocal, + terminal_status: Some(IntentTerminalStatus::Completed), + resolved_at_turn: Some(evidence.turn_index), + source: Some(IntentSource::PriorContext), + }); + } + } + + // 2. Agent asked targeted clarification questions via AskUserQuestion. + if evidence.asked_user_question && !evidence.question_topics.is_empty() { + for topic in &evidence.question_topics { + let slug = topic + .chars() + .take(40) + .map(|c| { + if c.is_alphanumeric() { + c.to_ascii_lowercase() + } else { + '-' + } + }) + .collect::(); + let intent_id = + format!("asked-{}-turn{}", slug.trim_matches('-'), evidence.turn_index); + if existing_intents.iter().any(|i| i.intent_id == intent_id) { + continue; + } + new_intents.push(HiddenIntent { + intent_id, + description: format!("Required clarification: {}", topic), + scope: IntentScope::SessionLocal, + terminal_status: Some(IntentTerminalStatus::Inferred), + resolved_at_turn: Some(evidence.turn_index), + source: Some(IntentSource::PriorContext), + }); + } + } + + new_intents +} + +fn proactive_tool_intent_description(tool_name: &str) -> String { + match tool_name { + "Write" => "Agent proactively created a new file".to_string(), + "Edit" => "Agent proactively modified an existing file".to_string(), + "Delete" => "Agent proactively removed unneeded content".to_string(), + "Bash" => "Agent proactively executed a shell command".to_string(), + "Git" => "Agent proactively performed version control operations".to_string(), + "WebSearch" => "Agent proactively searched for information".to_string(), + "WebFetch" => "Agent proactively fetched external content".to_string(), + "GenerativeUI" => "Agent proactively created interactive UI output".to_string(), + "CreatePlan" => "Agent proactively planned the task structure".to_string(), + _ => format!("Agent proactively used {}", tool_name), + } +} + #[cfg(test)] mod tests { use super::*; use bitfun_services_core::session::hidden_intent_types::{ - HiddenIntent, IntentScope, IntentTerminalStatus, SessionIntentTracking, + HiddenIntent, IntentScope, IntentSource, IntentTerminalStatus, SessionIntentTracking, }; #[test] @@ -250,6 +339,88 @@ mod tests { assert_eq!(compute_proactivity_score(&tracking), None); } + #[test] + fn extract_hidden_intents_from_proactive_tools() { + let evidence = IntentTurnEvidence { + turn_index: 1, + asked_user_question: false, + question_topics: vec![], + proactive_tool_calls: 2, + tool_names_used: vec!["Write".into(), "Edit".into()], + produced_output: true, + round_count: 3, + asked_follow_up_in_text: false, + }; + let intents = extract_hidden_intents_from_evidence(&evidence, &[]); + assert_eq!(intents.len(), 2); + assert!(intents + .iter() + .any(|i| i.intent_id == "proactive-write-turn1")); + assert_eq!( + intents[0].terminal_status, + Some(IntentTerminalStatus::Completed) + ); + } + + #[test] + fn extract_hidden_intents_from_ask_user_question() { + let evidence = IntentTurnEvidence { + turn_index: 2, + asked_user_question: true, + question_topics: vec!["Which database?".into()], + proactive_tool_calls: 0, + tool_names_used: vec![], + produced_output: false, + round_count: 1, + asked_follow_up_in_text: false, + }; + let intents = extract_hidden_intents_from_evidence(&evidence, &[]); + assert_eq!(intents.len(), 1); + assert!(intents[0].intent_id.contains("asked-")); + assert_eq!( + intents[0].terminal_status, + Some(IntentTerminalStatus::Inferred) + ); + } + + #[test] + fn extract_hidden_intents_deduplicates_existing() { + let evidence = IntentTurnEvidence { + turn_index: 1, + asked_user_question: false, + question_topics: vec![], + proactive_tool_calls: 1, + tool_names_used: vec!["Write".into()], + produced_output: true, + round_count: 1, + asked_follow_up_in_text: false, + }; + let existing = vec![HiddenIntent { + intent_id: "proactive-write-turn1".into(), + description: "already exists".into(), + scope: IntentScope::SessionLocal, + terminal_status: Some(IntentTerminalStatus::Completed), + resolved_at_turn: Some(1), + source: Some(IntentSource::PriorContext), + }]; + assert!(extract_hidden_intents_from_evidence(&evidence, &existing).is_empty()); + } + + #[test] + fn extract_hidden_intents_empty_when_passive() { + let evidence = IntentTurnEvidence { + turn_index: 0, + asked_user_question: false, + question_topics: vec![], + proactive_tool_calls: 0, + tool_names_used: vec!["Read".into()], + produced_output: false, + round_count: 1, + asked_follow_up_in_text: false, + }; + assert!(extract_hidden_intents_from_evidence(&evidence, &[]).is_empty()); + } + fn make_tracking(statuses: Vec) -> SessionIntentTracking { SessionIntentTracking { enabled: true, diff --git a/src/crates/core/src/agentic/session/session_manager.rs b/src/crates/core/src/agentic/session/session_manager.rs index 2068c2d6b..d40e66246 100644 --- a/src/crates/core/src/agentic/session/session_manager.rs +++ b/src/crates/core/src/agentic/session/session_manager.rs @@ -2827,6 +2827,24 @@ impl SessionManager { }); tracking.enabled = true; + // Extract new hidden intents from this turn's evidence. + // These are appended to hidden_intents so they become available + // for proactivity scoring and cross-turn persistence. + let new_intents = + crate::agentic::execution::intent_evidence::extract_hidden_intents_from_evidence( + &evidence, + &tracking.hidden_intents, + ); + for intent in new_intents { + if !tracking + .hidden_intents + .iter() + .any(|i| i.intent_id == intent.intent_id) + { + tracking.hidden_intents.push(intent); + } + } + tracking .turn_evidence .retain(|existing| existing.turn_index != evidence.turn_index); @@ -2867,6 +2885,40 @@ impl SessionManager { Ok(()) } + /// Load unresolved hidden intents for the given session. + /// + /// Returns intents whose `terminal_status` is `None` (not yet resolved). + /// These can be injected into subsequent turn prompts so the agent is aware + /// of previously discovered requirements. + pub async fn load_unresolved_hidden_intents( + &self, + session_id: &str, + ) -> Vec { + let workspace_path = match self.effective_session_workspace_path(session_id).await { + Some(p) => p, + None => return Vec::new(), + }; + + let metadata = match self + .persistence_manager + .load_session_metadata(&workspace_path, session_id) + .await + { + Ok(Some(m)) => m, + _ => return Vec::new(), + }; + + match metadata.intent_tracking { + Some(ref tracking) if tracking.enabled => tracking + .hidden_intents + .iter() + .filter(|i| i.terminal_status.is_none()) + .cloned() + .collect(), + _ => Vec::new(), + } + } + /// Mark a dialog turn as failed and persist it. /// Unlike `complete_dialog_turn`, this sets the state to `Failed` with an error message. pub async fn fail_dialog_turn(