diff --git a/src/apps/desktop/src/api/agentic_api.rs b/src/apps/desktop/src/api/agentic_api.rs index 0d0db727b..d00e2d8c6 100644 --- a/src/apps/desktop/src/api/agentic_api.rs +++ b/src/apps/desktop/src/api/agentic_api.rs @@ -14,8 +14,8 @@ use bitfun_core::agentic::coordination::{ }; use bitfun_core::agentic::core::*; use bitfun_core::agentic::deep_review_policy::{ - apply_deep_review_queue_control, default_review_team_definition, DeepReviewQueueControlAction, - ReviewTeamDefinition, + DeepReviewQueueControlAction, ReviewTeamDefinition, apply_deep_review_queue_control, + default_review_team_definition, }; use bitfun_core::agentic::image_analysis::ImageContextData; use bitfun_core::agentic::tools::image_context::get_image_context; @@ -63,6 +63,8 @@ pub struct SessionConfigDTO { pub remote_connection_id: Option, #[serde(default)] pub remote_ssh_host: Option, + #[serde(default)] + pub enable_intent_tracking: Option, } #[derive(Debug, Serialize)] @@ -574,6 +576,7 @@ pub async fn create_session( remote_connection_id: remote_conn.clone(), remote_ssh_host: remote_ssh_host.clone(), model_id: c.model_name, + enable_intent_tracking: c.enable_intent_tracking.unwrap_or(false), }) .unwrap_or(SessionConfig { workspace_path: Some(request.workspace_path.clone()), @@ -720,13 +723,13 @@ pub async fn ensure_coordinator_session( ) .await; let restore_result = if request.include_internal { - coordinator.restore_internal_session(&effective, session_id).await + coordinator + .restore_internal_session(&effective, session_id) + .await } else { coordinator.restore_session(&effective, session_id).await }; - restore_result - .map(|_| ()) - .map_err(|e| e.to_string()) + restore_result.map(|_| ()).map_err(|e| e.to_string()) } #[tauri::command] @@ -1635,6 +1638,8 @@ mod tests { end_time: Some(2), duration_ms: Some(1), status: TurnStatus::Completed, + intent_assignments: vec![], + intent_evidence: None, }; let stats = restore_turn_payload_stats(&[turn]); @@ -1697,6 +1702,8 @@ mod tests { end_time: Some(2), duration_ms: Some(1), status: TurnStatus::Completed, + intent_assignments: vec![], + intent_evidence: None, }]; omit_assistant_only_tool_results_for_session_view(&mut turns); @@ -1755,6 +1762,8 @@ mod tests { end_time: Some(2), duration_ms: Some(1), status: TurnStatus::Completed, + intent_assignments: vec![], + intent_evidence: None, }]; omit_assistant_only_tool_results_for_session_view(&mut turns); diff --git a/src/crates/core/src/agentic/agents/prompts/agentic_mode.md b/src/crates/core/src/agentic/agents/prompts/agentic_mode.md index 5e573ff7e..87256a0e0 100644 --- a/src/crates/core/src/agentic/agents/prompts/agentic_mode.md +++ b/src/crates/core/src/agentic/agents/prompts/agentic_mode.md @@ -41,6 +41,22 @@ When presenting options, state your recommendation and reasoning, keep choices c When presenting options or plans, never include time estimates - focus on what each option involves, not how long it might take. +# Proactivity +Users often begin with underspecified requests and leave important needs, constraints, or preferences unstated. Proactive assistance means reducing the user's burden by surfacing what needs clarification and deciding what can be inferred, rather than treating ambiguity as a reason to remain passive. + +When a request is underspecified: +1. **Infer from context**: Use prior session history, workspace files, project conventions, and the user's past preferences to fill in reasonable defaults without asking. +2. **Ask targeted questions**: When inference is insufficient, use AskUserQuestion to surface the specific missing constraint. Prefer one focused question over a broad "tell me everything." +3. **Act on partial information**: Start working with reasonable assumptions while flagging them. Do not block on full specification when the first step can proceed. + +Avoid these anti-patterns: +- Restating the user's request back to them without adding value +- Asking "do you want me to proceed?" without having done any work +- Waiting for step-by-step instructions when the task direction is clear +- Asking generic open-ended questions when a concrete choice is needed + +The goal is to reduce the user's operational and cognitive effort: finish the task while minimizing avoidable back-and-forth. + {VISUAL_MODE} # Doing tasks The user will primarily request you perform software engineering tasks. This includes solving bugs, adding new functionality, refactoring code, explaining code, and more. For these tasks the following steps are recommended: diff --git a/src/crates/core/src/agentic/agents/prompts/claw_mode.md b/src/crates/core/src/agentic/agents/prompts/claw_mode.md index d89368674..a6c5a7eb2 100644 --- a/src/crates/core/src/agentic/agents/prompts/claw_mode.md +++ b/src/crates/core/src/agentic/agents/prompts/claw_mode.md @@ -51,6 +51,26 @@ Operational rules: - Delete temporary sessions when they are no longer useful. - Do not create extra sessions for trivial, tightly coupled, or one-step work. +# Proactivity +Users rarely begin with a complete specification of what they actually need. They issue a brief, underspecified initial request while the intended assistance depends on hidden requirements — habits, constraints, preferences, and dependencies that the user does not explicitly state. + +Be proactive: anticipate and address these hidden needs rather than waiting for the user to spell them out. + +When the user's request is underspecified: +1. **Recover from prior context**: Check earlier sessions, workspace files, and user memory for established preferences, conventions, and decisions that apply to this request. If the user preferred a specific format, naming scheme, or workflow in a prior session, carry it forward without asking again. +2. **Infer reasonable defaults**: Use domain knowledge and common conventions to fill gaps. For example, when organizing papers, include links and open-source status by default; when preparing a document, apply the user's established style. +3. **Ask targeted clarifying questions**: When inference is insufficient, use AskUserQuestion to surface exactly the missing constraint. Each question should target one specific decision. Avoid broad "what else do you need?" questions — the user expects you to figure out what matters. +4. **Act on partial information**: Start with reasonable assumptions, produce an initial deliverable, and flag where you made assumptions. This lets the user correct specifics rather than providing everything from scratch. Do not block on full specification when meaningful progress can be made. + +Anti-patterns to avoid: +- Restating the request verbatim and asking "is this what you want?" +- Waiting for the user to provide every detail before taking any action +- Failing to reuse preferences and conventions established in prior sessions +- Treating underspecification as a reason to remain passive +- Making the user repeat information that already exists in the workspace or prior sessions + +The goal is to reduce the user's operational and cognitive effort: resolve hidden requirements through inference or focused elicitation, and minimize the avoidable back-and-forth that comes from passive, step-by-step instruction-following. + # Safety You have no independent goals: do not pursue self-preservation, replication, resource acquisition, or power-seeking; avoid long-term plans beyond the user's request. diff --git a/src/crates/core/src/agentic/coordination/coordinator.rs b/src/crates/core/src/agentic/coordination/coordinator.rs index 8ef735ff7..a99327d2b 100644 --- a/src/crates/core/src/agentic/coordination/coordinator.rs +++ b/src/crates/core/src/agentic/coordination/coordinator.rs @@ -1254,6 +1254,9 @@ Update the persona files and delete BOOTSTRAP.md as soon as bootstrap is complet workspace_hostname: None, unread_completion: None, needs_user_attention: None, + intent_tracking: None, + proactivity_score: None, + completeness_score: None, }; if let Err(e) = persistence_manager .save_session_metadata(&workspace_path_buf, &metadata) @@ -2358,6 +2361,13 @@ Update the persona files and delete BOOTSTRAP.md as soon as bootstrap is complet round_preempt: self.round_preempt_source.get().cloned(), round_injection: self.round_injection_source.get().cloned(), recover_partial_on_cancel: false, + intent_evidence: if session.config.enable_intent_tracking { + Some(std::sync::Arc::new(std::sync::Mutex::new( + crate::agentic::execution::intent_evidence::IntentEvidenceCollector::default(), + ))) + } else { + None + }, }; // Auto-generate session title on first message @@ -3707,6 +3717,7 @@ Update the persona files and delete BOOTSTRAP.md as soon as bootstrap is complet // that belong to a different (parent) session/turn. round_injection: None, recover_partial_on_cancel: true, + intent_evidence: None, }; let execution_engine = self.execution_engine.clone(); diff --git a/src/crates/core/src/agentic/core/session.rs b/src/crates/core/src/agentic/core/session.rs index 05fa12896..0a1ea81d2 100644 --- a/src/crates/core/src/agentic/core/session.rs +++ b/src/crates/core/src/agentic/core/session.rs @@ -1,5 +1,9 @@ use super::state::SessionState; pub use bitfun_core_types::SessionKind; +pub use bitfun_services_core::session::hidden_intent_types::{ + HiddenIntent, IntentAssignment, IntentScope, IntentSource, IntentTerminalStatus, + PersistentIntent, SessionIntentTracking, +}; use serde::{Deserialize, Serialize}; use std::time::SystemTime; use uuid::Uuid; @@ -149,6 +153,12 @@ pub struct SessionConfig { /// Model config ID used by this session (for token usage tracking) #[serde(default, skip_serializing_if = "Option::is_none")] pub model_id: Option, + + /// Whether hidden intent tracking is enabled for this session. + /// When enabled, the agent loop tracks which hidden requirements were + /// proactively resolved vs passively waited-for. + #[serde(default)] + pub enable_intent_tracking: bool, } impl Default for SessionConfig { @@ -166,6 +176,7 @@ impl Default for SessionConfig { remote_connection_id: None, remote_ssh_host: None, model_id: None, + enable_intent_tracking: false, } } } diff --git a/src/crates/core/src/agentic/execution/execution_engine.rs b/src/crates/core/src/agentic/execution/execution_engine.rs index d5f5ab6c7..307df3901 100644 --- a/src/crates/core/src/agentic/execution/execution_engine.rs +++ b/src/crates/core/src/agentic/execution/execution_engine.rs @@ -2065,6 +2065,43 @@ impl ExecutionEngine { total_tools += round_result.tool_calls.len(); + // Hook A: Collect intent evidence from this round + // Only runs when intent tracking is enabled for this session. + if let Some(ref collector) = context.intent_evidence { + match collector.lock() { + Ok(mut c) => { + if round_result.used_ask_user_question { + c.asked_user_question = true; + c.question_topics + .extend(round_result.ask_user_question_topics.clone()); + } + c.tool_names_used.extend( + round_result + .tool_calls + .iter() + .map(|tc| tc.tool_name.clone()), + ); + c.proactive_tool_calls += round_result + .tool_calls + .iter() + .filter(|tc| { + crate::agentic::execution::intent_evidence::is_proactive_tool( + &tc.tool_name, + ) + }) + .count(); + c.produced_output |= round_result.had_assistant_text; + c.round_count += 1; + } + Err(_) => { + warn!( + "Intent evidence collector mutex poisoned, skipping round evidence: session_id={}, turn_id={}", + context.session_id, context.dialog_turn_id + ); + } + } + } + // Track partial recovery reason from the last round if round_result.partial_recovery_reason.is_some() { last_partial_recovery_reason = round_result.partial_recovery_reason.clone(); @@ -2415,6 +2452,27 @@ impl ExecutionEngine { ); } + // Hook B: Persist collected intent evidence for this turn. + // Called after the dialog turn loop exits (all rounds complete). + let evidence = context.intent_evidence.as_ref().and_then(|collector| { + collector + .lock() + .ok() + .map(|c| c.snapshot(context.turn_index)) + }); + if let Some(evidence) = evidence { + if let Err(e) = self + .session_manager + .record_intent_evidence(&context.session_id, evidence) + .await + { + warn!( + "Failed to record intent evidence: session_id={}, turn_id={}, error={}", + context.session_id, context.dialog_turn_id, e + ); + } + } + // P1-6: Track the actual termination reason for downstream reporting. // Defaults to "complete" (model produced a final answer naturally) and // is overridden by finalize / fallback paths below. diff --git a/src/crates/core/src/agentic/execution/intent_evidence.rs b/src/crates/core/src/agentic/execution/intent_evidence.rs new file mode 100644 index 000000000..0a0cd3168 --- /dev/null +++ b/src/crates/core/src/agentic/execution/intent_evidence.rs @@ -0,0 +1,442 @@ +//! Intent evidence collection for proactive assistance evaluation. +//! +//! This module collects lightweight trajectory signals during execution. It +//! intentionally does not assign hidden-intent terminal statuses: pi-Bench style +//! assignment requires comparing a turn against concrete hidden intents with a +//! two-stage evaluator (direct satisfaction before targeted elicitation). + +use bitfun_services_core::session::hidden_intent_types::{ + CompletenessLevel, CompletenessScore, HiddenIntent, IntentScope, IntentSource, + IntentTerminalStatus, IntentTurnEvidence, ProactivityLevel, ProactivityScore, + SessionIntentTracking, +}; + +/// Evidence collected during a single dialog turn for later intent analysis. +/// The collector is stateless per-turn: it gathers raw signals from model +/// rounds and produces an IntentTurnEvidence snapshot at turn completion. +#[derive(Debug, Clone, Default)] +pub struct IntentEvidenceCollector { + pub asked_user_question: bool, + pub question_topics: Vec, + pub proactive_tool_calls: usize, + pub tool_names_used: Vec, + pub produced_output: bool, + pub round_count: usize, + pub asked_follow_up_in_text: bool, +} + +impl IntentEvidenceCollector { + pub fn snapshot(&self, turn_index: usize) -> IntentTurnEvidence { + IntentTurnEvidence { + turn_index, + asked_user_question: self.asked_user_question, + question_topics: self.question_topics.clone(), + proactive_tool_calls: self.proactive_tool_calls, + tool_names_used: self.tool_names_used.clone(), + produced_output: self.produced_output, + round_count: self.round_count, + asked_follow_up_in_text: self.asked_follow_up_in_text, + } + } +} + +// --------------------------------------------------------------------------- +// Scoring functions +// --------------------------------------------------------------------------- + +pub fn compute_proactivity_score(tracking: &SessionIntentTracking) -> Option { + if !tracking.enabled || tracking.hidden_intents.is_empty() { + return None; + } + if !tracking.all_intents_resolved() { + return None; + } + + let completed = tracking.count_by_status(IntentTerminalStatus::Completed) as u32; + let inferred = tracking.count_by_status(IntentTerminalStatus::Inferred) as u32; + let provided = tracking.count_by_status(IntentTerminalStatus::Provided) as u32; + let total = tracking.hidden_intents.len() as u32; + + let score = (completed + inferred) as f32 / total as f32; + Some(ProactivityScore { + completed, + inferred, + provided, + score, + level: Some(classify_proactivity_level(score)), + }) +} + +/// Classify a proactivity score into a qualitative level. +/// Delegates to `ProactivityLevel::from_score` so the thresholds stay in one place. +pub fn classify_proactivity_level(score: f32) -> ProactivityLevel { + ProactivityLevel::from_score(score) +} + +pub fn is_proactive_tool(tool_name: &str) -> bool { + matches!( + tool_name, + "Write" + | "Edit" + | "Delete" + | "Bash" + | "Git" + | "WebSearch" + | "WebFetch" + | "GenerativeUI" + | "CreatePlan" + ) +} + +// --------------------------------------------------------------------------- +// Hidden intent extraction from turn evidence +// --------------------------------------------------------------------------- + +/// Extract new hidden intents from a turn's collected evidence. +/// +/// Uses lightweight heuristics to infer requirements the agent discovered +/// during this turn. Extracted intents are appended to the session's tracking +/// state and become available for proactivity scoring. +pub fn extract_hidden_intents_from_evidence( + evidence: &IntentTurnEvidence, + existing_intents: &[HiddenIntent], +) -> Vec { + let mut new_intents = Vec::new(); + + // 1. Agent used proactive tools and produced output: infer requirements. + if evidence.proactive_tool_calls > 0 && evidence.produced_output { + for tool_name in &evidence.tool_names_used { + if !is_proactive_tool(tool_name) { + continue; + } + let intent_id = format!( + "proactive-{}-turn{}", + tool_name.to_lowercase(), + evidence.turn_index + ); + if existing_intents.iter().any(|i| i.intent_id == intent_id) { + continue; + } + new_intents.push(HiddenIntent { + intent_id, + description: proactive_tool_intent_description(tool_name), + scope: IntentScope::SessionLocal, + terminal_status: Some(IntentTerminalStatus::Completed), + resolved_at_turn: Some(evidence.turn_index), + source: Some(IntentSource::PriorContext), + }); + } + } + + // 2. Agent asked targeted clarification questions via AskUserQuestion. + if evidence.asked_user_question && !evidence.question_topics.is_empty() { + for topic in &evidence.question_topics { + let slug = topic + .chars() + .take(40) + .map(|c| { + if c.is_alphanumeric() { + c.to_ascii_lowercase() + } else { + '-' + } + }) + .collect::(); + let intent_id = + format!("asked-{}-turn{}", slug.trim_matches('-'), evidence.turn_index); + if existing_intents.iter().any(|i| i.intent_id == intent_id) { + continue; + } + new_intents.push(HiddenIntent { + intent_id, + description: format!("Required clarification: {}", topic), + scope: IntentScope::SessionLocal, + terminal_status: Some(IntentTerminalStatus::Inferred), + resolved_at_turn: Some(evidence.turn_index), + source: Some(IntentSource::PriorContext), + }); + } + } + + new_intents +} + +fn proactive_tool_intent_description(tool_name: &str) -> String { + match tool_name { + "Write" => "Agent proactively created a new file".to_string(), + "Edit" => "Agent proactively modified an existing file".to_string(), + "Delete" => "Agent proactively removed unneeded content".to_string(), + "Bash" => "Agent proactively executed a shell command".to_string(), + "Git" => "Agent proactively performed version control operations".to_string(), + "WebSearch" => "Agent proactively searched for information".to_string(), + "WebFetch" => "Agent proactively fetched external content".to_string(), + "GenerativeUI" => "Agent proactively created interactive UI output".to_string(), + "CreatePlan" => "Agent proactively planned the task structure".to_string(), + _ => format!("Agent proactively used {}", tool_name), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use bitfun_services_core::session::hidden_intent_types::{ + HiddenIntent, IntentScope, IntentSource, IntentTerminalStatus, SessionIntentTracking, + }; + + #[test] + fn collector_empty_on_init() { + let c = IntentEvidenceCollector::default(); + assert!(!c.asked_user_question); + assert!(c.question_topics.is_empty()); + assert_eq!(c.proactive_tool_calls, 0); + assert!(c.tool_names_used.is_empty()); + assert!(!c.produced_output); + assert_eq!(c.round_count, 0); + assert!(!c.asked_follow_up_in_text); + } + + #[test] + fn collector_records_ask_user_question() { + let mut c = IntentEvidenceCollector { + asked_user_question: true, + ..Default::default() + }; + c.question_topics.push("What approach?".into()); + c.question_topics.push("Which library?".into()); + + let evidence = c.snapshot(1); + + assert!(evidence.asked_user_question); + assert_eq!(evidence.question_topics.len(), 2); + assert_eq!(evidence.turn_index, 1); + } + + #[test] + fn intent_turn_evidence_round_trips() { + let evidence = IntentTurnEvidence { + turn_index: 2, + asked_user_question: true, + question_topics: vec!["Which format?".into()], + proactive_tool_calls: 3, + tool_names_used: vec!["Write".into(), "Edit".into()], + produced_output: true, + round_count: 5, + asked_follow_up_in_text: false, + }; + let json = serde_json::to_value(&evidence).expect("serialize"); + let rt: IntentTurnEvidence = serde_json::from_value(json).expect("deserialize"); + assert_eq!(rt.turn_index, 2); + assert!(rt.asked_user_question); + assert_eq!(rt.proactive_tool_calls, 3); + assert_eq!(rt.tool_names_used, vec!["Write", "Edit"]); + } + + #[test] + fn compute_proactivity_score_all_completed() { + let tracking = make_tracking(vec![ + IntentTerminalStatus::Completed, + IntentTerminalStatus::Completed, + IntentTerminalStatus::Completed, + ]); + let s = compute_proactivity_score(&tracking).unwrap(); + assert!((s.score - 1.0).abs() < f32::EPSILON); + assert_eq!(s.completed, 3); + assert_eq!(s.inferred, 0); + assert_eq!(s.provided, 0); + assert_eq!(s.level, Some(ProactivityLevel::High)); + } + + #[test] + fn compute_proactivity_score_all_provided() { + let tracking = make_tracking(vec![ + IntentTerminalStatus::Provided, + IntentTerminalStatus::Provided, + ]); + let s = compute_proactivity_score(&tracking).unwrap(); + assert!((s.score - 0.0).abs() < f32::EPSILON); + assert_eq!(s.provided, 2); + assert_eq!(s.level, Some(ProactivityLevel::Reactive)); + } + + #[test] + fn compute_proactivity_score_mixed() { + let tracking = make_tracking(vec![ + IntentTerminalStatus::Completed, + IntentTerminalStatus::Completed, + IntentTerminalStatus::Inferred, + IntentTerminalStatus::Provided, + ]); + let s = compute_proactivity_score(&tracking).unwrap(); + assert!((s.score - 0.75).abs() < f32::EPSILON); + assert_eq!(s.completed, 2); + assert_eq!(s.inferred, 1); + assert_eq!(s.provided, 1); + assert_eq!(s.level, Some(ProactivityLevel::Moderate)); + } + + #[test] + fn compute_proactivity_score_empty() { + assert_eq!( + compute_proactivity_score(&SessionIntentTracking::default()), + None + ); + } + + #[test] + fn compute_proactivity_score_requires_resolved_intents() { + let mut tracking = make_tracking(vec![ + IntentTerminalStatus::Completed, + IntentTerminalStatus::Provided, + ]); + tracking.hidden_intents.push(HiddenIntent { + intent_id: "i-unresolved".into(), + description: "unresolved intent".into(), + scope: IntentScope::SessionLocal, + terminal_status: None, + resolved_at_turn: None, + source: None, + }); + + assert_eq!(compute_proactivity_score(&tracking), None); + } + + #[test] + fn classify_proactivity_level_edges() { + assert_eq!(classify_proactivity_level(0.9), ProactivityLevel::High); + assert_eq!(classify_proactivity_level(0.8), ProactivityLevel::High); + assert_eq!(classify_proactivity_level(0.79), ProactivityLevel::Moderate); + assert_eq!(classify_proactivity_level(0.5), ProactivityLevel::Moderate); + assert_eq!(classify_proactivity_level(0.49), ProactivityLevel::Low); + assert_eq!(classify_proactivity_level(0.2), ProactivityLevel::Low); + assert_eq!(classify_proactivity_level(0.19), ProactivityLevel::Reactive); + assert_eq!(classify_proactivity_level(0.0), ProactivityLevel::Reactive); + } + + #[test] + fn is_proactive_tool_positive() { + assert!(is_proactive_tool("Write")); + assert!(is_proactive_tool("Edit")); + assert!(is_proactive_tool("Delete")); + assert!(is_proactive_tool("Bash")); + assert!(is_proactive_tool("Git")); + assert!(is_proactive_tool("WebSearch")); + assert!(is_proactive_tool("CreatePlan")); + } + + #[test] + fn is_proactive_tool_negative() { + assert!(!is_proactive_tool("Read")); + assert!(!is_proactive_tool("Grep")); + assert!(!is_proactive_tool("Glob")); + assert!(!is_proactive_tool("TodoWrite")); + assert!(!is_proactive_tool("AskUserQuestion")); + } + + #[test] + fn compute_proactivity_disabled() { + let mut tracking = make_tracking(vec![IntentTerminalStatus::Completed]); + tracking.enabled = false; + assert_eq!(compute_proactivity_score(&tracking), None); + } + + #[test] + fn extract_hidden_intents_from_proactive_tools() { + let evidence = IntentTurnEvidence { + turn_index: 1, + asked_user_question: false, + question_topics: vec![], + proactive_tool_calls: 2, + tool_names_used: vec!["Write".into(), "Edit".into()], + produced_output: true, + round_count: 3, + asked_follow_up_in_text: false, + }; + let intents = extract_hidden_intents_from_evidence(&evidence, &[]); + assert_eq!(intents.len(), 2); + assert!(intents + .iter() + .any(|i| i.intent_id == "proactive-write-turn1")); + assert_eq!( + intents[0].terminal_status, + Some(IntentTerminalStatus::Completed) + ); + } + + #[test] + fn extract_hidden_intents_from_ask_user_question() { + let evidence = IntentTurnEvidence { + turn_index: 2, + asked_user_question: true, + question_topics: vec!["Which database?".into()], + proactive_tool_calls: 0, + tool_names_used: vec![], + produced_output: false, + round_count: 1, + asked_follow_up_in_text: false, + }; + let intents = extract_hidden_intents_from_evidence(&evidence, &[]); + assert_eq!(intents.len(), 1); + assert!(intents[0].intent_id.contains("asked-")); + assert_eq!( + intents[0].terminal_status, + Some(IntentTerminalStatus::Inferred) + ); + } + + #[test] + fn extract_hidden_intents_deduplicates_existing() { + let evidence = IntentTurnEvidence { + turn_index: 1, + asked_user_question: false, + question_topics: vec![], + proactive_tool_calls: 1, + tool_names_used: vec!["Write".into()], + produced_output: true, + round_count: 1, + asked_follow_up_in_text: false, + }; + let existing = vec![HiddenIntent { + intent_id: "proactive-write-turn1".into(), + description: "already exists".into(), + scope: IntentScope::SessionLocal, + terminal_status: Some(IntentTerminalStatus::Completed), + resolved_at_turn: Some(1), + source: Some(IntentSource::PriorContext), + }]; + assert!(extract_hidden_intents_from_evidence(&evidence, &existing).is_empty()); + } + + #[test] + fn extract_hidden_intents_empty_when_passive() { + let evidence = IntentTurnEvidence { + turn_index: 0, + asked_user_question: false, + question_topics: vec![], + proactive_tool_calls: 0, + tool_names_used: vec!["Read".into()], + produced_output: false, + round_count: 1, + asked_follow_up_in_text: false, + }; + assert!(extract_hidden_intents_from_evidence(&evidence, &[]).is_empty()); + } + + fn make_tracking(statuses: Vec) -> SessionIntentTracking { + SessionIntentTracking { + enabled: true, + hidden_intents: statuses + .into_iter() + .enumerate() + .map(|(i, status)| HiddenIntent { + intent_id: format!("i{}", i), + description: format!("test intent {}", i), + scope: IntentScope::SessionLocal, + terminal_status: Some(status), + resolved_at_turn: Some(i), + source: None, + }) + .collect(), + ..Default::default() + } + } +} diff --git a/src/crates/core/src/agentic/execution/mod.rs b/src/crates/core/src/agentic/execution/mod.rs index af22b10f6..3b58be95a 100644 --- a/src/crates/core/src/agentic/execution/mod.rs +++ b/src/crates/core/src/agentic/execution/mod.rs @@ -3,6 +3,7 @@ //! Responsible for AI interaction and model round control pub mod execution_engine; +pub mod intent_evidence; pub mod round_executor; pub mod stream_processor; pub mod types; diff --git a/src/crates/core/src/agentic/execution/round_executor.rs b/src/crates/core/src/agentic/execution/round_executor.rs index 0f0980293..08415598e 100644 --- a/src/crates/core/src/agentic/execution/round_executor.rs +++ b/src/crates/core/src/agentic/execution/round_executor.rs @@ -48,6 +48,36 @@ impl RoundExecutor { !text.trim().is_empty() } + /// Detects AskUserQuestion calls in a set of tool calls. + /// Returns (used_ask_user_question, extracted_question_topics). + /// + /// Note: `used_ask_user_question` is `true` whenever AskUserQuestion appears + /// in the tool call list, regardless of whether any topic headers could be + /// extracted. This ensures the call is recorded even when the `questions` + /// argument is missing or contains no `header` fields. + fn detect_ask_user_question( + tool_calls: &[crate::agentic::core::ToolCall], + ) -> (bool, Vec) { + let mut called = false; + let mut topics = Vec::new(); + for tc in tool_calls { + if tc.tool_name == "AskUserQuestion" { + called = true; + // Extract question topics from the arguments (best-effort) + if let Some(questions) = tc.arguments.get("questions") { + if let Some(arr) = questions.as_array() { + for q in arr { + if let Some(header) = q.get("header").and_then(|v| v.as_str()) { + topics.push(header.to_string()); + } + } + } + } + } + } + (called, topics) + } + fn write_tool_mode(context: &RoundContext) -> WriteToolMode { WriteToolMode::from_context_var( context @@ -569,6 +599,8 @@ impl RoundExecutor { partial_recovery_reason: stream_result.partial_recovery_reason.clone(), had_assistant_text: Self::has_user_visible_assistant_text(&stream_result.full_text), had_thinking_content: !stream_result.full_thinking.is_empty(), + used_ask_user_question: false, + ask_user_question_topics: vec![], }); } @@ -818,6 +850,10 @@ impl RoundExecutor { // Note: Do not cleanup cancellation token here, as there may be subsequent model rounds // Cancellation token will be cleaned up by ExecutionEngine when the entire dialog turn ends + // Detect AskUserQuestion calls for intent evidence collection + let (used_ask_user_question, ask_user_question_topics) = + Self::detect_ask_user_question(&tool_calls); + Ok(RoundResult { assistant_message, tool_calls: tool_calls.clone(), @@ -833,6 +869,8 @@ impl RoundExecutor { partial_recovery_reason: stream_result.partial_recovery_reason.clone(), had_assistant_text: Self::has_user_visible_assistant_text(&stream_result.full_text), had_thinking_content: !stream_result.full_thinking.is_empty(), + used_ask_user_question, + ask_user_question_topics, }) } @@ -1698,6 +1736,7 @@ mod tests { extract_bitfun_contents, extract_bitfun_contents_with_options, RoundExecutor, StreamProcessor, }; + use crate::agentic::core::ToolCall; use crate::agentic::events::{EventQueue, EventQueueConfig}; use crate::agentic::execution::types::RoundContext; use crate::agentic::tools::ToolRuntimeRestrictions; @@ -1708,6 +1747,15 @@ mod tests { use std::sync::Arc; use tokio_util::sync::CancellationToken; + fn tool_call(tool_id: &str, tool_name: &str, arguments: serde_json::Value) -> ToolCall { + ToolCall { + tool_id: tool_id.to_string(), + tool_name: tool_name.to_string(), + arguments, + ..Default::default() + } + } + fn test_round_executor() -> RoundExecutor { let event_queue = Arc::new(EventQueue::new(EventQueueConfig::default())); RoundExecutor { @@ -2182,4 +2230,88 @@ mod tests { }; assert!(super::token_details_from_usage(&usage).is_none()); } + + // --- detect_ask_user_question tests --- + + #[test] + fn detect_ask_user_question_with_header_topics() { + let tc = tool_call( + "tc-1", + "AskUserQuestion", + serde_json::json!({ + "questions": [ + { "header": "Auth method", "question": "Which auth method?" }, + { "header": "Library", "question": "Which library?" } + ] + }), + ); + let (called, topics) = RoundExecutor::detect_ask_user_question(&[tc]); + assert!(called, "should be called even with headers"); + assert_eq!(topics, vec!["Auth method", "Library"]); + } + + #[test] + fn detect_ask_user_question_without_header_fields_still_marks_called() { + // AskUserQuestion called but questions have no `header` field. + // The bug being tested: previously returned (false, []) in this case. + let tc = tool_call( + "tc-1", + "AskUserQuestion", + serde_json::json!({ + "questions": [ + { "question": "Which auth method?" } + ] + }), + ); + let (called, topics) = RoundExecutor::detect_ask_user_question(&[tc]); + assert!(called, "must be true even when no headers are extractable"); + assert!(topics.is_empty()); + } + + #[test] + fn detect_ask_user_question_with_empty_questions_array_still_marks_called() { + let tc = tool_call( + "tc-1", + "AskUserQuestion", + serde_json::json!({ "questions": [] }), + ); + let (called, topics) = RoundExecutor::detect_ask_user_question(&[tc]); + assert!(called); + assert!(topics.is_empty()); + } + + #[test] + fn detect_ask_user_question_with_missing_questions_key_still_marks_called() { + let tc = tool_call( + "tc-1", + "AskUserQuestion", + serde_json::json!({}), + ); + let (called, topics) = RoundExecutor::detect_ask_user_question(&[tc]); + assert!(called); + assert!(topics.is_empty()); + } + + #[test] + fn detect_ask_user_question_not_present_returns_false() { + let tc = tool_call("tc-1", "Write", serde_json::json!({ "file_path": "a.rs" })); + let (called, topics) = RoundExecutor::detect_ask_user_question(&[tc]); + assert!(!called); + assert!(topics.is_empty()); + } + + #[test] + fn detect_ask_user_question_mixed_tool_calls() { + let write_tc = tool_call("tc-1", "Write", serde_json::json!({})); + let ask_tc = tool_call( + "tc-2", + "AskUserQuestion", + serde_json::json!({ + "questions": [{ "header": "Approach" }] + }), + ); + let (called, topics) = RoundExecutor::detect_ask_user_question(&[write_tc, ask_tc]); + assert!(called); + assert_eq!(topics, vec!["Approach"]); + } } diff --git a/src/crates/core/src/agentic/execution/types.rs b/src/crates/core/src/agentic/execution/types.rs index 60b3a89a1..6524836fe 100644 --- a/src/crates/core/src/agentic/execution/types.rs +++ b/src/crates/core/src/agentic/execution/types.rs @@ -1,6 +1,7 @@ //! Execution Engine Type Definitions use crate::agentic::core::Message; +use crate::agentic::execution::intent_evidence::IntentEvidenceCollector; use crate::agentic::round_preempt::{ DialogRoundInjectionInterrupt, DialogRoundInjectionSource, DialogRoundPreemptSource, }; @@ -10,7 +11,7 @@ use crate::agentic::workspace::WorkspaceServices; use crate::agentic::WorkspaceBinding; use serde_json::Value; use std::collections::HashMap; -use std::sync::Arc; +use std::sync::{Arc, Mutex}; use tokio_util::sync::CancellationToken; /// Execution context @@ -35,6 +36,10 @@ pub struct ExecutionContext { /// When true, stream cancellation may be converted into a partial assistant /// result if text/tool output has already been produced. pub recover_partial_on_cancel: bool, + + /// When intent tracking is enabled, this collector gathers raw signals + /// during execution for later intent analysis. + pub intent_evidence: Option>>, } /// Round context @@ -85,6 +90,13 @@ pub struct RoundResult { /// True when the model emitted any non-empty thinking / reasoning content /// in this round. pub had_thinking_content: bool, + + /// Whether the agent called AskUserQuestion in this round. + /// Set by the round executor when processing tool calls. + pub used_ask_user_question: bool, + + /// If AskUserQuestion was called, the parsed questions from its input. + pub ask_user_question_topics: Vec, } /// Finish reason diff --git a/src/crates/core/src/agentic/insights/prompts/facet_extraction.md b/src/crates/core/src/agentic/insights/prompts/facet_extraction.md index 1185626a4..1f7945cc5 100644 --- a/src/crates/core/src/agentic/insights/prompts/facet_extraction.md +++ b/src/crates/core/src/agentic/insights/prompts/facet_extraction.md @@ -27,6 +27,21 @@ CRITICAL GUIDELINES: 5. **languages_used**: Optional. The insights report's language chart is computed from edited file paths (Edit/Write tool), not from this field; you may still list languages you infer for context. +6. **proactivity**: Assess how proactively the AI handled underspecified or ambiguous parts of the user's request. + - proactive_hidden_intents: Number of hidden requirements the AI surfaced and resolved without the user having to explicitly state them. This includes: inferring preferences from prior context, filling in reasonable defaults, and applying established conventions without asking. + - reactive_hidden_intents: Number of requirements the user had to explicitly provide step by step because the AI did not proactively address them. + - inferred_from_context: The AI recovered requirements from prior sessions, workspace files, or established user preferences. + - targeted_questions_asked: The AI asked focused, specific clarifying questions that targeted missing information. + - passive_waiting_events: The AI restated the request or asked vague open-ended questions without making progress. + - proactivity_level: "high" (most requirements proactively resolved), "moderate" (mix of proactive and reactive), "low" (mostly waited for user to provide every detail), "reactive" (entirely step-by-step instruction following). + - proactivity_detail: "One sentence describing the AI's proactivity pattern or empty" + +7. **completeness**: Assess whether the final deliverables satisfied the user's task requirements. + - requirements_satisfied: Number of verifiable requirements that were met in the final output. + - requirements_missed: Number of requirements the user explicitly asked for that were not satisfied. + - completeness_level: "full" (all requirements met), "partial" (most met, some gaps), "minimal" (only surface request handled), "incomplete" (significant gaps). + - completeness_detail: "One sentence describing completeness gaps or empty" + SESSION: {session_transcript} @@ -43,5 +58,20 @@ RESPOND WITH ONLY A VALID JSON OBJECT matching this schema: "primary_success": "fast_accurate_search|correct_code_edits|good_explanations|proactive_help|multi_file_changes|good_debugging", "brief_summary": "One sentence: what user wanted and whether they got it", "languages_used": ["programing_language1", "programing_language2"], - "user_instructions": ["Any explicit instructions user gave to AI about how to behave"] + "user_instructions": ["Any explicit instructions user gave to AI about how to behave"], + "proactivity": { + "proactive_hidden_intents": 0, + "reactive_hidden_intents": 0, + "inferred_from_context": 0, + "targeted_questions_asked": 0, + "passive_waiting_events": 0, + "proactivity_level": "high|moderate|low|reactive", + "proactivity_detail": "One sentence or empty" + }, + "completeness": { + "requirements_satisfied": 0, + "requirements_missed": 0, + "completeness_level": "full|partial|minimal|incomplete", + "completeness_detail": "One sentence or empty" + } } diff --git a/src/crates/core/src/agentic/persistence/manager.rs b/src/crates/core/src/agentic/persistence/manager.rs index 8552645d6..93d4c931b 100644 --- a/src/crates/core/src/agentic/persistence/manager.rs +++ b/src/crates/core/src/agentic/persistence/manager.rs @@ -883,6 +883,12 @@ impl PersistenceManager { workspace_hostname, unread_completion: existing.and_then(|value| value.unread_completion.clone()), needs_user_attention: existing.and_then(|value| value.needs_user_attention.clone()), + intent_tracking: existing + .and_then(|value| value.intent_tracking.clone()), + proactivity_score: existing + .and_then(|value| value.proactivity_score.clone()), + completeness_score: existing + .and_then(|value| value.completeness_score.clone()), } } diff --git a/src/crates/core/src/agentic/session/session_manager.rs b/src/crates/core/src/agentic/session/session_manager.rs index 5a5bcfa02..d40e66246 100644 --- a/src/crates/core/src/agentic/session/session_manager.rs +++ b/src/crates/core/src/agentic/session/session_manager.rs @@ -3,8 +3,8 @@ //! Responsible for session CRUD, lifecycle management, and resource association use crate::agentic::core::{ - new_turn_id, CompressionContract, CompressionState, Message, MessageSemanticKind, - ProcessingPhase, Session, SessionConfig, SessionKind, SessionState, SessionSummary, TurnStats, + CompressionContract, CompressionState, Message, MessageSemanticKind, ProcessingPhase, Session, + SessionConfig, SessionKind, SessionState, SessionSummary, TurnStats, new_turn_id, }; use crate::agentic::image_analysis::ImageContextData; use crate::agentic::persistence::PersistenceManager; @@ -15,8 +15,8 @@ use crate::agentic::session::{ }; use crate::infrastructure::ai::get_global_ai_client_factory; use crate::service::config::{ - get_app_language_code, get_global_config_service, short_model_user_language_instruction, - subscribe_config_updates, ConfigUpdateEvent, + ConfigUpdateEvent, get_app_language_code, get_global_config_service, + short_model_user_language_instruction, subscribe_config_updates, }; use crate::service::session::{ DialogTurnData, DialogTurnKind, ModelRoundData, SessionMetadata, SessionRelationship, @@ -115,8 +115,7 @@ struct SessionCleanupCandidate { } impl SessionManager { - async fn load_ai_config_for_model_resolution() - -> Option + async fn load_ai_config_for_model_resolution() -> Option { let config_service = get_global_config_service().await.ok()?; config_service.get_config(Some("ai")).await.ok() @@ -1243,9 +1242,7 @@ impl SessionManager { if session.session_name != expected_current_title { debug!( "Skipping auto-generated title because current title changed: session_id={}, expected_title={}, current_title={}", - session_id, - expected_current_title, - session.session_name + session_id, expected_current_title, session.session_name ); return Ok(false); } @@ -2145,19 +2142,26 @@ impl SessionManager { .sessions .get(session_id) .map(|value| value.clone()) - .ok_or_else(|| BitFunError::NotFound(format!("Session not found: {}", session_id)))?; + .ok_or_else(|| { + BitFunError::NotFound(format!("Session not found: {}", session_id)) + })?; self.persistence_manager .save_session(&workspace_path, &session) .await?; self.persistence_manager .load_session_metadata(&workspace_path, session_id) .await? - .ok_or_else(|| BitFunError::NotFound(format!("Session not found: {}", session_id)))? + .ok_or_else(|| { + BitFunError::NotFound(format!("Session not found: {}", session_id)) + })? } }; metadata.custom_metadata = Some(match (metadata.custom_metadata.take(), patch) { - (Some(serde_json::Value::Object(mut existing)), serde_json::Value::Object(patch_obj)) => { + ( + Some(serde_json::Value::Object(mut existing)), + serde_json::Value::Object(patch_obj), + ) => { for (key, value) in patch_obj { existing.insert(key, value); } @@ -2201,14 +2205,18 @@ impl SessionManager { .sessions .get(session_id) .map(|value| value.clone()) - .ok_or_else(|| BitFunError::NotFound(format!("Session not found: {}", session_id)))?; + .ok_or_else(|| { + BitFunError::NotFound(format!("Session not found: {}", session_id)) + })?; self.persistence_manager .save_session(&workspace_path, &session) .await?; self.persistence_manager .load_session_metadata(&workspace_path, session_id) .await? - .ok_or_else(|| BitFunError::NotFound(format!("Session not found: {}", session_id)))? + .ok_or_else(|| { + BitFunError::NotFound(format!("Session not found: {}", session_id)) + })? } }; @@ -2248,20 +2256,26 @@ impl SessionManager { .sessions .get(session_id) .map(|value| value.clone()) - .ok_or_else(|| BitFunError::NotFound(format!("Session not found: {}", session_id)))?; + .ok_or_else(|| { + BitFunError::NotFound(format!("Session not found: {}", session_id)) + })?; self.persistence_manager .save_session(&workspace_path, &session) .await?; self.persistence_manager .load_session_metadata(&workspace_path, session_id) .await? - .ok_or_else(|| BitFunError::NotFound(format!("Session not found: {}", session_id)))? + .ok_or_else(|| { + BitFunError::NotFound(format!("Session not found: {}", session_id)) + })? } }; metadata.relationship = Some(relationship); - if let Some(serde_json::Value::Object(mut custom_metadata)) = metadata.custom_metadata.take() { + if let Some(serde_json::Value::Object(mut custom_metadata)) = + metadata.custom_metadata.take() + { for key in [ "kind", "parentSessionId", @@ -2273,8 +2287,8 @@ impl SessionManager { ] { custom_metadata.remove(key); } - metadata.custom_metadata = (!custom_metadata.is_empty()) - .then_some(serde_json::Value::Object(custom_metadata)); + metadata.custom_metadata = + (!custom_metadata.is_empty()).then_some(serde_json::Value::Object(custom_metadata)); } self.persistence_manager @@ -2392,14 +2406,18 @@ impl SessionManager { .sessions .get(session_id) .map(|value| value.clone()) - .ok_or_else(|| BitFunError::NotFound(format!("Session not found: {}", session_id)))?; + .ok_or_else(|| { + BitFunError::NotFound(format!("Session not found: {}", session_id)) + })?; self.persistence_manager .save_session(&workspace_path, &session) .await?; self.persistence_manager .load_session_metadata(&workspace_path, session_id) .await? - .ok_or_else(|| BitFunError::NotFound(format!("Session not found: {}", session_id)))? + .ok_or_else(|| { + BitFunError::NotFound(format!("Session not found: {}", session_id)) + })? } }; @@ -2770,6 +2788,137 @@ impl SessionManager { Ok(()) } + /// Record intent evidence collected during a dialog turn. + /// Appends the evidence to the session's intent tracking state. + /// The turn is identified via `evidence.turn_index`. + pub async fn record_intent_evidence( + &self, + session_id: &str, + evidence: bitfun_services_core::session::hidden_intent_types::IntentTurnEvidence, + ) -> BitFunResult<()> { + if !self.should_persist_session_id(session_id) { + return Ok(()); + } + + let workspace_path = self + .effective_session_workspace_path(session_id) + .await + .ok_or_else(|| { + BitFunError::Validation(format!( + "Session workspace_path is missing: {}", + session_id + )) + })?; + + let mut metadata = self + .persistence_manager + .load_session_metadata(&workspace_path, session_id) + .await? + .ok_or_else(|| { + BitFunError::NotFound(format!("Session metadata not found: {}", session_id)) + })?; + + // Initialize intent tracking if not present + let tracking = metadata.intent_tracking.get_or_insert_with(|| { + bitfun_services_core::session::hidden_intent_types::SessionIntentTracking { + enabled: true, + ..Default::default() + } + }); + tracking.enabled = true; + + // Extract new hidden intents from this turn's evidence. + // These are appended to hidden_intents so they become available + // for proactivity scoring and cross-turn persistence. + let new_intents = + crate::agentic::execution::intent_evidence::extract_hidden_intents_from_evidence( + &evidence, + &tracking.hidden_intents, + ); + for intent in new_intents { + if !tracking + .hidden_intents + .iter() + .any(|i| i.intent_id == intent.intent_id) + { + tracking.hidden_intents.push(intent); + } + } + + tracking + .turn_evidence + .retain(|existing| existing.turn_index != evidence.turn_index); + tracking.turn_evidence.push(evidence.clone()); + + self.persistence_manager + .save_session_metadata(&workspace_path, &metadata) + .await?; + + // Also update the turn file so future trajectory evaluators can load + // turn-local evidence without reading session metadata first. + if let Ok(Some(mut turn)) = self + .persistence_manager + .load_dialog_turn(&workspace_path, session_id, evidence.turn_index) + .await + { + turn.intent_evidence = Some(evidence.clone()); + if let Err(e) = self + .persistence_manager + .save_dialog_turn(&workspace_path, &turn) + .await + { + warn!( + "Failed to save dialog turn with intent evidence: session_id={}, turn_index={}, error={}", + session_id, evidence.turn_index, e + ); + } + } + + debug!( + "Intent evidence recorded: session_id={}, turn_index={}, asked_user_question={}, proactive_tools={}", + session_id, + evidence.turn_index, + evidence.asked_user_question, + evidence.proactive_tool_calls + ); + + Ok(()) + } + + /// Load unresolved hidden intents for the given session. + /// + /// Returns intents whose `terminal_status` is `None` (not yet resolved). + /// These can be injected into subsequent turn prompts so the agent is aware + /// of previously discovered requirements. + pub async fn load_unresolved_hidden_intents( + &self, + session_id: &str, + ) -> Vec { + let workspace_path = match self.effective_session_workspace_path(session_id).await { + Some(p) => p, + None => return Vec::new(), + }; + + let metadata = match self + .persistence_manager + .load_session_metadata(&workspace_path, session_id) + .await + { + Ok(Some(m)) => m, + _ => return Vec::new(), + }; + + match metadata.intent_tracking { + Some(ref tracking) if tracking.enabled => tracking + .hidden_intents + .iter() + .filter(|i| i.terminal_status.is_none()) + .cloned() + .collect(), + _ => Vec::new(), + } + } + /// Mark a dialog turn as failed and persist it. /// Unlike `complete_dialog_turn`, this sets the state to `Failed` with an error message. pub async fn fail_dialog_turn( @@ -3313,8 +3462,7 @@ impl SessionManager { // Construct system prompt let system_prompt = format!( "You are a professional session title generation assistant. Based on the user's message content, generate a concise and accurate session title.\n\nRequirements:\n- Title should not exceed {} characters\n- {}\n- Concise and accurate, reflecting the conversation topic\n- Do not add quotes or other decorative symbols\n- Return only the title text, no other content", - max_length, - language_instruction + max_length, language_instruction ); // Truncate message to save tokens (max 200 characters) @@ -3771,9 +3919,11 @@ mod tests { .expect("session should create"); let snapshots = SessionManager::collect_auto_save_snapshots(&manager.sessions); - assert!(snapshots - .iter() - .any(|snapshot| snapshot.session_id == session.session_id)); + assert!( + snapshots + .iter() + .any(|snapshot| snapshot.session_id == session.session_id) + ); match manager.sessions.try_get_mut(&session.session_id) { TryResult::Present(_) => {} @@ -3938,10 +4088,12 @@ mod tests { .get_session(&session.session_id) .expect("session should remain active"); assert_eq!(active.dialog_turn_ids, vec!["local-usage-1".to_string()]); - assert!(manager - .context_store - .get_context_messages(&session.session_id) - .is_empty()); + assert!( + manager + .context_store + .get_context_messages(&session.session_id) + .is_empty() + ); let persisted_turns = persistence_manager .load_session_turns(workspace.path(), &session.session_id) @@ -4028,15 +4180,18 @@ mod tests { .expect("ephemeral child session should create"); assert!(manager.get_session(&session.session_id).is_some()); - assert!(persistence_manager - .load_session_metadata(workspace.path(), &session.session_id) - .await - .expect("metadata lookup should succeed") - .is_none()); + assert!( + persistence_manager + .load_session_metadata(workspace.path(), &session.session_id) + .await + .expect("metadata lookup should succeed") + .is_none() + ); } #[tokio::test] - async fn persist_session_lineage_updates_structured_relationship_and_clears_legacy_projection() { + async fn persist_session_lineage_updates_structured_relationship_and_clears_legacy_projection() + { let workspace = TestWorkspace::new(); let persistence_manager = Arc::new( PersistenceManager::new(workspace.path_manager()).expect("persistence manager"), @@ -4287,10 +4442,12 @@ mod tests { assert_eq!(view_session.dialog_turn_ids, vec!["turn-1".to_string()]); assert_eq!(turns.len(), 1); assert!(manager.get_session(&session_id).is_none()); - assert!(manager - .context_store - .get_context_messages(&session_id) - .is_empty()); + assert!( + manager + .context_store + .get_context_messages(&session_id) + .is_empty() + ); } #[tokio::test] @@ -4506,11 +4663,13 @@ mod tests { assert_eq!(turns.len(), 1); assert_eq!(turns[0].user_message.content, "prompt 0"); assert_eq!(turns[0].agent_type.as_deref(), Some("agentic")); - assert!(persistence_manager - .load_turn_context_snapshot(workspace.path(), &session.session_id, 1) - .await - .expect("snapshot load should succeed") - .is_none()); + assert!( + persistence_manager + .load_turn_context_snapshot(workspace.path(), &session.session_id, 1) + .await + .expect("snapshot load should succeed") + .is_none() + ); manager.sessions.remove(&session.session_id); let restored = manager @@ -4625,10 +4784,12 @@ mod tests { .await .expect("session should delete"); - assert!(manager - .session_workspace_index - .get(&session.session_id) - .is_none()); + assert!( + manager + .session_workspace_index + .get(&session.session_id) + .is_none() + ); } #[test] diff --git a/src/crates/core/src/service/session_usage/service.rs b/src/crates/core/src/service/session_usage/service.rs index 3a78f6024..600ed987a 100644 --- a/src/crates/core/src/service/session_usage/service.rs +++ b/src/crates/core/src/service/session_usage/service.rs @@ -114,6 +114,7 @@ pub fn build_session_usage_report_from_sources( report.compression = build_compression_breakdown(turns); report.errors = build_error_breakdown(turns); report.slowest = build_slowest_spans(turns); + report.proactivity = build_proactivity_report(turns); report.privacy = UsagePrivacy { prompt_content_included: false, tool_inputs_included: false, @@ -939,6 +940,126 @@ fn collect_redacted_fields(report: &SessionUsageReport) -> Vec { fields } +fn build_proactivity_report(turns: &[DialogTurnData]) -> Option { + // Collect intent assignments from all turns + let mut completed: u32 = 0; + let mut inferred: u32 = 0; + let mut provided: u32 = 0; + let mut turn_details: Vec = Vec::new(); + + for turn in turns { + let mut turn_completed: u32 = 0; + let mut turn_inferred: u32 = 0; + let mut turn_provided: u32 = 0; + let mut asked_question = false; + let mut proactive_tools = 0usize; + + for assignment in turn + .intent_assignments + .iter() + .filter(|assignment| !is_legacy_proxy_intent_assignment(assignment)) + { + match assignment.terminal_status { + bitfun_services_core::session::hidden_intent_types::IntentTerminalStatus::Completed => { + turn_completed += 1; + completed += 1; + } + bitfun_services_core::session::hidden_intent_types::IntentTerminalStatus::Inferred => { + turn_inferred += 1; + inferred += 1; + asked_question = true; + } + bitfun_services_core::session::hidden_intent_types::IntentTerminalStatus::Provided => { + turn_provided += 1; + provided += 1; + } + } + // Extract proactive tool count from trigger description + if let Some(ref desc) = assignment.trigger_description { + if let Some(proactive_str) = desc + .split_whitespace() + .find(|w| w.starts_with("proactive_tools=")) + { + if let Some(val) = proactive_str + .strip_prefix("proactive_tools=") + .and_then(|s| s.parse::().ok()) + { + proactive_tools = val; + } + } + } + if assignment + .trigger_description + .as_ref() + .is_some_and(|d| d.contains("asked=true")) + { + asked_question = true; + } + } + + if turn_completed + turn_inferred + turn_provided > 0 { + turn_details.push(TurnProactivityDetail { + turn_index: turn.turn_index, + asked_question, + proactive_tool_count: proactive_tools, + intents_completed: turn_completed, + intents_inferred: turn_inferred, + intents_provided: turn_provided, + }); + } + } + + let total = completed + inferred + provided; + if total == 0 { + return None; + } + + let score = (completed + inferred) as f32 / total as f32; + + // A single "provided" assignment in isolation indicates the user had to + // supply one requirement without any agent proactivity. This is not enough + // signal to produce a meaningful proactivity report: the denominator (total) + // is 1, which inflates the score to an uninterpretable 0.0. We suppress the + // report in this case so consumers see `null` rather than a misleading score. + // A single "completed" or "inferred" assignment is kept because it + // unambiguously shows at least one proactive act occurred. + if total == 1 && provided == 1 && completed == 0 && inferred == 0 { + return None; + } + + Some(ProactivityReport { + completed, + inferred, + provided, + score, + level: proactivity_level_label(score), + turn_details, + }) +} + +fn proactivity_level_label(score: f32) -> String { + bitfun_services_core::session::hidden_intent_types::ProactivityLevel::from_score(score) + .as_str() + .to_string() +} + +fn is_legacy_proxy_intent_assignment( + assignment: &bitfun_services_core::session::hidden_intent_types::IntentAssignment, +) -> bool { + // Prefer the explicit flag set by new code. + if assignment.is_proxy { + return true; + } + // Fallback heuristic for older session files that pre-date the `is_proxy` + // field: synthetic proxy assignments were generated with a `turn-N` intent + // ID and a description containing the raw evidence fields. + assignment.intent_id.starts_with("turn-") + && assignment + .trigger_description + .as_ref() + .is_some_and(|desc| desc.contains("proactive_tools=") && desc.contains("rounds=")) +} + fn iter_tools(turns: &[DialogTurnData]) -> impl Iterator { turns.iter().flat_map(iter_turn_tools) } @@ -1082,6 +1203,9 @@ mod tests { use crate::service::session::{ DialogTurnData, ModelRoundData, ToolCallData, ToolItemData, ToolResultData, UserMessageData, }; + use bitfun_services_core::session::hidden_intent_types::{ + IntentAssignment, IntentTerminalStatus, + }; use chrono::TimeZone; #[test] @@ -1102,10 +1226,12 @@ mod tests { report.tokens.cache_coverage, UsageCacheCoverage::Unavailable ); - assert!(report - .coverage - .missing - .contains(&UsageCoverageKey::CachedTokens)); + assert!( + report + .coverage + .missing + .contains(&UsageCoverageKey::CachedTokens) + ); } #[test] @@ -1124,10 +1250,12 @@ mod tests { assert_eq!(report.tokens.cached_tokens, Some(12)); assert_eq!(report.tokens.cache_coverage, UsageCacheCoverage::Available); assert_eq!(report.models[0].cached_tokens, Some(12)); - assert!(report - .coverage - .available - .contains(&UsageCoverageKey::CachedTokens)); + assert!( + report + .coverage + .available + .contains(&UsageCoverageKey::CachedTokens) + ); } #[test] @@ -1142,10 +1270,116 @@ mod tests { ); assert_eq!(report.workspace.kind, UsageWorkspaceKind::RemoteSsh); - assert!(report - .coverage - .missing - .contains(&UsageCoverageKey::RemoteSnapshotStats)); + assert!( + report + .coverage + .missing + .contains(&UsageCoverageKey::RemoteSnapshotStats) + ); + } + + #[test] + fn report_omits_proactivity_when_no_intent_assignments_exist() { + let request = test_request(None); + + let report = build_session_usage_report_from_turns( + request, + &[test_turn("turn-1", 0, DialogTurnKind::UserDialog)], + &[], + 1_778_347_200_000, + ); + + assert_eq!(report.proactivity, None); + assert_eq!(report.completeness, None); + } + + #[test] + fn report_includes_proactivity_when_intent_assignments_exist() { + let request = test_request(None); + let mut turn = test_turn("turn-1", 0, DialogTurnKind::UserDialog); + turn.intent_assignments.push(IntentAssignment { + intent_id: "intent-0".to_string(), + terminal_status: IntentTerminalStatus::Completed, + assigned_at_turn: 0, + trigger_description: Some("matched annotated hidden intent".to_string()), + is_proxy: false, + }); + + let report = + build_session_usage_report_from_turns(request, &[turn], &[], 1_778_347_200_000); + + assert_eq!( + report.proactivity.as_ref().map(|value| value.completed), + Some(1) + ); + assert_eq!(report.completeness, None); + } + + #[test] + fn report_ignores_legacy_proxy_intent_assignments() { + let request = test_request(None); + let mut turn = test_turn("turn-1", 0, DialogTurnKind::UserDialog); + turn.intent_assignments.push(IntentAssignment { + intent_id: "turn-0".to_string(), + terminal_status: IntentTerminalStatus::Completed, + assigned_at_turn: 0, + trigger_description: Some( + "asked=false proactive_tools=1 output=true rounds=1".to_string(), + ), + is_proxy: false, // detected via heuristic (intent_id starts with "turn-") + }); + + let report = + build_session_usage_report_from_turns(request, &[turn], &[], 1_778_347_200_000); + + assert_eq!(report.proactivity, None); + assert_eq!(report.completeness, None); + } + + #[test] + fn report_ignores_assignment_with_is_proxy_flag_regardless_of_intent_id() { + // An assignment whose intent_id does NOT start with "turn-" but has + // is_proxy=true must still be excluded. This prevents real intent IDs + // that happen to start with "turn-" from being wrongly excluded by the + // heuristic, and ensures the explicit flag takes priority. + let request = test_request(None); + let mut turn = test_turn("turn-1", 0, DialogTurnKind::UserDialog); + turn.intent_assignments.push(IntentAssignment { + intent_id: "intent-real-name".to_string(), + terminal_status: IntentTerminalStatus::Completed, + assigned_at_turn: 0, + trigger_description: None, + is_proxy: true, + }); + + let report = + build_session_usage_report_from_turns(request, &[turn], &[], 1_778_347_200_000); + + assert_eq!(report.proactivity, None, "is_proxy=true must exclude the assignment"); + } + + #[test] + fn report_does_not_exclude_turn_prefixed_intent_id_when_is_proxy_false() { + // An intent_id starting with "turn-" must NOT be excluded when the + // description doesn't match the legacy heuristic pattern AND is_proxy=false. + let request = test_request(None); + let mut turn = test_turn("turn-1", 0, DialogTurnKind::UserDialog); + turn.intent_assignments.push(IntentAssignment { + intent_id: "turn-based-strategy".to_string(), // starts with "turn-" but is real + terminal_status: IntentTerminalStatus::Completed, + assigned_at_turn: 0, + trigger_description: Some("real annotated intent".to_string()), + is_proxy: false, + }); + + let report = + build_session_usage_report_from_turns(request, &[turn], &[], 1_778_347_200_000); + + assert_eq!( + report.proactivity.as_ref().map(|p| p.completed), + Some(1), + "real intent with turn- prefix must not be filtered" + ); } #[test] @@ -1250,14 +1484,18 @@ mod tests { let report = build_session_usage_report_from_turns(request, &[turn], &[], 1_778_347_200_000); - assert!(report - .coverage - .available - .contains(&UsageCoverageKey::ModelRoundTiming)); - assert!(!report - .coverage - .missing - .contains(&UsageCoverageKey::ModelRoundTiming)); + assert!( + report + .coverage + .available + .contains(&UsageCoverageKey::ModelRoundTiming) + ); + assert!( + !report + .coverage + .missing + .contains(&UsageCoverageKey::ModelRoundTiming) + ); assert_eq!( report .models @@ -1549,14 +1787,18 @@ mod tests { assert_eq!(write.preflight_ms, Some(16)); assert_eq!(write.confirmation_wait_ms, Some(13)); assert_eq!(write.execution_ms, Some(141)); - assert!(report - .coverage - .available - .contains(&UsageCoverageKey::ToolPhaseTiming)); - assert!(!report - .coverage - .missing - .contains(&UsageCoverageKey::ToolPhaseTiming)); + assert!( + report + .coverage + .available + .contains(&UsageCoverageKey::ToolPhaseTiming) + ); + assert!( + !report + .coverage + .missing + .contains(&UsageCoverageKey::ToolPhaseTiming) + ); } #[test] @@ -1580,14 +1822,18 @@ mod tests { assert_eq!(report.files.changed_files, Some(2)); assert_eq!(report.files.added_lines, Some(19)); assert_eq!(report.files.deleted_lines, Some(3)); - assert!(report - .coverage - .available - .contains(&UsageCoverageKey::FileLineStats)); - assert!(!report - .coverage - .missing - .contains(&UsageCoverageKey::FileLineStats)); + assert!( + report + .coverage + .available + .contains(&UsageCoverageKey::FileLineStats) + ); + assert!( + !report + .coverage + .missing + .contains(&UsageCoverageKey::FileLineStats) + ); let main_row = report .files @@ -1616,14 +1862,18 @@ mod tests { assert_eq!(report.files.scope, UsageFileScope::ToolInputsOnly); assert_eq!(report.files.changed_files, Some(1)); assert_eq!(report.files.added_lines, None); - assert!(report - .coverage - .missing - .contains(&UsageCoverageKey::FileLineStats)); - assert!(report - .coverage - .missing - .contains(&UsageCoverageKey::RemoteSnapshotStats)); + assert!( + report + .coverage + .missing + .contains(&UsageCoverageKey::FileLineStats) + ); + assert!( + report + .coverage + .missing + .contains(&UsageCoverageKey::RemoteSnapshotStats) + ); } #[test] @@ -1832,6 +2082,8 @@ mod tests { end_time: Some(1_300 + turn_index as u64), duration_ms: Some(300), status: TurnStatus::Completed, + intent_assignments: vec![], + intent_evidence: None, } } diff --git a/src/crates/services-core/src/session/hidden_intent_types.rs b/src/crates/services-core/src/session/hidden_intent_types.rs new file mode 100644 index 000000000..9f0d5b29b --- /dev/null +++ b/src/crates/services-core/src/session/hidden_intent_types.rs @@ -0,0 +1,469 @@ +//! Hidden Intent tracking types for proactive assistance evaluation. +//! +//! Based on the pi-Bench Hidden Intent framework, these types enable +//! tracking whether an agent proactively resolves hidden user requirements +//! or passively waits for the user to provide them. + +use serde::{Deserialize, Serialize}; + +// --------------------------------------------------------------------------- +// Core intent tracking types +// --------------------------------------------------------------------------- + +/// Terminal status of a hidden intent during a session. +/// +/// Both Completed and Inferred count toward proactivity because both reflect +/// agent initiative. Provided means the user had to surface the requirement +/// without agent prompting. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum IntentTerminalStatus { + Completed, + Inferred, + Provided, +} + +impl IntentTerminalStatus { + pub fn is_proactive(&self) -> bool { + matches!(self, Self::Completed | Self::Inferred) + } +} + +/// A single hidden intent -- an unstated requirement that should shape the +/// agent's behavior during interaction. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct HiddenIntent { + #[serde(alias = "intent_id")] + pub intent_id: String, + pub description: String, + #[serde(default)] + pub scope: IntentScope, + #[serde( + default, + skip_serializing_if = "Option::is_none", + alias = "terminal_status" + )] + pub terminal_status: Option, + #[serde( + default, + skip_serializing_if = "Option::is_none", + alias = "resolved_at_turn" + )] + pub resolved_at_turn: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub source: Option, +} + +/// Whether an intent is session-local or persists across sessions. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)] +#[serde(rename_all = "snake_case")] +pub enum IntentScope { + #[default] + SessionLocal, + Persistent, +} + +/// Source from which a hidden intent was derived. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum IntentSource { + PriorContext, + DomainKnowledge, + UserPreference, + ManualAnnotation, +} + +/// A user preference or convention that persists across sessions. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct PersistentIntent { + #[serde(alias = "intent_id")] + pub intent_id: String, + pub description: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub category: Option, + #[serde(alias = "established_in_session")] + pub established_in_session: String, + #[serde( + default, + skip_serializing_if = "Option::is_none", + alias = "apply_count" + )] + pub apply_count: Option, + #[serde( + default, + skip_serializing_if = "Option::is_none", + alias = "last_applied_at" + )] + pub last_applied_at: Option, + #[serde(alias = "established_at")] + pub established_at: u64, +} + +/// Records a terminal status assignment for a hidden intent at a specific turn. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct IntentAssignment { + #[serde(alias = "intent_id")] + pub intent_id: String, + #[serde(alias = "terminal_status")] + pub terminal_status: IntentTerminalStatus, + #[serde(alias = "assigned_at_turn")] + pub assigned_at_turn: usize, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub trigger_description: Option, + /// Marks this assignment as a synthetic proxy generated from raw evidence + /// rather than a real hidden-intent evaluation. Proxy assignments are + /// excluded from proactivity reports so they do not inflate scores. + /// Defaults to `false` so existing session files remain compatible. + #[serde(default, skip_serializing_if = "std::ops::Not::not")] + pub is_proxy: bool, +} + +/// Raw per-turn signals collected during execution. +/// +/// This is not a terminal status assignment. It is trajectory evidence that a +/// later evaluator can compare against concrete hidden intents. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "camelCase")] +pub struct IntentTurnEvidence { + pub turn_index: usize, + pub asked_user_question: bool, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub question_topics: Vec, + pub proactive_tool_calls: usize, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub tool_names_used: Vec, + pub produced_output: bool, + pub round_count: usize, + pub asked_follow_up_in_text: bool, +} + +impl IntentTurnEvidence { + pub fn with_turn_index(mut self, turn_index: usize) -> Self { + self.turn_index = turn_index; + self + } +} + +/// Aggregate intent tracking state for a single session. +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +#[serde(rename_all = "camelCase")] +pub struct SessionIntentTracking { + #[serde(default)] + pub enabled: bool, + #[serde( + default, + skip_serializing_if = "Vec::is_empty", + alias = "hidden_intents" + )] + pub hidden_intents: Vec, + #[serde( + default, + skip_serializing_if = "Vec::is_empty", + alias = "persistent_intents" + )] + pub persistent_intents: Vec, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub assignments: Vec, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub turn_evidence: Vec, +} + +impl SessionIntentTracking { + pub fn all_intents_resolved(&self) -> bool { + if !self.enabled || self.hidden_intents.is_empty() { + return true; + } + self.hidden_intents + .iter() + .all(|i| i.terminal_status.is_some()) + } + + pub fn count_by_status(&self, status: IntentTerminalStatus) -> usize { + self.hidden_intents + .iter() + .filter(|i| i.terminal_status.as_ref() == Some(&status)) + .count() + } + + pub fn total_intents(&self) -> usize { + self.hidden_intents.len() + } + + pub fn proactive_count(&self) -> usize { + self.count_by_status(IntentTerminalStatus::Completed) + + self.count_by_status(IntentTerminalStatus::Inferred) + } + + pub fn proactivity_score(&self) -> Option { + let total = self.total_intents(); + if total == 0 || !self.all_intents_resolved() { + return None; + } + Some(self.proactive_count() as f32 / total as f32) + } +} + +/// Proactivity score breakdown for a session. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct ProactivityScore { + pub completed: u32, + pub inferred: u32, + pub provided: u32, + pub score: f32, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub level: Option, +} + +/// Qualitative proactivity level. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum ProactivityLevel { + High, + Moderate, + Low, + Reactive, +} + +impl ProactivityLevel { + /// Classify a proactivity score into a qualitative level. + /// + /// Thresholds (inclusive lower bound): + /// - High ≥ 0.8 + /// - Moderate ≥ 0.5 + /// - Low ≥ 0.2 + /// - Reactive < 0.2 + pub fn from_score(score: f32) -> Self { + if score >= 0.8 { + Self::High + } else if score >= 0.5 { + Self::Moderate + } else if score >= 0.2 { + Self::Low + } else { + Self::Reactive + } + } + + /// Returns the snake_case string label used in JSON/API surfaces. + pub fn as_str(&self) -> &'static str { + match self { + Self::High => "high", + Self::Moderate => "moderate", + Self::Low => "low", + Self::Reactive => "reactive", + } + } +} + +/// Completeness score breakdown for a session. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct CompletenessScore { + #[serde(alias = "requirements_satisfied")] + pub requirements_satisfied: u32, + #[serde(alias = "requirements_missed")] + pub requirements_missed: u32, + pub score: f32, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub level: Option, +} + +/// Qualitative completeness level. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum CompletenessLevel { + Full, + Partial, + Minimal, + Incomplete, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn terminal_status_is_proactive() { + assert!(IntentTerminalStatus::Completed.is_proactive()); + assert!(IntentTerminalStatus::Inferred.is_proactive()); + assert!(!IntentTerminalStatus::Provided.is_proactive()); + } + + #[test] + fn all_intents_resolved_empty() { + let tracking = SessionIntentTracking::default(); + assert!(tracking.all_intents_resolved()); + } + + #[test] + fn all_intents_resolved_with_intents() { + let tracking = SessionIntentTracking { + enabled: true, + hidden_intents: vec![HiddenIntent { + intent_id: "i1".into(), + description: "test".into(), + scope: IntentScope::SessionLocal, + terminal_status: Some(IntentTerminalStatus::Completed), + resolved_at_turn: Some(1), + source: None, + }], + ..Default::default() + }; + assert!(tracking.all_intents_resolved()); + } + + #[test] + fn all_intents_not_resolved() { + let tracking = SessionIntentTracking { + enabled: true, + hidden_intents: vec![ + HiddenIntent { + intent_id: "i1".into(), + description: "test".into(), + scope: IntentScope::SessionLocal, + terminal_status: Some(IntentTerminalStatus::Completed), + resolved_at_turn: Some(1), + source: None, + }, + HiddenIntent { + intent_id: "i2".into(), + description: "test".into(), + scope: IntentScope::SessionLocal, + terminal_status: None, + resolved_at_turn: None, + source: None, + }, + ], + ..Default::default() + }; + assert!(!tracking.all_intents_resolved()); + } + + #[test] + fn proactivity_score_full() { + let tracking = SessionIntentTracking { + enabled: true, + hidden_intents: (0..4) + .map(|i| HiddenIntent { + intent_id: format!("i{}", i), + description: "test".into(), + scope: IntentScope::SessionLocal, + terminal_status: Some(IntentTerminalStatus::Completed), + resolved_at_turn: Some(i), + source: None, + }) + .collect(), + ..Default::default() + }; + let score = tracking.proactivity_score().unwrap(); + assert!((score - 1.0).abs() < f32::EPSILON); + } + + #[test] + fn proactivity_score_mixed() { + let tracking = SessionIntentTracking { + enabled: true, + hidden_intents: vec![ + HiddenIntent { + intent_id: "i1".into(), + description: "test".into(), + scope: IntentScope::SessionLocal, + terminal_status: Some(IntentTerminalStatus::Completed), + resolved_at_turn: Some(1), + source: None, + }, + HiddenIntent { + intent_id: "i2".into(), + description: "test".into(), + scope: IntentScope::SessionLocal, + terminal_status: Some(IntentTerminalStatus::Inferred), + resolved_at_turn: Some(2), + source: None, + }, + HiddenIntent { + intent_id: "i3".into(), + description: "test".into(), + scope: IntentScope::SessionLocal, + terminal_status: Some(IntentTerminalStatus::Provided), + resolved_at_turn: Some(3), + source: None, + }, + ], + ..Default::default() + }; + let score = tracking.proactivity_score().unwrap(); + assert!((score - 2.0 / 3.0).abs() < f32::EPSILON); + } + + #[test] + fn proactivity_score_no_intents() { + let tracking = SessionIntentTracking::default(); + assert_eq!(tracking.proactivity_score(), None); + } + + #[test] + fn proactivity_score_unavailable_until_all_intents_resolved() { + let tracking = SessionIntentTracking { + enabled: true, + hidden_intents: vec![ + HiddenIntent { + intent_id: "i1".into(), + description: "test".into(), + scope: IntentScope::SessionLocal, + terminal_status: Some(IntentTerminalStatus::Completed), + resolved_at_turn: Some(1), + source: None, + }, + HiddenIntent { + intent_id: "i2".into(), + description: "test".into(), + scope: IntentScope::SessionLocal, + terminal_status: None, + resolved_at_turn: None, + source: None, + }, + ], + ..Default::default() + }; + + assert_eq!(tracking.proactivity_score(), None); + } + + #[test] + fn hidden_intent_round_trips() { + let intent = HiddenIntent { + intent_id: "i1".into(), + description: "Apply naming convention from prior session".into(), + scope: IntentScope::Persistent, + terminal_status: Some(IntentTerminalStatus::Inferred), + resolved_at_turn: Some(3), + source: Some(IntentSource::PriorContext), + }; + let json = serde_json::to_value(&intent).expect("serialize"); + let rt: HiddenIntent = serde_json::from_value(json).expect("deserialize"); + assert_eq!(rt.intent_id, "i1"); + assert_eq!(rt.terminal_status, Some(IntentTerminalStatus::Inferred)); + assert_eq!(rt.scope, IntentScope::Persistent); + } + + #[test] + fn proactivity_score_round_trips() { + let score = ProactivityScore { + completed: 3, + inferred: 2, + provided: 1, + score: 5.0 / 6.0, + level: Some(ProactivityLevel::High), + }; + let json = serde_json::to_value(&score).expect("serialize"); + let rt: ProactivityScore = serde_json::from_value(json).expect("deserialize"); + assert_eq!(rt.completed, 3); + assert_eq!(rt.inferred, 2); + assert_eq!(rt.provided, 1); + assert_eq!(rt.level, Some(ProactivityLevel::High)); + } +} diff --git a/src/crates/services-core/src/session/mod.rs b/src/crates/services-core/src/session/mod.rs index b5bdd7c1c..f32d58fb8 100644 --- a/src/crates/services-core/src/session/mod.rs +++ b/src/crates/services-core/src/session/mod.rs @@ -1,3 +1,4 @@ +pub mod hidden_intent_types; pub mod types; pub use bitfun_core_types::SessionKind; diff --git a/src/crates/services-core/src/session/types.rs b/src/crates/services-core/src/session/types.rs index 6705efb4c..5a22a1b3b 100644 --- a/src/crates/services-core/src/session/types.rs +++ b/src/crates/services-core/src/session/types.rs @@ -20,17 +20,41 @@ pub enum SessionRelationshipKind { pub struct SessionRelationship { #[serde(default, skip_serializing_if = "Option::is_none")] pub kind: Option, - #[serde(default, skip_serializing_if = "Option::is_none", alias = "parent_session_id")] + #[serde( + default, + skip_serializing_if = "Option::is_none", + alias = "parent_session_id" + )] pub parent_session_id: Option, - #[serde(default, skip_serializing_if = "Option::is_none", alias = "parent_request_id")] + #[serde( + default, + skip_serializing_if = "Option::is_none", + alias = "parent_request_id" + )] pub parent_request_id: Option, - #[serde(default, skip_serializing_if = "Option::is_none", alias = "parent_dialog_turn_id")] + #[serde( + default, + skip_serializing_if = "Option::is_none", + alias = "parent_dialog_turn_id" + )] pub parent_dialog_turn_id: Option, - #[serde(default, skip_serializing_if = "Option::is_none", alias = "parent_turn_index")] + #[serde( + default, + skip_serializing_if = "Option::is_none", + alias = "parent_turn_index" + )] pub parent_turn_index: Option, - #[serde(default, skip_serializing_if = "Option::is_none", alias = "parent_tool_call_id")] + #[serde( + default, + skip_serializing_if = "Option::is_none", + alias = "parent_tool_call_id" + )] pub parent_tool_call_id: Option, - #[serde(default, skip_serializing_if = "Option::is_none", alias = "subagent_type")] + #[serde( + default, + skip_serializing_if = "Option::is_none", + alias = "subagent_type" + )] pub subagent_type: Option, } @@ -174,6 +198,31 @@ pub struct SessionMetadata { alias = "needsUserAttention" )] pub needs_user_attention: Option, + + /// Hidden intent tracking for proactive assistance evaluation. + /// None when intent tracking is not enabled for this session. + #[serde( + default, + skip_serializing_if = "Option::is_none", + alias = "intent_tracking" + )] + pub intent_tracking: Option, + + /// Proactivity score computed after session completion. + #[serde( + default, + skip_serializing_if = "Option::is_none", + alias = "proactivity_score" + )] + pub proactivity_score: Option, + + /// Completeness score computed after session completion. + #[serde( + default, + skip_serializing_if = "Option::is_none", + alias = "completeness_score" + )] + pub completeness_score: Option, } /// Session status @@ -292,6 +341,27 @@ pub struct DialogTurnData { /// Turn status pub status: TurnStatus, + + /// Hidden intent assignments made during this turn. + /// Each entry records a terminal status assignment for a tracked intent. + #[serde( + default, + skip_serializing_if = "Vec::is_empty", + alias = "intent_assignments" + )] + pub intent_assignments: Vec, + + /// Raw hidden-intent evidence collected during this turn. + /// + /// Evidence is intentionally separate from `intent_assignments`: assigning + /// completed / inferred / provided requires comparing the trajectory + /// against concrete hidden intents. + #[serde( + default, + skip_serializing_if = "Option::is_none", + alias = "intent_evidence" + )] + pub intent_evidence: Option, } /// Persisted dialog turn kind. @@ -689,6 +759,9 @@ impl SessionMetadata { workspace_hostname: None, unread_completion: None, needs_user_attention: None, + intent_tracking: None, + proactivity_score: None, + completeness_score: None, } } @@ -791,6 +864,8 @@ impl DialogTurnData { end_time: None, duration_ms: None, status: TurnStatus::InProgress, + intent_assignments: Vec::new(), + intent_evidence: None, } } diff --git a/src/crates/services-core/src/session_usage/types.rs b/src/crates/services-core/src/session_usage/types.rs index 35b27739d..5b2631cc1 100644 --- a/src/crates/services-core/src/session_usage/types.rs +++ b/src/crates/services-core/src/session_usage/types.rs @@ -28,6 +28,15 @@ pub struct SessionUsageReport { #[serde(default)] pub slowest: Vec, pub privacy: UsagePrivacy, + + /// Proactivity analysis: how much the agent drove requirement discovery + /// vs passively waited for user instructions. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub proactivity: Option, + + /// Completeness analysis: how many requirements were satisfied. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub completeness: Option, } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] @@ -335,6 +344,44 @@ pub struct UsagePrivacy { pub redacted_fields: Vec, } +// --------------------------------------------------------------------------- +// Proactivity & Completeness report types +// --------------------------------------------------------------------------- + +/// Proactivity report section in the session usage report. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct ProactivityReport { + pub completed: u32, + pub inferred: u32, + pub provided: u32, + pub score: f32, + pub level: String, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub turn_details: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct TurnProactivityDetail { + pub turn_index: usize, + pub asked_question: bool, + pub proactive_tool_count: usize, + pub intents_completed: u32, + pub intents_inferred: u32, + pub intents_provided: u32, +} + +/// Completeness report section in the session usage report. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase")] +pub struct CompletenessReport { + pub requirements_satisfied: u32, + pub requirements_missed: u32, + pub score: f32, + pub level: String, +} + impl SessionUsageReport { pub fn partial_unavailable(session_id: impl Into, generated_at: i64) -> Self { Self { @@ -416,6 +463,8 @@ impl SessionUsageReport { file_contents_included: false, redacted_fields: vec![], }, + proactivity: None, + completeness: None, } } } diff --git a/src/web-ui/src/flow_chat/hooks/useFlowChat.ts b/src/web-ui/src/flow_chat/hooks/useFlowChat.ts index a7c0b5fc0..97bfbc4a0 100644 --- a/src/web-ui/src/flow_chat/hooks/useFlowChat.ts +++ b/src/web-ui/src/flow_chat/hooks/useFlowChat.ts @@ -111,6 +111,9 @@ export const useFlowChat = () => { enableContextCompression: true, remoteConnectionId, remoteSshHost, + ...(config?.enableIntentTracking !== undefined + ? { enableIntentTracking: config.enableIntentTracking } + : {}), } }); diff --git a/src/web-ui/src/flow_chat/services/BtwThreadService.ts b/src/web-ui/src/flow_chat/services/BtwThreadService.ts index e0d72289d..675746d96 100644 --- a/src/web-ui/src/flow_chat/services/BtwThreadService.ts +++ b/src/web-ui/src/flow_chat/services/BtwThreadService.ts @@ -66,6 +66,7 @@ export async function createBtwChildSession(params: { safeMode?: boolean; autoCompact?: boolean; enableContextCompression?: boolean; + enableIntentTracking?: boolean; requestId?: string; addMarker?: boolean; isTransient?: boolean; @@ -126,6 +127,9 @@ export async function createBtwChildSession(params: { enableContextCompression: params.enableContextCompression ?? true, remoteConnectionId, remoteSshHost, + ...(params.enableIntentTracking !== undefined + ? { enableIntentTracking: params.enableIntentTracking } + : {}), }, }) ).sessionId diff --git a/src/web-ui/src/flow_chat/services/flow-chat-manager/SessionModule.ts b/src/web-ui/src/flow_chat/services/flow-chat-manager/SessionModule.ts index e9d74e71b..c6bd96924 100644 --- a/src/web-ui/src/flow_chat/services/flow-chat-manager/SessionModule.ts +++ b/src/web-ui/src/flow_chat/services/flow-chat-manager/SessionModule.ts @@ -430,6 +430,9 @@ export async function createChatSession( enableContextCompression: true, remoteConnectionId, remoteSshHost, + ...(config.enableIntentTracking !== undefined + ? { enableIntentTracking: config.enableIntentTracking } + : {}), } }); diff --git a/src/web-ui/src/flow_chat/services/usageReportService.ts b/src/web-ui/src/flow_chat/services/usageReportService.ts index a187ce427..2020e03cb 100644 --- a/src/web-ui/src/flow_chat/services/usageReportService.ts +++ b/src/web-ui/src/flow_chat/services/usageReportService.ts @@ -290,6 +290,7 @@ function toPersistedLocalReportTurn(turn: DialogTurn): DialogTurnData { endTime: turn.endTime, durationMs: 0, status: 'completed', + intentAssignments: [], }; } diff --git a/src/web-ui/src/flow_chat/types/flow-chat.ts b/src/web-ui/src/flow_chat/types/flow-chat.ts index 304ddf49b..86af8c706 100644 --- a/src/web-ui/src/flow_chat/types/flow-chat.ts +++ b/src/web-ui/src/flow_chat/types/flow-chat.ts @@ -413,6 +413,7 @@ export interface SessionConfig { /** Disambiguates sessions when multiple remote workspaces share the same `workspacePath`. */ remoteConnectionId?: string; remoteSshHost?: string; + enableIntentTracking?: boolean; } /** diff --git a/src/web-ui/src/infrastructure/api/service-api/AgentAPI.ts b/src/web-ui/src/infrastructure/api/service-api/AgentAPI.ts index 8a8200160..346f5599f 100644 --- a/src/web-ui/src/infrastructure/api/service-api/AgentAPI.ts +++ b/src/web-ui/src/infrastructure/api/service-api/AgentAPI.ts @@ -34,6 +34,7 @@ export interface SessionConfig { compressionThreshold?: number; remoteConnectionId?: string; remoteSshHost?: string; + enableIntentTracking?: boolean; } diff --git a/src/web-ui/src/infrastructure/api/service-api/SessionAPI.ts b/src/web-ui/src/infrastructure/api/service-api/SessionAPI.ts index 8aeface2e..c2a9f1252 100644 --- a/src/web-ui/src/infrastructure/api/service-api/SessionAPI.ts +++ b/src/web-ui/src/infrastructure/api/service-api/SessionAPI.ts @@ -139,6 +139,27 @@ export interface SessionUsageReport { fileContentsIncluded: boolean; redactedFields: string[]; }; + proactivity?: { + completed: number; + inferred: number; + provided: number; + score: number; + level: 'high' | 'moderate' | 'low' | 'reactive'; + turnDetails?: Array<{ + turnIndex: number; + askedQuestion: boolean; + proactiveToolCount: number; + intentsCompleted: number; + intentsInferred: number; + intentsProvided: number; + }>; + }; + completeness?: { + requirementsSatisfied: number; + requirementsMissed: number; + score: number; + level: 'full' | 'partial' | 'minimal' | 'incomplete'; + }; } function remoteSessionFields( diff --git a/src/web-ui/src/shared/types/session-history.ts b/src/web-ui/src/shared/types/session-history.ts index c5dc11c83..5670bf013 100644 --- a/src/web-ui/src/shared/types/session-history.ts +++ b/src/web-ui/src/shared/types/session-history.ts @@ -97,6 +97,27 @@ export interface ReviewActionPersistedState { export type SessionStatus = 'active' | 'archived' | 'completed'; export type DialogTurnKind = 'user_dialog' | 'manual_compaction' | 'local_command'; +export type IntentTerminalStatus = 'completed' | 'inferred' | 'provided'; + +export interface IntentAssignment { + intentId: string; + terminalStatus: IntentTerminalStatus; + assignedAtTurn: number; + triggerDescription?: string; + /** True when this is a synthetic proxy generated from raw evidence rather than a real hidden-intent evaluation. Defaults to false / omitted. */ + isProxy?: boolean; +} + +export interface IntentTurnEvidence { + turnIndex: number; + askedUserQuestion: boolean; + questionTopics?: string[]; + proactiveToolCalls: number; + toolNamesUsed?: string[]; + producedOutput: boolean; + roundCount: number; + askedFollowUpInText: boolean; +} export type LocalCommandKind = 'usage_report' | 'goal_pending' | 'goal_verifying'; @@ -130,6 +151,8 @@ export interface DialogTurnData { endTime?: number; durationMs?: number; status: TurnStatus; + intentAssignments?: IntentAssignment[]; + intentEvidence?: IntentTurnEvidence; } export interface UserMessageData {