diff --git a/crates/tracevault-cli/src/commands/check.rs b/crates/tracevault-cli/src/commands/check.rs index 68aec742..5f3ea1a2 100644 --- a/crates/tracevault-cli/src/commands/check.rs +++ b/crates/tracevault-cli/src/commands/check.rs @@ -135,31 +135,38 @@ fn collect_session_data(session_dir: &Path) -> Option { pub async fn check_policies(project_root: &Path) -> Result<(), Box> { let (server_url, token) = resolve_credentials(project_root); - let server_url = match server_url { - Some(url) => url, - None => { - return Err("No server URL configured. Run 'tracevault login' first.".into()); - } - }; + let server_url = server_url + .ok_or("No server URL configured. Run `tracevault login --server-url=` to set one.")?; if token.is_none() { - return Err("Not logged in. Run 'tracevault login' to check policies.".into()); + return Err( + "Not logged in. Run `tracevault login --server-url=` to authenticate." + .into(), + ); } let org_slug = TracevaultConfig::load(project_root) .and_then(|c| c.org_slug) - .ok_or("No org_slug in config. Run 'tracevault init' first.")?; + .ok_or("No org_slug in config. Run `tracevault init` first.")?; let client = ApiClient::new(&server_url, token.as_deref()); - // Resolve repo_id by name + // Resolve repo_id by name. + // + // Connectivity errors here (auth expired, server down, network + // unreachable) propagate so the pre-push hook exits non-zero — if a + // repo is opted into TraceVault, every push must be evaluated, full + // stop. Letting pushes slip when TV is unreachable would defeat the + // point of enforcement. We attach an actionable next step to each + // error so the user (or agent) knows the recovery command without + // guessing — see `connectivity_message` below. let repo_name = git_repo_name(project_root); - let repos = client.list_repos(&org_slug).await?; + let repos = client + .list_repos(&org_slug) + .await + .map_err(|e| connectivity_message(&e.to_string()))?; let repo = repos.iter().find(|r| r.name == repo_name).ok_or_else(|| { - format!( - "Repo '{}' not found on server. Run 'tracevault sync' first.", - repo_name - ) + format!("Repo '{repo_name}' not found on server. Run `tracevault sync` first.") })?; // Collect session data from unpushed sessions @@ -200,7 +207,8 @@ pub async fn check_policies(project_root: &Path) -> Result<(), Box Result<(), Box String { + let lower = raw.to_ascii_lowercase(); + + // 401 — token rejected. An expired session is the most common cause of + // a surprise blocked push, so point straight at the refresh command. + if lower.contains("401") || lower.contains("unauthorized") { + return with_action( + raw, + "Session token may be expired. Run `tracevault login --server-url=` to refresh.", + ); + } + // 403 — authenticated but not allowed. Re-login won't help; this is an + // org-membership problem. + if lower.contains("403") || lower.contains("forbidden") { + return with_action( + raw, + "Your token lacks access to this repo's policies. Check your org membership and rerun `tracevault login`.", + ); + } + // 5xx — server-side fault. Checked before the transport keywords below + // so a `504 Gateway Timeout` reads as a server issue, not a local one. + if is_server_error(&lower) { + return with_action( + raw, + "TraceVault server returned an error. The team has likely been paged; retry shortly.", + ); + } + // Transport-level failure with no HTTP status — DNS, refused, timeout. + // ("connect" also covers "connection refused"/"connection reset".) + if lower.contains("dns") + || lower.contains("connect") + || lower.contains("timed out") + || lower.contains("timeout") + { + return with_action( + raw, + "Could not reach the TraceVault server. Check network and `server_url` in .tracevault/config.toml.", + ); + } + // Unrecognized — surface the raw error verbatim, no invented advice. + format!("Policy check could not run: {raw}.") +} + +/// Format the standard "could not run" line with an actionable next step. +fn with_action(raw: &str, action: &str) -> String { + format!("Policy check could not run: {raw}.\n → {action}") +} + +/// Heuristic for a 5xx server-side failure. Matches the textual status +/// (`internal`, `server error`) and the concrete 5xx codes so a bare +/// `503 Service Unavailable` — which carries neither phrase — is still +/// recognized. +fn is_server_error(lower: &str) -> bool { + lower.contains("internal") + || lower.contains("server error") + || ["500", "501", "502", "503", "504"] + .iter() + .any(|code| lower.contains(code)) +} + +#[cfg(test)] +mod tests { + use super::connectivity_message; + + #[test] + fn connectivity_message_suggests_login_on_401() { + let m = connectivity_message("Stream failed (401 Unauthorized): bad token"); + assert!( + m.contains("tracevault login"), + "401 errors must surface the login hint; got: {m}" + ); + } + + #[test] + fn connectivity_message_suggests_login_on_unauthorized_text() { + let m = connectivity_message("Server returned: unauthorized request"); + assert!(m.contains("tracevault login")); + } + + #[test] + fn connectivity_message_suggests_network_check_on_dns_error() { + let m = connectivity_message("error sending request for url: dns error"); + assert!( + m.to_lowercase().contains("network") || m.to_lowercase().contains("server_url"), + "DNS errors must surface a network hint; got: {m}" + ); + } + + #[test] + fn connectivity_message_suggests_network_check_on_connection_refused() { + let m = connectivity_message("connection refused"); + assert!(m.to_lowercase().contains("network")); + } + + #[test] + fn connectivity_message_falls_back_when_unrecognized() { + let m = connectivity_message("some weird new error shape we have not seen before"); + // No suggestion — but the raw error must still be in the message so + // the user can debug. + assert!(m.contains("some weird new error shape")); + assert!(!m.contains("→")); // no action arrow + } + + #[test] + fn connectivity_message_does_not_collide_on_403() { + let m = connectivity_message("403 Forbidden"); + // 403 should suggest org membership, NOT a re-login. + assert!( + m.to_lowercase().contains("membership"), + "403 should mention membership; got: {m}" + ); + } + + #[test] + fn connectivity_message_flags_500_as_server_error() { + let m = connectivity_message("Server returned 500 Internal Server Error: oops"); + assert!( + m.to_lowercase().contains("server returned an error"), + "500 should surface the server-error hint; got: {m}" + ); + } + + #[test] + fn connectivity_message_flags_503_without_internal_text() { + // `503 Service Unavailable` carries neither "internal" nor + // "server error" — the bare code must still be recognized. + let m = connectivity_message("Server returned 503 Service Unavailable: "); + assert!( + m.to_lowercase().contains("server returned an error"), + "503 should surface the server-error hint; got: {m}" + ); + } + + #[test] + fn connectivity_message_treats_504_as_server_not_network() { + // A gateway timeout is a server-side fault even though it contains + // the word "timeout"; it must not be misreported as a local network + // problem. + let m = connectivity_message("Server returned 504 Gateway Timeout: "); + assert!( + m.to_lowercase().contains("server returned an error"), + "504 should be treated as a server error; got: {m}" + ); + } +} diff --git a/crates/tracevault-server/src/auth.rs b/crates/tracevault-server/src/auth.rs index e261ee62..69ab75a1 100644 --- a/crates/tracevault-server/src/auth.rs +++ b/crates/tracevault-server/src/auth.rs @@ -68,6 +68,42 @@ pub fn sha256_hex(input: &str) -> String { hex::encode(hasher.finalize()) } +/// Single-statement sliding-session auth check, shared by both request +/// extractors (`AuthUser`, `OrgAuth`) so the behavior is defined once. +/// +/// Binds `$1` = `token_hash` and returns the matching `user_id`, or no row +/// when the token is missing or expired (the `expires_at > NOW()` filter +/// rejects expired sessions — they are never revived). +/// +/// Sliding window: as long as a user authenticates at least once per ~30 +/// days the token never expires; genuinely-dormant tokens still hit the +/// cliff and must re-login. +/// +/// Why a CTE instead of a plain `UPDATE ... RETURNING`: an unconditional +/// UPDATE would take a row lock and write a new tuple on *every* +/// authenticated request. TraceVault streams an event on every agent hook, +/// so the same token authenticates many times in quick succession (often +/// concurrently) — an always-write check serializes those requests on one +/// row lock and amplifies WAL/IO. Here the `session` CTE always reads the +/// `user_id` for the auth decision, while the `slide` CTE only writes when +/// the expiry has drifted more than a day from the 30-day target. The +/// common "already-fresh token" path therefore performs no write and takes +/// no row lock. A data-modifying CTE always runs to completion even though +/// nothing selects from it, so the slide still happens when due. +pub const SLIDING_SESSION_AUTH_SQL: &str = "\ +WITH session AS ( + SELECT user_id FROM auth_sessions + WHERE token_hash = $1 AND expires_at > NOW() +), +slide AS ( + UPDATE auth_sessions + SET expires_at = NOW() + INTERVAL '30 days' + WHERE token_hash = $1 + AND expires_at > NOW() + AND expires_at < NOW() + INTERVAL '29 days' +) +SELECT user_id FROM session"; + #[cfg(test)] mod tests { use super::*; diff --git a/crates/tracevault-server/src/extractors.rs b/crates/tracevault-server/src/extractors.rs index 4b924943..c960bdc1 100644 --- a/crates/tracevault-server/src/extractors.rs +++ b/crates/tracevault-server/src/extractors.rs @@ -32,14 +32,13 @@ impl FromRequestParts for AuthUser { let token_hash = sha256_hex(header); - // Try auth_sessions first - let session_row = sqlx::query_as::<_, (Uuid,)>( - "SELECT user_id FROM auth_sessions WHERE token_hash = $1 AND expires_at > NOW()", - ) - .bind(&token_hash) - .fetch_optional(&state.pool) - .await - .map_err(|_| (StatusCode::INTERNAL_SERVER_ERROR, "Database error"))?; + // Sliding session window — validate the token and slide its expiry + // forward when due. See `SLIDING_SESSION_AUTH_SQL` for the why. + let session_row = sqlx::query_as::<_, (Uuid,)>(crate::auth::SLIDING_SESSION_AUTH_SQL) + .bind(&token_hash) + .fetch_optional(&state.pool) + .await + .map_err(|_| (StatusCode::INTERNAL_SERVER_ERROR, "Database error"))?; if let Some((user_id,)) = session_row { return Ok(AuthUser { user_id }); @@ -120,14 +119,13 @@ impl FromRequestParts for OrgAuth { let org_id = org_row.0; - // Try session auth - let session_row = sqlx::query_as::<_, (Uuid,)>( - "SELECT user_id FROM auth_sessions WHERE token_hash = $1 AND expires_at > NOW()", - ) - .bind(&token_hash) - .fetch_optional(&state.pool) - .await - .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?; + // Try session auth — sliding window, same statement as `AuthUser`. + // See `SLIDING_SESSION_AUTH_SQL` for the why. + let session_row = sqlx::query_as::<_, (Uuid,)>(crate::auth::SLIDING_SESSION_AUTH_SQL) + .bind(&token_hash) + .fetch_optional(&state.pool) + .await + .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?; if let Some((user_id,)) = session_row { // Check membership diff --git a/crates/tracevault-server/tests/sliding_session_test.rs b/crates/tracevault-server/tests/sliding_session_test.rs new file mode 100644 index 00000000..90360ecd --- /dev/null +++ b/crates/tracevault-server/tests/sliding_session_test.rs @@ -0,0 +1,136 @@ +//! Verifies the sliding-session-window behavior used by the auth +//! extractors. On a successful auth the matching `auth_sessions` row has +//! its `expires_at` slid forward to NOW() + 30 days — but only when the +//! expiry has drifted more than a day, so a fresh token is a pure read. +//! +//! The extractors are wired into HTTP handlers and not trivially callable +//! in isolation, so these tests run the *exact* statement the extractors +//! use — `tracevault_server::auth::SLIDING_SESSION_AUTH_SQL` — against a +//! real Postgres pool. Importing the shared const (rather than copying the +//! SQL here) means the test breaks if the production query ever diverges. + +mod common; + +use chrono::{Duration, Utc}; +use tracevault_server::auth::{sha256_hex, SLIDING_SESSION_AUTH_SQL}; +use uuid::Uuid; + +async fn seed_session_with_expiry( + pool: &sqlx::PgPool, + user_id: Uuid, + token_hash: &str, + expires_at: chrono::DateTime, +) -> Uuid { + sqlx::query_scalar::<_, Uuid>( + "INSERT INTO auth_sessions (user_id, token_hash, expires_at) \ + VALUES ($1, $2, $3) RETURNING id", + ) + .bind(user_id) + .bind(token_hash) + .bind(expires_at) + .fetch_one(pool) + .await + .unwrap() +} + +/// Run the production auth statement and return the matched `user_id` +/// (or None if the token was expired / not found). +async fn run_auth(pool: &sqlx::PgPool, token_hash: &str) -> Option { + sqlx::query_as::<_, (Uuid,)>(SLIDING_SESSION_AUTH_SQL) + .bind(token_hash) + .fetch_optional(pool) + .await + .unwrap() + .map(|(id,)| id) +} + +/// Read the current `expires_at` for a token's session row. +async fn current_expiry(pool: &sqlx::PgPool, token_hash: &str) -> chrono::DateTime { + sqlx::query_scalar::<_, chrono::DateTime>( + "SELECT expires_at FROM auth_sessions WHERE token_hash = $1", + ) + .bind(token_hash) + .fetch_one(pool) + .await + .unwrap() +} + +#[sqlx::test(migrations = "./migrations")] +async fn auth_extends_expires_at_for_a_stale_session(pool: sqlx::PgPool) { + let user_id = common::seed_user(&pool).await; + let token = format!("tok_{}", Uuid::new_v4()); + let token_hash = sha256_hex(&token); + + // Seed an "almost expired" session: 2 minutes left. This is well past + // the 1-day slide threshold, so auth should both succeed and bump it. + let initial_expires = Utc::now() + Duration::minutes(2); + seed_session_with_expiry(&pool, user_id, &token_hash, initial_expires).await; + + let returned = run_auth(&pool, &token_hash) + .await + .expect("valid (not-yet-expired) session should authenticate"); + assert_eq!(returned, user_id, "auth must return the owning user_id"); + + // After the slide, expires_at should be ~30 days out — at minimum more + // than 29 days from now. We don't pin it exactly because NOW() is + // evaluated server-side and clock skew between the test host and the + // Postgres container is real. + let min_expected = Utc::now() + Duration::days(29); + assert!( + current_expiry(&pool, &token_hash).await > min_expected, + "sliding window should extend a stale session's expiry to ~30 days" + ); +} + +#[sqlx::test(migrations = "./migrations")] +async fn auth_does_not_rewrite_a_fresh_session(pool: sqlx::PgPool) { + let user_id = common::seed_user(&pool).await; + let token = format!("tok_{}", Uuid::new_v4()); + let token_hash = sha256_hex(&token); + + // Seed a freshly-bumped session: expires in 30 days. It is within the + // 1-day slide threshold, so auth must still succeed but leave the row + // untouched (the common hot-path that avoids a write + row lock). + let fresh_expires = Utc::now() + Duration::days(30); + seed_session_with_expiry(&pool, user_id, &token_hash, fresh_expires).await; + let before = current_expiry(&pool, &token_hash).await; + + let returned = run_auth(&pool, &token_hash) + .await + .expect("a fresh session should still authenticate"); + assert_eq!(returned, user_id); + + let after = current_expiry(&pool, &token_hash).await; + assert_eq!( + before, after, + "a fresh session must not be rewritten by auth (no needless write)" + ); +} + +#[sqlx::test(migrations = "./migrations")] +async fn auth_does_not_revive_an_already_expired_session(pool: sqlx::PgPool) { + let user_id = common::seed_user(&pool).await; + let token = format!("tok_{}", Uuid::new_v4()); + let token_hash = sha256_hex(&token); + + // Seed a session that expired an hour ago. + let expired_at = Utc::now() - Duration::hours(1); + seed_session_with_expiry(&pool, user_id, &token_hash, expired_at).await; + + assert!( + run_auth(&pool, &token_hash).await.is_none(), + "an already-expired session must NOT authenticate or be revived" + ); + + // Belt-and-braces: the row's expires_at must remain in the past. + assert!( + current_expiry(&pool, &token_hash).await < Utc::now(), + "row's expires_at should remain in the past after a no-op auth" + ); +} + +#[sqlx::test(migrations = "./migrations")] +async fn auth_is_a_noop_for_unknown_tokens(pool: sqlx::PgPool) { + let unknown_hash = sha256_hex("definitely-not-a-real-token"); + assert!(run_auth(&pool, &unknown_hash).await.is_none()); +}