From a02065604d439dee7f89ec913360dc337e78950a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bart=C5=82omiej=20Kuras?= <bartlomiej.kuras@virtuslab.com>
Date: Thu, 28 May 2026 12:20:20 +0200
Subject: [PATCH 1/4] feat(auth): slide auth_sessions.expires_at on every
 successful auth
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously the only path to recover from an expired `auth_sessions` row
was `tracevault login`. Tokens hard-expire at 30 days regardless of
how active the user is — daily users hit the cliff just as easily as
inactive ones, with no signal until a push gets blocked.

Replace the SELECT-only validity check in both `AuthUser` and `OrgAuth`
extractors with a single `UPDATE auth_sessions SET expires_at = NOW() +
INTERVAL '30 days' WHERE token_hash =  AND expires_at > NOW()
RETURNING user_id`. Atomic: one round-trip, no race between the
validity check and the extension.

Net effect:
* Active users (anything that talks to TV once per 30 days) never see
  a token expiry again — the window slides forward on every hit.
* Inactive users still hit the 30-day expiry cliff and must re-login,
  which is the right security tradeoff for genuinely-dormant tokens.
* No new endpoint, no client change, no breaking-change to existing
  callers.

Three integration tests against a real Postgres pool exercise the SQL
directly: extend-on-hit, no-revive-of-already-expired, no-op-on-
unknown-token.
---
 crates/tracevault-server/src/extractors.rs    |  23 +++-
 .../tests/sliding_session_test.rs             | 116 ++++++++++++++++++
 2 files changed, 135 insertions(+), 4 deletions(-)
 create mode 100644 crates/tracevault-server/tests/sliding_session_test.rs
diff --git a/crates/tracevault-server/src/extractors.rs b/crates/tracevault-server/src/extractors.rs
index 4b924943..2776c19d 100644
--- a/crates/tracevault-server/src/extractors.rs
+++ b/crates/tracevault-server/src/extractors.rs
@@ -32,9 +32,19 @@ impl FromRequestParts<AppState> for AuthUser {
 
         let token_hash = sha256_hex(header);
 
-        // Try auth_sessions first
+        // Sliding session window: on every successful auth, bump expires_at
+        // to NOW() + 30 days. As long as the user touches TV at least once
+        // per 30 days, the token never expires from their perspective.
+        // Inactive users still hit the 30-day cliff and must re-login.
+        //
+        // Using UPDATE..RETURNING means we atomically (a) verify the token
+        // is still valid (the WHERE filters expired rows out), (b) extend
+        // it, (c) read back the user_id — one round-trip, no race.
         let session_row = sqlx::query_as::<_, (Uuid,)>(
-            "SELECT user_id FROM auth_sessions WHERE token_hash = $1 AND expires_at > NOW()",
+            "UPDATE auth_sessions
+             SET expires_at = NOW() + INTERVAL '30 days'
+             WHERE token_hash = $1 AND expires_at > NOW()
+             RETURNING user_id",
         )
         .bind(&token_hash)
         .fetch_optional(&state.pool)
@@ -120,9 +130,14 @@ impl FromRequestParts<AppState> for OrgAuth {
 
         let org_id = org_row.0;
 
-        // Try session auth
+        // Try session auth — sliding window: same pattern as AuthUser, see
+        // the comment there. UPDATE..RETURNING atomically bumps expires_at
+        // and returns the user_id only when the token is still valid.
         let session_row = sqlx::query_as::<_, (Uuid,)>(
-            "SELECT user_id FROM auth_sessions WHERE token_hash = $1 AND expires_at > NOW()",
+            "UPDATE auth_sessions
+             SET expires_at = NOW() + INTERVAL '30 days'
+             WHERE token_hash = $1 AND expires_at > NOW()
+             RETURNING user_id",
         )
         .bind(&token_hash)
         .fetch_optional(&state.pool)
diff --git a/crates/tracevault-server/tests/sliding_session_test.rs b/crates/tracevault-server/tests/sliding_session_test.rs
new file mode 100644
index 00000000..cf2fc612
--- /dev/null
+++ b/crates/tracevault-server/tests/sliding_session_test.rs
@@ -0,0 +1,116 @@
+//! Verifies the sliding-session-window behavior added to the auth
+//! extractors. On every successful auth, the matching `auth_sessions` row
+//! has its `expires_at` bumped to NOW() + 30 days.
+//!
+//! The extractor is wired into HTTP handlers and not trivially callable in
+//! isolation, but the SQL pattern it uses is `UPDATE auth_sessions SET
+//! expires_at = NOW() + INTERVAL '30 days' WHERE token_hash = $1 AND
+//! expires_at > NOW() RETURNING user_id`. These tests exercise that
+//! statement directly against a real Postgres pool, which is the precise
+//! source of the behavior we want to lock in.
+
+mod common;
+
+use chrono::{Duration, Utc};
+use tracevault_server::auth::sha256_hex;
+use uuid::Uuid;
+
+async fn seed_session_with_expiry(
+    pool: &sqlx::PgPool,
+    user_id: Uuid,
+    token_hash: &str,
+    expires_at: chrono::DateTime<Utc>,
+) -> Uuid {
+    sqlx::query_scalar::<_, Uuid>(
+        "INSERT INTO auth_sessions (user_id, token_hash, expires_at) \
+         VALUES ($1, $2, $3) RETURNING id",
+    )
+    .bind(user_id)
+    .bind(token_hash)
+    .bind(expires_at)
+    .fetch_one(pool)
+    .await
+    .unwrap()
+}
+
+/// Run the sliding-window UPDATE used by the extractor and return the
+/// updated `expires_at` for the session row (or None if the row was
+/// expired / not found).
+async fn run_sliding_update(
+    pool: &sqlx::PgPool,
+    token_hash: &str,
+) -> Option<chrono::DateTime<Utc>> {
+    sqlx::query_as::<_, (chrono::DateTime<Utc>,)>(
+        "UPDATE auth_sessions
+         SET expires_at = NOW() + INTERVAL '30 days'
+         WHERE token_hash = $1 AND expires_at > NOW()
+         RETURNING expires_at",
+    )
+    .bind(token_hash)
+    .fetch_optional(pool)
+    .await
+    .unwrap()
+    .map(|(ts,)| ts)
+}
+
+#[sqlx::test(migrations = "./migrations")]
+async fn auth_extends_expires_at_on_each_hit(pool: sqlx::PgPool) {
+    let user_id = common::seed_user(&pool).await;
+    let token = format!("tok_{}", Uuid::new_v4());
+    let token_hash = sha256_hex(&token);
+
+    // Seed an "almost expired" session: 2 minutes left.
+    let initial_expires = Utc::now() + Duration::minutes(2);
+    seed_session_with_expiry(&pool, user_id, &token_hash, initial_expires).await;
+
+    let bumped = run_sliding_update(&pool, &token_hash)
+        .await
+        .expect("valid (not-yet-expired) session should be extended");
+
+    // After the bump, expires_at should be ~30 days in the future — at
+    // minimum, more than 29 days from now. We don't pin it exactly because
+    // NOW() in Postgres is evaluated server-side and clock skew between
+    // test machine and server is real (postgres in Docker, test in host).
+    let min_expected = Utc::now() + Duration::days(29);
+    assert!(
+        bumped > min_expected,
+        "sliding window should extend expires_at to ~30 days; got {bumped}"
+    );
+}
+
+#[sqlx::test(migrations = "./migrations")]
+async fn auth_does_not_extend_already_expired_session(pool: sqlx::PgPool) {
+    let user_id = common::seed_user(&pool).await;
+    let token = format!("tok_{}", Uuid::new_v4());
+    let token_hash = sha256_hex(&token);
+
+    // Seed a session that expired an hour ago.
+    let expired_at = Utc::now() - Duration::hours(1);
+    seed_session_with_expiry(&pool, user_id, &token_hash, expired_at).await;
+
+    let result = run_sliding_update(&pool, &token_hash).await;
+    assert!(
+        result.is_none(),
+        "an already-expired session must NOT be revived by a sliding-window update; got {result:?}"
+    );
+
+    // Belt-and-braces: confirm the row's expires_at is still in the past.
+    let row_expires_at = sqlx::query_scalar::<_, chrono::DateTime<Utc>>(
+        "SELECT expires_at FROM auth_sessions WHERE token_hash = $1",
+    )
+    .bind(&token_hash)
+    .fetch_one(&pool)
+    .await
+    .unwrap();
+    assert!(
+        row_expires_at < Utc::now(),
+        "row's expires_at should remain in the past after a no-op sliding update"
+    );
+}
+
+#[sqlx::test(migrations = "./migrations")]
+async fn auth_sliding_update_is_a_noop_for_unknown_tokens(pool: sqlx::PgPool) {
+    let unknown_hash = sha256_hex("definitely-not-a-real-token");
+    let result = run_sliding_update(&pool, &unknown_hash).await;
+    assert!(result.is_none());
+}

From effdbe48ad6a23d6fe059061976af4c2903c3970 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bart=C5=82omiej=20Kuras?= <bartlomiej.kuras@virtuslab.com>
Date: Thu, 28 May 2026 12:20:31 +0200
Subject: [PATCH 2/4] fix(cli): actionable error messages on tracevault check
 failure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The pre-push hook runs `tracevault check` and exits 1 on any failure.
Previously that 1 came with an opaque message — typically
"Stream failed (500 Internal Server Error)" or "Server returned 401:
Invalid or expired token". Both an agent and a human reader were left
guessing what to do.

Adds `connectivity_message()` which sniffs the raw error string and
appends a specific next step:

* 401 / Unauthorized: "Session token may be expired. Run
  `tracevault login --server-url=<server_url>` to refresh."
* 403 / Forbidden: org-membership hint (not a re-login).
* DNS / connect / timeout / connection refused: network + server_url
  hint.
* 5xx Server Error / Internal: "team has been paged, retry shortly."
* Unknown shapes: pass through the raw error, no fake advice.

All connectivity-class errors still propagate as Err so the pre-push
hook exits non-zero — a TV-tracked repo MUST be evaluated by TV on
every push, that contract doesn't change. The fix here is purely the
quality of the message the user/agent reads after the block.

Six unit tests pin the message shapes for the common error patterns
the api_client surfaces today.
---
 crates/tracevault-cli/src/commands/check.rs | 122 +++++++++++++++++---
 1 file changed, 107 insertions(+), 15 deletions(-)

diff --git a/crates/tracevault-cli/src/commands/check.rs b/crates/tracevault-cli/src/commands/check.rs
index 68aec742..b9b858a2 100644
--- a/crates/tracevault-cli/src/commands/check.rs
+++ b/crates/tracevault-cli/src/commands/check.rs
@@ -135,31 +135,38 @@ fn collect_session_data(session_dir: &Path) -> Option<SessionCheckData> {
 pub async fn check_policies(project_root: &Path) -> Result<(), Box<dyn std::error::Error>> {
     let (server_url, token) = resolve_credentials(project_root);
 
-    let server_url = match server_url {
-        Some(url) => url,
-        None => {
-            return Err("No server URL configured. Run 'tracevault login' first.".into());
-        }
-    };
+    let server_url = server_url
+        .ok_or("No server URL configured. Run `tracevault login --server-url=<url>` to set one.")?;
 
     if token.is_none() {
-        return Err("Not logged in. Run 'tracevault login' to check policies.".into());
+        return Err(
+            "Not logged in. Run `tracevault login --server-url=<server_url>` to authenticate."
+                .into(),
+        );
     }
 
     let org_slug = TracevaultConfig::load(project_root)
         .and_then(|c| c.org_slug)
-        .ok_or("No org_slug in config. Run 'tracevault init' first.")?;
+        .ok_or("No org_slug in config. Run `tracevault init` first.")?;
 
     let client = ApiClient::new(&server_url, token.as_deref());
 
-    // Resolve repo_id by name
+    // Resolve repo_id by name.
+    //
+    // Connectivity errors here (auth expired, server down, network
+    // unreachable) propagate so the pre-push hook exits non-zero — if a
+    // repo is opted into TraceVault, every push must be evaluated, full
+    // stop. Letting pushes slip when TV is unreachable would defeat the
+    // point of enforcement. We attach an actionable next step to each
+    // error so the user (or agent) knows the recovery command without
+    // guessing — see `connectivity_message` below.
     let repo_name = git_repo_name(project_root);
-    let repos = client.list_repos(&org_slug).await?;
+    let repos = client
+        .list_repos(&org_slug)
+        .await
+        .map_err(|e| connectivity_message(&e.to_string()))?;
     let repo = repos.iter().find(|r| r.name == repo_name).ok_or_else(|| {
-        format!(
-            "Repo '{}' not found on server. Run 'tracevault sync' first.",
-            repo_name
-        )
+        format!("Repo '{repo_name}' not found on server. Run `tracevault sync` first.")
     })?;
 
     // Collect session data from unpushed sessions
@@ -200,7 +207,8 @@ pub async fn check_policies(project_root: &Path) -> Result<(), Box<dyn std::erro
                 commit_sha,
             },
         )
-        .await?;
+        .await
+        .map_err(|e| connectivity_message(&e.to_string()))?;
 
     // Print results
     for r in &result.results {
@@ -227,3 +235,87 @@ pub async fn check_policies(project_root: &Path) -> Result<(), Box<dyn std::erro
 
     Ok(())
 }
+
+/// Wrap an opaque API-client error string with the most useful next step.
+/// Today the api_client surfaces errors as `"Stream failed (401 ...)"` or
+/// `"Server returned 500: ..."` strings; we sniff for the common shapes
+/// and surface a one-line action.
+fn connectivity_message(raw: &str) -> String {
+    let lower = raw.to_ascii_lowercase();
+    let action = if lower.contains("401") || lower.contains("unauthorized") {
+        Some("Session token may be expired. Run `tracevault login --server-url=<server_url>` to refresh.")
+    } else if lower.contains("403") || lower.contains("forbidden") {
+        Some("Your token lacks access to this repo's policies. Check your org membership and rerun `tracevault login`.")
+    } else if lower.contains("dns")
+        || lower.contains("connect")
+        || lower.contains("timed out")
+        || lower.contains("timeout")
+        || lower.contains("connection refused")
+    {
+        Some("Could not reach the TraceVault server. Check network and `server_url` in .tracevault/config.toml.")
+    } else if lower.contains("5") && (lower.contains("server error") || lower.contains("internal"))
+    {
+        Some("TraceVault server returned an error. The team has likely been paged; retry shortly.")
+    } else {
+        None
+    };
+
+    match action {
+        Some(a) => format!("Policy check could not run: {raw}.\n  → {a}"),
+        None => format!("Policy check could not run: {raw}."),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::connectivity_message;
+
+    #[test]
+    fn connectivity_message_suggests_login_on_401() {
+        let m = connectivity_message("Stream failed (401 Unauthorized): bad token");
+        assert!(
+            m.contains("tracevault login"),
+            "401 errors must surface the login hint; got: {m}"
+        );
+    }
+
+    #[test]
+    fn connectivity_message_suggests_login_on_unauthorized_text() {
+        let m = connectivity_message("Server returned: unauthorized request");
+        assert!(m.contains("tracevault login"));
+    }
+
+    #[test]
+    fn connectivity_message_suggests_network_check_on_dns_error() {
+        let m = connectivity_message("error sending request for url: dns error");
+        assert!(
+            m.to_lowercase().contains("network") || m.to_lowercase().contains("server_url"),
+            "DNS errors must surface a network hint; got: {m}"
+        );
+    }
+
+    #[test]
+    fn connectivity_message_suggests_network_check_on_connection_refused() {
+        let m = connectivity_message("connection refused");
+        assert!(m.to_lowercase().contains("network"));
+    }
+
+    #[test]
+    fn connectivity_message_falls_back_when_unrecognized() {
+        let m = connectivity_message("some weird new error shape we have not seen before");
+        // No suggestion — but the raw error must still be in the message so
+        // the user can debug.
+        assert!(m.contains("some weird new error shape"));
+        assert!(!m.contains("→")); // no action arrow
+    }
+
+    #[test]
+    fn connectivity_message_does_not_collide_on_403() {
+        let m = connectivity_message("403 Forbidden");
+        // 403 should suggest org membership, NOT a re-login.
+        assert!(
+            m.to_lowercase().contains("membership"),
+            "403 should mention membership; got: {m}"
+        );
+    }
+}

From 9955798f40c41159ab5bedc9bf43c9749bff7253 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bart=C5=82omiej=20Kuras?= <bartlomiej.kuras@virtuslab.com>
Date: Mon, 1 Jun 2026 07:51:00 +0200
Subject: [PATCH 3/4] refactor(auth): slide session expiry only when stale,
 dedupe the query
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The sliding-session check ran an unconditional `UPDATE ... RETURNING` on
every authenticated request. TraceVault streams an event on every agent
hook, so the same token authenticates many times in quick succession
(often concurrently) — an always-write check serializes those requests on
a single row lock and amplifies WAL/IO. This is the cost the Aikido review
flagged on extractors.rs.

Replace it with a CTE that always reads user_id for the auth decision but
only slides expires_at forward once it has drifted more than a day from
the 30-day target. The common already-fresh-token path now performs no
write and takes no row lock; user-facing behavior is unchanged (active
tokens still never expire, expired ones are never revived).

Extract the statement to `auth::SLIDING_SESSION_AUTH_SQL` so both
extractors share one definition and the integration test exercises the
real query instead of a hand-copied duplicate. Add a test pinning the
no-rewrite-when-fresh path.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 crates/tracevault-server/src/auth.rs          |  36 ++++++
 crates/tracevault-server/src/extractors.rs    |  45 ++-----
 .../tests/sliding_session_test.rs             | 122 ++++++++++--------
 3 files changed, 121 insertions(+), 82 deletions(-)

diff --git a/crates/tracevault-server/src/auth.rs b/crates/tracevault-server/src/auth.rs
index e261ee62..69ab75a1 100644
--- a/crates/tracevault-server/src/auth.rs
+++ b/crates/tracevault-server/src/auth.rs
@@ -68,6 +68,42 @@ pub fn sha256_hex(input: &str) -> String {
     hex::encode(hasher.finalize())
 }
 
+/// Single-statement sliding-session auth check, shared by both request
+/// extractors (`AuthUser`, `OrgAuth`) so the behavior is defined once.
+///
+/// Binds `$1` = `token_hash` and returns the matching `user_id`, or no row
+/// when the token is missing or expired (the `expires_at > NOW()` filter
+/// rejects expired sessions — they are never revived).
+///
+/// Sliding window: as long as a user authenticates at least once per ~30
+/// days the token never expires; genuinely-dormant tokens still hit the
+/// cliff and must re-login.
+///
+/// Why a CTE instead of a plain `UPDATE ... RETURNING`: an unconditional
+/// UPDATE would take a row lock and write a new tuple on *every*
+/// authenticated request. TraceVault streams an event on every agent hook,
+/// so the same token authenticates many times in quick succession (often
+/// concurrently) — an always-write check serializes those requests on one
+/// row lock and amplifies WAL/IO. Here the `session` CTE always reads the
+/// `user_id` for the auth decision, while the `slide` CTE only writes when
+/// the expiry has drifted more than a day from the 30-day target. The
+/// common "already-fresh token" path therefore performs no write and takes
+/// no row lock. A data-modifying CTE always runs to completion even though
+/// nothing selects from it, so the slide still happens when due.
+pub const SLIDING_SESSION_AUTH_SQL: &str = "\
+WITH session AS (
+    SELECT user_id FROM auth_sessions
+    WHERE token_hash = $1 AND expires_at > NOW()
+),
+slide AS (
+    UPDATE auth_sessions
+    SET expires_at = NOW() + INTERVAL '30 days'
+    WHERE token_hash = $1
+      AND expires_at > NOW()
+      AND expires_at < NOW() + INTERVAL '29 days'
+)
+SELECT user_id FROM session";
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/crates/tracevault-server/src/extractors.rs b/crates/tracevault-server/src/extractors.rs
index 2776c19d..c960bdc1 100644
--- a/crates/tracevault-server/src/extractors.rs
+++ b/crates/tracevault-server/src/extractors.rs
@@ -32,24 +32,13 @@ impl FromRequestParts<AppState> for AuthUser {
 
         let token_hash = sha256_hex(header);
 
-        // Sliding session window: on every successful auth, bump expires_at
-        // to NOW() + 30 days. As long as the user touches TV at least once
-        // per 30 days, the token never expires from their perspective.
-        // Inactive users still hit the 30-day cliff and must re-login.
-        //
-        // Using UPDATE..RETURNING means we atomically (a) verify the token
-        // is still valid (the WHERE filters expired rows out), (b) extend
-        // it, (c) read back the user_id — one round-trip, no race.
-        let session_row = sqlx::query_as::<_, (Uuid,)>(
-            "UPDATE auth_sessions
-             SET expires_at = NOW() + INTERVAL '30 days'
-             WHERE token_hash = $1 AND expires_at > NOW()
-             RETURNING user_id",
-        )
-        .bind(&token_hash)
-        .fetch_optional(&state.pool)
-        .await
-        .map_err(|_| (StatusCode::INTERNAL_SERVER_ERROR, "Database error"))?;
+        // Sliding session window — validate the token and slide its expiry
+        // forward when due. See `SLIDING_SESSION_AUTH_SQL` for the why.
+        let session_row = sqlx::query_as::<_, (Uuid,)>(crate::auth::SLIDING_SESSION_AUTH_SQL)
+            .bind(&token_hash)
+            .fetch_optional(&state.pool)
+            .await
+            .map_err(|_| (StatusCode::INTERNAL_SERVER_ERROR, "Database error"))?;
 
         if let Some((user_id,)) = session_row {
             return Ok(AuthUser { user_id });
@@ -130,19 +119,13 @@ impl FromRequestParts<AppState> for OrgAuth {
 
         let org_id = org_row.0;
 
-        // Try session auth — sliding window: same pattern as AuthUser, see
-        // the comment there. UPDATE..RETURNING atomically bumps expires_at
-        // and returns the user_id only when the token is still valid.
-        let session_row = sqlx::query_as::<_, (Uuid,)>(
-            "UPDATE auth_sessions
-             SET expires_at = NOW() + INTERVAL '30 days'
-             WHERE token_hash = $1 AND expires_at > NOW()
-             RETURNING user_id",
-        )
-        .bind(&token_hash)
-        .fetch_optional(&state.pool)
-        .await
-        .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
+        // Try session auth — sliding window, same statement as `AuthUser`.
+        // See `SLIDING_SESSION_AUTH_SQL` for the why.
+        let session_row = sqlx::query_as::<_, (Uuid,)>(crate::auth::SLIDING_SESSION_AUTH_SQL)
+            .bind(&token_hash)
+            .fetch_optional(&state.pool)
+            .await
+            .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
 
         if let Some((user_id,)) = session_row {
             // Check membership
diff --git a/crates/tracevault-server/tests/sliding_session_test.rs b/crates/tracevault-server/tests/sliding_session_test.rs
index cf2fc612..90360ecd 100644
--- a/crates/tracevault-server/tests/sliding_session_test.rs
+++ b/crates/tracevault-server/tests/sliding_session_test.rs
@@ -1,18 +1,18 @@
-//! Verifies the sliding-session-window behavior added to the auth
-//! extractors. On every successful auth, the matching `auth_sessions` row
-//! has its `expires_at` bumped to NOW() + 30 days.
+//! Verifies the sliding-session-window behavior used by the auth
+//! extractors. On a successful auth the matching `auth_sessions` row has
+//! its `expires_at` slid forward to NOW() + 30 days — but only when the
+//! expiry has drifted more than a day, so a fresh token is a pure read.
 //!
-//! The extractor is wired into HTTP handlers and not trivially callable in
-//! isolation, but the SQL pattern it uses is `UPDATE auth_sessions SET
-//! expires_at = NOW() + INTERVAL '30 days' WHERE token_hash = $1 AND
-//! expires_at > NOW() RETURNING user_id`. These tests exercise that
-//! statement directly against a real Postgres pool, which is the precise
-//! source of the behavior we want to lock in.
+//! The extractors are wired into HTTP handlers and not trivially callable
+//! in isolation, so these tests run the *exact* statement the extractors
+//! use — `tracevault_server::auth::SLIDING_SESSION_AUTH_SQL` — against a
+//! real Postgres pool. Importing the shared const (rather than copying the
+//! SQL here) means the test breaks if the production query ever diverges.
 
 mod common;
 
 use chrono::{Duration, Utc};
-use tracevault_server::auth::sha256_hex;
+use tracevault_server::auth::{sha256_hex, SLIDING_SESSION_AUTH_SQL};
 use uuid::Uuid;
 
 async fn seed_session_with_expiry(
@@ -33,53 +33,82 @@ async fn seed_session_with_expiry(
     .unwrap()
 }
 
-/// Run the sliding-window UPDATE used by the extractor and return the
-/// updated `expires_at` for the session row (or None if the row was
-/// expired / not found).
-async fn run_sliding_update(
-    pool: &sqlx::PgPool,
-    token_hash: &str,
-) -> Option<chrono::DateTime<Utc>> {
-    sqlx::query_as::<_, (chrono::DateTime<Utc>,)>(
-        "UPDATE auth_sessions
-         SET expires_at = NOW() + INTERVAL '30 days'
-         WHERE token_hash = $1 AND expires_at > NOW()
-         RETURNING expires_at",
+/// Run the production auth statement and return the matched `user_id`
+/// (or None if the token was expired / not found).
+async fn run_auth(pool: &sqlx::PgPool, token_hash: &str) -> Option<Uuid> {
+    sqlx::query_as::<_, (Uuid,)>(SLIDING_SESSION_AUTH_SQL)
+        .bind(token_hash)
+        .fetch_optional(pool)
+        .await
+        .unwrap()
+        .map(|(id,)| id)
+}
+
+/// Read the current `expires_at` for a token's session row.
+async fn current_expiry(pool: &sqlx::PgPool, token_hash: &str) -> chrono::DateTime<Utc> {
+    sqlx::query_scalar::<_, chrono::DateTime<Utc>>(
+        "SELECT expires_at FROM auth_sessions WHERE token_hash = $1",
     )
     .bind(token_hash)
-    .fetch_optional(pool)
+    .fetch_one(pool)
     .await
     .unwrap()
-    .map(|(ts,)| ts)
 }
 
 #[sqlx::test(migrations = "./migrations")]
-async fn auth_extends_expires_at_on_each_hit(pool: sqlx::PgPool) {
+async fn auth_extends_expires_at_for_a_stale_session(pool: sqlx::PgPool) {
     let user_id = common::seed_user(&pool).await;
     let token = format!("tok_{}", Uuid::new_v4());
     let token_hash = sha256_hex(&token);
 
-    // Seed an "almost expired" session: 2 minutes left.
+    // Seed an "almost expired" session: 2 minutes left. This is well past
+    // the 1-day slide threshold, so auth should both succeed and bump it.
     let initial_expires = Utc::now() + Duration::minutes(2);
     seed_session_with_expiry(&pool, user_id, &token_hash, initial_expires).await;
 
-    let bumped = run_sliding_update(&pool, &token_hash)
+    let returned = run_auth(&pool, &token_hash)
         .await
-        .expect("valid (not-yet-expired) session should be extended");
+        .expect("valid (not-yet-expired) session should authenticate");
+    assert_eq!(returned, user_id, "auth must return the owning user_id");
 
-    // After the bump, expires_at should be ~30 days in the future — at
-    // minimum, more than 29 days from now. We don't pin it exactly because
-    // NOW() in Postgres is evaluated server-side and clock skew between
-    // test machine and server is real (postgres in Docker, test in host).
+    // After the slide, expires_at should be ~30 days out — at minimum more
+    // than 29 days from now. We don't pin it exactly because NOW() is
+    // evaluated server-side and clock skew between the test host and the
+    // Postgres container is real.
     let min_expected = Utc::now() + Duration::days(29);
     assert!(
-        bumped > min_expected,
-        "sliding window should extend expires_at to ~30 days; got {bumped}"
+        current_expiry(&pool, &token_hash).await > min_expected,
+        "sliding window should extend a stale session's expiry to ~30 days"
     );
 }
 
 #[sqlx::test(migrations = "./migrations")]
-async fn auth_does_not_extend_already_expired_session(pool: sqlx::PgPool) {
+async fn auth_does_not_rewrite_a_fresh_session(pool: sqlx::PgPool) {
+    let user_id = common::seed_user(&pool).await;
+    let token = format!("tok_{}", Uuid::new_v4());
+    let token_hash = sha256_hex(&token);
+
+    // Seed a freshly-bumped session: expires in 30 days. It is within the
+    // 1-day slide threshold, so auth must still succeed but leave the row
+    // untouched (the common hot-path that avoids a write + row lock).
+    let fresh_expires = Utc::now() + Duration::days(30);
+    seed_session_with_expiry(&pool, user_id, &token_hash, fresh_expires).await;
+    let before = current_expiry(&pool, &token_hash).await;
+
+    let returned = run_auth(&pool, &token_hash)
+        .await
+        .expect("a fresh session should still authenticate");
+    assert_eq!(returned, user_id);
+
+    let after = current_expiry(&pool, &token_hash).await;
+    assert_eq!(
+        before, after,
+        "a fresh session must not be rewritten by auth (no needless write)"
+    );
+}
+
+#[sqlx::test(migrations = "./migrations")]
+async fn auth_does_not_revive_an_already_expired_session(pool: sqlx::PgPool) {
     let user_id = common::seed_user(&pool).await;
     let token = format!("tok_{}", Uuid::new_v4());
     let token_hash = sha256_hex(&token);
@@ -88,29 +117,20 @@ async fn auth_does_not_extend_already_expired_session(pool: sqlx::PgPool) {
     let expired_at = Utc::now() - Duration::hours(1);
     seed_session_with_expiry(&pool, user_id, &token_hash, expired_at).await;
 
-    let result = run_sliding_update(&pool, &token_hash).await;
     assert!(
-        result.is_none(),
-        "an already-expired session must NOT be revived by a sliding-window update; got {result:?}"
+        run_auth(&pool, &token_hash).await.is_none(),
+        "an already-expired session must NOT authenticate or be revived"
     );
 
-    // Belt-and-braces: confirm the row's expires_at is still in the past.
-    let row_expires_at = sqlx::query_scalar::<_, chrono::DateTime<Utc>>(
-        "SELECT expires_at FROM auth_sessions WHERE token_hash = $1",
-    )
-    .bind(&token_hash)
-    .fetch_one(&pool)
-    .await
-    .unwrap();
+    // Belt-and-braces: the row's expires_at must remain in the past.
     assert!(
-        row_expires_at < Utc::now(),
-        "row's expires_at should remain in the past after a no-op sliding update"
+        current_expiry(&pool, &token_hash).await < Utc::now(),
+        "row's expires_at should remain in the past after a no-op auth"
     );
 }
 
 #[sqlx::test(migrations = "./migrations")]
-async fn auth_sliding_update_is_a_noop_for_unknown_tokens(pool: sqlx::PgPool) {
+async fn auth_is_a_noop_for_unknown_tokens(pool: sqlx::PgPool) {
     let unknown_hash = sha256_hex("definitely-not-a-real-token");
-    let result = run_sliding_update(&pool, &unknown_hash).await;
-    assert!(result.is_none());
+    assert!(run_auth(&pool, &unknown_hash).await.is_none());
 }

From be9dea1f9c39759016587731ab885f2783a533f1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bart=C5=82omiej=20Kuras?= <bartlomiej.kuras@virtuslab.com>
Date: Mon, 1 Jun 2026 07:51:00 +0200
Subject: [PATCH 4/4] refactor(cli): flatten connectivity_message, recognize
 bare 5xx codes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use early returns instead of an intermediate Option + match (the second
Aikido note). Replace the near-meaningless `contains("5")` heuristic with
explicit 5xx status-code matching so `503 Service Unavailable` and `504
Gateway Timeout` — which carry neither "internal" nor "server error" text
— are classified as server faults instead of falling through with no
advice. Check 5xx before the transport keywords so a gateway timeout is
not misreported as a local network problem.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 crates/tracevault-cli/src/commands/check.rs | 104 ++++++++++++++++----
 1 file changed, 85 insertions(+), 19 deletions(-)

diff --git a/crates/tracevault-cli/src/commands/check.rs b/crates/tracevault-cli/src/commands/check.rs
index b9b858a2..5f3ea1a2 100644
--- a/crates/tracevault-cli/src/commands/check.rs
+++ b/crates/tracevault-cli/src/commands/check.rs
@@ -238,32 +238,66 @@ pub async fn check_policies(project_root: &Path) -> Result<(), Box<dyn std::erro
 
 /// Wrap an opaque API-client error string with the most useful next step.
 /// Today the api_client surfaces errors as `"Stream failed (401 ...)"` or
-/// `"Server returned 500: ..."` strings; we sniff for the common shapes
-/// and surface a one-line action.
+/// `"Server returned 500 Internal Server Error: ..."` strings; we sniff for
+/// the common shapes and surface a one-line action.
 fn connectivity_message(raw: &str) -> String {
     let lower = raw.to_ascii_lowercase();
-    let action = if lower.contains("401") || lower.contains("unauthorized") {
-        Some("Session token may be expired. Run `tracevault login --server-url=<server_url>` to refresh.")
-    } else if lower.contains("403") || lower.contains("forbidden") {
-        Some("Your token lacks access to this repo's policies. Check your org membership and rerun `tracevault login`.")
-    } else if lower.contains("dns")
+
+    // 401 — token rejected. An expired session is the most common cause of
+    // a surprise blocked push, so point straight at the refresh command.
+    if lower.contains("401") || lower.contains("unauthorized") {
+        return with_action(
+            raw,
+            "Session token may be expired. Run `tracevault login --server-url=<server_url>` to refresh.",
+        );
+    }
+    // 403 — authenticated but not allowed. Re-login won't help; this is an
+    // org-membership problem.
+    if lower.contains("403") || lower.contains("forbidden") {
+        return with_action(
+            raw,
+            "Your token lacks access to this repo's policies. Check your org membership and rerun `tracevault login`.",
+        );
+    }
+    // 5xx — server-side fault. Checked before the transport keywords below
+    // so a `504 Gateway Timeout` reads as a server issue, not a local one.
+    if is_server_error(&lower) {
+        return with_action(
+            raw,
+            "TraceVault server returned an error. The team has likely been paged; retry shortly.",
+        );
+    }
+    // Transport-level failure with no HTTP status — DNS, refused, timeout.
+    // ("connect" also covers "connection refused"/"connection reset".)
+    if lower.contains("dns")
         || lower.contains("connect")
         || lower.contains("timed out")
         || lower.contains("timeout")
-        || lower.contains("connection refused")
-    {
-        Some("Could not reach the TraceVault server. Check network and `server_url` in .tracevault/config.toml.")
-    } else if lower.contains("5") && (lower.contains("server error") || lower.contains("internal"))
     {
-        Some("TraceVault server returned an error. The team has likely been paged; retry shortly.")
-    } else {
-        None
-    };
-
-    match action {
-        Some(a) => format!("Policy check could not run: {raw}.\n  → {a}"),
-        None => format!("Policy check could not run: {raw}."),
+        return with_action(
+            raw,
+            "Could not reach the TraceVault server. Check network and `server_url` in .tracevault/config.toml.",
+        );
     }
+    // Unrecognized — surface the raw error verbatim, no invented advice.
+    format!("Policy check could not run: {raw}.")
+}
+
+/// Format the standard "could not run" line with an actionable next step.
+fn with_action(raw: &str, action: &str) -> String {
+    format!("Policy check could not run: {raw}.\n  → {action}")
+}
+
+/// Heuristic for a 5xx server-side failure. Matches the textual status
+/// (`internal`, `server error`) and the concrete 5xx codes so a bare
+/// `503 Service Unavailable` — which carries neither phrase — is still
+/// recognized.
+fn is_server_error(lower: &str) -> bool {
+    lower.contains("internal")
+        || lower.contains("server error")
+        || ["500", "501", "502", "503", "504"]
+            .iter()
+            .any(|code| lower.contains(code))
 }
 
 #[cfg(test)]
@@ -318,4 +352,36 @@ mod tests {
             "403 should mention membership; got: {m}"
         );
     }
+
+    #[test]
+    fn connectivity_message_flags_500_as_server_error() {
+        let m = connectivity_message("Server returned 500 Internal Server Error: oops");
+        assert!(
+            m.to_lowercase().contains("server returned an error"),
+            "500 should surface the server-error hint; got: {m}"
+        );
+    }
+
+    #[test]
+    fn connectivity_message_flags_503_without_internal_text() {
+        // `503 Service Unavailable` carries neither "internal" nor
+        // "server error" — the bare code must still be recognized.
+        let m = connectivity_message("Server returned 503 Service Unavailable: ");
+        assert!(
+            m.to_lowercase().contains("server returned an error"),
+            "503 should surface the server-error hint; got: {m}"
+        );
+    }
+
+    #[test]
+    fn connectivity_message_treats_504_as_server_not_network() {
+        // A gateway timeout is a server-side fault even though it contains
+        // the word "timeout"; it must not be misreported as a local network
+        // problem.
+        let m = connectivity_message("Server returned 504 Gateway Timeout: ");
+        assert!(
+            m.to_lowercase().contains("server returned an error"),
+            "504 should be treated as a server error; got: {m}"
+        );
+    }
 }