From 1b6bcd1418b7b63fe3382010e0d3c4f62977ac7f Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Fri, 8 May 2026 15:43:20 -0500
Subject: [PATCH 01/19] ci(pr): classify PR test failures as new vs. known and
 post sticky comment
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cross-reference each PR test failure against the target branch's nightly
failure history and post a single sticky comment that splits failures
into NEW (introduced by this PR) and KNOWN (recurring on nightly, known
flaky on nightly, or flaked in this run via pytest-rerunfailures).

- Add `--mode pr` to nightly_report.py: read history without writing
  back, annotate each failure with `pr_classification`.
- Branch nightly_report_helper.sh on RAPIDS_BUILD_TYPE so PR runs read
  the target branch's history and write per-matrix summaries to a
  PR-scoped S3 prefix (`ci_test_reports/pr/run-{run_id}/`).
- Extract shared aggregator helpers into aggregate_common.py
  (download/load/aggregate/escape) so the PR aggregator can reuse them
  without behavioural drift on nightly.
- Add aggregate_pr.py to build the Markdown comment body and ci/pr_summary.sh
  to post or update the sticky comment via the GitHub API.
- Wire a new `pr-test-summary` job into pr.yaml: gated on `if: always()`,
  permissions `pull-requests: write`, runs after every PR test job.
  Not added to pr-builder needs — the comment is informational and must
  not block merging.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .github/workflows/pr.yaml         |  54 ++++
 ci/pr_summary.sh                  | 145 +++++++++++
 ci/utils/aggregate_common.py      | 197 ++++++++++++++
 ci/utils/aggregate_nightly.py     | 179 +------------
 ci/utils/aggregate_pr.py          | 416 ++++++++++++++++++++++++++++++
 ci/utils/nightly_report.py        | 177 +++++++++++--
 ci/utils/nightly_report_helper.sh |  54 +++-
 7 files changed, 1021 insertions(+), 201 deletions(-)
 create mode 100755 ci/pr_summary.sh
 create mode 100644 ci/utils/aggregate_common.py
 create mode 100644 ci/utils/aggregate_pr.py

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 4a1f29319b..18893b741e 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -530,3 +530,57 @@ jobs:
     with:
       build_type: pull-request
       script: ci/test_self_hosted_service.sh
+  pr-test-summary:
+    # Read each PR test job's per-matrix summary from S3, classify every
+    # failure as NEW vs. KNOWN against the target branch's nightly history,
+    # and post (or update) a single sticky comment on the PR.  Purely
+    # informational — never gates the PR (not in pr-builder's needs).
+    needs:
+      - conda-cpp-tests
+      - conda-python-tests
+      - wheel-tests-cuopt
+      - wheel-tests-cuopt-server
+      - test-self-hosted-server
+    if: always()
+    runs-on: linux-amd64-cpu4
+    container:
+      image: python:3.14-slim
+    permissions:
+      pull-requests: write
+    steps:
+      - uses: actions/checkout@v6
+      - name: Install dependencies
+        run: |
+          apt-get update && apt-get install -y --no-install-recommends curl
+          pip install awscli
+      - name: Aggregate per-matrix summaries and post sticky comment
+        env:
+          CUOPT_AWS_ACCESS_KEY_ID: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
+          CUOPT_AWS_SECRET_ACCESS_KEY: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
+          CUOPT_S3_URI: ${{ secrets.CUOPT_S3_URI }}
+          GITHUB_REPOSITORY: ${{ github.repository }}
+          GITHUB_RUN_ID: ${{ github.run_id }}
+          GITHUB_SERVER_URL: ${{ github.server_url }}
+          GITHUB_SHA: ${{ github.sha }}
+          GITHUB_TOKEN: ${{ github.token }}
+        run: |
+          set -euo pipefail
+          # Resolve PR number from the pull-request/{N} branch ref.
+          PR_NUMBER=$(echo "${{ github.ref }}" | sed 's|refs/heads/pull-request/||')
+          if ! [[ "${PR_NUMBER}" =~ ^[0-9]+$ ]]; then
+              echo "ERROR: could not parse PR number from ${{ github.ref }}" >&2
+              exit 1
+          fi
+          export PR_NUMBER
+
+          # Look up the PR's target branch via the API (the push event
+          # doesn't expose it directly).
+          GITHUB_BASE_REF=$(curl -s -L --max-time 30 \
+              -H "Authorization: Bearer ${GITHUB_TOKEN}" \
+              -H "Accept: application/vnd.github+json" \
+              "https://api.github.com/repos/${GITHUB_REPOSITORY}/pulls/${PR_NUMBER}" \
+              | python3 -c "import json,sys; d=json.load(sys.stdin); print(d.get('base',{}).get('ref','main'))")
+          export GITHUB_BASE_REF
+          echo "PR #${PR_NUMBER} → target branch: ${GITHUB_BASE_REF}"
+
+          bash ci/pr_summary.sh
diff --git a/ci/pr_summary.sh b/ci/pr_summary.sh
new file mode 100755
index 0000000000..d338b7b06f
--- /dev/null
+++ b/ci/pr_summary.sh
@@ -0,0 +1,145 @@
+#!/bin/bash
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Aggregate per-matrix PR test summaries and post (or update) a single
+# sticky PR comment that classifies every failure as NEW (introduced by
+# this PR) or KNOWN (recurring on nightly, known flaky, or flaked in this
+# run only).
+#
+# Runs as a post-test job after every PR test job finishes.
+#
+# Required environment:
+#   PR_NUMBER                     - PR number to comment on
+#   GITHUB_TOKEN                  - token with pull-requests:write
+#   GITHUB_REPOSITORY             - owner/repo (e.g., NVIDIA/cuopt)
+#   GITHUB_RUN_ID                 - workflow run that produced summaries
+#   CUOPT_S3_URI, CUOPT_AWS_*     - S3 bucket root + credentials
+#
+# Optional:
+#   GITHUB_BASE_REF               - PR target branch (default: main)
+#   GITHUB_SERVER_URL             - default: https://github.com
+
+set -euo pipefail
+
+SCRIPT_DIR="$(dirname "$(realpath "${BASH_SOURCE[0]}")")"
+OUTPUT_DIR="${PWD}/pr-aggregate-output"
+mkdir -p "${OUTPUT_DIR}"
+
+COMMENT_MARKER="<!-- pr-test-classification -->"
+
+if [ -z "${PR_NUMBER:-}" ]; then
+    echo "ERROR: PR_NUMBER is not set; cannot post comment." >&2
+    exit 1
+fi
+if [ -z "${GITHUB_REPOSITORY:-}" ]; then
+    echo "ERROR: GITHUB_REPOSITORY is not set." >&2
+    exit 1
+fi
+
+TARGET_BRANCH="${GITHUB_BASE_REF:-main}"
+RUN_DATE=$(date +%F)
+GITHUB_RUN_URL="${GITHUB_SERVER_URL:-https://github.com}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID:-}"
+
+# Map CUOPT_AWS_* to standard AWS env vars for the aws CLI used by aggregate_pr.py
+export AWS_ACCESS_KEY_ID="${CUOPT_AWS_ACCESS_KEY_ID:-${AWS_ACCESS_KEY_ID:-}}"
+export AWS_SECRET_ACCESS_KEY="${CUOPT_AWS_SECRET_ACCESS_KEY:-${AWS_SECRET_ACCESS_KEY:-}}"
+unset AWS_SESSION_TOKEN
+
+if [ -z "${CUOPT_S3_URI:-}" ]; then
+    echo "WARNING: CUOPT_S3_URI is not set; nothing to aggregate." >&2
+    exit 0
+fi
+if [ -z "${GITHUB_RUN_ID:-}" ]; then
+    echo "WARNING: GITHUB_RUN_ID is not set; cannot locate per-matrix summaries." >&2
+    exit 0
+fi
+
+S3_PR_SUMMARIES_PREFIX="${CUOPT_S3_URI}ci_test_reports/pr/run-${GITHUB_RUN_ID}/"
+
+echo "Aggregating PR per-matrix summaries from ${S3_PR_SUMMARIES_PREFIX}"
+
+python3 "${SCRIPT_DIR}/utils/aggregate_pr.py" \
+    --s3-pr-summaries-prefix "${S3_PR_SUMMARIES_PREFIX}" \
+    --output-dir "${OUTPUT_DIR}" \
+    --target-branch "${TARGET_BRANCH}" \
+    --sha "${GITHUB_SHA:-}" \
+    --github-run-url "${GITHUB_RUN_URL}" \
+    --run-date "${RUN_DATE}"
+
+COMMENT_FILE="${OUTPUT_DIR}/pr_comment.md"
+if [ ! -s "${COMMENT_FILE}" ]; then
+    echo "No failures or flakes; not posting a PR comment."
+    exit 0
+fi
+
+if [ -z "${GITHUB_TOKEN:-}" ]; then
+    echo "ERROR: GITHUB_TOKEN is not set; cannot post PR comment." >&2
+    echo "--- comment body that would have been posted ---"
+    cat "${COMMENT_FILE}"
+    exit 1
+fi
+
+# --- Find an existing classification comment ---
+EXISTING_ID=$(
+    curl -s -L --max-time 30 \
+        -H "Authorization: Bearer ${GITHUB_TOKEN}" \
+        -H "Accept: application/vnd.github+json" \
+        "https://api.github.com/repos/${GITHUB_REPOSITORY}/issues/${PR_NUMBER}/comments?per_page=100" \
+        | python3 -c "
+import json, sys
+marker = ${COMMENT_MARKER@Q}
+try:
+    comments = json.load(sys.stdin)
+except json.JSONDecodeError:
+    sys.exit(0)
+for c in comments:
+    body = c.get('body') or ''
+    if body.lstrip().startswith(marker):
+        print(c['id'])
+        break
+"
+)
+
+# --- Build the JSON payload from the file (handles newlines/quotes safely) ---
+PAYLOAD=$(python3 -c "
+import json, sys
+body = open('${COMMENT_FILE}').read()
+print(json.dumps({'body': body}))
+")
+
+if [ -n "${EXISTING_ID}" ]; then
+    echo "Updating existing PR comment ${EXISTING_ID}"
+    curl -s -L --max-time 30 -X PATCH \
+        -H "Authorization: Bearer ${GITHUB_TOKEN}" \
+        -H "Accept: application/vnd.github+json" \
+        -H "Content-Type: application/json" \
+        -d "${PAYLOAD}" \
+        "https://api.github.com/repos/${GITHUB_REPOSITORY}/issues/comments/${EXISTING_ID}" \
+        > "${OUTPUT_DIR}/comment_response.json"
+else
+    echo "Creating new PR comment"
+    curl -s -L --max-time 30 -X POST \
+        -H "Authorization: Bearer ${GITHUB_TOKEN}" \
+        -H "Accept: application/vnd.github+json" \
+        -H "Content-Type: application/json" \
+        -d "${PAYLOAD}" \
+        "https://api.github.com/repos/${GITHUB_REPOSITORY}/issues/${PR_NUMBER}/comments" \
+        > "${OUTPUT_DIR}/comment_response.json"
+fi
+
+# Report the comment URL for the workflow log
+python3 -c "
+import json
+try:
+    d = json.load(open('${OUTPUT_DIR}/comment_response.json'))
+except (json.JSONDecodeError, FileNotFoundError):
+    print('WARNING: no comment response captured')
+else:
+    if 'html_url' in d:
+        print(f\"PR comment posted: {d['html_url']}\")
+    elif 'message' in d:
+        import sys
+        print(f\"ERROR: GitHub API returned {d.get('message')}\", file=sys.stderr)
+        sys.exit(1)
+"
diff --git a/ci/utils/aggregate_common.py b/ci/utils/aggregate_common.py
new file mode 100644
index 0000000000..efe8da82b3
--- /dev/null
+++ b/ci/utils/aggregate_common.py
@@ -0,0 +1,197 @@
+#!/usr/bin/env python3
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Shared helpers for the nightly and PR aggregators.
+
+Both aggregators consume per-matrix summary JSONs produced by
+``nightly_report.py`` and merge them into a single view.  The merge logic,
+S3 listing, and HTML escaping are identical in both cases and live here.
+
+Renderers (HTML dashboard for nightly; Markdown comment for PRs) stay in the
+respective aggregator scripts since their output formats diverge.
+"""
+
+import json
+import os
+import sys
+from pathlib import Path
+
+# Ensure ci/utils is importable when invoked from a sibling script
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+from s3_helpers import s3_download, s3_list  # noqa: E402
+
+
+# ---------------------------------------------------------------------------
+# Download and load summaries
+# ---------------------------------------------------------------------------
+
+
+def download_summaries(s3_prefix, local_dir, s3_fallback_prefix=""):
+    """Download all JSON summaries from S3 prefix into local_dir.
+
+    If s3_fallback_prefix is set and no summaries are found at s3_prefix,
+    retries with the fallback (used when the run-scoped path is empty
+    because uploads landed under the branch-scoped path).
+
+    Returns a list of loaded summary dicts.
+    """
+    local_dir = Path(local_dir)
+    local_dir.mkdir(parents=True, exist_ok=True)
+
+    uris = s3_list(s3_prefix)
+    json_uris = [
+        u
+        for u in uris
+        if u.endswith(".json") and not u.endswith("/consolidated.json")
+    ]
+
+    if (
+        not json_uris
+        and s3_fallback_prefix
+        and s3_fallback_prefix != s3_prefix
+    ):
+        print(
+            f"No summaries at {s3_prefix}, trying fallback: {s3_fallback_prefix}"
+        )
+        uris = s3_list(s3_fallback_prefix)
+        json_uris = [
+            u
+            for u in uris
+            if u.endswith(".json") and not u.endswith("/consolidated.json")
+        ]
+        if json_uris:
+            s3_prefix = s3_fallback_prefix
+
+    print(f"Found {len(json_uris)} summary file(s) at {s3_prefix}")
+
+    summaries = []
+    for uri in json_uris:
+        filename = uri.rsplit("/", 1)[-1]
+        local_path = str(local_dir / filename)
+        if s3_download(uri, local_path):
+            try:
+                with open(local_path) as f:
+                    summaries.append(json.load(f))
+            except (json.JSONDecodeError, OSError) as exc:
+                print(
+                    f"WARNING: Failed to parse {local_path}: {exc}",
+                    file=sys.stderr,
+                )
+    return summaries
+
+
+def load_local_summaries(local_dir):
+    """Load summaries from a local directory (for testing without S3)."""
+    local_dir = Path(local_dir)
+    summaries = []
+    for json_file in sorted(local_dir.glob("*.json")):
+        try:
+            with open(json_file) as f:
+                summaries.append(json.load(f))
+        except (json.JSONDecodeError, OSError) as exc:
+            print(
+                f"WARNING: Failed to parse {json_file}: {exc}", file=sys.stderr
+            )
+    return summaries
+
+
+# ---------------------------------------------------------------------------
+# Aggregation
+# ---------------------------------------------------------------------------
+
+
+def aggregate_summaries(summaries):
+    """Merge per-matrix summaries into a consolidated view.
+
+    Returns a dict with:
+      - matrix_grid: list of {test_type, matrix_label, status, counts, ...}
+      - totals: aggregate counts
+      - all_new_failures, all_recurring_failures, all_flaky_tests,
+        all_resolved_tests: merged lists with matrix context added
+    """
+    grid = []
+    totals = {
+        "total": 0,
+        "passed": 0,
+        "failed": 0,
+        "flaky": 0,
+        "skipped": 0,
+        "resolved": 0,
+    }
+    all_new_failures = []
+    all_recurring_failures = []
+    all_flaky_tests = []
+    all_resolved_tests = []
+    any_new_flaky = False
+
+    for s in summaries:
+        test_type = s.get("test_type", "unknown")
+        matrix_label = s.get("matrix_label", "unknown")
+        counts = s.get("counts", {})
+
+        failed = counts.get("failed", 0)
+        flaky = counts.get("flaky", 0)
+        has_new = s.get("has_new_failures", False)
+        if s.get("has_new_flaky", False):
+            any_new_flaky = True
+
+        if failed > 0:
+            status = "failed-new" if has_new else "failed-recurring"
+        elif flaky > 0:
+            status = "flaky"
+        elif counts.get("total", 0) == 0:
+            status = "no-results"
+        else:
+            status = "passed"
+
+        grid.append(
+            {
+                "test_type": test_type,
+                "matrix_label": matrix_label,
+                "status": status,
+                "counts": counts,
+                "sha": s.get("sha", ""),
+            }
+        )
+
+        for key in totals:
+            totals[key] += counts.get(key, 0)
+
+        ctx = {"test_type": test_type, "matrix_label": matrix_label}
+        for entry in s.get("new_failures", []):
+            all_new_failures.append({**entry, **ctx})
+        for entry in s.get("recurring_failures", []):
+            all_recurring_failures.append({**entry, **ctx})
+        for entry in s.get("flaky_tests", []):
+            all_flaky_tests.append({**entry, **ctx})
+        for entry in s.get("resolved_tests", []):
+            all_resolved_tests.append({**entry, **ctx})
+
+    grid.sort(key=lambda g: (g["test_type"], g["matrix_label"]))
+
+    return {
+        "matrix_grid": grid,
+        "totals": totals,
+        "all_new_failures": all_new_failures,
+        "all_recurring_failures": all_recurring_failures,
+        "all_flaky_tests": all_flaky_tests,
+        "all_resolved_tests": all_resolved_tests,
+        "has_new_flaky": any_new_flaky,
+    }
+
+
+# ---------------------------------------------------------------------------
+# HTML escaping
+# ---------------------------------------------------------------------------
+
+
+def html_escape(text):
+    return (
+        str(text)
+        .replace("&", "&amp;")
+        .replace("<", "&lt;")
+        .replace(">", "&gt;")
+        .replace('"', "&quot;")
+    )
diff --git a/ci/utils/aggregate_nightly.py b/ci/utils/aggregate_nightly.py
index 4901fab7c3..8b62a5ff99 100644
--- a/ci/utils/aggregate_nightly.py
+++ b/ci/utils/aggregate_nightly.py
@@ -30,168 +30,13 @@
 
 # Ensure ci/utils is importable when invoked as a script
 sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
-from s3_helpers import s3_download, s3_upload, s3_list  # noqa: E402
-
-
-# ---------------------------------------------------------------------------
-# Download and merge summaries
-# ---------------------------------------------------------------------------
-
-
-def download_summaries(s3_prefix, local_dir, s3_fallback_prefix=""):
-    """Download all JSON summaries from S3 prefix into local_dir.
-    If s3_fallback_prefix is set and no summaries found at s3_prefix,
-    retries with the fallback (used when RAPIDS_BRANCH in rapidsai
-    containers doesn't match the branch input).
-    Returns list of loaded summary dicts."""
-    local_dir = Path(local_dir)
-    local_dir.mkdir(parents=True, exist_ok=True)
-
-    uris = s3_list(s3_prefix)
-    json_uris = [
-        u
-        for u in uris
-        if u.endswith(".json") and not u.endswith("/consolidated.json")
-    ]
-
-    # Fallback: search the parent date prefix if branch-specific path is empty
-    if (
-        not json_uris
-        and s3_fallback_prefix
-        and s3_fallback_prefix != s3_prefix
-    ):
-        print(
-            f"No summaries at {s3_prefix}, trying fallback: {s3_fallback_prefix}"
-        )
-        uris = s3_list(s3_fallback_prefix)
-        json_uris = [
-            u
-            for u in uris
-            if u.endswith(".json") and not u.endswith("/consolidated.json")
-        ]
-        if json_uris:
-            s3_prefix = s3_fallback_prefix
-
-    print(f"Found {len(json_uris)} summary file(s) at {s3_prefix}")
-
-    summaries = []
-    for uri in json_uris:
-        filename = uri.rsplit("/", 1)[-1]
-        local_path = str(local_dir / filename)
-        if s3_download(uri, local_path):
-            try:
-                with open(local_path) as f:
-                    summaries.append(json.load(f))
-            except (json.JSONDecodeError, OSError) as exc:
-                print(
-                    f"WARNING: Failed to parse {local_path}: {exc}",
-                    file=sys.stderr,
-                )
-    return summaries
-
-
-def load_local_summaries(local_dir):
-    """Load summaries from a local directory (for testing without S3)."""
-    local_dir = Path(local_dir)
-    summaries = []
-    for json_file in sorted(local_dir.glob("*.json")):
-        try:
-            with open(json_file) as f:
-                summaries.append(json.load(f))
-        except (json.JSONDecodeError, OSError) as exc:
-            print(
-                f"WARNING: Failed to parse {json_file}: {exc}", file=sys.stderr
-            )
-    return summaries
-
-
-# ---------------------------------------------------------------------------
-# Aggregation
-# ---------------------------------------------------------------------------
-
-
-def aggregate_summaries(summaries):
-    """Merge per-matrix summaries into a consolidated view.
-
-    Returns a dict with:
-      - matrix_grid: list of {test_type, matrix_label, status, counts, ...}
-      - totals: aggregate counts
-      - all_new_failures, all_recurring_failures, all_flaky_tests,
-        all_resolved_tests: merged lists with matrix context added
-    """
-    grid = []
-    totals = {
-        "total": 0,
-        "passed": 0,
-        "failed": 0,
-        "flaky": 0,
-        "skipped": 0,
-        "resolved": 0,
-    }
-    all_new_failures = []
-    all_recurring_failures = []
-    all_flaky_tests = []
-    all_resolved_tests = []
-    any_new_flaky = False
-
-    for s in summaries:
-        test_type = s.get("test_type", "unknown")
-        matrix_label = s.get("matrix_label", "unknown")
-        counts = s.get("counts", {})
-
-        # Determine job status
-        failed = counts.get("failed", 0)
-        flaky = counts.get("flaky", 0)
-        has_new = s.get("has_new_failures", False)
-        if s.get("has_new_flaky", False):
-            any_new_flaky = True
-
-        if failed > 0:
-            status = "failed-new" if has_new else "failed-recurring"
-        elif flaky > 0:
-            status = "flaky"
-        elif counts.get("total", 0) == 0:
-            status = "no-results"
-        else:
-            status = "passed"
-
-        grid.append(
-            {
-                "test_type": test_type,
-                "matrix_label": matrix_label,
-                "status": status,
-                "counts": counts,
-                "sha": s.get("sha", ""),
-            }
-        )
-
-        # Accumulate totals
-        for key in totals:
-            totals[key] += counts.get(key, 0)
-
-        # Merge failure lists with matrix context
-        ctx = {"test_type": test_type, "matrix_label": matrix_label}
-        for entry in s.get("new_failures", []):
-            all_new_failures.append({**entry, **ctx})
-        for entry in s.get("recurring_failures", []):
-            all_recurring_failures.append({**entry, **ctx})
-        for entry in s.get("flaky_tests", []):
-            all_flaky_tests.append({**entry, **ctx})
-        for entry in s.get("resolved_tests", []):
-            all_resolved_tests.append({**entry, **ctx})
-
-    # Sort grid for consistent display
-    grid.sort(key=lambda g: (g["test_type"], g["matrix_label"]))
-
-    return {
-        "matrix_grid": grid,
-        "totals": totals,
-        "all_new_failures": all_new_failures,
-        "all_recurring_failures": all_recurring_failures,
-        "all_flaky_tests": all_flaky_tests,
-        "all_resolved_tests": all_resolved_tests,
-        "has_new_flaky": any_new_flaky,
-    }
+from aggregate_common import (  # noqa: E402
+    aggregate_summaries,
+    download_summaries,
+    html_escape as _html_escape,
+    load_local_summaries,
+)
+from s3_helpers import s3_download, s3_upload  # noqa: E402
 
 
 # ---------------------------------------------------------------------------
@@ -298,16 +143,6 @@ def generate_consolidated_json(
 # ---------------------------------------------------------------------------
 
 
-def _html_escape(text):
-    return (
-        str(text)
-        .replace("&", "&amp;")
-        .replace("<", "&lt;")
-        .replace(">", "&gt;")
-        .replace('"', "&quot;")
-    )
-
-
 def _status_badge(status):
     """Return an HTML badge for a matrix cell status."""
     colors = {
diff --git a/ci/utils/aggregate_pr.py b/ci/utils/aggregate_pr.py
new file mode 100644
index 0000000000..547391e266
--- /dev/null
+++ b/ci/utils/aggregate_pr.py
@@ -0,0 +1,416 @@
+#!/usr/bin/env python3
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Aggregate per-matrix PR test summaries into a Markdown body for the PR
+classification comment.
+
+Each PR test job runs ``nightly_report.py --mode pr`` which writes a
+per-matrix summary JSON to::
+
+    s3://bucket/ci_test_reports/pr/run-${GITHUB_RUN_ID}/{test_type}-{matrix}.json
+
+This script downloads them, merges with the shared aggregator helpers, and
+emits two Markdown sections:
+
+  - **NEW failures** — failures introduced by this PR (not in nightly
+    history, or only present as resolved-and-not-flaky).
+  - **KNOWN issues** — pre-existing breakage (active on nightly) or known
+    flakes (flagged on nightly, or flaked in this PR run).
+
+The output Markdown is prefixed with a hidden marker comment so the
+comment poster (``ci/pr_summary.sh``) can find and update an existing
+comment in place.
+
+If nothing failed or flaked across the run, this script writes an empty
+file and the poster skips commenting.
+"""
+
+import argparse
+import json
+import os
+import sys
+from datetime import datetime, timezone
+from pathlib import Path
+
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+from aggregate_common import (  # noqa: E402
+    aggregate_summaries,
+    download_summaries,
+    load_local_summaries,
+)
+
+# Hidden marker — must match the value used by ci/pr_summary.sh.
+COMMENT_MARKER = "<!-- pr-test-classification -->"
+
+# Maximum total comment body size we are willing to post.  GitHub allows
+# ~65k characters per comment, but we cap earlier and truncate the failure
+# tables so the comment stays readable.
+MAX_BODY_CHARS = 60000
+MAX_ROWS_PER_BUCKET = 80
+MAX_ERROR_SNIPPET = 600
+
+
+def _short_msg(msg, limit=140):
+    """Single-line summary of an error message for table cells."""
+    if not msg:
+        return ""
+    lines = [ln for ln in msg.splitlines() if ln.strip()]
+    summary = lines[-1] if lines else ""
+    if len(summary) > limit:
+        summary = summary[: limit - 1] + "…"
+    return summary.replace("|", "\\|")
+
+
+def _details_block(msg):
+    """Render an error message as a collapsible <details> block."""
+    if not msg:
+        return ""
+    snippet = msg.strip()
+    if len(snippet) > MAX_ERROR_SNIPPET:
+        snippet = snippet[:MAX_ERROR_SNIPPET] + "\n…[truncated]"
+    return (
+        f"<details><summary>error</summary>\n\n```\n{snippet}\n```\n</details>"
+    )
+
+
+def _classify_known_subgroups(recurring, flaky):
+    """Split the KNOWN bucket into the three sub-groups for the comment.
+
+    Returns ``(broken_on_nightly, known_flaky_nightly, flaked_in_pr_run)``.
+    Each entry retains its full per-matrix context.
+    """
+    broken_on_nightly = []
+    known_flaky_nightly = []
+    flaked_in_pr_run = []
+
+    for entry in recurring:
+        cls = entry.get("pr_classification", "")
+        if cls == "known_recurring":
+            broken_on_nightly.append(entry)
+        elif cls == "known_flaky_nightly":
+            known_flaky_nightly.append(entry)
+        else:
+            broken_on_nightly.append(entry)
+
+    for entry in flaky:
+        cls = entry.get("pr_classification", "")
+        if cls == "known_flaky_nightly":
+            known_flaky_nightly.append(entry)
+        elif cls == "known_recurring":
+            broken_on_nightly.append(entry)
+        else:
+            flaked_in_pr_run.append(entry)
+
+    return broken_on_nightly, known_flaky_nightly, flaked_in_pr_run
+
+
+def _matrix_grid_table(grid):
+    if not grid:
+        return ""
+    lines = [
+        "| Test type | Matrix | Status | Passed | Failed | Flaky | Skipped |",
+        "|-----------|--------|--------|--------|--------|-------|---------|",
+    ]
+    badge_for = {
+        "passed": "PASS",
+        "failed-new": "NEW FAIL",
+        "failed-recurring": "RECURRING",
+        "flaky": "FLAKY",
+        "no-results": "NO DATA",
+    }
+    for g in grid:
+        c = g.get("counts", {})
+        lines.append(
+            f"| {g['test_type']} | `{g['matrix_label']}` | "
+            f"{badge_for.get(g['status'], g['status'])} | "
+            f"{c.get('passed', 0)} | {c.get('failed', 0)} | "
+            f"{c.get('flaky', 0)} | {c.get('skipped', 0)} |"
+        )
+    return "\n".join(lines)
+
+
+def _failure_table(entries, columns, row_fn, cap=MAX_ROWS_PER_BUCKET):
+    if not entries:
+        return ""
+    lines = ["| " + " | ".join(columns) + " |"]
+    lines.append("|" + "|".join(["---"] * len(columns)) + "|")
+    for entry in entries[:cap]:
+        lines.append(row_fn(entry))
+    if len(entries) > cap:
+        lines.append(f"\n_…and {len(entries) - cap} more not shown._")
+    return "\n".join(lines)
+
+
+def build_comment_body(
+    agg, target_branch, github_run_url, sha="", run_date=""
+):
+    """Build the Markdown body for the sticky PR comment.
+
+    Returns an empty string if there is nothing worth commenting on.
+    """
+    new_failures = agg["all_new_failures"]
+    recurring = agg["all_recurring_failures"]
+    flaky = agg["all_flaky_tests"]
+
+    if not new_failures and not recurring and not flaky:
+        return ""
+
+    broken_on_nightly, known_flaky_nightly, flaked_in_pr_run = (
+        _classify_known_subgroups(recurring, flaky)
+    )
+
+    parts = [COMMENT_MARKER]
+    parts.append("## PR Test Classification")
+    parts.append("")
+
+    headline = []
+    if new_failures:
+        headline.append(f"**{len(new_failures)} NEW** failure(s)")
+    known_total = (
+        len(broken_on_nightly)
+        + len(known_flaky_nightly)
+        + len(flaked_in_pr_run)
+    )
+    if known_total:
+        headline.append(f"**{known_total} KNOWN** issue(s)")
+    if headline:
+        parts.append(" • ".join(headline))
+        parts.append("")
+
+    meta = []
+    if target_branch:
+        meta.append(f"Compared against nightly history for `{target_branch}`")
+    if sha:
+        meta.append(f"PR head: `{sha[:12]}`")
+    if run_date:
+        meta.append(f"Run date: {run_date}")
+    if github_run_url:
+        meta.append(f"[Workflow run]({github_run_url})")
+    if meta:
+        parts.append(" · ".join(meta))
+        parts.append("")
+
+    grid_md = _matrix_grid_table(agg["matrix_grid"])
+    if grid_md:
+        parts.append("<details><summary>Per-matrix status</summary>\n")
+        parts.append(grid_md)
+        parts.append("\n</details>")
+        parts.append("")
+
+    # --- NEW failures ---
+    if new_failures:
+        parts.append("### NEW failures (likely introduced by this PR)")
+        parts.append("")
+        parts.append(
+            _failure_table(
+                new_failures,
+                ["Test type", "Matrix", "Suite", "Test", "Error"],
+                lambda e: (
+                    f"| {e['test_type']} | `{e['matrix_label']}` | "
+                    f"{e['suite']} | `{e['name']}` | "
+                    f"{_short_msg(e.get('message', ''))} |"
+                ),
+            )
+        )
+        parts.append("")
+
+    # --- KNOWN issues ---
+    if known_total:
+        parts.append("### KNOWN issues (pre-existing, not caused by this PR)")
+        parts.append("")
+
+        if broken_on_nightly:
+            parts.append("**Already broken on nightly** (recurring)")
+            parts.append("")
+            parts.append(
+                _failure_table(
+                    broken_on_nightly,
+                    [
+                        "Test type",
+                        "Matrix",
+                        "Suite",
+                        "Test",
+                        "First seen",
+                        "Failure count",
+                        "Error",
+                    ],
+                    lambda e: (
+                        f"| {e['test_type']} | `{e['matrix_label']}` | "
+                        f"{e['suite']} | `{e['name']}` | "
+                        f"{e.get('first_seen', 'unknown')} | "
+                        f"{e.get('failure_count', '?')} | "
+                        f"{_short_msg(e.get('message', ''))} |"
+                    ),
+                )
+            )
+            parts.append("")
+
+        if known_flaky_nightly:
+            parts.append("**Known flaky on nightly**")
+            parts.append("")
+            parts.append(
+                _failure_table(
+                    known_flaky_nightly,
+                    [
+                        "Test type",
+                        "Matrix",
+                        "Suite",
+                        "Test",
+                        "First seen",
+                        "Error",
+                    ],
+                    lambda e: (
+                        f"| {e['test_type']} | `{e['matrix_label']}` | "
+                        f"{e['suite']} | `{e['name']}` | "
+                        f"{e.get('first_seen', 'unknown')} | "
+                        f"{_short_msg(e.get('message', ''))} |"
+                    ),
+                )
+            )
+            parts.append("")
+
+        if flaked_in_pr_run:
+            parts.append(
+                "**Flaked in this PR run** (passed on retry; not previously known to flake)"
+            )
+            parts.append("")
+            parts.append(
+                _failure_table(
+                    flaked_in_pr_run,
+                    [
+                        "Test type",
+                        "Matrix",
+                        "Suite",
+                        "Test",
+                        "Retries",
+                        "Error",
+                    ],
+                    lambda e: (
+                        f"| {e['test_type']} | `{e['matrix_label']}` | "
+                        f"{e['suite']} | `{e['name']}` | "
+                        f"{e.get('retry_count', '?')} | "
+                        f"{_short_msg(e.get('message', ''))} |"
+                    ),
+                )
+            )
+            parts.append("")
+
+    parts.append(
+        "_Classification compares each failure against the most recent "
+        "nightly history for the target branch.  Tests passed on retry "
+        "via `pytest-rerunfailures` are reported as flaky._"
+    )
+
+    body = "\n".join(parts)
+    if len(body) > MAX_BODY_CHARS:
+        body = body[: MAX_BODY_CHARS - 200] + (
+            "\n\n…_comment truncated; see workflow run for full details._"
+        )
+    return body
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Aggregate per-matrix PR test summaries into a Markdown PR comment."
+    )
+    parser.add_argument(
+        "--s3-pr-summaries-prefix",
+        default="",
+        help=(
+            "S3 prefix where ``nightly_report.py --mode pr`` uploaded "
+            "per-matrix summaries for this run.  Example: "
+            "s3://bucket/ci_test_reports/pr/run-12345/"
+        ),
+    )
+    parser.add_argument(
+        "--local-summaries-dir",
+        default="",
+        help="Local directory of summaries (for testing without S3).",
+    )
+    parser.add_argument(
+        "--output-dir",
+        default="aggregate-output",
+        help="Directory to write pr_comment.md and consolidated.json into.",
+    )
+    parser.add_argument(
+        "--target-branch",
+        default=os.environ.get("GITHUB_BASE_REF", "main"),
+        help="PR target branch — surfaced in the comment for context.",
+    )
+    parser.add_argument(
+        "--sha",
+        default=os.environ.get("GITHUB_SHA", ""),
+        help="PR head SHA — surfaced in the comment for context.",
+    )
+    parser.add_argument(
+        "--github-run-url",
+        default="",
+        help="Workflow run URL — linked from the comment footer.",
+    )
+    parser.add_argument(
+        "--run-date",
+        default=datetime.now(timezone.utc).strftime("%Y-%m-%d"),
+        help="Date the run started (YYYY-MM-DD).",
+    )
+    args = parser.parse_args()
+
+    output_dir = Path(args.output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    if args.local_summaries_dir:
+        summaries = load_local_summaries(args.local_summaries_dir)
+    elif args.s3_pr_summaries_prefix:
+        summaries = download_summaries(
+            args.s3_pr_summaries_prefix, output_dir / "summaries"
+        )
+    else:
+        print(
+            "ERROR: provide --s3-pr-summaries-prefix or --local-summaries-dir.",
+            file=sys.stderr,
+        )
+        return 2
+
+    if not summaries:
+        print("No PR per-matrix summaries found; nothing to comment on.")
+        (output_dir / "pr_comment.md").write_text("")
+        return 0
+
+    agg = aggregate_summaries(summaries)
+    body = build_comment_body(
+        agg,
+        target_branch=args.target_branch,
+        github_run_url=args.github_run_url,
+        sha=args.sha,
+        run_date=args.run_date,
+    )
+
+    (output_dir / "pr_comment.md").write_text(body)
+    consolidated = {
+        "timestamp": datetime.now(timezone.utc).isoformat(),
+        "target_branch": args.target_branch,
+        "sha": args.sha,
+        "run_date": args.run_date,
+        "totals": agg["totals"],
+        "matrix_grid": agg["matrix_grid"],
+        "new_failures": agg["all_new_failures"],
+        "recurring_failures": agg["all_recurring_failures"],
+        "flaky_tests": agg["all_flaky_tests"],
+    }
+    (output_dir / "pr_consolidated.json").write_text(
+        json.dumps(consolidated, indent=2) + "\n"
+    )
+
+    if not body:
+        print("All tests passed (no failures or flakes); skipping PR comment.")
+    else:
+        print(
+            f"PR comment body written to {output_dir / 'pr_comment.md'} "
+            f"({len(body)} chars)."
+        )
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/ci/utils/nightly_report.py b/ci/utils/nightly_report.py
index 6742458582..782748e742 100755
--- a/ci/utils/nightly_report.py
+++ b/ci/utils/nightly_report.py
@@ -368,6 +368,89 @@ def update_history(history, classified, sha, date_str):
     )
 
 
+# ---------------------------------------------------------------------------
+# PR-mode classification (read-only against nightly history)
+# ---------------------------------------------------------------------------
+
+
+def classify_pr_against_history(classified, history):
+    """Classify PR run results by comparing against the nightly history.
+
+    Read-only: never mutates ``history``.  Each failure is annotated with a
+    ``pr_classification`` field used by the PR comment renderer.
+
+    Routing into the existing summary lists so ``aggregate_summaries``
+    consumes PR summaries without changes:
+
+      - ``new_failures``: hard failures the PR introduced.  ``pr_classification=new``.
+      - ``recurring_failures``: hard failures known to nightly.
+        ``pr_classification`` is ``known_recurring`` (active on nightly) or
+        ``known_flaky_nightly`` (history flagged the test flaky).
+      - ``flaky_tests``: tests that passed on retry within the PR run.
+        ``pr_classification`` is ``known_flaky_nightly`` (already known
+        flaky), ``known_recurring`` (hard-failing on nightly but flaked
+        here), or ``known_flaky_pr`` (only flaked in this PR run).
+
+    Returns ``(new_failures, recurring_failures, flaky_tests)``.
+    """
+    tests_history = history.get("tests", {})
+
+    new_failures = []
+    recurring_failures = []
+    flaky_tests = []
+
+    def _key(entry):
+        return f"{entry['suite']}::{entry['classname']}::{entry['name']}"
+
+    # Hard failures: failed/errored in PR run, did NOT pass on retry.
+    for entry in classified["failed"] + classified["error"]:
+        rec = tests_history.get(_key(entry))
+        if rec and rec.get("status") == "active":
+            recurring_failures.append(
+                {
+                    **entry,
+                    "first_seen": rec.get("first_seen_date", "unknown"),
+                    "failure_count": rec.get("failure_count", 0),
+                    "pr_classification": "known_recurring",
+                }
+            )
+        elif rec and rec.get("is_flaky"):
+            recurring_failures.append(
+                {
+                    **entry,
+                    "first_seen": rec.get("first_seen_date", "unknown"),
+                    "failure_count": rec.get("failure_count", 0),
+                    "pr_classification": "known_flaky_nightly",
+                }
+            )
+        else:
+            # Not in history, or history says resolved-and-not-flaky:
+            # this PR is the cause.
+            new_failures.append({**entry, "pr_classification": "new"})
+
+    # Flaky in PR run: passed on retry within the same run.
+    for entry in classified["flaky"]:
+        rec = tests_history.get(_key(entry))
+        if rec and rec.get("is_flaky"):
+            classification = "known_flaky_nightly"
+            first_seen = rec.get("first_seen_date", "unknown")
+        elif rec and rec.get("status") == "active":
+            classification = "known_recurring"
+            first_seen = rec.get("first_seen_date", "unknown")
+        else:
+            classification = "known_flaky_pr"
+            first_seen = "unknown"
+        flaky_tests.append(
+            {
+                **entry,
+                "first_seen": first_seen,
+                "pr_classification": classification,
+            }
+        )
+
+    return new_failures, recurring_failures, flaky_tests
+
+
 def save_history(history, history_path):
     """Write history to a local JSON file."""
     with open(history_path, "w") as f:
@@ -542,19 +625,35 @@ def generate_json_summary(
     recurring_failures,
     resolved_tests,
     new_flaky_tests=None,
+    flaky_tests=None,
     test_type="",
     matrix_label="",
     sha="",
     date_str="",
+    mode="nightly",
 ):
-    """Generate a JSON summary for downstream tools (Slack notifier, dashboard)."""
+    """Generate a JSON summary for downstream tools (Slack notifier, dashboard, PR comment).
+
+    ``flaky_tests`` lets PR mode pass its own annotated list (with
+    ``pr_classification`` and ``first_seen``).  In nightly mode it defaults
+    to ``classified["flaky"]`` to preserve existing behavior.
+    """
     if new_flaky_tests is None:
         new_flaky_tests = []
+    if flaky_tests is None:
+        flaky_tests = classified["flaky"]
     new_flaky_keys = {
         f"{e['classname']}::{e['name']}" for e in new_flaky_tests
     }
+
+    def _opt(d, key):
+        if key in d:
+            return {key: d[key]}
+        return {}
+
     return {
         "timestamp": datetime.now(timezone.utc).isoformat(),
+        "mode": mode,
         "test_type": test_type,
         "matrix_label": matrix_label,
         "sha": sha,
@@ -575,6 +674,7 @@ def generate_json_summary(
                 "name": e["name"],
                 "classname": e["classname"],
                 "message": e.get("message", ""),
+                **_opt(e, "pr_classification"),
             }
             for e in new_failures
         ],
@@ -585,6 +685,8 @@ def generate_json_summary(
                 "classname": e["classname"],
                 "first_seen": e.get("first_seen", "unknown"),
                 "message": e.get("message", ""),
+                **_opt(e, "pr_classification"),
+                **_opt(e, "failure_count"),
             }
             for e in recurring_failures
         ],
@@ -596,8 +698,10 @@ def generate_json_summary(
                 "retry_count": e.get("retry_count", 0),
                 "message": e.get("message", ""),
                 "is_new": f"{e['classname']}::{e['name']}" in new_flaky_keys,
+                **_opt(e, "pr_classification"),
+                **_opt(e, "first_seen"),
             }
-            for e in classified["flaky"]
+            for e in flaky_tests
         ],
         "resolved_tests": [
             {
@@ -945,12 +1049,24 @@ def main():
         default=os.environ.get("GITHUB_STEP_SUMMARY", ""),
         help="Path to write GitHub Actions step summary",
     )
+    parser.add_argument(
+        "--mode",
+        choices=["nightly", "pr"],
+        default="nightly",
+        help=(
+            "nightly: update and upload history; classify against and "
+            "evolve the long-term failure state. "
+            "pr: read history but never write it; classify each PR failure "
+            "as new vs. known (recurring or flaky) for the PR comment."
+        ),
+    )
 
     args = parser.parse_args()
 
     output_dir = Path(args.output_dir)
     output_dir.mkdir(parents=True, exist_ok=True)
     local_history_path = str(output_dir / "test_failure_history.json")
+    pr_mode = args.mode == "pr"
 
     # ---- Step 1: Download history from S3 ----
     if args.s3_history_uri:
@@ -982,31 +1098,45 @@ def main():
         f"{len(classified['skipped'])} skipped"
     )
 
-    # ---- Step 3: Update history ----
+    # ---- Step 3: Classify against history ----
     history = load_history(local_history_path)
-    (
-        history,
-        new_failures,
-        recurring_failures,
-        resolved_tests,
-        new_flaky_tests,
-    ) = update_history(history, classified, args.sha, args.date)
+    pr_flaky_tests = None  # populated only in PR mode
 
-    if new_flaky_tests:
-        print(
-            f"NEW FLAKY: {len(new_flaky_tests)} test(s) flaky for the first time"
+    if pr_mode:
+        new_failures, recurring_failures, pr_flaky_tests = (
+            classify_pr_against_history(classified, history)
         )
-    if resolved_tests:
+        resolved_tests = []
+        new_flaky_tests = []
         print(
-            f"Stabilized: {len(resolved_tests)} previously-failing test(s) now pass"
+            f"PR classification: {len(new_failures)} new, "
+            f"{len(recurring_failures)} known recurring/flaky-on-nightly, "
+            f"{len(pr_flaky_tests)} flaky in run"
         )
+    else:
+        (
+            history,
+            new_failures,
+            recurring_failures,
+            resolved_tests,
+            new_flaky_tests,
+        ) = update_history(history, classified, args.sha, args.date)
+
+        if new_flaky_tests:
+            print(
+                f"NEW FLAKY: {len(new_flaky_tests)} test(s) flaky for the first time"
+            )
+        if resolved_tests:
+            print(
+                f"Stabilized: {len(resolved_tests)} previously-failing test(s) now pass"
+            )
 
-    save_history(history, local_history_path)
-    print(f"Updated local history at {local_history_path}")
+        save_history(history, local_history_path)
+        print(f"Updated local history at {local_history_path}")
 
-    # ---- Step 4: Upload history back to S3 ----
-    if args.s3_history_uri:
-        s3_upload(local_history_path, args.s3_history_uri)
+        # ---- Step 4: Upload history back to S3 ----
+        if args.s3_history_uri:
+            s3_upload(local_history_path, args.s3_history_uri)
 
     # ---- Step 5: Generate reports ----
     report_kwargs = dict(
@@ -1046,6 +1176,8 @@ def main():
         recurring_failures,
         resolved_tests,
         new_flaky_tests,
+        flaky_tests=pr_flaky_tests,
+        mode=args.mode,
         **report_kwargs,
     )
     json_path = output_dir / "nightly_summary.json"
@@ -1097,7 +1229,10 @@ def main():
         print(
             f"\nFAILED: {genuine_failures} genuine test failure(s) detected."
         )
-        return 1
+        # PR mode is reporting-only; the underlying test job already conveys
+        # pass/fail.  Returning 0 keeps post-test summary jobs uncoupled from
+        # this script's exit code.
+        return 0 if pr_mode else 1
     if classified["flaky"]:
         print(
             f"\nWARNING: All tests passed but {len(classified['flaky'])} flaky test(s) detected."
diff --git a/ci/utils/nightly_report_helper.sh b/ci/utils/nightly_report_helper.sh
index c65fc22f0e..2fb99c8052 100755
--- a/ci/utils/nightly_report_helper.sh
+++ b/ci/utils/nightly_report_helper.sh
@@ -2,7 +2,8 @@
 # SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
-# Shared helper for generating nightly test reports with matrix-aware S3 paths.
+# Shared helper for generating nightly and PR test reports with
+# matrix-aware S3 paths.
 #
 # Usage (source from any test script):
 #
@@ -15,6 +16,16 @@
 #   # For wheel tests:
 #   generate_nightly_report "wheel-python" --with-python-version
 #
+# Behavior depends on RAPIDS_BUILD_TYPE:
+#   - "nightly":     update and upload the long-term failure history,
+#                    upload a per-matrix summary under summaries/, plus HTML.
+#   - "pull-request" (PR mode): read the target branch's nightly history
+#                    (no writes), classify each PR failure as new vs. known
+#                    (recurring or flaky), and upload only a run-scoped
+#                    per-matrix summary under ci_test_reports/pr/run-${GITHUB_RUN_ID}/
+#                    for the PR comment aggregator to consume.
+#   - other:         local report only; no S3 reads or writes.
+#
 # Prerequisites (set before calling):
 #   RAPIDS_TESTS_DIR   - directory containing JUnit XML test results
 #
@@ -22,10 +33,11 @@
 #   RAPIDS_CUDA_VERSION   - CUDA version (e.g., "12.9")
 #   RAPIDS_PY_VERSION     - Python version (e.g., "3.12"), used with --with-python-version
 #   RAPIDS_BRANCH         - branch name (e.g., "main")
-#   RAPIDS_BUILD_TYPE     - build type; S3 history/summary/HTML uploads are
-#                           only enabled when this equals "nightly"
-#   CUOPT_S3_URI          - S3 bucket root (e.g., s3://cuopt-datasets/);
-#                           only consulted when RAPIDS_BUILD_TYPE=nightly
+#   RAPIDS_BUILD_TYPE     - build type ("nightly", "pull-request", ...)
+#   GITHUB_BASE_REF       - PR target branch; in PR mode the helper reads
+#                           the nightly history from this branch.  Falls
+#                           back to RAPIDS_BRANCH or "main".
+#   CUOPT_S3_URI          - S3 bucket root (e.g., s3://cuopt-datasets/)
 #   GITHUB_SHA            - commit SHA
 #   GITHUB_RUN_ID         - GitHub Actions run ID (scopes summaries to this run)
 #   GITHUB_STEP_SUMMARY   - path for GitHub Actions step summary
@@ -78,10 +90,8 @@ generate_nightly_report() {
     local s3_summary_uri=""
     local s3_summary_branch_uri=""
     local s3_html_uri=""
+    local mode="nightly"
 
-    # Only upload to S3 for nightly runs. For PRs and other build types we
-    # still generate the local report and GitHub Step Summary, but skip S3
-    # so PR runs don't pollute the nightly history/summary/report buckets.
     if [ "${RAPIDS_BUILD_TYPE:-}" = "nightly" ] && [ -n "${CUOPT_S3_URI:-}" ]; then
         local s3_base="${CUOPT_S3_URI}ci_test_reports/nightly"
         s3_history_uri="${s3_base}/history/${branch_slug}/${test_type}-${matrix_label}.json"
@@ -102,10 +112,38 @@ generate_nightly_report() {
         fi
         s3_summary_branch_uri="${s3_base}/summaries/${run_date}/${branch_slug}/${summary_filename}"
         s3_html_uri="${s3_base}/reports/${run_date}/${branch_slug}/${test_type}-${matrix_label}.html"
+    elif [ "${RAPIDS_BUILD_TYPE:-}" = "pull-request" ] && [ -n "${CUOPT_S3_URI:-}" ]; then
+        # PR mode: read the target branch's nightly history (never write
+        # back), and write a run-scoped per-matrix summary that the PR
+        # comment aggregator picks up.
+        mode="pr"
+
+        local target_branch="${GITHUB_BASE_REF:-${RAPIDS_BRANCH:-main}}"
+        local target_branch_slug
+        target_branch_slug=$(echo "${target_branch}" | tr '/' '-')
+
+        local s3_nightly_base="${CUOPT_S3_URI}ci_test_reports/nightly"
+        s3_history_uri="${s3_nightly_base}/history/${target_branch_slug}/${test_type}-${matrix_label}.json"
+        # Fall back to main's history when the target branch has no history yet
+        # (e.g. PRs into a fresh release branch).
+        if [ "${target_branch_slug}" != "main" ]; then
+            s3_history_seed_uri="${s3_nightly_base}/history/main/${test_type}-${matrix_label}.json"
+        fi
+
+        # PR summaries live under a separate prefix so they never mix with
+        # nightly data.  Scoped by GITHUB_RUN_ID so each workflow run is
+        # isolated; cleaned up via bucket lifecycle policy.
+        local pr_base="${CUOPT_S3_URI}ci_test_reports/pr"
+        if [ -n "${GITHUB_RUN_ID:-}" ]; then
+            s3_summary_uri="${pr_base}/run-${GITHUB_RUN_ID}/${test_type}-${matrix_label}.json"
+        else
+            echo "WARNING: GITHUB_RUN_ID unset; skipping PR summary upload" >&2
+        fi
     fi
 
     # --- Run nightly report ---
     python3 "${_HELPER_DIR}/nightly_report.py" \
+        --mode "${mode}" \
         --results-dir "${RAPIDS_TESTS_DIR}" \
         --output-dir "${report_output_dir}" \
         --sha "${GITHUB_SHA:-unknown}" \

From 9961e224a5052de3d918d8cf394f482bfb48424f Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Fri, 8 May 2026 15:47:53 -0500
Subject: [PATCH 02/19] ci(pr): extract pr-test-summary into a reusable
 workflow

Mirror the nightly-summary pattern: keep job-level wiring (needs/if) in
pr.yaml and move the implementation into its own reusable workflow.
Use explicit secret pass-through (matching how test.yaml calls
nightly-summary.yaml) instead of `secrets: inherit`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .github/workflows/pr.yaml              | 51 ++------------------
 .github/workflows/pr_test_summary.yaml | 67 ++++++++++++++++++++++++++
 2 files changed, 72 insertions(+), 46 deletions(-)
 create mode 100644 .github/workflows/pr_test_summary.yaml

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 18893b741e..a6056b1e67 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -531,10 +531,6 @@ jobs:
       build_type: pull-request
       script: ci/test_self_hosted_service.sh
   pr-test-summary:
-    # Read each PR test job's per-matrix summary from S3, classify every
-    # failure as NEW vs. KNOWN against the target branch's nightly history,
-    # and post (or update) a single sticky comment on the PR.  Purely
-    # informational — never gates the PR (not in pr-builder's needs).
     needs:
       - conda-cpp-tests
       - conda-python-tests
@@ -542,45 +538,8 @@ jobs:
       - wheel-tests-cuopt-server
       - test-self-hosted-server
     if: always()
-    runs-on: linux-amd64-cpu4
-    container:
-      image: python:3.14-slim
-    permissions:
-      pull-requests: write
-    steps:
-      - uses: actions/checkout@v6
-      - name: Install dependencies
-        run: |
-          apt-get update && apt-get install -y --no-install-recommends curl
-          pip install awscli
-      - name: Aggregate per-matrix summaries and post sticky comment
-        env:
-          CUOPT_AWS_ACCESS_KEY_ID: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
-          CUOPT_AWS_SECRET_ACCESS_KEY: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
-          CUOPT_S3_URI: ${{ secrets.CUOPT_S3_URI }}
-          GITHUB_REPOSITORY: ${{ github.repository }}
-          GITHUB_RUN_ID: ${{ github.run_id }}
-          GITHUB_SERVER_URL: ${{ github.server_url }}
-          GITHUB_SHA: ${{ github.sha }}
-          GITHUB_TOKEN: ${{ github.token }}
-        run: |
-          set -euo pipefail
-          # Resolve PR number from the pull-request/{N} branch ref.
-          PR_NUMBER=$(echo "${{ github.ref }}" | sed 's|refs/heads/pull-request/||')
-          if ! [[ "${PR_NUMBER}" =~ ^[0-9]+$ ]]; then
-              echo "ERROR: could not parse PR number from ${{ github.ref }}" >&2
-              exit 1
-          fi
-          export PR_NUMBER
-
-          # Look up the PR's target branch via the API (the push event
-          # doesn't expose it directly).
-          GITHUB_BASE_REF=$(curl -s -L --max-time 30 \
-              -H "Authorization: Bearer ${GITHUB_TOKEN}" \
-              -H "Accept: application/vnd.github+json" \
-              "https://api.github.com/repos/${GITHUB_REPOSITORY}/pulls/${PR_NUMBER}" \
-              | python3 -c "import json,sys; d=json.load(sys.stdin); print(d.get('base',{}).get('ref','main'))")
-          export GITHUB_BASE_REF
-          echo "PR #${PR_NUMBER} → target branch: ${GITHUB_BASE_REF}"
-
-          bash ci/pr_summary.sh
+    uses: ./.github/workflows/pr_test_summary.yaml
+    secrets:
+      CUOPT_AWS_ACCESS_KEY_ID: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
+      CUOPT_AWS_SECRET_ACCESS_KEY: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
+      CUOPT_S3_URI: ${{ secrets.CUOPT_S3_URI }}
diff --git a/.github/workflows/pr_test_summary.yaml b/.github/workflows/pr_test_summary.yaml
new file mode 100644
index 0000000000..e8d5fcbb41
--- /dev/null
+++ b/.github/workflows/pr_test_summary.yaml
@@ -0,0 +1,67 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Reusable workflow: aggregate per-matrix PR test summaries from S3,
+# classify every failure as NEW (introduced by this PR) vs. KNOWN
+# (recurring on nightly, known flaky on nightly, or flaked in this run),
+# and post (or update) a single sticky comment on the PR.
+#
+# Called from pr.yaml after the PR test jobs finish.  Purely informational
+# — never gates the PR.
+
+name: pr-test-summary
+
+on:
+  workflow_call:
+    secrets:
+      CUOPT_AWS_ACCESS_KEY_ID:
+        required: true
+      CUOPT_AWS_SECRET_ACCESS_KEY:
+        required: true
+      CUOPT_S3_URI:
+        required: true
+
+jobs:
+  pr-test-summary:
+    runs-on: linux-amd64-cpu4
+    container:
+      image: python:3.14-slim
+    permissions:
+      pull-requests: write
+    steps:
+      - uses: actions/checkout@v6
+      - name: Install dependencies
+        run: |
+          apt-get update && apt-get install -y --no-install-recommends curl
+          pip install awscli
+      - name: Aggregate per-matrix summaries and post sticky comment
+        env:
+          CUOPT_AWS_ACCESS_KEY_ID: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
+          CUOPT_AWS_SECRET_ACCESS_KEY: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
+          CUOPT_S3_URI: ${{ secrets.CUOPT_S3_URI }}
+          GITHUB_REPOSITORY: ${{ github.repository }}
+          GITHUB_RUN_ID: ${{ github.run_id }}
+          GITHUB_SERVER_URL: ${{ github.server_url }}
+          GITHUB_SHA: ${{ github.sha }}
+          GITHUB_TOKEN: ${{ github.token }}
+        run: |
+          set -euo pipefail
+          # Resolve PR number from the pull-request/{N} branch ref.
+          PR_NUMBER=$(echo "${{ github.ref }}" | sed 's|refs/heads/pull-request/||')
+          if ! [[ "${PR_NUMBER}" =~ ^[0-9]+$ ]]; then
+              echo "ERROR: could not parse PR number from ${{ github.ref }}" >&2
+              exit 1
+          fi
+          export PR_NUMBER
+
+          # Look up the PR's target branch via the API (the push event
+          # doesn't expose it directly).
+          GITHUB_BASE_REF=$(curl -s -L --max-time 30 \
+              -H "Authorization: Bearer ${GITHUB_TOKEN}" \
+              -H "Accept: application/vnd.github+json" \
+              "https://api.github.com/repos/${GITHUB_REPOSITORY}/pulls/${PR_NUMBER}" \
+              | python3 -c "import json,sys; d=json.load(sys.stdin); print(d.get('base',{}).get('ref','main'))")
+          export GITHUB_BASE_REF
+          echo "PR #${PR_NUMBER} → target branch: ${GITHUB_BASE_REF}"
+
+          bash ci/pr_summary.sh

From c4876d24e086383a720eb495ab8522d61d5679ac Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Fri, 8 May 2026 16:05:21 -0500
Subject: [PATCH 03/19] ci(pr): extract PR comment GitHub-API logic into a
 Python helper

The shell script had three blocks of inline Python plus a bash-specific
${VAR@Q} quoting trick.  Move all GitHub API interactions into
ci/utils/pr_comment_helper.py with two CLI subcommands:

  - base-ref: resolve the PR's target branch
  - post:     post or update the sticky comment by hidden marker

Stdlib only (urllib + json) so it runs in slim CI containers.  The
hidden marker now lives in pr_comment_helper.COMMENT_MARKER as the
single source of truth, imported by aggregate_pr.py.

ci/pr_summary.sh shrinks from ~120 lines of mixed shell+Python to ~70
lines of straight orchestration.  pr_test_summary.yaml's base-ref
lookup goes from a curl+inline-Python pipe to a one-liner.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .github/workflows/pr_test_summary.yaml |  10 +-
 ci/pr_summary.sh                       | 132 ++++--------------
 ci/utils/aggregate_pr.py               |   4 +-
 ci/utils/pr_comment_helper.py          | 182 +++++++++++++++++++++++++
 4 files changed, 215 insertions(+), 113 deletions(-)
 create mode 100644 ci/utils/pr_comment_helper.py

diff --git a/.github/workflows/pr_test_summary.yaml b/.github/workflows/pr_test_summary.yaml
index e8d5fcbb41..b57c7f8089 100644
--- a/.github/workflows/pr_test_summary.yaml
+++ b/.github/workflows/pr_test_summary.yaml
@@ -54,13 +54,9 @@ jobs:
           fi
           export PR_NUMBER
 
-          # Look up the PR's target branch via the API (the push event
-          # doesn't expose it directly).
-          GITHUB_BASE_REF=$(curl -s -L --max-time 30 \
-              -H "Authorization: Bearer ${GITHUB_TOKEN}" \
-              -H "Accept: application/vnd.github+json" \
-              "https://api.github.com/repos/${GITHUB_REPOSITORY}/pulls/${PR_NUMBER}" \
-              | python3 -c "import json,sys; d=json.load(sys.stdin); print(d.get('base',{}).get('ref','main'))")
+          # Push events don't expose the PR target branch; ask the API.
+          GITHUB_BASE_REF=$(python3 ci/utils/pr_comment_helper.py base-ref \
+              --repo "${GITHUB_REPOSITORY}" --pr "${PR_NUMBER}")
           export GITHUB_BASE_REF
           echo "PR #${PR_NUMBER} → target branch: ${GITHUB_BASE_REF}"
 
diff --git a/ci/pr_summary.sh b/ci/pr_summary.sh
index d338b7b06f..65f1c914e2 100755
--- a/ci/pr_summary.sh
+++ b/ci/pr_summary.sh
@@ -2,23 +2,20 @@
 # SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
-# Aggregate per-matrix PR test summaries and post (or update) a single
-# sticky PR comment that classifies every failure as NEW (introduced by
-# this PR) or KNOWN (recurring on nightly, known flaky, or flaked in this
-# run only).
+# Aggregate per-matrix PR test summaries from S3 and post (or update) a
+# sticky comment on the PR that classifies every failure as NEW
+# (introduced by this PR) or KNOWN (recurring on nightly, known flaky on
+# nightly, or flaked in this run via pytest-rerunfailures).
 #
-# Runs as a post-test job after every PR test job finishes.
+# Runs as a post-test job after every PR test job finishes.  See
+# ci/utils/aggregate_pr.py for content generation and
+# ci/utils/pr_comment_helper.py for GitHub API interactions.
 #
-# Required environment:
-#   PR_NUMBER                     - PR number to comment on
-#   GITHUB_TOKEN                  - token with pull-requests:write
-#   GITHUB_REPOSITORY             - owner/repo (e.g., NVIDIA/cuopt)
-#   GITHUB_RUN_ID                 - workflow run that produced summaries
-#   CUOPT_S3_URI, CUOPT_AWS_*     - S3 bucket root + credentials
-#
-# Optional:
-#   GITHUB_BASE_REF               - PR target branch (default: main)
-#   GITHUB_SERVER_URL             - default: https://github.com
+# Required env:
+#   PR_NUMBER, GITHUB_TOKEN, GITHUB_REPOSITORY, GITHUB_RUN_ID,
+#   CUOPT_S3_URI, CUOPT_AWS_ACCESS_KEY_ID, CUOPT_AWS_SECRET_ACCESS_KEY
+# Optional env:
+#   GITHUB_BASE_REF (default: main), GITHUB_SERVER_URL, GITHUB_SHA
 
 set -euo pipefail
 
@@ -26,25 +23,8 @@ SCRIPT_DIR="$(dirname "$(realpath "${BASH_SOURCE[0]}")")"
 OUTPUT_DIR="${PWD}/pr-aggregate-output"
 mkdir -p "${OUTPUT_DIR}"
 
-COMMENT_MARKER="<!-- pr-test-classification -->"
-
-if [ -z "${PR_NUMBER:-}" ]; then
-    echo "ERROR: PR_NUMBER is not set; cannot post comment." >&2
-    exit 1
-fi
-if [ -z "${GITHUB_REPOSITORY:-}" ]; then
-    echo "ERROR: GITHUB_REPOSITORY is not set." >&2
-    exit 1
-fi
-
-TARGET_BRANCH="${GITHUB_BASE_REF:-main}"
-RUN_DATE=$(date +%F)
-GITHUB_RUN_URL="${GITHUB_SERVER_URL:-https://github.com}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID:-}"
-
-# Map CUOPT_AWS_* to standard AWS env vars for the aws CLI used by aggregate_pr.py
-export AWS_ACCESS_KEY_ID="${CUOPT_AWS_ACCESS_KEY_ID:-${AWS_ACCESS_KEY_ID:-}}"
-export AWS_SECRET_ACCESS_KEY="${CUOPT_AWS_SECRET_ACCESS_KEY:-${AWS_SECRET_ACCESS_KEY:-}}"
-unset AWS_SESSION_TOKEN
+: "${PR_NUMBER:?PR_NUMBER is required}"
+: "${GITHUB_REPOSITORY:?GITHUB_REPOSITORY is required}"
 
 if [ -z "${CUOPT_S3_URI:-}" ]; then
     echo "WARNING: CUOPT_S3_URI is not set; nothing to aggregate." >&2
@@ -55,19 +35,24 @@ if [ -z "${GITHUB_RUN_ID:-}" ]; then
     exit 0
 fi
 
+# aws CLI uses the standard AWS_* env vars; map the cuOpt-prefixed secrets onto them.
+export AWS_ACCESS_KEY_ID="${CUOPT_AWS_ACCESS_KEY_ID:-${AWS_ACCESS_KEY_ID:-}}"
+export AWS_SECRET_ACCESS_KEY="${CUOPT_AWS_SECRET_ACCESS_KEY:-${AWS_SECRET_ACCESS_KEY:-}}"
+unset AWS_SESSION_TOKEN
+
+GITHUB_RUN_URL="${GITHUB_SERVER_URL:-https://github.com}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}"
 S3_PR_SUMMARIES_PREFIX="${CUOPT_S3_URI}ci_test_reports/pr/run-${GITHUB_RUN_ID}/"
+COMMENT_FILE="${OUTPUT_DIR}/pr_comment.md"
 
 echo "Aggregating PR per-matrix summaries from ${S3_PR_SUMMARIES_PREFIX}"
-
 python3 "${SCRIPT_DIR}/utils/aggregate_pr.py" \
     --s3-pr-summaries-prefix "${S3_PR_SUMMARIES_PREFIX}" \
     --output-dir "${OUTPUT_DIR}" \
-    --target-branch "${TARGET_BRANCH}" \
+    --target-branch "${GITHUB_BASE_REF:-main}" \
     --sha "${GITHUB_SHA:-}" \
     --github-run-url "${GITHUB_RUN_URL}" \
-    --run-date "${RUN_DATE}"
+    --run-date "$(date +%F)"
 
-COMMENT_FILE="${OUTPUT_DIR}/pr_comment.md"
 if [ ! -s "${COMMENT_FILE}" ]; then
     echo "No failures or flakes; not posting a PR comment."
     exit 0
@@ -75,71 +60,12 @@ fi
 
 if [ -z "${GITHUB_TOKEN:-}" ]; then
     echo "ERROR: GITHUB_TOKEN is not set; cannot post PR comment." >&2
-    echo "--- comment body that would have been posted ---"
-    cat "${COMMENT_FILE}"
+    echo "--- comment body that would have been posted ---" >&2
+    cat "${COMMENT_FILE}" >&2
     exit 1
 fi
 
-# --- Find an existing classification comment ---
-EXISTING_ID=$(
-    curl -s -L --max-time 30 \
-        -H "Authorization: Bearer ${GITHUB_TOKEN}" \
-        -H "Accept: application/vnd.github+json" \
-        "https://api.github.com/repos/${GITHUB_REPOSITORY}/issues/${PR_NUMBER}/comments?per_page=100" \
-        | python3 -c "
-import json, sys
-marker = ${COMMENT_MARKER@Q}
-try:
-    comments = json.load(sys.stdin)
-except json.JSONDecodeError:
-    sys.exit(0)
-for c in comments:
-    body = c.get('body') or ''
-    if body.lstrip().startswith(marker):
-        print(c['id'])
-        break
-"
-)
-
-# --- Build the JSON payload from the file (handles newlines/quotes safely) ---
-PAYLOAD=$(python3 -c "
-import json, sys
-body = open('${COMMENT_FILE}').read()
-print(json.dumps({'body': body}))
-")
-
-if [ -n "${EXISTING_ID}" ]; then
-    echo "Updating existing PR comment ${EXISTING_ID}"
-    curl -s -L --max-time 30 -X PATCH \
-        -H "Authorization: Bearer ${GITHUB_TOKEN}" \
-        -H "Accept: application/vnd.github+json" \
-        -H "Content-Type: application/json" \
-        -d "${PAYLOAD}" \
-        "https://api.github.com/repos/${GITHUB_REPOSITORY}/issues/comments/${EXISTING_ID}" \
-        > "${OUTPUT_DIR}/comment_response.json"
-else
-    echo "Creating new PR comment"
-    curl -s -L --max-time 30 -X POST \
-        -H "Authorization: Bearer ${GITHUB_TOKEN}" \
-        -H "Accept: application/vnd.github+json" \
-        -H "Content-Type: application/json" \
-        -d "${PAYLOAD}" \
-        "https://api.github.com/repos/${GITHUB_REPOSITORY}/issues/${PR_NUMBER}/comments" \
-        > "${OUTPUT_DIR}/comment_response.json"
-fi
-
-# Report the comment URL for the workflow log
-python3 -c "
-import json
-try:
-    d = json.load(open('${OUTPUT_DIR}/comment_response.json'))
-except (json.JSONDecodeError, FileNotFoundError):
-    print('WARNING: no comment response captured')
-else:
-    if 'html_url' in d:
-        print(f\"PR comment posted: {d['html_url']}\")
-    elif 'message' in d:
-        import sys
-        print(f\"ERROR: GitHub API returned {d.get('message')}\", file=sys.stderr)
-        sys.exit(1)
-"
+python3 "${SCRIPT_DIR}/utils/pr_comment_helper.py" post \
+    --repo "${GITHUB_REPOSITORY}" \
+    --pr "${PR_NUMBER}" \
+    --body-file "${COMMENT_FILE}"
diff --git a/ci/utils/aggregate_pr.py b/ci/utils/aggregate_pr.py
index 547391e266..deae3bee22 100644
--- a/ci/utils/aggregate_pr.py
+++ b/ci/utils/aggregate_pr.py
@@ -40,9 +40,7 @@
     download_summaries,
     load_local_summaries,
 )
-
-# Hidden marker — must match the value used by ci/pr_summary.sh.
-COMMENT_MARKER = "<!-- pr-test-classification -->"
+from pr_comment_helper import COMMENT_MARKER  # noqa: E402
 
 # Maximum total comment body size we are willing to post.  GitHub allows
 # ~65k characters per comment, but we cap earlier and truncate the failure
diff --git a/ci/utils/pr_comment_helper.py b/ci/utils/pr_comment_helper.py
new file mode 100644
index 0000000000..36c7075bce
--- /dev/null
+++ b/ci/utils/pr_comment_helper.py
@@ -0,0 +1,182 @@
+#!/usr/bin/env python3
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""GitHub PR helpers for the PR test-summary workflow.
+
+Two subcommands:
+
+    base-ref     Print the PR's target branch (e.g., ``main``).
+    post         Post (or update) a single sticky comment identified by a
+                 hidden HTML-comment marker.
+
+Stdlib only (urllib + json) so this runs in slim CI containers without
+extra installs.  Both ``ci/pr_summary.sh`` and ``pr_test_summary.yaml``
+dispatch into this module rather than embedding inline Python.
+
+The hidden marker is defined here as the single source of truth and
+re-used by ``aggregate_pr.py`` when it builds the comment body.
+"""
+
+import argparse
+import json
+import os
+import sys
+from urllib import error, request
+
+GITHUB_API = "https://api.github.com"
+
+# Single source of truth for the sticky-comment marker.  ``aggregate_pr.py``
+# imports this so the body it writes always matches the marker the poster
+# searches for.
+COMMENT_MARKER = "<!-- pr-test-classification -->"
+
+
+def _gh_request(method, url, token, payload=None, timeout=30):
+    """Issue a GitHub API request and return parsed JSON (or ``None``)."""
+    headers = {
+        "Authorization": f"Bearer {token}",
+        "Accept": "application/vnd.github+json",
+        "X-GitHub-Api-Version": "2022-11-28",
+    }
+    data = None
+    if payload is not None:
+        data = json.dumps(payload).encode()
+        headers["Content-Type"] = "application/json"
+
+    req = request.Request(url, data=data, headers=headers, method=method)
+    try:
+        with request.urlopen(req, timeout=timeout) as resp:
+            body = resp.read().decode()
+    except error.HTTPError as exc:
+        detail = exc.read().decode()[:300]
+        raise RuntimeError(
+            f"GitHub API {method} {url} failed: {exc.code} {detail}"
+        ) from exc
+    except error.URLError as exc:
+        raise RuntimeError(f"GitHub API {method} {url} failed: {exc}") from exc
+
+    if not body:
+        return None
+    try:
+        return json.loads(body)
+    except json.JSONDecodeError:
+        return None
+
+
+def resolve_base_ref(repo, pr_number, token):
+    """Return the PR's target branch (e.g. ``main``)."""
+    data = _gh_request(
+        "GET", f"{GITHUB_API}/repos/{repo}/pulls/{pr_number}", token
+    )
+    return ((data or {}).get("base") or {}).get("ref", "main")
+
+
+def find_existing_comment_id(repo, pr_number, token, marker):
+    """Find a comment whose body starts with ``marker``.  Returns id or None."""
+    page = 1
+    while True:
+        url = (
+            f"{GITHUB_API}/repos/{repo}/issues/{pr_number}/comments"
+            f"?per_page=100&page={page}"
+        )
+        comments = _gh_request("GET", url, token) or []
+        for c in comments:
+            body = (c.get("body") or "").lstrip()
+            if body.startswith(marker):
+                return c["id"]
+        if len(comments) < 100:
+            return None
+        page += 1
+
+
+def post_or_update_comment(repo, pr_number, token, body, marker):
+    """Update the existing sticky comment if present; otherwise create one.
+
+    Returns the comment URL (``html_url``) or ``""``.
+    """
+    existing_id = find_existing_comment_id(repo, pr_number, token, marker)
+    payload = {"body": body}
+    if existing_id is not None:
+        resp = _gh_request(
+            "PATCH",
+            f"{GITHUB_API}/repos/{repo}/issues/comments/{existing_id}",
+            token,
+            payload=payload,
+        )
+        action = "Updated"
+    else:
+        resp = _gh_request(
+            "POST",
+            f"{GITHUB_API}/repos/{repo}/issues/{pr_number}/comments",
+            token,
+            payload=payload,
+        )
+        action = "Created"
+    url = (resp or {}).get("html_url", "")
+    print(f"{action} PR comment: {url}")
+    return url
+
+
+def _cmd_base_ref(args):
+    print(resolve_base_ref(args.repo, args.pr, args.token))
+    return 0
+
+
+def _cmd_post(args):
+    with open(args.body_file) as f:
+        body = f.read()
+    if not body.strip():
+        print("Empty body; nothing to post.")
+        return 0
+    post_or_update_comment(args.repo, args.pr, args.token, body, args.marker)
+    return 0
+
+
+def _add_common_args(sp):
+    sp.add_argument("--repo", required=True, help="owner/name")
+    sp.add_argument("--pr", required=True, type=int, help="PR number")
+    sp.add_argument(
+        "--token",
+        default=os.environ.get("GITHUB_TOKEN", ""),
+        help="GitHub token (defaults to $GITHUB_TOKEN).",
+    )
+
+
+def main():
+    p = argparse.ArgumentParser(description=__doc__)
+    sub = p.add_subparsers(dest="cmd", required=True)
+
+    sp_base = sub.add_parser("base-ref", help="Print the PR's target branch.")
+    _add_common_args(sp_base)
+    sp_base.set_defaults(func=_cmd_base_ref)
+
+    sp_post = sub.add_parser(
+        "post", help="Post or update a sticky PR comment."
+    )
+    _add_common_args(sp_post)
+    sp_post.add_argument(
+        "--body-file",
+        required=True,
+        help="File whose contents are the comment body.",
+    )
+    sp_post.add_argument(
+        "--marker",
+        default=COMMENT_MARKER,
+        help="Hidden HTML-comment marker that identifies the sticky comment.",
+    )
+    sp_post.set_defaults(func=_cmd_post)
+
+    args = p.parse_args()
+    if not args.token:
+        print("ERROR: --token or $GITHUB_TOKEN must be set.", file=sys.stderr)
+        return 2
+    try:
+        return args.func(args)
+    except RuntimeError as exc:
+        print(f"ERROR: {exc}", file=sys.stderr)
+        return 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())

From 3ed781889a384fb464530d124e1e5f05388c1d84 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Mon, 11 May 2026 10:42:12 -0500
Subject: [PATCH 04/19] ci(pr): tell checks workflow to ignore pr-test-summary

rapids-check-pr-job-dependencies (run by the shared checks workflow)
fails any PR job not listed in pr-builder.needs.  pr-test-summary is
informational and intentionally kept out of pr-builder's needs, so add
it to ignored_pr_jobs.

Without this the dependency check fails, the subsequent changed-files
step is skipped, and the next step throws "Error reading JToken from
JsonReader" trying to fromJSON the empty output.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .github/workflows/pr.yaml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index a6056b1e67..25d3d627fe 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -359,6 +359,10 @@ jobs:
     uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@main
     with:
       enable_check_generated_files: false
+      # pr-test-summary is informational (posts the failure-classification
+      # comment) and intentionally not in pr-builder's needs, so the
+      # dependency checker must skip it.
+      ignored_pr_jobs: pr-test-summary
   conda-cpp-build:
     needs: [checks, compute-matrix-filters, changed-files]
     # Consumed by conda-cpp-tests, conda-python-build, and (transitively) docs-build.

From 5b025810801207e6e6276c4371c55f639f097f13 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Mon, 11 May 2026 10:49:31 -0500
Subject: [PATCH 05/19] ci(pr): force bash shell in pr-test-summary workflow
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

python:3.14-slim ships with dash as /bin/sh and GitHub Actions picks it
for `run:` steps in this container.  Dash doesn't understand `[[ ... ]]`
or `set -o pipefail`, so the inline script failed with
`/__w/_temp/X.sh: 4: [[: not found`.

Add `defaults.run.shell: bash` at the workflow level, matching the
existing convention in self_hosted_service_test.yaml and
build_test_publish_images.yaml.  Bash is already available in the
container — only the shell selection needed forcing.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .github/workflows/pr_test_summary.yaml | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/.github/workflows/pr_test_summary.yaml b/.github/workflows/pr_test_summary.yaml
index b57c7f8089..1b8233b707 100644
--- a/.github/workflows/pr_test_summary.yaml
+++ b/.github/workflows/pr_test_summary.yaml
@@ -21,6 +21,13 @@ on:
       CUOPT_S3_URI:
         required: true
 
+# The python:3.14-slim container's default /bin/sh is dash, which doesn't
+# understand `[[ ... ]]` or `set -o pipefail`.  Force bash for all run
+# steps in this workflow (matches self_hosted_service_test.yaml convention).
+defaults:
+  run:
+    shell: bash
+
 jobs:
   pr-test-summary:
     runs-on: linux-amd64-cpu4

From 49da483cb2721f6331e6adfa2749f7aabd32b152 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Mon, 11 May 2026 13:32:31 -0500
Subject: [PATCH 06/19] test(temp): add always-failing test to verify PR
 comment classifier

DO NOT MERGE.  Exists only to confirm that pr-test-summary picks up a
new failure end to end and posts it under "NEW failures" in the sticky
PR comment.  Remove this file before merging.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../cuopt/cuopt/tests/test_pr_comment_smoke.py  | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
 create mode 100644 python/cuopt/cuopt/tests/test_pr_comment_smoke.py

diff --git a/python/cuopt/cuopt/tests/test_pr_comment_smoke.py b/python/cuopt/cuopt/tests/test_pr_comment_smoke.py
new file mode 100644
index 0000000000..d92204fe12
--- /dev/null
+++ b/python/cuopt/cuopt/tests/test_pr_comment_smoke.py
@@ -0,0 +1,17 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# TEMPORARY — DO NOT MERGE.
+#
+# This file exists solely to verify the PR test-classification comment
+# end to end: it forces one always-failing test so the PR-mode classifier
+# emits a `pr_classification=new` entry that lands under "NEW failures"
+# in the sticky PR comment.  Remove this file before merging PR #1194.
+
+
+def test_pr_comment_smoke_always_fails():
+    """Intentionally fails to exercise the pr-test-summary workflow."""
+    assert False, (
+        "intentional failure to verify the PR comment classifier — "
+        "this test should not exist on main; remove with PR #1194"
+    )

From 062e063e00319f4d64a72299c12f1d0e088983bf Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Tue, 12 May 2026 11:53:39 -0500
Subject: [PATCH 07/19] ci(pr): grant pull-requests:write to pr-test-summary
 caller

A reusable workflow can only request permissions the caller already has.
The repo's default GITHUB_TOKEN permissions don't include
pull-requests:write, so the caller in pr.yaml must grant it explicitly
for the comment poster.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .github/workflows/pr.yaml              |  8 ++++++++
 .github/workflows/pr_test_summary.yaml | 12 +++++++++---
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 217ff102d7..e9a0102c9a 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -639,6 +639,14 @@ jobs:
       - wheel-tests-cuopt-server
       - test-self-hosted-server
     if: always()
+    # The reusable workflow's job needs `contents: read` (for
+    # actions/checkout) and `pull-requests: write` (to post the sticky
+    # comment).  Reusable workflows inherit the caller's GITHUB_TOKEN
+    # permissions, so the caller must grant at least the same set —
+    # the workflow-level default is `permissions: {}`.
+    permissions:
+      contents: read
+      pull-requests: write
     uses: ./.github/workflows/pr_test_summary.yaml
     secrets:
       CUOPT_AWS_ACCESS_KEY_ID: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
diff --git a/.github/workflows/pr_test_summary.yaml b/.github/workflows/pr_test_summary.yaml
index 1b8233b707..c4712859fc 100644
--- a/.github/workflows/pr_test_summary.yaml
+++ b/.github/workflows/pr_test_summary.yaml
@@ -34,9 +34,12 @@ jobs:
     container:
       image: python:3.14-slim
     permissions:
+      contents: read
       pull-requests: write
     steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          persist-credentials: false
       - name: Install dependencies
         run: |
           apt-get update && apt-get install -y --no-install-recommends curl
@@ -54,9 +57,12 @@ jobs:
         run: |
           set -euo pipefail
           # Resolve PR number from the pull-request/{N} branch ref.
-          PR_NUMBER=$(echo "${{ github.ref }}" | sed 's|refs/heads/pull-request/||')
+          # Read $GITHUB_REF via env, not a workflow expression — zizmor
+          # flags inlining github.ref into a shell as a code-injection
+          # vector.
+          PR_NUMBER=$(echo "$GITHUB_REF" | sed 's|refs/heads/pull-request/||')
           if ! [[ "${PR_NUMBER}" =~ ^[0-9]+$ ]]; then
-              echo "ERROR: could not parse PR number from ${{ github.ref }}" >&2
+              echo "ERROR: could not parse PR number from $GITHUB_REF" >&2
               exit 1
           fi
           export PR_NUMBER

From c4dfdc8c326b363d0391696678299ab919950f67 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Tue, 12 May 2026 13:26:39 -0500
Subject: [PATCH 08/19] test(temp): add segfault smoke test for PR comment
 crash path

The assert-failure test already verifies the basic NEW-failure flow.
Add a second test that sends SIGSEGV to pytest mid-run so the
crash-marker path (write_pytest_crash_marker, added in #1191) writes
a PROCESS_CRASH entry that the classifier surfaces under NEW failures
with a SIGSEGV message.

Named test_zz_* so it runs after the assert test in definition order
and the assertion failure still lands in the partial JUnit XML before
pytest is killed.

DO NOT MERGE.  Remove both smoke tests before merging PR #1194.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../cuopt/tests/test_pr_comment_smoke.py      | 29 ++++++++++++++++---
 1 file changed, 25 insertions(+), 4 deletions(-)

diff --git a/python/cuopt/cuopt/tests/test_pr_comment_smoke.py b/python/cuopt/cuopt/tests/test_pr_comment_smoke.py
index d92204fe12..5ab34461b8 100644
--- a/python/cuopt/cuopt/tests/test_pr_comment_smoke.py
+++ b/python/cuopt/cuopt/tests/test_pr_comment_smoke.py
@@ -3,10 +3,24 @@
 
 # TEMPORARY — DO NOT MERGE.
 #
-# This file exists solely to verify the PR test-classification comment
-# end to end: it forces one always-failing test so the PR-mode classifier
-# emits a `pr_classification=new` entry that lands under "NEW failures"
-# in the sticky PR comment.  Remove this file before merging PR #1194.
+# This file verifies the PR test-classification comment end to end:
+#
+#   1. `test_pr_comment_smoke_always_fails` — a plain assertion failure.
+#      Confirms the classifier emits a normal `pr_classification=new`
+#      entry under "NEW failures".
+#
+#   2. `test_zz_pr_comment_smoke_segfault` — sends SIGSEGV to the pytest
+#      process to confirm PR #1191's crash-marker path: pytest dies
+#      mid-run, `write_pytest_crash_marker` writes <xml>-crash.xml, and
+#      the classifier surfaces a PROCESS_CRASH entry under NEW failures
+#      with a message containing "SIGSEGV".  Named with a `zz_` prefix
+#      so it runs after the assert test in pytest's definition order
+#      and the assert failure still makes it into the partial JUnit XML.
+#
+# Remove this file before merging PR #1194.
+
+import os
+import signal
 
 
 def test_pr_comment_smoke_always_fails():
@@ -15,3 +29,10 @@ def test_pr_comment_smoke_always_fails():
         "intentional failure to verify the PR comment classifier — "
         "this test should not exist on main; remove with PR #1194"
     )
+
+
+def test_zz_pr_comment_smoke_segfault():
+    """Intentionally crashes the pytest process to exercise the
+    crash-marker path added in PR #1191.  Should produce a
+    PROCESS_CRASH entry in the PR comment with a SIGSEGV message."""
+    os.kill(os.getpid(), signal.SIGSEGV)

From 6984bc14267d0c4da48f77766d0ec9a4de4f1ec0 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Tue, 12 May 2026 15:41:41 -0500
Subject: [PATCH 09/19] ci(pr): surface crashes in a CAUTION callout above NEW
 failures
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Crashes (PROCESS_CRASH entries written by PR #1191's
write_pytest_crash_marker, or any failure whose message matches
"crashed with SIG*") now render in a dedicated GitHub `[!CAUTION]`
alert block at the top of the comment.  The full crash diagnostic
goes in a fenced code block — no more half-sentence truncation when
the message gets cut at 140 chars.

Bumps the table-cell short_msg cap from 140 to 300 chars so ordinary
failure messages also stop ending mid-sentence.

Crashes are split out of the NEW and KNOWN buckets before those
sections render, so the CAUTION block is the single source of truth
for crash entries (no duplication).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 ci/utils/aggregate_pr.py | 69 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 68 insertions(+), 1 deletion(-)

diff --git a/ci/utils/aggregate_pr.py b/ci/utils/aggregate_pr.py
index deae3bee22..42e6d83702 100644
--- a/ci/utils/aggregate_pr.py
+++ b/ci/utils/aggregate_pr.py
@@ -30,6 +30,7 @@
 import argparse
 import json
 import os
+import re
 import sys
 from datetime import datetime, timezone
 from pathlib import Path
@@ -48,9 +49,33 @@
 MAX_BODY_CHARS = 60000
 MAX_ROWS_PER_BUCKET = 80
 MAX_ERROR_SNIPPET = 600
+# Crash entries get their full message in a code block, capped only at a
+# generous limit since the diagnostic line is the whole point of the entry.
+MAX_CRASH_MESSAGE_CHARS = 2000
 
+# Crashes write a JUnit case named "PROCESS_CRASH" with a message
+# containing "crashed with SIG..." (see ci/utils/crash_helpers.sh from
+# PR #1191).  Match either fingerprint defensively.
+_CRASH_NAME = "PROCESS_CRASH"
+_CRASH_MESSAGE_RE = re.compile(r"crashed with SIG[A-Z]+", re.IGNORECASE)
 
-def _short_msg(msg, limit=140):
+
+def _is_crash(entry):
+    if entry.get("name") == _CRASH_NAME:
+        return True
+    return bool(_CRASH_MESSAGE_RE.search(entry.get("message", "") or ""))
+
+
+def _split_crashes(failures):
+    """Partition a failures list into ``(crashes, non_crash)``."""
+    crashes = []
+    non_crash = []
+    for entry in failures:
+        (crashes if _is_crash(entry) else non_crash).append(entry)
+    return crashes, non_crash
+
+
+def _short_msg(msg, limit=300):
     """Single-line summary of an error message for table cells."""
     if not msg:
         return ""
@@ -155,6 +180,13 @@ def build_comment_body(
     if not new_failures and not recurring and not flaky:
         return ""
 
+    # Pull crashes out so they render in a dedicated CAUTION block above
+    # the normal NEW/KNOWN tables and don't get drowned out by ordinary
+    # assertion failures.
+    new_crashes, new_failures = _split_crashes(new_failures)
+    recurring_crashes, recurring = _split_crashes(recurring)
+    all_crashes = new_crashes + recurring_crashes
+
     broken_on_nightly, known_flaky_nightly, flaked_in_pr_run = (
         _classify_known_subgroups(recurring, flaky)
     )
@@ -164,6 +196,8 @@ def build_comment_body(
     parts.append("")
 
     headline = []
+    if all_crashes:
+        headline.append(f"**{len(all_crashes)} CRASH(es)**")
     if new_failures:
         headline.append(f"**{len(new_failures)} NEW** failure(s)")
     known_total = (
@@ -197,6 +231,39 @@ def build_comment_body(
         parts.append("\n</details>")
         parts.append("")
 
+    # --- CRASHES (top of comment, GitHub red-alert callout) ---
+    if all_crashes:
+        parts.append("> [!CAUTION]")
+        parts.append(
+            "> **CRASHES detected — a test process was terminated by a signal mid-run.**"
+        )
+        parts.append(
+            "> These need urgent investigation.  The JUnit XML was not "
+            "finalized, so the specific test that triggered the crash "
+            "may not be identified; check the workflow run log for the "
+            "last test invoked before the signal."
+        )
+        parts.append("")
+        for entry in all_crashes:
+            heading_tag = (
+                "NEW" if entry.get("pr_classification") == "new" else "KNOWN"
+            )
+            parts.append(
+                f"#### `{entry.get('suite', '?')}` — "
+                f"`{entry.get('name', 'PROCESS_CRASH')}` "
+                f"_[{entry['test_type']} / {entry['matrix_label']}]_ "
+                f"— {heading_tag}"
+            )
+            msg = (entry.get("message") or "").strip()
+            if msg:
+                if len(msg) > MAX_CRASH_MESSAGE_CHARS:
+                    msg = msg[:MAX_CRASH_MESSAGE_CHARS] + "\n…[truncated]"
+                parts.append("")
+                parts.append("```")
+                parts.append(msg)
+                parts.append("```")
+            parts.append("")
+
     # --- NEW failures ---
     if new_failures:
         parts.append("### NEW failures (likely introduced by this PR)")

From d0e0957693047861ea941d519e1a37f216fe560b Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Tue, 12 May 2026 15:44:22 -0500
Subject: [PATCH 10/19] test(temp): fix D209 docstring closing in segfault
 smoke test

Multi-line docstring closing quotes must be on their own line.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 python/cuopt/cuopt/tests/test_pr_comment_smoke.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/cuopt/cuopt/tests/test_pr_comment_smoke.py b/python/cuopt/cuopt/tests/test_pr_comment_smoke.py
index 5ab34461b8..6f940db4f2 100644
--- a/python/cuopt/cuopt/tests/test_pr_comment_smoke.py
+++ b/python/cuopt/cuopt/tests/test_pr_comment_smoke.py
@@ -34,5 +34,6 @@ def test_pr_comment_smoke_always_fails():
 def test_zz_pr_comment_smoke_segfault():
     """Intentionally crashes the pytest process to exercise the
     crash-marker path added in PR #1191.  Should produce a
-    PROCESS_CRASH entry in the PR comment with a SIGSEGV message."""
+    PROCESS_CRASH entry in the PR comment with a SIGSEGV message.
+    """
     os.kill(os.getpid(), signal.SIGSEGV)

From 748cbec5b5100c12bad636c1291795373a86d03a Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Wed, 13 May 2026 10:30:58 -0500
Subject: [PATCH 11/19] test(temp): scope segfault smoke test to py3.11 matrix

Verify the PR comment shows the CAUTION block on exactly one matrix
entry (cuda*-py3.11-x86_64) while other Python versions stay green.
Uses pytest.mark.skipif on sys.version_info so non-3.11 runs skip
the crash without affecting the assertion-failure smoke test.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 python/cuopt/cuopt/tests/test_pr_comment_smoke.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/python/cuopt/cuopt/tests/test_pr_comment_smoke.py b/python/cuopt/cuopt/tests/test_pr_comment_smoke.py
index 6f940db4f2..aa04bfcd9f 100644
--- a/python/cuopt/cuopt/tests/test_pr_comment_smoke.py
+++ b/python/cuopt/cuopt/tests/test_pr_comment_smoke.py
@@ -21,6 +21,9 @@
 
 import os
 import signal
+import sys
+
+import pytest
 
 
 def test_pr_comment_smoke_always_fails():
@@ -31,6 +34,14 @@ def test_pr_comment_smoke_always_fails():
     )
 
 
+@pytest.mark.skipif(
+    sys.version_info[:2] != (3, 11),
+    reason=(
+        "Smoke crash is scoped to the py3.11 matrix so the CAUTION block "
+        "in the PR comment can be verified to appear on exactly one "
+        "matrix entry while other Python versions stay green-on-crash."
+    ),
+)
 def test_zz_pr_comment_smoke_segfault():
     """Intentionally crashes the pytest process to exercise the
     crash-marker path added in PR #1191.  Should produce a

From 868f6b8adb7907611f534574b050bb19779e4bab Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Wed, 13 May 2026 10:32:14 -0500
Subject: [PATCH 12/19] ci(pr): collapse per-crash details under the CAUTION
 callout
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Keep the CAUTION block as the headline; move per-crash heading and
the full diagnostic into a <details> tab labelled "N crash(es) —
click to expand details".  Same content, click-to-reveal.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 ci/utils/aggregate_pr.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/ci/utils/aggregate_pr.py b/ci/utils/aggregate_pr.py
index 42e6d83702..63ae24091d 100644
--- a/ci/utils/aggregate_pr.py
+++ b/ci/utils/aggregate_pr.py
@@ -244,6 +244,15 @@ def build_comment_body(
             "last test invoked before the signal."
         )
         parts.append("")
+        # Collapse per-crash details under a hidden tab — the CAUTION
+        # block is the headline; details are one click away.
+        crash_word = "crash" if len(all_crashes) == 1 else "crashes"
+        parts.append("<details>")
+        parts.append(
+            f"<summary><strong>{len(all_crashes)} {crash_word}"
+            " — click to expand details</strong></summary>"
+        )
+        parts.append("")  # blank line so the body renders as Markdown
         for entry in all_crashes:
             heading_tag = (
                 "NEW" if entry.get("pr_classification") == "new" else "KNOWN"
@@ -263,6 +272,8 @@ def build_comment_body(
                 parts.append(msg)
                 parts.append("```")
             parts.append("")
+        parts.append("</details>")
+        parts.append("")
 
     # --- NEW failures ---
     if new_failures:

From 0be6ac9169d091cca73d92a9578172f8e786cede Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Wed, 13 May 2026 10:37:13 -0500
Subject: [PATCH 13/19] ci(pr): wrap NEW failures section in a red CAUTION
 callout

Replace the `### NEW failures (likely introduced by this PR)` plain
heading with a `> [!CAUTION]` alert block that GitHub renders with a
red border and caution icon.  Visual hierarchy is now:

  - CAUTION (crashes, collapsible details)
  - CAUTION (NEW failures + table)
  - plain h3 (KNOWN issues + sub-tables)

so red callouts cover "stuff this PR may have caused" and plain
sections cover pre-existing issues.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 ci/utils/aggregate_pr.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/ci/utils/aggregate_pr.py b/ci/utils/aggregate_pr.py
index 63ae24091d..831362500c 100644
--- a/ci/utils/aggregate_pr.py
+++ b/ci/utils/aggregate_pr.py
@@ -275,9 +275,12 @@ def build_comment_body(
         parts.append("</details>")
         parts.append("")
 
-    # --- NEW failures ---
+    # --- NEW failures (red CAUTION callout as the section header) ---
     if new_failures:
-        parts.append("### NEW failures (likely introduced by this PR)")
+        parts.append("> [!CAUTION]")
+        parts.append(
+            f"> **NEW failures ({len(new_failures)}) — likely introduced by this PR**"
+        )
         parts.append("")
         parts.append(
             _failure_table(

From fc35b3688f83a793259c5073a26ab4df6a8f735c Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Wed, 13 May 2026 14:09:07 -0500
Subject: [PATCH 14/19] ci(pr): remove smoke tests and unused _details_block
 helper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Drop the temporary python/cuopt/cuopt/tests/test_pr_comment_smoke.py
(verified the PR comment classifier and crash callout end-to-end).
Also remove the dead _details_block helper and MAX_ERROR_SNIPPET
constant from aggregate_pr.py — leftover from an early draft that
was superseded by the inline CRASHES code block.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 ci/utils/aggregate_pr.py                      | 13 -----
 .../cuopt/tests/test_pr_comment_smoke.py      | 50 -------------------
 2 files changed, 63 deletions(-)
 delete mode 100644 python/cuopt/cuopt/tests/test_pr_comment_smoke.py

diff --git a/ci/utils/aggregate_pr.py b/ci/utils/aggregate_pr.py
index 831362500c..086d5cd6e1 100644
--- a/ci/utils/aggregate_pr.py
+++ b/ci/utils/aggregate_pr.py
@@ -48,7 +48,6 @@
 # tables so the comment stays readable.
 MAX_BODY_CHARS = 60000
 MAX_ROWS_PER_BUCKET = 80
-MAX_ERROR_SNIPPET = 600
 # Crash entries get their full message in a code block, capped only at a
 # generous limit since the diagnostic line is the whole point of the entry.
 MAX_CRASH_MESSAGE_CHARS = 2000
@@ -86,18 +85,6 @@ def _short_msg(msg, limit=300):
     return summary.replace("|", "\\|")
 
 
-def _details_block(msg):
-    """Render an error message as a collapsible <details> block."""
-    if not msg:
-        return ""
-    snippet = msg.strip()
-    if len(snippet) > MAX_ERROR_SNIPPET:
-        snippet = snippet[:MAX_ERROR_SNIPPET] + "\n…[truncated]"
-    return (
-        f"<details><summary>error</summary>\n\n```\n{snippet}\n```\n</details>"
-    )
-
-
 def _classify_known_subgroups(recurring, flaky):
     """Split the KNOWN bucket into the three sub-groups for the comment.
 
diff --git a/python/cuopt/cuopt/tests/test_pr_comment_smoke.py b/python/cuopt/cuopt/tests/test_pr_comment_smoke.py
deleted file mode 100644
index aa04bfcd9f..0000000000
--- a/python/cuopt/cuopt/tests/test_pr_comment_smoke.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-
-# TEMPORARY — DO NOT MERGE.
-#
-# This file verifies the PR test-classification comment end to end:
-#
-#   1. `test_pr_comment_smoke_always_fails` — a plain assertion failure.
-#      Confirms the classifier emits a normal `pr_classification=new`
-#      entry under "NEW failures".
-#
-#   2. `test_zz_pr_comment_smoke_segfault` — sends SIGSEGV to the pytest
-#      process to confirm PR #1191's crash-marker path: pytest dies
-#      mid-run, `write_pytest_crash_marker` writes <xml>-crash.xml, and
-#      the classifier surfaces a PROCESS_CRASH entry under NEW failures
-#      with a message containing "SIGSEGV".  Named with a `zz_` prefix
-#      so it runs after the assert test in pytest's definition order
-#      and the assert failure still makes it into the partial JUnit XML.
-#
-# Remove this file before merging PR #1194.
-
-import os
-import signal
-import sys
-
-import pytest
-
-
-def test_pr_comment_smoke_always_fails():
-    """Intentionally fails to exercise the pr-test-summary workflow."""
-    assert False, (
-        "intentional failure to verify the PR comment classifier — "
-        "this test should not exist on main; remove with PR #1194"
-    )
-
-
-@pytest.mark.skipif(
-    sys.version_info[:2] != (3, 11),
-    reason=(
-        "Smoke crash is scoped to the py3.11 matrix so the CAUTION block "
-        "in the PR comment can be verified to appear on exactly one "
-        "matrix entry while other Python versions stay green-on-crash."
-    ),
-)
-def test_zz_pr_comment_smoke_segfault():
-    """Intentionally crashes the pytest process to exercise the
-    crash-marker path added in PR #1191.  Should produce a
-    PROCESS_CRASH entry in the PR comment with a SIGSEGV message.
-    """
-    os.kill(os.getpid(), signal.SIGSEGV)

From af327d41861844d5fa27df6876114daf65dc00e4 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Thu, 14 May 2026 11:30:39 -0500
Subject: [PATCH 15/19] =?UTF-8?q?ci(pr):=20address=20coderabbit=20review?=
 =?UTF-8?q?=20=E2=80=94=20type=20hints,=20docstrings,=20classifier=20bug?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bug fix in classify_pr_against_history (nightly_report.py):
Check `is_flaky` before `status == "active"` for hard failures.  A test
record marked both active and flaky was previously bucketed only as
`known_recurring`, never surfacing `known_flaky_nightly`.  The
flaky-in-PR-run loop already had this precedence; align the hard-fail
loop to match.

Documentation/type-hint review fixes:
- aggregate_common.py: type hints + Google-style Args/Returns/Raises
  docstrings on download_summaries, load_local_summaries,
  aggregate_summaries, html_escape.
- aggregate_pr.py: same on build_comment_body and main().
- nightly_report.py: same on classify_pr_against_history.
- pr_comment_helper.py: same on resolve_base_ref,
  find_existing_comment_id, post_or_update_comment, main().

All files now `from __future__ import annotations` so the modern
``list[dict[str, Any]]`` / ``int | None`` syntax works without
needing typing.List/Dict/Optional.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 ci/utils/aggregate_common.py  | 104 +++++++++++++++++++++++++++-------
 ci/utils/aggregate_pr.py      |  58 +++++++++++++++++--
 ci/utils/nightly_report.py    |  59 ++++++++++++++-----
 ci/utils/pr_comment_helper.py |  78 ++++++++++++++++++++++---
 4 files changed, 253 insertions(+), 46 deletions(-)

diff --git a/ci/utils/aggregate_common.py b/ci/utils/aggregate_common.py
index efe8da82b3..dee6e33498 100644
--- a/ci/utils/aggregate_common.py
+++ b/ci/utils/aggregate_common.py
@@ -13,10 +13,13 @@
 respective aggregator scripts since their output formats diverge.
 """
 
+from __future__ import annotations
+
 import json
 import os
 import sys
 from pathlib import Path
+from typing import Any
 
 # Ensure ci/utils is importable when invoked from a sibling script
 sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
@@ -28,14 +31,32 @@
 # ---------------------------------------------------------------------------
 
 
-def download_summaries(s3_prefix, local_dir, s3_fallback_prefix=""):
-    """Download all JSON summaries from S3 prefix into local_dir.
-
-    If s3_fallback_prefix is set and no summaries are found at s3_prefix,
-    retries with the fallback (used when the run-scoped path is empty
-    because uploads landed under the branch-scoped path).
-
-    Returns a list of loaded summary dicts.
+def download_summaries(
+    s3_prefix: str,
+    local_dir: str | os.PathLike[str],
+    s3_fallback_prefix: str = "",
+) -> list[dict[str, Any]]:
+    """Download all JSON summaries from an S3 prefix into a local directory.
+
+    If ``s3_fallback_prefix`` is set and no summaries are found at
+    ``s3_prefix``, retries with the fallback (used when the run-scoped
+    path is empty because uploads landed under the branch-scoped path).
+
+    Args:
+        s3_prefix: Primary S3 URI prefix to list (e.g.,
+            ``s3://bucket/ci_test_reports/pr/run-12345/``).
+        local_dir: Local directory to download into.  Created if absent.
+        s3_fallback_prefix: Optional secondary prefix to try when
+            ``s3_prefix`` yields no ``*.json`` summaries.
+
+    Returns:
+        List of loaded summary dicts.  Files that fail to parse are
+        skipped with a warning to stderr; the list contains only
+        successfully loaded entries.
+
+    Raises:
+        This function does not raise.  Underlying S3 / IO / JSON parse
+        errors are caught and logged.
     """
     local_dir = Path(local_dir)
     local_dir.mkdir(parents=True, exist_ok=True)
@@ -82,8 +103,22 @@ def download_summaries(s3_prefix, local_dir, s3_fallback_prefix=""):
     return summaries
 
 
-def load_local_summaries(local_dir):
-    """Load summaries from a local directory (for testing without S3)."""
+def load_local_summaries(
+    local_dir: str | os.PathLike[str],
+) -> list[dict[str, Any]]:
+    """Load JSON summaries from a local directory (for testing without S3).
+
+    Args:
+        local_dir: Directory containing ``*.json`` per-matrix summaries.
+
+    Returns:
+        List of loaded summary dicts.  Files that fail to parse are
+        skipped with a warning to stderr.
+
+    Raises:
+        This function does not raise.  IO / JSON parse errors are caught
+        and logged.
+    """
     local_dir = Path(local_dir)
     summaries = []
     for json_file in sorted(local_dir.glob("*.json")):
@@ -102,14 +137,32 @@ def load_local_summaries(local_dir):
 # ---------------------------------------------------------------------------
 
 
-def aggregate_summaries(summaries):
-    """Merge per-matrix summaries into a consolidated view.
-
-    Returns a dict with:
-      - matrix_grid: list of {test_type, matrix_label, status, counts, ...}
-      - totals: aggregate counts
-      - all_new_failures, all_recurring_failures, all_flaky_tests,
-        all_resolved_tests: merged lists with matrix context added
+def aggregate_summaries(
+    summaries: list[dict[str, Any]],
+) -> dict[str, Any]:
+    """Merge per-matrix summaries into a single consolidated view.
+
+    Args:
+        summaries: List of per-matrix summary dicts as produced by
+            ``nightly_report.py`` (either nightly or PR mode).  Each
+            dict is expected to provide at least ``test_type``,
+            ``matrix_label``, ``counts``, and the per-bucket failure
+            lists; missing fields default to safe values.
+
+    Returns:
+        Consolidated dict with keys:
+
+          - ``matrix_grid``: list of per-matrix status dicts (sorted
+            by ``test_type`` then ``matrix_label``).
+          - ``totals``: aggregate test counts across all matrices.
+          - ``all_new_failures``, ``all_recurring_failures``,
+            ``all_flaky_tests``, ``all_resolved_tests``: merged failure
+            lists with per-entry ``test_type`` / ``matrix_label``
+            context added.
+          - ``has_new_flaky``: True iff any summary flagged a new flaky.
+
+    Raises:
+        This function does not raise.  Malformed entries are tolerated.
     """
     grid = []
     totals = {
@@ -187,7 +240,20 @@ def aggregate_summaries(summaries):
 # ---------------------------------------------------------------------------
 
 
-def html_escape(text):
+def html_escape(text: Any) -> str:
+    """Escape HTML special characters in ``text``.
+
+    Args:
+        text: Any value; converted to ``str`` before escaping.
+
+    Returns:
+        ``str(text)`` with ``&``, ``<``, ``>`` and ``"`` replaced by
+        their HTML entity equivalents.  Safe for inclusion in HTML
+        attribute values and element bodies.
+
+    Raises:
+        This function does not raise.
+    """
     return (
         str(text)
         .replace("&", "&amp;")
diff --git a/ci/utils/aggregate_pr.py b/ci/utils/aggregate_pr.py
index 086d5cd6e1..73f6273602 100644
--- a/ci/utils/aggregate_pr.py
+++ b/ci/utils/aggregate_pr.py
@@ -27,6 +27,8 @@
 file and the poster skips commenting.
 """
 
+from __future__ import annotations
+
 import argparse
 import json
 import os
@@ -34,6 +36,7 @@
 import sys
 from datetime import datetime, timezone
 from pathlib import Path
+from typing import Any
 
 sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
 from aggregate_common import (  # noqa: E402
@@ -154,11 +157,37 @@ def _failure_table(entries, columns, row_fn, cap=MAX_ROWS_PER_BUCKET):
 
 
 def build_comment_body(
-    agg, target_branch, github_run_url, sha="", run_date=""
-):
-    """Build the Markdown body for the sticky PR comment.
-
-    Returns an empty string if there is nothing worth commenting on.
+    agg: dict[str, Any],
+    target_branch: str,
+    github_run_url: str,
+    sha: str = "",
+    run_date: str = "",
+) -> str:
+    """Build the Markdown body for the sticky PR test-classification comment.
+
+    Renders a CAUTION callout for crashes (with collapsible details),
+    a CAUTION callout + table for NEW failures, and plain sub-sections
+    for KNOWN issues (recurring on nightly, known flaky on nightly,
+    flaked in this PR run only).
+
+    Args:
+        agg: Output of ``aggregate_summaries(...)``.  Must contain
+            ``all_new_failures``, ``all_recurring_failures``,
+            ``all_flaky_tests``, and ``matrix_grid``.
+        target_branch: PR target branch (e.g., ``main``); surfaced in
+            the comment meta line.
+        github_run_url: Workflow run URL — linked from the meta line.
+        sha: PR head SHA (truncated to 12 chars for display).
+        run_date: ``YYYY-MM-DD`` run date string.
+
+    Returns:
+        The full Markdown body, prefixed with ``COMMENT_MARKER`` and
+        capped at ``MAX_BODY_CHARS`` (with a "comment truncated" note
+        appended when the cap is hit).  Empty string when there are
+        no failures or flakes — callers must skip posting in that case.
+
+    Raises:
+        This function does not raise.
     """
     new_failures = agg["all_new_failures"]
     recurring = agg["all_recurring_failures"]
@@ -377,7 +406,24 @@ def build_comment_body(
     return body
 
 
-def main():
+def main() -> int:
+    """CLI entry point — aggregate PR summaries and write the comment body.
+
+    Reads per-matrix summary JSONs (from S3 or a local directory),
+    classifies failures, writes ``pr_comment.md`` and
+    ``pr_consolidated.json`` to ``--output-dir``.  When all tests
+    pass, writes an empty ``pr_comment.md`` so the poster skips
+    commenting.
+
+    Returns:
+        ``0`` on success (including the all-green / empty-body case),
+        or ``2`` when neither an S3 prefix nor a local directory was
+        provided.
+
+    Raises:
+        SystemExit: Indirectly via ``argparse`` if argument parsing
+            fails.
+    """
     parser = argparse.ArgumentParser(
         description="Aggregate per-matrix PR test summaries into a Markdown PR comment."
     )
diff --git a/ci/utils/nightly_report.py b/ci/utils/nightly_report.py
index 782748e742..aad46304f3 100755
--- a/ci/utils/nightly_report.py
+++ b/ci/utils/nightly_report.py
@@ -34,6 +34,8 @@
       --s3-summary-uri s3://bucket/ci_test_reports/nightly/summaries/2026-04-13/python-cuda12.9-py3.12-x86_64.json
 """
 
+from __future__ import annotations
+
 import argparse
 import json
 import os
@@ -41,6 +43,7 @@
 from collections import defaultdict
 from datetime import datetime, timezone
 from pathlib import Path
+from typing import Any
 from xml.etree import ElementTree
 
 # Ensure ci/utils is importable when invoked as a script
@@ -373,8 +376,15 @@ def update_history(history, classified, sha, date_str):
 # ---------------------------------------------------------------------------
 
 
-def classify_pr_against_history(classified, history):
-    """Classify PR run results by comparing against the nightly history.
+def classify_pr_against_history(
+    classified: dict[str, list[dict[str, Any]]],
+    history: dict[str, Any],
+) -> tuple[
+    list[dict[str, Any]],
+    list[dict[str, Any]],
+    list[dict[str, Any]],
+]:
+    """Classify PR run results against the nightly failure history.
 
     Read-only: never mutates ``history``.  Each failure is annotated with a
     ``pr_classification`` field used by the PR comment renderer.
@@ -384,43 +394,66 @@ def classify_pr_against_history(classified, history):
 
       - ``new_failures``: hard failures the PR introduced.  ``pr_classification=new``.
       - ``recurring_failures``: hard failures known to nightly.
-        ``pr_classification`` is ``known_recurring`` (active on nightly) or
-        ``known_flaky_nightly`` (history flagged the test flaky).
+        ``pr_classification`` is ``known_flaky_nightly`` when the history
+        flagged the test as cross-run flaky (checked first, since it is a
+        more specific signal than mere "currently active"), or
+        ``known_recurring`` when the test is active on nightly but not
+        flagged flaky.
       - ``flaky_tests``: tests that passed on retry within the PR run.
         ``pr_classification`` is ``known_flaky_nightly`` (already known
         flaky), ``known_recurring`` (hard-failing on nightly but flaked
         here), or ``known_flaky_pr`` (only flaked in this PR run).
 
-    Returns ``(new_failures, recurring_failures, flaky_tests)``.
+    Args:
+        classified: Output of ``classify_failures(...)`` — a mapping with
+            keys ``passed``, ``failed``, ``error``, ``flaky``, ``skipped``
+            whose values are lists of per-testcase dicts.
+        history: Loaded nightly history JSON.  Expected to contain a
+            ``tests`` mapping keyed by ``suite::classname::name``; absent
+            or malformed input is treated as empty history.
+
+    Returns:
+        Tuple ``(new_failures, recurring_failures, flaky_tests)``.  Each
+        list contains the original testcase dict augmented with a
+        ``pr_classification`` string and, where applicable, ``first_seen``
+        and ``failure_count`` keys pulled from history.
+
+    Raises:
+        This function does not raise.  Malformed ``history`` (missing
+        ``tests`` mapping, missing per-test fields) is tolerated.
     """
     tests_history = history.get("tests", {})
 
-    new_failures = []
-    recurring_failures = []
-    flaky_tests = []
+    new_failures: list[dict[str, Any]] = []
+    recurring_failures: list[dict[str, Any]] = []
+    flaky_tests: list[dict[str, Any]] = []
 
-    def _key(entry):
+    def _key(entry: dict[str, Any]) -> str:
         return f"{entry['suite']}::{entry['classname']}::{entry['name']}"
 
     # Hard failures: failed/errored in PR run, did NOT pass on retry.
+    # Check ``is_flaky`` before ``status == 'active'`` so a test marked
+    # both active and flaky lands in ``known_flaky_nightly`` (the more
+    # specific signal).  Matches the precedence in the flaky-in-run loop
+    # below.
     for entry in classified["failed"] + classified["error"]:
         rec = tests_history.get(_key(entry))
-        if rec and rec.get("status") == "active":
+        if rec and rec.get("is_flaky"):
             recurring_failures.append(
                 {
                     **entry,
                     "first_seen": rec.get("first_seen_date", "unknown"),
                     "failure_count": rec.get("failure_count", 0),
-                    "pr_classification": "known_recurring",
+                    "pr_classification": "known_flaky_nightly",
                 }
             )
-        elif rec and rec.get("is_flaky"):
+        elif rec and rec.get("status") == "active":
             recurring_failures.append(
                 {
                     **entry,
                     "first_seen": rec.get("first_seen_date", "unknown"),
                     "failure_count": rec.get("failure_count", 0),
-                    "pr_classification": "known_flaky_nightly",
+                    "pr_classification": "known_recurring",
                 }
             )
         else:
diff --git a/ci/utils/pr_comment_helper.py b/ci/utils/pr_comment_helper.py
index 36c7075bce..ce87c6f26d 100644
--- a/ci/utils/pr_comment_helper.py
+++ b/ci/utils/pr_comment_helper.py
@@ -18,6 +18,8 @@
 re-used by ``aggregate_pr.py`` when it builds the comment body.
 """
 
+from __future__ import annotations
+
 import argparse
 import json
 import os
@@ -64,16 +66,47 @@ def _gh_request(method, url, token, payload=None, timeout=30):
         return None
 
 
-def resolve_base_ref(repo, pr_number, token):
-    """Return the PR's target branch (e.g. ``main``)."""
+def resolve_base_ref(repo: str, pr_number: int, token: str) -> str:
+    """Return the PR's target branch (e.g. ``main``).
+
+    Args:
+        repo: GitHub ``owner/name`` slug.
+        pr_number: Pull-request number.
+        token: GitHub token with at least ``pull-requests: read``.
+
+    Returns:
+        The PR's base ref, or ``"main"`` if the API response lacks one.
+
+    Raises:
+        RuntimeError: If the underlying GitHub API call fails.
+    """
     data = _gh_request(
         "GET", f"{GITHUB_API}/repos/{repo}/pulls/{pr_number}", token
     )
     return ((data or {}).get("base") or {}).get("ref", "main")
 
 
-def find_existing_comment_id(repo, pr_number, token, marker):
-    """Find a comment whose body starts with ``marker``.  Returns id or None."""
+def find_existing_comment_id(
+    repo: str, pr_number: int, token: str, marker: str
+) -> int | None:
+    """Find the id of a PR comment whose body starts with ``marker``.
+
+    Paginates through issue comments (100 per page) until a match is
+    found or all pages are exhausted.
+
+    Args:
+        repo: GitHub ``owner/name`` slug.
+        pr_number: Pull-request number.
+        token: GitHub token with ``pull-requests: read``.
+        marker: Hidden HTML-comment marker that identifies the sticky
+            comment (matched after stripping leading whitespace).
+
+    Returns:
+        The integer comment id, or ``None`` if no comment matches.
+
+    Raises:
+        RuntimeError: If a GitHub API call fails.
+    """
     page = 1
     while True:
         url = (
@@ -90,10 +123,29 @@ def find_existing_comment_id(repo, pr_number, token, marker):
         page += 1
 
 
-def post_or_update_comment(repo, pr_number, token, body, marker):
-    """Update the existing sticky comment if present; otherwise create one.
+def post_or_update_comment(
+    repo: str, pr_number: int, token: str, body: str, marker: str
+) -> str:
+    """Update the existing sticky PR comment if present; otherwise create one.
+
+    Looks up an existing comment by ``marker``; if found, ``PATCH``es it
+    in place; otherwise ``POST``s a new one.
+
+    Args:
+        repo: GitHub ``owner/name`` slug.
+        pr_number: Pull-request number.
+        token: GitHub token with ``pull-requests: write``.
+        body: Full Markdown body to post (must already include
+            ``marker`` somewhere near the top for future lookups).
+        marker: Hidden HTML-comment marker that identifies the sticky
+            comment.
 
-    Returns the comment URL (``html_url``) or ``""``.
+    Returns:
+        The created/updated comment's ``html_url``, or ``""`` if the
+        API response lacked one.
+
+    Raises:
+        RuntimeError: If a GitHub API call fails.
     """
     existing_id = find_existing_comment_id(repo, pr_number, token, marker)
     payload = {"body": body}
@@ -143,7 +195,17 @@ def _add_common_args(sp):
     )
 
 
-def main():
+def main() -> int:
+    """Dispatch to the requested subcommand.
+
+    Returns:
+        ``0`` on success, ``1`` if a GitHub API call failed, or ``2``
+        if a token was not provided.
+
+    Raises:
+        SystemExit: Indirectly via ``argparse`` if argument parsing
+            fails.
+    """
     p = argparse.ArgumentParser(description=__doc__)
     sub = p.add_subparsers(dest="cmd", required=True)
 

From df48bd2c8f4d97503bdfb5a91bdf0feac2572477 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Thu, 14 May 2026 11:45:47 -0500
Subject: [PATCH 16/19] =?UTF-8?q?ci(pr):=20address=20review=20=E2=80=94=20?=
 =?UTF-8?q?strict=20env,=20drop=20default-shell,=20simplify?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Address jameslamb's review on PR #1194:

- pr.yaml: drop the now-self-explanatory comment block above the
  pr-test-summary `permissions:` block.
- pr_test_summary.yaml:
    * remove `defaults.run.shell: bash` — GHA's default already is
      bash -e {0}, so the override was redundant.
    * split `apt-get update && apt-get install` onto two lines.
    * drop the unused GITHUB_SERVER_URL env (now that pr_summary.sh
      hard-codes https://github.com).
- pr_summary.sh: rewrite to be strict.
    * all required env vars validated up front via `: "${VAR:?}"`
      assertions — including GITHUB_TOKEN, which now fails the job
      loudly before aggregate_pr.py runs instead of after.
    * remove every `:-default` fallback (CUOPT_AWS_*, GITHUB_BASE_REF,
      GITHUB_SHA, GITHUB_SERVER_URL).
    * hard-code https://github.com instead of templating
      GITHUB_SERVER_URL — this code is GitHub-specific and there is
      no real use case for overriding the server URL.
    * remove the silent "WARNING; exit 0" fallbacks for missing
      CUOPT_S3_URI / GITHUB_RUN_ID — the strict assertions cover them
      and the job should fail loudly when misconfigured.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .github/workflows/pr.yaml              |  5 ---
 .github/workflows/pr_test_summary.yaml | 11 +-----
 ci/pr_summary.sh                       | 52 +++++++++++---------------
 3 files changed, 24 insertions(+), 44 deletions(-)

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index e9a0102c9a..48e43586a3 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -639,11 +639,6 @@ jobs:
       - wheel-tests-cuopt-server
       - test-self-hosted-server
     if: always()
-    # The reusable workflow's job needs `contents: read` (for
-    # actions/checkout) and `pull-requests: write` (to post the sticky
-    # comment).  Reusable workflows inherit the caller's GITHUB_TOKEN
-    # permissions, so the caller must grant at least the same set —
-    # the workflow-level default is `permissions: {}`.
     permissions:
       contents: read
       pull-requests: write
diff --git a/.github/workflows/pr_test_summary.yaml b/.github/workflows/pr_test_summary.yaml
index c4712859fc..3d983ca99d 100644
--- a/.github/workflows/pr_test_summary.yaml
+++ b/.github/workflows/pr_test_summary.yaml
@@ -21,13 +21,6 @@ on:
       CUOPT_S3_URI:
         required: true
 
-# The python:3.14-slim container's default /bin/sh is dash, which doesn't
-# understand `[[ ... ]]` or `set -o pipefail`.  Force bash for all run
-# steps in this workflow (matches self_hosted_service_test.yaml convention).
-defaults:
-  run:
-    shell: bash
-
 jobs:
   pr-test-summary:
     runs-on: linux-amd64-cpu4
@@ -42,7 +35,8 @@ jobs:
           persist-credentials: false
       - name: Install dependencies
         run: |
-          apt-get update && apt-get install -y --no-install-recommends curl
+          apt-get update
+          apt-get install -y --no-install-recommends curl
           pip install awscli
       - name: Aggregate per-matrix summaries and post sticky comment
         env:
@@ -51,7 +45,6 @@ jobs:
           CUOPT_S3_URI: ${{ secrets.CUOPT_S3_URI }}
           GITHUB_REPOSITORY: ${{ github.repository }}
           GITHUB_RUN_ID: ${{ github.run_id }}
-          GITHUB_SERVER_URL: ${{ github.server_url }}
           GITHUB_SHA: ${{ github.sha }}
           GITHUB_TOKEN: ${{ github.token }}
         run: |
diff --git a/ci/pr_summary.sh b/ci/pr_summary.sh
index 65f1c914e2..feed14ab99 100755
--- a/ci/pr_summary.sh
+++ b/ci/pr_summary.sh
@@ -11,36 +11,35 @@
 # ci/utils/aggregate_pr.py for content generation and
 # ci/utils/pr_comment_helper.py for GitHub API interactions.
 #
-# Required env:
-#   PR_NUMBER, GITHUB_TOKEN, GITHUB_REPOSITORY, GITHUB_RUN_ID,
-#   CUOPT_S3_URI, CUOPT_AWS_ACCESS_KEY_ID, CUOPT_AWS_SECRET_ACCESS_KEY
-# Optional env:
-#   GITHUB_BASE_REF (default: main), GITHUB_SERVER_URL, GITHUB_SHA
+# Required env (all must be set; the script exits with a loud error if any
+# are missing):
+#   PR_NUMBER, GITHUB_REPOSITORY, GITHUB_RUN_ID, GITHUB_BASE_REF,
+#   GITHUB_SHA, GITHUB_TOKEN, CUOPT_S3_URI, CUOPT_AWS_ACCESS_KEY_ID,
+#   CUOPT_AWS_SECRET_ACCESS_KEY.
 
 set -euo pipefail
 
+: "${PR_NUMBER:?required}"
+: "${GITHUB_REPOSITORY:?required}"
+: "${GITHUB_RUN_ID:?required}"
+: "${GITHUB_BASE_REF:?required}"
+: "${GITHUB_SHA:?required}"
+: "${GITHUB_TOKEN:?required}"
+: "${CUOPT_S3_URI:?required}"
+: "${CUOPT_AWS_ACCESS_KEY_ID:?required}"
+: "${CUOPT_AWS_SECRET_ACCESS_KEY:?required}"
+
 SCRIPT_DIR="$(dirname "$(realpath "${BASH_SOURCE[0]}")")"
 OUTPUT_DIR="${PWD}/pr-aggregate-output"
 mkdir -p "${OUTPUT_DIR}"
 
-: "${PR_NUMBER:?PR_NUMBER is required}"
-: "${GITHUB_REPOSITORY:?GITHUB_REPOSITORY is required}"
-
-if [ -z "${CUOPT_S3_URI:-}" ]; then
-    echo "WARNING: CUOPT_S3_URI is not set; nothing to aggregate." >&2
-    exit 0
-fi
-if [ -z "${GITHUB_RUN_ID:-}" ]; then
-    echo "WARNING: GITHUB_RUN_ID is not set; cannot locate per-matrix summaries." >&2
-    exit 0
-fi
-
-# aws CLI uses the standard AWS_* env vars; map the cuOpt-prefixed secrets onto them.
-export AWS_ACCESS_KEY_ID="${CUOPT_AWS_ACCESS_KEY_ID:-${AWS_ACCESS_KEY_ID:-}}"
-export AWS_SECRET_ACCESS_KEY="${CUOPT_AWS_SECRET_ACCESS_KEY:-${AWS_SECRET_ACCESS_KEY:-}}"
+# aws CLI reads the standard AWS_* env vars; map the cuOpt-prefixed
+# secrets onto them.
+export AWS_ACCESS_KEY_ID="${CUOPT_AWS_ACCESS_KEY_ID}"
+export AWS_SECRET_ACCESS_KEY="${CUOPT_AWS_SECRET_ACCESS_KEY}"
 unset AWS_SESSION_TOKEN
 
-GITHUB_RUN_URL="${GITHUB_SERVER_URL:-https://github.com}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}"
+GITHUB_RUN_URL="https://github.com/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}"
 S3_PR_SUMMARIES_PREFIX="${CUOPT_S3_URI}ci_test_reports/pr/run-${GITHUB_RUN_ID}/"
 COMMENT_FILE="${OUTPUT_DIR}/pr_comment.md"
 
@@ -48,8 +47,8 @@ echo "Aggregating PR per-matrix summaries from ${S3_PR_SUMMARIES_PREFIX}"
 python3 "${SCRIPT_DIR}/utils/aggregate_pr.py" \
     --s3-pr-summaries-prefix "${S3_PR_SUMMARIES_PREFIX}" \
     --output-dir "${OUTPUT_DIR}" \
-    --target-branch "${GITHUB_BASE_REF:-main}" \
-    --sha "${GITHUB_SHA:-}" \
+    --target-branch "${GITHUB_BASE_REF}" \
+    --sha "${GITHUB_SHA}" \
     --github-run-url "${GITHUB_RUN_URL}" \
     --run-date "$(date +%F)"
 
@@ -58,13 +57,6 @@ if [ ! -s "${COMMENT_FILE}" ]; then
     exit 0
 fi
 
-if [ -z "${GITHUB_TOKEN:-}" ]; then
-    echo "ERROR: GITHUB_TOKEN is not set; cannot post PR comment." >&2
-    echo "--- comment body that would have been posted ---" >&2
-    cat "${COMMENT_FILE}" >&2
-    exit 1
-fi
-
 python3 "${SCRIPT_DIR}/utils/pr_comment_helper.py" post \
     --repo "${GITHUB_REPOSITORY}" \
     --pr "${PR_NUMBER}" \

From cbb6d7e9d90ebea6c34db2d4ce85047e49af9089 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Thu, 14 May 2026 12:36:47 -0500
Subject: [PATCH 17/19] =?UTF-8?q?ci(pr):=20trim=20Python=20entry=20points?=
 =?UTF-8?q?=20=E2=80=94=20strict=20args,=20single=20config=20source?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Continue the simplification:

aggregate_pr.py:
- `--target-branch`, `--sha`, `--github-run-url` are now required.  No
  more env-var fallbacks in argparse defaults — the shell script passes
  every value explicitly, and the env-var fallback was a second hidden
  source of configuration.
- `--s3-pr-summaries-prefix` / `--local-summaries-dir` go into an
  argparse mutually-exclusive group with `required=True`, removing the
  manual ``return 2`` validation block.
- Keep static defaults that are not env-fallbacks: `--output-dir`,
  `--run-date` (defaults to today UTC).

pr_comment_helper.py:
- Drop the `--token` CLI flag entirely; read `GITHUB_TOKEN` from env
  directly with a strict early-fail when missing.  Removes the
  dual-source pattern (CLI default + env fallback).
- Drop the `--marker` CLI flag.  `COMMENT_MARKER` is the only valid
  value — make it the single source of truth.  Public functions still
  accept a `marker` kwarg (default `COMMENT_MARKER`) for unit testing.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 ci/utils/aggregate_pr.py      | 29 +++++++++----------------
 ci/utils/pr_comment_helper.py | 41 +++++++++++++++++------------------
 2 files changed, 30 insertions(+), 40 deletions(-)

diff --git a/ci/utils/aggregate_pr.py b/ci/utils/aggregate_pr.py
index 73f6273602..834593862e 100644
--- a/ci/utils/aggregate_pr.py
+++ b/ci/utils/aggregate_pr.py
@@ -416,29 +416,26 @@ def main() -> int:
     commenting.
 
     Returns:
-        ``0`` on success (including the all-green / empty-body case),
-        or ``2`` when neither an S3 prefix nor a local directory was
-        provided.
+        ``0`` on success (including the all-green / empty-body case).
 
     Raises:
         SystemExit: Indirectly via ``argparse`` if argument parsing
-            fails.
+            fails (missing required arguments).
     """
     parser = argparse.ArgumentParser(
         description="Aggregate per-matrix PR test summaries into a Markdown PR comment."
     )
-    parser.add_argument(
+    src = parser.add_mutually_exclusive_group(required=True)
+    src.add_argument(
         "--s3-pr-summaries-prefix",
-        default="",
         help=(
             "S3 prefix where ``nightly_report.py --mode pr`` uploaded "
             "per-matrix summaries for this run.  Example: "
             "s3://bucket/ci_test_reports/pr/run-12345/"
         ),
     )
-    parser.add_argument(
+    src.add_argument(
         "--local-summaries-dir",
-        default="",
         help="Local directory of summaries (for testing without S3).",
     )
     parser.add_argument(
@@ -448,23 +445,23 @@ def main() -> int:
     )
     parser.add_argument(
         "--target-branch",
-        default=os.environ.get("GITHUB_BASE_REF", "main"),
+        required=True,
         help="PR target branch — surfaced in the comment for context.",
     )
     parser.add_argument(
         "--sha",
-        default=os.environ.get("GITHUB_SHA", ""),
+        required=True,
         help="PR head SHA — surfaced in the comment for context.",
     )
     parser.add_argument(
         "--github-run-url",
-        default="",
+        required=True,
         help="Workflow run URL — linked from the comment footer.",
     )
     parser.add_argument(
         "--run-date",
         default=datetime.now(timezone.utc).strftime("%Y-%m-%d"),
-        help="Date the run started (YYYY-MM-DD).",
+        help="Date the run started (YYYY-MM-DD).  Defaults to today (UTC).",
     )
     args = parser.parse_args()
 
@@ -473,16 +470,10 @@ def main() -> int:
 
     if args.local_summaries_dir:
         summaries = load_local_summaries(args.local_summaries_dir)
-    elif args.s3_pr_summaries_prefix:
+    else:
         summaries = download_summaries(
             args.s3_pr_summaries_prefix, output_dir / "summaries"
         )
-    else:
-        print(
-            "ERROR: provide --s3-pr-summaries-prefix or --local-summaries-dir.",
-            file=sys.stderr,
-        )
-        return 2
 
     if not summaries:
         print("No PR per-matrix summaries found; nothing to comment on.")
diff --git a/ci/utils/pr_comment_helper.py b/ci/utils/pr_comment_helper.py
index ce87c6f26d..744bec48f1 100644
--- a/ci/utils/pr_comment_helper.py
+++ b/ci/utils/pr_comment_helper.py
@@ -87,7 +87,7 @@ def resolve_base_ref(repo: str, pr_number: int, token: str) -> str:
 
 
 def find_existing_comment_id(
-    repo: str, pr_number: int, token: str, marker: str
+    repo: str, pr_number: int, token: str, marker: str = COMMENT_MARKER
 ) -> int | None:
     """Find the id of a PR comment whose body starts with ``marker``.
 
@@ -124,7 +124,11 @@ def find_existing_comment_id(
 
 
 def post_or_update_comment(
-    repo: str, pr_number: int, token: str, body: str, marker: str
+    repo: str,
+    pr_number: int,
+    token: str,
+    body: str,
+    marker: str = COMMENT_MARKER,
 ) -> str:
     """Update the existing sticky PR comment if present; otherwise create one.
 
@@ -170,37 +174,36 @@ def post_or_update_comment(
     return url
 
 
-def _cmd_base_ref(args):
-    print(resolve_base_ref(args.repo, args.pr, args.token))
+def _cmd_base_ref(args: argparse.Namespace, token: str) -> int:
+    print(resolve_base_ref(args.repo, args.pr, token))
     return 0
 
 
-def _cmd_post(args):
+def _cmd_post(args: argparse.Namespace, token: str) -> int:
     with open(args.body_file) as f:
         body = f.read()
     if not body.strip():
         print("Empty body; nothing to post.")
         return 0
-    post_or_update_comment(args.repo, args.pr, args.token, body, args.marker)
+    post_or_update_comment(args.repo, args.pr, token, body)
     return 0
 
 
-def _add_common_args(sp):
+def _add_common_args(sp: argparse.ArgumentParser) -> None:
     sp.add_argument("--repo", required=True, help="owner/name")
     sp.add_argument("--pr", required=True, type=int, help="PR number")
-    sp.add_argument(
-        "--token",
-        default=os.environ.get("GITHUB_TOKEN", ""),
-        help="GitHub token (defaults to $GITHUB_TOKEN).",
-    )
 
 
 def main() -> int:
     """Dispatch to the requested subcommand.
 
+    Reads ``GITHUB_TOKEN`` from the environment (the GitHub convention);
+    there is no ``--token`` CLI flag so configuration comes from a
+    single source.
+
     Returns:
         ``0`` on success, ``1`` if a GitHub API call failed, or ``2``
-        if a token was not provided.
+        if ``GITHUB_TOKEN`` is not set in the environment.
 
     Raises:
         SystemExit: Indirectly via ``argparse`` if argument parsing
@@ -222,19 +225,15 @@ def main() -> int:
         required=True,
         help="File whose contents are the comment body.",
     )
-    sp_post.add_argument(
-        "--marker",
-        default=COMMENT_MARKER,
-        help="Hidden HTML-comment marker that identifies the sticky comment.",
-    )
     sp_post.set_defaults(func=_cmd_post)
 
     args = p.parse_args()
-    if not args.token:
-        print("ERROR: --token or $GITHUB_TOKEN must be set.", file=sys.stderr)
+    token = os.environ.get("GITHUB_TOKEN")
+    if not token:
+        print("ERROR: GITHUB_TOKEN env var must be set.", file=sys.stderr)
         return 2
     try:
-        return args.func(args)
+        return args.func(args, token)
     except RuntimeError as exc:
         print(f"ERROR: {exc}", file=sys.stderr)
         return 1

From ade99de98ef7e21b071c4886b08e2c4336babc63 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Thu, 14 May 2026 12:40:26 -0500
Subject: [PATCH 18/19] ci(pr): trim verbose comments

Remove section-divider banners and what-not-why comments now that the
function docstrings and well-named identifiers carry the same
information.  Keep all WHY comments (precedence reasoning in the
classifier, constant-value rationale, the CUOPT_AWS_*-to-AWS_*
mapping note).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 ci/pr_summary.sh              | 17 +++--------------
 ci/utils/aggregate_common.py  | 15 ---------------
 ci/utils/aggregate_pr.py      | 10 ++--------
 ci/utils/pr_comment_helper.py |  5 ++---
 4 files changed, 7 insertions(+), 40 deletions(-)

diff --git a/ci/pr_summary.sh b/ci/pr_summary.sh
index feed14ab99..ade470d5e3 100755
--- a/ci/pr_summary.sh
+++ b/ci/pr_summary.sh
@@ -2,20 +2,9 @@
 # SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
-# Aggregate per-matrix PR test summaries from S3 and post (or update) a
-# sticky comment on the PR that classifies every failure as NEW
-# (introduced by this PR) or KNOWN (recurring on nightly, known flaky on
-# nightly, or flaked in this run via pytest-rerunfailures).
-#
-# Runs as a post-test job after every PR test job finishes.  See
-# ci/utils/aggregate_pr.py for content generation and
-# ci/utils/pr_comment_helper.py for GitHub API interactions.
-#
-# Required env (all must be set; the script exits with a loud error if any
-# are missing):
-#   PR_NUMBER, GITHUB_REPOSITORY, GITHUB_RUN_ID, GITHUB_BASE_REF,
-#   GITHUB_SHA, GITHUB_TOKEN, CUOPT_S3_URI, CUOPT_AWS_ACCESS_KEY_ID,
-#   CUOPT_AWS_SECRET_ACCESS_KEY.
+# Aggregate per-matrix PR test summaries from S3 and post (or update)
+# the sticky PR classification comment.  See ci/utils/aggregate_pr.py
+# (content) and ci/utils/pr_comment_helper.py (GitHub API).
 
 set -euo pipefail
 
diff --git a/ci/utils/aggregate_common.py b/ci/utils/aggregate_common.py
index dee6e33498..1c000a4bc4 100644
--- a/ci/utils/aggregate_common.py
+++ b/ci/utils/aggregate_common.py
@@ -26,11 +26,6 @@
 from s3_helpers import s3_download, s3_list  # noqa: E402
 
 
-# ---------------------------------------------------------------------------
-# Download and load summaries
-# ---------------------------------------------------------------------------
-
-
 def download_summaries(
     s3_prefix: str,
     local_dir: str | os.PathLike[str],
@@ -132,11 +127,6 @@ def load_local_summaries(
     return summaries
 
 
-# ---------------------------------------------------------------------------
-# Aggregation
-# ---------------------------------------------------------------------------
-
-
 def aggregate_summaries(
     summaries: list[dict[str, Any]],
 ) -> dict[str, Any]:
@@ -235,11 +225,6 @@ def aggregate_summaries(
     }
 
 
-# ---------------------------------------------------------------------------
-# HTML escaping
-# ---------------------------------------------------------------------------
-
-
 def html_escape(text: Any) -> str:
     """Escape HTML special characters in ``text``.
 
diff --git a/ci/utils/aggregate_pr.py b/ci/utils/aggregate_pr.py
index 834593862e..1981a628c9 100644
--- a/ci/utils/aggregate_pr.py
+++ b/ci/utils/aggregate_pr.py
@@ -196,9 +196,8 @@ def build_comment_body(
     if not new_failures and not recurring and not flaky:
         return ""
 
-    # Pull crashes out so they render in a dedicated CAUTION block above
-    # the normal NEW/KNOWN tables and don't get drowned out by ordinary
-    # assertion failures.
+    # Pulled out so crashes render in their own CAUTION block and don't
+    # get drowned out by ordinary assertion failures.
     new_crashes, new_failures = _split_crashes(new_failures)
     recurring_crashes, recurring = _split_crashes(recurring)
     all_crashes = new_crashes + recurring_crashes
@@ -247,7 +246,6 @@ def build_comment_body(
         parts.append("\n</details>")
         parts.append("")
 
-    # --- CRASHES (top of comment, GitHub red-alert callout) ---
     if all_crashes:
         parts.append("> [!CAUTION]")
         parts.append(
@@ -260,8 +258,6 @@ def build_comment_body(
             "last test invoked before the signal."
         )
         parts.append("")
-        # Collapse per-crash details under a hidden tab — the CAUTION
-        # block is the headline; details are one click away.
         crash_word = "crash" if len(all_crashes) == 1 else "crashes"
         parts.append("<details>")
         parts.append(
@@ -291,7 +287,6 @@ def build_comment_body(
         parts.append("</details>")
         parts.append("")
 
-    # --- NEW failures (red CAUTION callout as the section header) ---
     if new_failures:
         parts.append("> [!CAUTION]")
         parts.append(
@@ -311,7 +306,6 @@ def build_comment_body(
         )
         parts.append("")
 
-    # --- KNOWN issues ---
     if known_total:
         parts.append("### KNOWN issues (pre-existing, not caused by this PR)")
         parts.append("")
diff --git a/ci/utils/pr_comment_helper.py b/ci/utils/pr_comment_helper.py
index 744bec48f1..c07fc97272 100644
--- a/ci/utils/pr_comment_helper.py
+++ b/ci/utils/pr_comment_helper.py
@@ -28,9 +28,8 @@
 
 GITHUB_API = "https://api.github.com"
 
-# Single source of truth for the sticky-comment marker.  ``aggregate_pr.py``
-# imports this so the body it writes always matches the marker the poster
-# searches for.
+# Imported by aggregate_pr.py so the body it writes and the marker the
+# poster searches for stay in sync.
 COMMENT_MARKER = "<!-- pr-test-classification -->"
 
 

From b4bfea9317f92d42acc9ad31dbb7d35f413eee12 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Thu, 14 May 2026 14:28:09 -0500
Subject: [PATCH 19/19] ci(pr): document why the GITHUB_BASE_REF fallback is
 load-bearing

The fallback exists because GHA leaves `github.base_ref` empty for the
`push` events the PR workflow triggers on, and the shared rapidsai
test workflows don't propagate a target branch into the test
container.  Inline comment + a follow-up note in the PR description
to centralize PR classification in pr-test-summary (which already
resolves the base ref via the API) so the fallback can be removed.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 ci/utils/nightly_report_helper.sh | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/ci/utils/nightly_report_helper.sh b/ci/utils/nightly_report_helper.sh
index 2fb99c8052..113253af37 100755
--- a/ci/utils/nightly_report_helper.sh
+++ b/ci/utils/nightly_report_helper.sh
@@ -118,6 +118,13 @@ generate_nightly_report() {
         # comment aggregator picks up.
         mode="pr"
 
+        # GITHUB_BASE_REF is unset for the `push` events the PR workflow
+        # triggers on (GHA only populates it for `pull_request` events).
+        # The shared rapidsai test workflows don't propagate a target
+        # branch into the test container, so we fall back to RAPIDS_BRANCH
+        # then "main".  Follow-up: centralize PR classification in the
+        # pr-test-summary job so this fallback is no longer needed
+        # (see PR #1194 description).
         local target_branch="${GITHUB_BASE_REF:-${RAPIDS_BRANCH:-main}}"
         local target_branch_slug
         target_branch_slug=$(echo "${target_branch}" | tr '/' '-')