From 8040af8ee148c44488503db2b492c6c58fc46543 Mon Sep 17 00:00:00 2001 From: Gregory Giguashvili Date: Sun, 15 Mar 2026 12:04:35 +0200 Subject: [PATCH 01/10] Script to list microshift prow jobs for a release --- .../microshift-prow-jobs-for-release.sh | 129 ++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100755 .claude/scripts/microshift-prow-jobs-for-release.sh diff --git a/.claude/scripts/microshift-prow-jobs-for-release.sh b/.claude/scripts/microshift-prow-jobs-for-release.sh new file mode 100755 index 0000000000..a7882d84c7 --- /dev/null +++ b/.claude/scripts/microshift-prow-jobs-for-release.sh @@ -0,0 +1,129 @@ +#!/bin/bash +set -euo pipefail + +# Prow Jobs Analyzer +# Analyzes status of Prow jobs for MicroShift + +PROW_URL="https://prow.ci.openshift.org/data.js" +PROW_JOB="microshift" + +# Base query - fetch all MicroShift jobs for a release +fetch_base_data() { + local release="${1}" + curl -s "${PROW_URL}" | jq -c --arg release "${release}" --arg job "${PROW_JOB}" ' + .[] | select((.job | contains($job)) and (.job | contains($release))) | + {job: .job, status: .state, started: .started, finished: .finished, duration: .duration, url: .url} + ' +} + +# Map status to icon +status_to_icon() { + case "${1}" in + success) echo "✓" ;; + failure) echo "✗" ;; + pending) echo "⋯" ;; + *) echo "?" ;; + esac +} + +# Query for status mode - show latest run for each job +query_status() { + { + echo -e "JOB\tSTATUS\tFINISHED\tDURATION\tURL" + jq -sr ' + group_by(.job) | + map(sort_by(.started | tonumber) | reverse | first) | + .[] | + .status = (if .status == "success" then "✓" + elif .status == "failure" then "✗" + elif .status == "pending" then "⋯" + else "?" end) | + [.job, .status, .finished, .duration, .url] | + @tsv + ' + } | column -t -s $'\t' +} + +# Query for failed mode - show only latest jobs with failure status +query_failed() { + { + echo -e "JOB\tSTATUS\tFINISHED\tDURATION\tURL" + jq -sr ' + group_by(.job) | + map(sort_by(.started | tonumber) | reverse | first) | + .[] | + select(.status == "failure") | + .status = "✗" | + [.job, .status, .finished, .duration, .url] | + @tsv + ' + } | column -t -s $'\t' +} + +# Usage +usage() { + echo "Usage: ${0} [--mode MODE] " + echo " --mode MODE: Operation mode (default: failed)" + echo " status: Show status of latest run for each job" + echo " failed: Show only latest jobs with failure status" + echo " release: OpenShift release version (e.g., 4.17, 4.16)" + exit 1 +} + +# Status mode - show latest run for each job +mode_status() { + local release="${1}" + fetch_base_data "${release}" | query_status +} + +# Failed mode - show only failed jobs +mode_failed() { + local release="${1}" + fetch_base_data "${release}" | query_failed +} + +# Main +main() { + local mode="failed" + local release="" + + # Parse arguments + while [[ ${#} -gt 0 ]]; do + case "${1}" in + --mode) + mode="${2}" + shift 2 + ;; + -*) + echo "Unknown option: ${1}" + usage + ;; + *) + release="${1}" + shift + ;; + esac + done + + # Validate arguments + if [[ -z "${release}" ]]; then + echo "Error: release argument is required" + usage + fi + + # Execute mode + case "${mode}" in + status) + mode_status "${release}" + ;; + failed) + mode_failed "${release}" + ;; + *) + echo "Error: Unknown mode '${mode}'" + usage + ;; + esac +} + +main "${@}" From ba278cd1a7049f0e5537d830f7932a3defaf0c06 Mon Sep 17 00:00:00 2001 From: Gregory Giguashvili Date: Sun, 15 Mar 2026 12:05:52 +0200 Subject: [PATCH 02/10] Develop a skill for per-release CI analysis --- .claude/commands/analyze-ci-for-release.md | 291 +++++++++++++++++++++ 1 file changed, 291 insertions(+) create mode 100644 .claude/commands/analyze-ci-for-release.md diff --git a/.claude/commands/analyze-ci-for-release.md b/.claude/commands/analyze-ci-for-release.md new file mode 100644 index 0000000000..4a23c8a1f2 --- /dev/null +++ b/.claude/commands/analyze-ci-for-release.md @@ -0,0 +1,291 @@ +--- +name: Analyze CI for a Release +argument-hint: +description: Analyze CI for a MicroShift release using analyse-ci-test-job skill and produce a summary +allowed-tools: Skill, Bash, Read, Write, Glob, Grep, Agent +--- + +# analyze-ci-for-release + +## Name +analyze-ci-for-release - Analyze all failed periodic CI jobs for a MicroShift release + +## Synopsis +``` +/analyze-ci-for-release [--limit N] +``` + +## Description +Analyzes all failed periodic jobs for a specific MicroShift release by leveraging existing tools and agents. This command orchestrates the analysis workflow by: + +1. Fetching list of failed periodic jobs using `.claude/scripts/microshift-prow-jobs-for-release.sh` +2. Analyzing each job individually using the `openshift-ci-test-job` agent +3. Aggregating results and presenting a concise summary with common failure patterns + +This approach reuses existing analysis capabilities rather than duplicating logic. + +## Arguments +- `` (required): OpenShift release version (e.g., 4.22, 4.21, 4.20) +- `--limit N` (optional): Limit analysis to first N jobs (useful for quick checks, default: all jobs) + +## Implementation Steps + +### Step 1: Validate Arguments and Fetch Failed Jobs + +**Goal**: Get the list of failed periodic jobs for the release. + +**Actions**: +1. Validate release version argument is provided +2. Execute `.claude/scripts/microshift-prow-jobs-for-release.sh ` to get all failed jobs +3. Filter output to only include periodic jobs (containing `-periodics-`) +4. Extract job URLs from the output +5. Apply limit if specified + +**Example Command**: +```bash +bash .claude/scripts/microshift-prow-jobs-for-release.sh 4.22 | grep -E "periodics-" | awk '{print $NF}' +``` + +**Expected Output**: +``` +Found 17 failed periodic jobs for release 4.22 +``` + +**Error Handling**: +- If no release version provided, show usage and exit +- If no periodic jobs found, report "No failed periodic jobs found for release X.XX" and exit successfully +- If microshift-prow-jobs-for-release.sh fails, report error and exit + +### Step 2: Analyze Each Job Using openshift-ci-test-job agent + +**Goal**: Get detailed root cause analysis for each failed job. + +**Actions**: +1. For each job URL in the list: + - Call the `openshift-ci-test-job` agent with the job URL **in parallel** + - Capture the analysis result (failure reason, error summary) + - Track common patterns across jobs + - Store all intermediate analysis files in `/tmp` + +2. Progress reporting: + - Show "Analyzing job X/Y: " for each job + - Use the agent tool to invoke `openshift-ci-test-job` for each URL + - **Run all job analyses in parallel** to maximize efficiency + +**Example**: +``` +Analyzing 17 jobs in parallel... +Job 1/17: periodic-ci-openshift-microshift-release-4.22-periodics-e2e-aws-ovn-ocp-conformance +Job 2/17: periodic-ci-openshift-microshift-release-4.22-periodics-e2e-aws-tests-bootc-nightly +... +``` + +**Data Collection**: +For each job analysis, extract: +- Job name +- Job ID +- Failure type (build failure, test failure, infrastructure issue) +- Primary error/root cause +- Affected test scenarios (if applicable) + +**File Storage**: +All intermediate analysis files are stored in `/tmp` with naming pattern: +- `/tmp/analyze-ci-release--job--.txt` + +### Step 3: Aggregate Results and Identify Patterns + +**Goal**: Find common failure patterns across all jobs from parallel execution. + +**Actions**: +1. Collect results from all parallel job analyses + - Read individual job analysis files from `/tmp` + - Extract key findings from each analysis + +2. Group jobs by failure type: + - Build/infrastructure failures + - Test execution failures + - Configuration/setup issues + +3. Identify most common errors: + - Count occurrences of similar error messages + - Group jobs with identical root causes + +4. Categorize by severity: + - CRITICAL: Affects multiple jobs, blocks release + - HIGH: Affects several jobs + - MEDIUM: Isolated failures + - LOW: Flaky/intermittent issues + +### Step 4: Generate Concise Summary Report + +**Goal**: Present actionable summary to the user. + +**Actions**: +1. Aggregate all job analysis results from parallel execution +2. Identify common patterns and group by failure type +3. Generate summary report and save to `/tmp/analyze-ci-release--summary..txt` +4. Display the summary to the user + +**Report Structure**: + +``` +═══════════════════════════════════════════════════════════════ +MICROSHIFT 4.22 RELEASE - FAILED JOBS ANALYSIS +═══════════════════════════════════════════════════════════════ + +📊 OVERVIEW + Total Failed Jobs: 17 + Analysis Date: 2026-03-14 + Report saved to: /tmp/analyze-ci-release-4.22-summary.txt + +📋 FAILURE BREAKDOWN + Build Failures: 0 jobs + Test Failures: 15 jobs + Infrastructure: 2 jobs + +🔍 TOP ISSUES (by frequency) + +1. OCP Conformance Test Failures (8 jobs) + Severity: HIGH + Pattern: Tests timeout or fail in conformance suite + Affected Jobs: + • periodic-ci-openshift-microshift-release-4.22-periodics-e2e-aws-ovn-ocp-conformance + • periodic-ci-openshift-microshift-release-4.22-periodics-e2e-aws-ovn-ocp-conformance-serial + • ... (6 more) + + Root Cause: [summarized from openshift-ci-test-job results] + Next Steps: [recommended actions] + +2. Bootc Image Test Failures (4 jobs) + Severity: MEDIUM + Pattern: Image build or deployment issues + Affected Jobs: + • periodic-ci-openshift-microshift-release-4.22-periodics-e2e-aws-tests-bootc-nightly + • ... (3 more) + + Root Cause: [summarized] + Next Steps: [recommended actions] + +3. Infrastructure/Timeout Issues (2 jobs) + Severity: LOW + Pattern: Jobs timeout or fail to allocate resources + Affected Jobs: + • periodic-ci-openshift-microshift-release-4.22-periodics-rebase-on-nightlies + • periodic-ci-openshift-microshift-release-4.22-periodics-update-versions-releases + + Root Cause: [summarized] + +═══════════════════════════════════════════════════════════════ + +Individual job reports available in: + /tmp/analyze-ci-release-4.22-job-*.txt +``` + +## Examples + +### Example 1: Analyze All Failed Jobs + +``` +/analyze-ci-for-release 4.22 +``` + +**Behavior**: +- Fetches all failed periodic jobs for 4.22 +- Analyzes each job using openshift-ci-test-job agent +- Presents aggregated summary + +### Example 2: Quick Analysis (First 5 Jobs) + +``` +/analyze-ci-for-release 4.22 --limit 5 +``` + +**Behavior**: +- Analyzes only first 5 failed jobs +- Useful for quick health check +- Still provides pattern analysis + +### Example 3: Different Release + +``` +/analyze-ci-for-release 4.21 +``` + +**Behavior**: +- Analyzes 4.21 release jobs +- Same workflow as 4.22 + +## Performance Considerations + +- **Execution Time**: Significantly reduced through parallel execution - typically 2-3 minutes for 15-20 jobs (depends on openshift-ci-test-job execution time) +- **Network Usage**: Moderate to high - all jobs analyzed in parallel fetch logs from GCS simultaneously +- **Parallelization**: All jobs are analyzed in parallel for maximum efficiency +- **Use --limit**: For quick checks, use --limit flag to analyze subset +- **File Storage**: All intermediate and report files are stored in `/tmp` directory + +## Prerequisites + +- `.claude/scripts/microshift-prow-jobs-for-release.sh` script must exist and be executable +- `openshift-ci-test-job` agent must be available +- Internet access to fetch job data from Prow/GCS +- Bash shell + +## Error Handling + +### No Failed Jobs +``` +No failed periodic jobs found for release 4.22 +This is good news - all periodic jobs are passing! ✓ +``` + +### Invalid Release Version +``` +Error: Invalid release version +Usage: /analyze-ci-for-release [--limit N] +Example: /analyze-ci-for-release 4.22 +``` + +### microshift-prow-jobs-for-release.sh Not Found +``` +Error: Could not find .claude/scripts/microshift-prow-jobs-for-release.sh +Please ensure you're in the microshift project directory. +``` + +## Related Skills + +- **openshift-ci-test-job**: Detailed analysis of a single job (used internally) +- **analyze-ci-test-scenario**: Analyze specific test scenario results +- **analyze-microshift-start**: Analyze MicroShift startup performance +- **analyze-sos-report**: Investigate runtime issues from SOS reports + +## Use Cases + +### Daily CI Health Check +``` +/analyze-ci-for-release 4.22 --limit 10 +``` +Quick morning check of CI status + +### Pre-Release Verification +``` +/analyze-ci-for-release 4.22 +``` +Complete analysis before cutting a release + +### Root Cause Investigation +``` +/analyze-ci-for-release 4.22 +``` +When multiple jobs fail, identify common issues + +### Trend Analysis +Run periodically and compare summaries over time to identify regression patterns + +## Notes + +- This skill focuses on **periodic** jobs only (not presubmit/postsubmit) +- Analysis is read-only - no modifications to CI data +- Results are saved in files in /tmp directory with a timestamp +- Provide links to the jobs in the summary +- Only present a concise analysis summary for each job +- Pattern detection improves with more jobs analyzed (avoid limiting unless needed) From 19ae9b7f5b76e3057fec5ce43cd8dc604e5d9b48 Mon Sep 17 00:00:00 2001 From: Gregory Giguashvili Date: Sun, 15 Mar 2026 12:06:25 +0200 Subject: [PATCH 03/10] Develop a skill for multiple release CI analysis summary --- .../analyze-ci-for-release-manager.md | 223 ++++++++++++++++++ 1 file changed, 223 insertions(+) create mode 100644 .claude/commands/analyze-ci-for-release-manager.md diff --git a/.claude/commands/analyze-ci-for-release-manager.md b/.claude/commands/analyze-ci-for-release-manager.md new file mode 100644 index 0000000000..1f5e8e0478 --- /dev/null +++ b/.claude/commands/analyze-ci-for-release-manager.md @@ -0,0 +1,223 @@ +--- +name: Analyze CI for Release Manager +argument-hint: +description: Analyze CI for multiple MicroShift releases and produce an HTML summary +allowed-tools: Skill, Bash, Read, Write, Glob, Grep, Agent +--- + +# analyze-ci-for-release-manager + +## Synopsis +``` +/analyze-ci-for-release-manager +``` + +## Description +Accepts a comma-separated list of MicroShift release versions, runs the `analyze-ci-for-release` skill for each release, and produces a single HTML summary file consolidating all per-release results. + +## Arguments +- `$ARGUMENTS` (required): Comma-separated list of release versions (e.g., `4.19,4.20,4.21,4.22`) + +## Implementation Steps + +### Step 1: Parse and Validate Arguments + +**Actions**: +1. Split `$ARGUMENTS` by comma to get a list of release versions +2. Trim whitespace from each version +3. Validate that at least one release version is provided +4. If no arguments provided, show usage and stop + +**Error Handling**: +- If `$ARGUMENTS` is empty, display: "Usage: /analyze-ci-for-release-manager " and stop + +### Step 2: Analyze Each Release + +**Actions**: +1. For each release version from the parsed list, invoke the `analyze-ci-for-release` skill: + ``` + Skill: analyze-ci-for-release, args: "" + ``` +2. Run releases **sequentially** (each skill invocation is a full analysis) +3. After each skill completes, note the summary report file path it produced (typically `/tmp/analyze-ci-release--summary.*.txt`) +4. Track which releases succeeded and which failed + +**Progress Reporting**: +``` +Analyzing release X/Y: +``` + +### Step 3: Collect Per-Release Results + +**Actions**: +1. After all releases are analyzed, gather all summary files: + - Look for `/tmp/analyze-ci-release--summary.*.txt` for each version + - Also look for per-job files: `/tmp/analyze-ci-release--job-*.txt` +2. Read each summary file to extract the analysis content +3. If a summary file is missing for a release, note it as "Analysis failed or produced no output" + +### Step 4: Generate HTML Summary Report + +**Goal**: Create a single HTML file at `/tmp/microshift-ci-release-manager-.html` that consolidates all per-release analyses. + +**Actions**: +1. Generate the HTML report with the structure described below +2. Save to `/tmp/microshift-ci-release-manager-.html` where `` is `YYYYMMDD-HHMMSS` +3. **IMPORTANT**: Use the `Bash` tool with `cat <<'HTMLEOF' > /tmp/microshift-ci-release-manager-.html` (heredoc) to write the file, NOT the `Write` tool. This ensures the absolute `/tmp` path is used and avoids permission prompts. +4. Display the file path to the user in the end. + +**HTML Structure**: + +The HTML file must be a self-contained, single-file document with embedded CSS. Use the following structure: + +```html + + + + + MicroShift CI Release Manager Report - YYYY-MM-DD + + + +
+

MicroShift CI Release Manager Report

+

Generated: YYYY-MM-DD HH:MM:SS UTC

+ + +
+ +
+
N
+
Release X.YY Failed Jobs
+
+
+ + +
+

Releases Analyzed

+ +
+ + +
+
+

Release X.YY

+ N failed jobs +
+ + + + +
+ ... analysis content ... +
+
+ + +
+ + + + +``` + +**Content Guidelines**: +- Do NOT re-analyze or reinterpret the data from `analyze-ci-for-release` - use its output as-is +- Convert the plain text analysis reports into HTML-formatted content, preserving all information +- Ensure all Prow job URLs from the original analysis remain clickable links in the HTML +- Use appropriate badge colors: + - `badge-ok`: 0 failed jobs + - `badge-issues`: 1+ failed jobs + - `badge-critical`: 5+ failed jobs or CRITICAL severity issues present + - `badge-nodata`: analysis failed or no data +- Make per-job details collapsible to keep the page manageable +- The overview cards should show the number of failed jobs per release at a glance + +### Step 5: Report Completion + +**Actions**: +1. Display the path to the generated HTML file +2. Provide a brief text summary listing each release and its failed job count + +**Example Output**: +``` +HTML report generated: /tmp/microshift-ci-release-manager-20260315-143022.html + +Summary: + Release 4.19: 3 failed periodic jobs + Release 4.20: 7 failed periodic jobs + Release 4.21: 0 failed periodic jobs + Release 4.22: 12 failed periodic jobs +``` + +## Examples + +### Example 1: Analyze Multiple Releases +``` +/analyze-ci-for-release-manager 4.19,4.20,4.21,4.22 +``` + +### Example 2: Analyze Two Releases +``` +/analyze-ci-for-release-manager 4.21,4.22 +``` + +### Example 3: Single Release (still produces HTML) +``` +/analyze-ci-for-release-manager 4.22 +``` + +## Notes +- Each release analysis uses the `analyze-ci-for-release` skill - this command does NOT duplicate that logic +- The HTML report is self-contained (no external CSS/JS dependencies) +- All intermediate files from `analyze-ci-for-release` remain available in `/tmp` +- Releases are analyzed sequentially since each invocation is resource-intensive +- The HTML file can be opened in any browser for convenient examination +- If a release analysis fails, it is noted in the report but does not block other releases From 686a165669a6410184a6d90628593af6a56c115b Mon Sep 17 00:00:00 2001 From: Gregory Giguashvili Date: Sun, 15 Mar 2026 12:07:00 +0200 Subject: [PATCH 04/10] Relax allowed permissions to avoid unnecessary prompts --- .claude/settings.json | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/.claude/settings.json b/.claude/settings.json index e85447cce0..0c1d983a32 100644 --- a/.claude/settings.json +++ b/.claude/settings.json @@ -2,17 +2,19 @@ "permissions": { "allow": [ "Read(//tmp/**)", - "Bash(mktemp:*)", - "Bash(curl:*)", - "Bash(gcloud storage cp:*)", - "Bash(gh pr view:*)", - "Bash(gh pr diff:*)", - "Bash(tar:*)", - "Bash(gh pr checks:*)", + "Write(//tmp/**)", + "Bash(*)", "WebFetch(domain:prow.ci.openshift.org)", - "Bash(gh issue list:*)" + "Skill(analyze-ci-for-release-manager)", + "Skill(analyze-ci-for-release)", + "Skill(analyze-ci-test-job)", + "Skill(analyze-ci-test-scenario)", + "Skill(analyze-sos-report)" ], "deny": [], - "ask": [] + "ask": [ + "Bash(sudo:*)", + "Bash(git:*)" + ] } } From 4ad22895928cb9b311d7517976b1f177f83eddce Mon Sep 17 00:00:00 2001 From: Gregory Giguashvili Date: Sun, 15 Mar 2026 13:18:39 +0200 Subject: [PATCH 05/10] Address AI review comments --- .claude/commands/analyze-ci-for-release.md | 2 +- .claude/scripts/microshift-prow-jobs-for-release.sh | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/.claude/commands/analyze-ci-for-release.md b/.claude/commands/analyze-ci-for-release.md index 4a23c8a1f2..acf191c338 100644 --- a/.claude/commands/analyze-ci-for-release.md +++ b/.claude/commands/analyze-ci-for-release.md @@ -1,7 +1,7 @@ --- name: Analyze CI for a Release argument-hint: -description: Analyze CI for a MicroShift release using analyse-ci-test-job skill and produce a summary +description: Analyze CI for a MicroShift release using analyze-ci-test-job skill and produce a summary allowed-tools: Skill, Bash, Read, Write, Glob, Grep, Agent --- diff --git a/.claude/scripts/microshift-prow-jobs-for-release.sh b/.claude/scripts/microshift-prow-jobs-for-release.sh index a7882d84c7..298f88f60f 100755 --- a/.claude/scripts/microshift-prow-jobs-for-release.sh +++ b/.claude/scripts/microshift-prow-jobs-for-release.sh @@ -10,7 +10,7 @@ PROW_JOB="microshift" # Base query - fetch all MicroShift jobs for a release fetch_base_data() { local release="${1}" - curl -s "${PROW_URL}" | jq -c --arg release "${release}" --arg job "${PROW_JOB}" ' + curl -s --max-time 60 "${PROW_URL}" | jq -c --arg release "${release}" --arg job "${PROW_JOB}" ' .[] | select((.job | contains($job)) and (.job | contains($release))) | {job: .job, status: .state, started: .started, finished: .finished, duration: .duration, url: .url} ' @@ -91,6 +91,10 @@ main() { while [[ ${#} -gt 0 ]]; do case "${1}" in --mode) + if [[ ${#} -lt 2 ]]; then + echo "Error: mode requires an argument" + usage + fi mode="${2}" shift 2 ;; From 2c393b2a06b064e56af3a17d8cf8f739b95061b1 Mon Sep 17 00:00:00 2001 From: Gregory Giguashvili Date: Mon, 16 Mar 2026 07:20:00 +0200 Subject: [PATCH 06/10] Develop a skill for rebase PR test analysis --- .../commands/analyze-ci-for-pull-requests.md | 260 ++++++++++++++++++ .../microshift-prow-jobs-for-pull-requests.sh | 242 ++++++++++++++++ 2 files changed, 502 insertions(+) create mode 100644 .claude/commands/analyze-ci-for-pull-requests.md create mode 100755 .claude/scripts/microshift-prow-jobs-for-pull-requests.sh diff --git a/.claude/commands/analyze-ci-for-pull-requests.md b/.claude/commands/analyze-ci-for-pull-requests.md new file mode 100644 index 0000000000..fc6f3c056d --- /dev/null +++ b/.claude/commands/analyze-ci-for-pull-requests.md @@ -0,0 +1,260 @@ +--- +name: Analyze CI for Pull Requests +argument-hint: [--rebase] [--limit N] +description: Analyze CI for open MicroShift pull requests and produce a summary of failures +allowed-tools: Skill, Bash, Read, Write, Glob, Grep, Agent +--- + +# analyze-ci-for-pull-requests + +## Synopsis +``` +/analyze-ci-for-pull-requests [--rebase] [--limit N] +``` + +## Description +Fetches all open MicroShift pull requests, identifies failed Prow CI jobs for each PR, analyzes each failure using the `analyze-ci-test-job` agent, and produces a text summary report. + +This command orchestrates the analysis workflow by: + +1. Fetching the list of open PRs and their failed jobs using `.claude/scripts/microshift-prow-jobs-for-pull-requests.sh --mode detail` +2. Filtering to only PRs that have at least one failed job +3. Analyzing each failed job individually using the `analyze-ci-test-job` agent +4. Aggregating results into a summary report saved to `/tmp` + +## Arguments +- `--rebase` (optional): Only analyze rebase PRs (titles containing `NO-ISSUE: rebase-release-`) +- `--limit N` (optional): Limit analysis to first N failed jobs total (useful for quick checks, default: all failed jobs) + +## Implementation Steps + +### Step 1: Fetch Open PRs and Their Job Results + +**Goal**: Get the list of open PRs with failed Prow jobs. + +**Actions**: +1. Execute `.claude/scripts/microshift-prow-jobs-for-pull-requests.sh --mode detail` to get all open PRs and their job statuses. If `--rebase` was specified, add `--filter "NO-ISSUE: rebase-release-"` to only include rebase PRs +2. Parse the output to identify PRs with failed jobs (lines containing `✗`) +3. For each failed job, extract: + - PR number and title (from the `=== PR #NNN: ... ===` header lines) + - PR URL (line following the header) + - Job name (first column) + - Job URL (last column, the Prow URL) +4. Apply `--limit N` if specified + +**Example Commands**: +```bash +# All open PRs +bash .claude/scripts/microshift-prow-jobs-for-pull-requests.sh --mode detail 2>/dev/null + +# Rebase PRs only +bash .claude/scripts/microshift-prow-jobs-for-pull-requests.sh --mode detail --filter "NO-ISSUE: rebase-release-" 2>/dev/null +``` + +**Expected Output Format**: +``` +=== PR #6313: USHIFT-6636: Change test-agent impl to align with greenboot-rs === + https://github.com/openshift/microshift/pull/6313 + + JOB STATUS URL + pull-ci-openshift-microshift-main-e2e-aws-tests ✗ https://prow.ci.openshift.org/view/gs/... + pull-ci-openshift-microshift-main-e2e-aws-tests-arm ✗ https://prow.ci.openshift.org/view/gs/... + ... +``` + +**Error Handling**: +- If no open PRs found, report "No open pull requests found" and exit successfully +- If no failed jobs across all PRs, report "All PR jobs are passing" and exit successfully +- If `microshift-prow-jobs-for-pull-requests.sh` fails, report error and exit + +### Step 2: Analyze Each Failed Job Using analyze-ci-test-job Agent + +**Goal**: Get detailed root cause analysis for each failed job. + +**Actions**: +1. For each failed job URL from Step 1: + - Call the `analyze-ci-test-job` agent with the job URL **in parallel** + - Capture the analysis result (failure reason, error summary) + - Store all intermediate analysis files in `/tmp` + +2. Progress reporting: + - Show "Analyzing job X/Y: (PR #NNN)" for each job + - Use the Agent tool to invoke `analyze-ci-test-job` for each URL + - **Run all job analyses in parallel** to maximize efficiency + +**Data Collection**: +For each job analysis, extract: +- Job name +- Job URL +- PR number and title +- Failure type (build failure, test failure, infrastructure issue) +- Primary error/root cause +- Affected test scenarios (if applicable) + +**File Storage**: +All intermediate analysis files are stored in `/tmp` with naming pattern: +- `/tmp/analyze-ci-prs-job--pr-.txt` + +### Step 3: Aggregate Results and Identify Patterns + +**Goal**: Find common failure patterns across all PRs and jobs. + +**Actions**: +1. Collect results from all parallel job analyses + - Read individual job analysis files from `/tmp` + - Extract key findings from each analysis + +2. Group failures by PR: + - List each PR with its failed jobs and root causes + +3. Identify common errors across PRs: + - Count occurrences of similar error messages + - Group jobs with identical root causes (e.g., same infrastructure issue affecting multiple PRs) + +### Step 4: Generate Summary Report + +**Goal**: Present actionable summary to the user. + +**Actions**: +1. Aggregate all job analysis results from parallel execution +2. Identify common patterns and group by PR and failure type +3. Generate summary report and save to `/tmp/analyze-ci-prs-summary..txt` +4. Display the summary to the user + +**Report Structure**: + +``` +═══════════════════════════════════════════════════════════════ +MICROSHIFT OPEN PULL REQUESTS - FAILED JOBS ANALYSIS +═══════════════════════════════════════════════════════════════ + +OVERVIEW + Total Open PRs: 6 + PRs with Failures: 2 + Total Failed Jobs: 9 + Analysis Date: 2026-03-15 + Report: /tmp/analyze-ci-prs-summary.20260315-143022.txt + +PER-PR BREAKDOWN + +PR #6313: USHIFT-6636: Change test-agent impl to align with greenboot-rs + https://github.com/openshift/microshift/pull/6313 + Jobs: 8 passed, 7 failed + + Failed Jobs: + 1. pull-ci-openshift-microshift-main-e2e-aws-tests + Status: FAILURE + Root Cause: [summarized from analyze-ci-test-job] + URL: https://prow.ci.openshift.org/view/gs/... + + 2. pull-ci-openshift-microshift-main-e2e-aws-tests-arm + Status: FAILURE + Root Cause: [summarized] + URL: https://prow.ci.openshift.org/view/gs/... + + ... (more failed jobs) + +PR #6116: USHIFT-6491: Improve gitops test + https://github.com/openshift/microshift/pull/6116 + Jobs: 15 passed, 2 failed + + Failed Jobs: + 1. pull-ci-openshift-microshift-main-e2e-aws-tests-bootc-periodic + Status: FAILURE + Root Cause: [summarized] + URL: https://prow.ci.openshift.org/view/gs/... + +COMMON PATTERNS (across PRs) + If the same failure pattern appears in multiple PRs, list it here + with the affected PRs and jobs. + +═══════════════════════════════════════════════════════════════ + +Individual job reports: /tmp/analyze-ci-prs-job-*.txt +``` + +## Examples + +### Example 1: Analyze All Failed PR Jobs + +``` +/analyze-ci-for-pull-requests +``` + +**Behavior**: +- Fetches all open PRs and their Prow job results +- Identifies PRs with failed jobs +- Analyzes each failed job +- Presents aggregated summary grouped by PR + +### Example 2: Analyze Only Rebase PRs + +``` +/analyze-ci-for-pull-requests --rebase +``` + +**Behavior**: +- Only includes PRs with `NO-ISSUE: rebase-release-` in the title +- Useful for release manager workflow to check rebase CI status + +### Example 3: Quick Analysis (First 5 Failed Jobs) + +``` +/analyze-ci-for-pull-requests --limit 5 +``` + +**Behavior**: +- Analyzes only first 5 failed jobs across all PRs +- Useful for quick health check + +## Performance Considerations + +- **Execution Time**: Depends on number of failed jobs; parallel execution helps significantly +- **Network Usage**: Each job analysis fetches logs from GCS +- **Parallelization**: All job analyses run in parallel for maximum efficiency +- **Use --limit**: For quick checks, use --limit flag to analyze a subset +- **File Storage**: All intermediate and report files are stored in `/tmp` directory + +## Prerequisites + +- `.claude/scripts/microshift-prow-jobs-for-pull-requests.sh` script must exist and be executable +- `analyze-ci-test-job` agent must be available +- `gh` CLI must be authenticated with access to openshift/microshift +- Internet access to fetch job data from GCS +- Bash shell + +## Error Handling + +### No Open PRs +``` +No open pull requests found. +``` + +### No Failed Jobs +``` +All open PR jobs are passing! ✓ +No failures to analyze. +``` + +### Script Not Found +``` +Error: Could not find .claude/scripts/microshift-prow-jobs-for-pull-requests.sh +Please ensure you're in the microshift project directory. +``` + +## Related Skills + +- **analyze-ci-test-job**: Detailed analysis of a single job (used internally) +- **analyze-ci-for-release**: Similar analysis for periodic release jobs +- **analyze-ci-for-release-manager**: Multi-release analysis with HTML output +- **analyze-ci-test-scenario**: Analyze specific test scenario results + +## Notes + +- This skill focuses on **presubmit** PR jobs (not periodic/postsubmit) +- Analysis is read-only - no modifications to CI data or PRs +- Results are saved in files in /tmp directory with a timestamp +- Provide links to the jobs in the summary +- Only present a concise analysis summary for each job +- PRs with no Prow jobs (e.g., drafts without triggered tests) are skipped +- Pattern detection improves with more jobs analyzed (avoid limiting unless needed) diff --git a/.claude/scripts/microshift-prow-jobs-for-pull-requests.sh b/.claude/scripts/microshift-prow-jobs-for-pull-requests.sh new file mode 100755 index 0000000000..8b04bf0229 --- /dev/null +++ b/.claude/scripts/microshift-prow-jobs-for-pull-requests.sh @@ -0,0 +1,242 @@ +#!/bin/bash +set -euo pipefail + +# Prow Jobs for Pull Requests +# Lists open MicroShift PRs with their associated Prow test job results +# Uses GCS bucket (test-platform-results) to query job data directly + +GCS_API="https://storage.googleapis.com/storage/v1/b/test-platform-results/o" +GCS_BASE="https://storage.googleapis.com/test-platform-results" +PROW_VIEW="https://prow.ci.openshift.org/view/gs/test-platform-results" +GH_REPO="openshift/microshift" +GCS_PR_PREFIX="pr-logs/pull/openshift_microshift" + +# Get open PRs using GitHub CLI, optionally filtered by title substring +fetch_open_prs() { + local filter="${1:-}" + local pr_data + pr_data=$(gh pr list --repo "${GH_REPO}" --state open --limit 100 --json number,title,url) + + if [[ -n "${filter}" ]]; then + echo "${pr_data}" | jq -c --arg f "${filter}" '[.[] | select(.title | contains($f))]' + else + echo "${pr_data}" + fi +} + +# List job names for a PR from GCS +list_pr_jobs() { + local pr="${1}" + curl -s --max-time 30 "${GCS_API}?prefix=${GCS_PR_PREFIX}/${pr}/&delimiter=/" | \ + jq -r '.prefixes[]? // empty' | \ + sed "s|${GCS_PR_PREFIX}/${pr}/||; s|/$||" +} + +# Get latest build result for a job +get_latest_build() { + local pr="${1}" job="${2}" + local build_id result + + build_id=$(curl -s --max-time 10 "${GCS_BASE}/${GCS_PR_PREFIX}/${pr}/${job}/latest-build.txt" 2>/dev/null) || return 1 + [[ -z "${build_id}" ]] && return 1 + + result=$(curl -s --max-time 10 "${GCS_BASE}/${GCS_PR_PREFIX}/${pr}/${job}/${build_id}/finished.json" 2>/dev/null | \ + jq -r '.result // "PENDING"' 2>/dev/null) || result="PENDING" + + local url="${PROW_VIEW}/${GCS_PR_PREFIX}/${pr}/${job}/${build_id}" + echo "${result} ${url}" +} + +# Map result to icon +result_to_icon() { + case "${1}" in + SUCCESS) echo "✓" ;; + FAILURE) echo "✗" ;; + ABORTED) echo "⊘" ;; + PENDING) echo "⋯" ;; + *) echo "?" ;; + esac +} + +# Usage +usage() { + echo "Usage: ${0} [--mode MODE] [--filter STRING]" + echo " --mode MODE: Operation mode (default: summary)" + echo " summary: Show table of open PRs with test job status summary" + echo " detail: Show table of open PRs with individual test job links" + echo " --filter STRING: Only include PRs whose title contains STRING" + exit 1 +} + +# Fetch job results for a single PR (parallelized) +fetch_pr_results() { + local pr="${1}" + local tmpdir="${2}" + local jobs + + jobs=$(list_pr_jobs "${pr}") + if [[ -z "${jobs}" ]]; then + return + fi + + # Fetch latest build for each job in parallel + while IFS= read -r job; do + ( + result_line=$(get_latest_build "${pr}" "${job}" 2>/dev/null) || true + if [[ -n "${result_line}" ]]; then + echo "${job} ${result_line}" > "${tmpdir}/${job}" + fi + ) & + done <<< "${jobs}" + wait +} + +# Summary mode - show PR with pass/fail counts +mode_summary() { + local filter="${1:-}" + local pr_data + + echo "Fetching open PRs..." >&2 + pr_data=$(fetch_open_prs "${filter}") + + local pr_count + pr_count=$(echo "${pr_data}" | jq 'length') + + if [[ "${pr_count}" -eq 0 ]]; then + echo "No open pull requests found." + return + fi + + echo "Fetching job results..." >&2 + + { + echo -e "PR\tTITLE\t✓\t✗\t⋯\tJOBS" + echo "${pr_data}" | jq -r '.[] | [.number, .title, .url] | @tsv' | while IFS=$'\t' read -r pr_number pr_title pr_url; do + local tmpdir + tmpdir=$(mktemp -d) + + fetch_pr_results "${pr_number}" "${tmpdir}" + + local success=0 failure=0 pending=0 total=0 + for f in "${tmpdir}"/*; do + [[ -f "${f}" ]] || continue + local result + result=$(cut -f2 "${f}") + total=$((total + 1)) + case "${result}" in + SUCCESS) success=$((success + 1)) ;; + FAILURE) failure=$((failure + 1)) ;; + *) pending=$((pending + 1)) ;; + esac + done + rm -rf "${tmpdir}" + + # Truncate title to 50 chars + if [[ ${#pr_title} -gt 50 ]]; then + pr_title="${pr_title:0:47}..." + fi + + echo -e "${pr_url}\t${pr_title}\t${success}\t${failure}\t${pending}\t${total}" + done + } | column -t -s $'\t' +} + +# Detail mode - show each job for each PR +mode_detail() { + local filter="${1:-}" + local pr_data + + echo "Fetching open PRs..." >&2 + pr_data=$(fetch_open_prs "${filter}") + + local pr_count + pr_count=$(echo "${pr_data}" | jq 'length') + + if [[ "${pr_count}" -eq 0 ]]; then + echo "No open pull requests found." + return + fi + + echo "${pr_data}" | jq -r '.[] | [.number, .title, .url] | @tsv' | while IFS=$'\t' read -r pr_number pr_title pr_url; do + echo "" + echo "=== PR #${pr_number}: ${pr_title} ===" + echo " ${pr_url}" + echo "" + + local tmpdir + tmpdir=$(mktemp -d) + + fetch_pr_results "${pr_number}" "${tmpdir}" + + local file_count + file_count=$(find "${tmpdir}" -maxdepth 1 -type f | wc -l) + + if [[ "${file_count}" -eq 0 ]]; then + echo " No Prow jobs found." + else + { + echo -e "JOB\tSTATUS\tURL" + while IFS= read -r -d '' f; do + local job result url icon + IFS=$'\t' read -r job result url < "${f}" + icon=$(result_to_icon "${result}") + echo -e "${job}\t${icon}\t${url}" + done < <(find "${tmpdir}" -maxdepth 1 -type f -print0 | sort -z) + } | column -t -s $'\t' | sed 's/^/ /' + fi + + rm -rf "${tmpdir}" + done +} + +# Main +main() { + local mode="summary" + local filter="" + + # Parse arguments + while [[ ${#} -gt 0 ]]; do + case "${1}" in + --mode) + if [[ ${#} -lt 2 ]]; then + echo "Error: mode requires an argument" + usage + fi + mode="${2}" + shift 2 + ;; + --filter) + if [[ ${#} -lt 2 ]]; then + echo "Error: filter requires an argument" + usage + fi + filter="${2}" + shift 2 + ;; + -*) + echo "Unknown option: ${1}" + usage + ;; + *) + echo "Unknown argument: ${1}" + usage + ;; + esac + done + + # Execute mode + case "${mode}" in + summary) + mode_summary "${filter}" + ;; + detail) + mode_detail "${filter}" + ;; + *) + echo "Error: Unknown mode '${mode}'" + usage + ;; + esac +} + +main "${@}" From 02f361e32897843db7f3585ed289b5fa71ac1843 Mon Sep 17 00:00:00 2001 From: Gregory Giguashvili Date: Mon, 16 Mar 2026 07:21:28 +0200 Subject: [PATCH 07/10] Update analyze-ci-for-release-manager to support rebase PR analysis --- .../analyze-ci-for-release-manager.md | 154 +++++++++++++----- 1 file changed, 117 insertions(+), 37 deletions(-) diff --git a/.claude/commands/analyze-ci-for-release-manager.md b/.claude/commands/analyze-ci-for-release-manager.md index 1f5e8e0478..97b22f178a 100644 --- a/.claude/commands/analyze-ci-for-release-manager.md +++ b/.claude/commands/analyze-ci-for-release-manager.md @@ -13,7 +13,7 @@ allowed-tools: Skill, Bash, Read, Write, Glob, Grep, Agent ``` ## Description -Accepts a comma-separated list of MicroShift release versions, runs the `analyze-ci-for-release` skill for each release, and produces a single HTML summary file consolidating all per-release results. +Accepts a comma-separated list of MicroShift release versions, runs the `analyze-ci-for-release` skill for each release and the `analyze-ci-for-pull-requests --rebase` skill for open rebase PRs, and produces a single HTML summary file consolidating all results. The HTML report uses tabs to separate Periodics (per-release) and Pull Requests sections. ## Arguments - `$ARGUMENTS` (required): Comma-separated list of release versions (e.g., `4.19,4.20,4.21,4.22`) @@ -31,7 +31,7 @@ Accepts a comma-separated list of MicroShift release versions, runs the `analyze **Error Handling**: - If `$ARGUMENTS` is empty, display: "Usage: /analyze-ci-for-release-manager " and stop -### Step 2: Analyze Each Release +### Step 2: Analyze Each Release (Periodics) **Actions**: 1. For each release version from the parsed list, invoke the `analyze-ci-for-release` skill: @@ -47,18 +47,35 @@ Accepts a comma-separated list of MicroShift release versions, runs the `analyze Analyzing release X/Y: ``` -### Step 3: Collect Per-Release Results +### Step 3: Analyze Rebase Pull Requests **Actions**: -1. After all releases are analyzed, gather all summary files: - - Look for `/tmp/analyze-ci-release--summary.*.txt` for each version - - Also look for per-job files: `/tmp/analyze-ci-release--job-*.txt` +1. Invoke the `analyze-ci-for-pull-requests` skill with `--rebase` argument: + ``` + Skill: analyze-ci-for-pull-requests, args: "--rebase" + ``` +2. After the skill completes, note the summary report file path (typically `/tmp/analyze-ci-prs-summary.*.txt`) +3. If no rebase PRs are found, note "No open rebase PRs" for the report + +**Progress Reporting**: +``` +Analyzing rebase pull requests... +``` + +### Step 4: Collect All Results + +**Actions**: +1. After all analyses complete, gather all summary files: + - Periodics: `/tmp/analyze-ci-release--summary.*.txt` for each version + - Pull Requests: `/tmp/analyze-ci-prs-summary.*.txt` + - Per-job files: `/tmp/analyze-ci-release--job-*.txt` and `/tmp/analyze-ci-prs-job-*.txt` 2. Read each summary file to extract the analysis content 3. If a summary file is missing for a release, note it as "Analysis failed or produced no output" +4. If no PR summary file exists, note "No open rebase PRs or no failures found" -### Step 4: Generate HTML Summary Report +### Step 5: Generate HTML Summary Report -**Goal**: Create a single HTML file at `/tmp/microshift-ci-release-manager-.html` that consolidates all per-release analyses. +**Goal**: Create a single HTML file at `/tmp/microshift-ci-release-manager-.html` that consolidates all analyses with tabbed navigation. **Actions**: 1. Generate the HTML report with the structure described below @@ -68,7 +85,7 @@ Analyzing release X/Y: **HTML Structure**: -The HTML file must be a self-contained, single-file document with embedded CSS. Use the following structure: +The HTML file must be a self-contained, single-file document with embedded CSS and JS. Use the following structure: ```html @@ -112,6 +129,14 @@ The HTML file must be a self-contained, single-file document with embedded CSS. .toc a:hover { text-decoration: underline; } .timestamp { color: #6c757d; font-size: 0.9em; } a { color: #0366d6; } + + /* Tab styling */ + .tab-bar { display: flex; gap: 0; margin: 20px 0 0 0; border-bottom: 2px solid #dee2e6; } + .tab-btn { padding: 12px 24px; border: none; background: transparent; font-size: 1em; font-weight: 600; color: #6c757d; cursor: pointer; border-bottom: 3px solid transparent; margin-bottom: -2px; transition: color 0.2s, border-color 0.2s; } + .tab-btn:hover { color: #333; } + .tab-btn.active { color: #e94560; border-bottom-color: #e94560; } + .tab-content { display: none; } + .tab-content.active { display: block; } @@ -119,42 +144,89 @@ The HTML file must be a self-contained, single-file document with embedded CSS.

MicroShift CI Release Manager Report

Generated: YYYY-MM-DD HH:MM:SS UTC

- +
N
Release X.YY Failed Jobs
+ +
+
N
+
Rebase PRs Failed Jobs
+
- -
-

Releases Analyzed

- + +
+ +
- -
-
-

Release X.YY

- N failed jobs + +
+ + +
+

Releases Analyzed

+
- - - -
- ... analysis content ... + +
+
+

Release X.YY

+ N failed jobs +
+ + +
+ ... periodics analysis content ... +
+ +
- + +
+ + +
+
+

PR #NNN: title

+ N failed jobs +
+ + +
+ ... PR analysis content ... +
+
+ + +
+

No open rebase pull requests found.

+
+