From aec47dd514ad5a8609ca04c9a78064a5c170739a Mon Sep 17 00:00:00 2001 From: Giles Odigwe Date: Mon, 27 Apr 2026 08:04:47 -0700 Subject: [PATCH 1/6] Add dotnet integration test report to CI - Add --report-junit flag to dotnet integration test step to generate JUnit XML alongside TRX, with explicit --results-directory to centralize output in IntegrationTestResults/ - Upload JUnit XML artifacts from each matrix leg (net10.0/ubuntu, net472/windows) as dotnet-test-results-{framework}-{os} - Add dotnet-integration-test-report job that downloads artifacts, runs the existing aggregate.py script, posts markdown to Job Summary, and saves trend history via actions/cache - Refactor aggregate.py to discover JUnit XML files recursively, supporting both pytest (pytest.xml) and xunit (*.junit.xml) layouts - Handle provider name derivation for dotnet artifact naming convention - Fix nodeid collision when same test runs under multiple frameworks by qualifying keys with provider when collisions are detected - Improve module extraction for dotnet C# classnames (recognizes IntegrationTests/UnitTests namespace segments) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/workflows/dotnet-build-and-test.yml | 66 ++++++++++++ python/scripts/flaky_report/aggregate.py | 113 ++++++++++++++++---- 2 files changed, 158 insertions(+), 21 deletions(-) diff --git a/.github/workflows/dotnet-build-and-test.yml b/.github/workflows/dotnet-build-and-test.yml index 6454adba31..6fb2c77fc8 100644 --- a/.github/workflows/dotnet-build-and-test.yml +++ b/.github/workflows/dotnet-build-and-test.yml @@ -257,6 +257,8 @@ jobs: -c ${{ matrix.configuration }} ` --no-build -v Normal ` --report-xunit-trx ` + --report-junit ` + --results-directory ../IntegrationTestResults/ ` --ignore-exit-code 8 ` --filter-not-trait "Category=IntegrationDisabled" ` --parallel-algorithm aggressive ` @@ -299,6 +301,14 @@ jobs: shell: pwsh run: ./dotnet/eng/scripts/dotnet-check-coverage.ps1 -JsonReportPath "TestResults/Reports/Summary.json" -CoverageThreshold $env:COVERAGE_THRESHOLD + - name: Upload integration test results + if: always() && github.event_name != 'pull_request' && matrix.integration-tests + uses: actions/upload-artifact@v7 + with: + name: dotnet-test-results-${{ matrix.targetFramework }}-${{ matrix.os }} + path: IntegrationTestResults/**/*.junit.xml + if-no-files-found: ignore + # This final job is required to satisfy the merge queue. It must only run (or succeed) if no tests failed dotnet-build-and-test-check: if: always() @@ -341,3 +351,59 @@ jobs: uses: actions/github-script@v8 with: script: core.setFailed('Integration Tests Cancelled!') + + # Integration test trend report (aggregates JUnit XML results from dotnet test jobs) + dotnet-integration-test-report: + name: Integration Test Report + if: > + always() && + github.event_name != 'pull_request' && + (contains(join(needs.*.result, ','), 'success') || + contains(join(needs.*.result, ','), 'failure')) + needs: [dotnet-test] + runs-on: ubuntu-latest + defaults: + run: + working-directory: python + steps: + - uses: actions/checkout@v6 + - name: Set up python and install the project + uses: ./.github/actions/python-setup + with: + python-version: "3.13" + os: ${{ runner.os }} + - name: Download all test results from current run + uses: actions/download-artifact@v4 + with: + pattern: dotnet-test-results-* + path: dotnet-test-results/ + - name: Restore report history cache + uses: actions/cache/restore@v4 + with: + path: python/dotnet-integration-report-history.json + key: dotnet-integration-report-history-${{ github.run_id }} + restore-keys: | + dotnet-integration-report-history- + - name: Generate trend report + run: > + uv run python scripts/flaky_report/aggregate.py + ../dotnet-test-results/ + dotnet-integration-report-history.json + dotnet-integration-test-report.md + - name: Post to Job Summary + if: always() + run: cat dotnet-integration-test-report.md >> $GITHUB_STEP_SUMMARY + - name: Save report history cache + if: always() + uses: actions/cache/save@v4 + with: + path: python/dotnet-integration-report-history.json + key: dotnet-integration-report-history-${{ github.run_id }} + - name: Upload trend report + if: always() + uses: actions/upload-artifact@v7 + with: + name: dotnet-integration-test-report + path: | + python/dotnet-integration-test-report.md + python/dotnet-integration-report-history.json diff --git a/python/scripts/flaky_report/aggregate.py b/python/scripts/flaky_report/aggregate.py index e07a5e136a..579f9ac935 100644 --- a/python/scripts/flaky_report/aggregate.py +++ b/python/scripts/flaky_report/aggregate.py @@ -2,16 +2,18 @@ """Aggregate per-provider JUnit XML test results and generate a trend report. -Parses ``pytest.xml`` (JUnit XML) files produced by each CI job, merges them -into a single run, combines with historical data, and generates a markdown -trend table — the same pattern used by ``scripts/sample_validation/aggregate.py``. +Parses JUnit XML files produced by CI jobs — both ``pytest.xml`` (Python) and +xunit v3 ``*.junit.xml`` (dotnet) — merges them into a single run, combines +with historical data, and generates a markdown trend table. Usage (from CI): python aggregate.py -The reports directory is expected to contain subdirectories named -``test-results-/`` each containing a ``pytest.xml`` file -(created by ``actions/download-artifact``). +The reports directory is expected to contain artifact subdirectories. Two +layouts are supported: + +- **Python (pytest):** ``test-results-/pytest.xml`` +- **Dotnet (xunit):** ``dotnet-test-results--/*.junit.xml`` """ from __future__ import annotations @@ -46,9 +48,21 @@ def _format_run_label(timestamp: str) -> str: def _derive_provider(directory_name: str) -> str: """Derive a provider label from a report directory name. - ``test-results-openai`` → ``OpenAI`` - ``test-results-azure-openai`` → ``Azure OpenAI`` + Handles both Python and dotnet naming conventions: + - ``test-results-openai`` → ``OpenAI`` + - ``test-results-azure-openai`` → ``Azure OpenAI`` + - ``dotnet-test-results-net10.0-ubuntu-latest`` → ``net10.0 (ubuntu)`` """ + # Dotnet convention: dotnet-test-results-- + if directory_name.startswith("dotnet-test-results-"): + raw = directory_name.replace("dotnet-test-results-", "") + # e.g. "net10.0-ubuntu-latest" → framework="net10.0", os="ubuntu-latest" + parts = raw.split("-", 1) + framework = parts[0] + os_label = parts[1].split("-")[0] if len(parts) > 1 else "" + return f"{framework} ({os_label})" if os_label else framework + + # Python convention: test-results- raw = directory_name.replace("test-results-", "") known = { "openai": "OpenAI", @@ -102,11 +116,21 @@ def _parse_junit_xml(xml_path: Path) -> list[dict[str, str]]: # it appends the class name, e.g.: # "packages.foundry.tests.foundry.test_foundry_embedding_client.TestFoundryEmbeddingIntegration" # We want the file-level module: "test_foundry_embedding_client" + # + # xunit (dotnet) writes classname as the full C# type, e.g.: + # "OpenAIChatCompletion.IntegrationTests.ChatCompletionTests" + # We want the project prefix: "OpenAIChatCompletion" if classname: parts = classname.rsplit(".", 2) # If the last segment starts with uppercase it's a class name — take the one before it if len(parts) >= 2 and parts[-1][0:1].isupper(): - module = parts[-2] + # For dotnet: if the penultimate part is "IntegrationTests" or "UnitTests", + # use the part before that (the project name) instead + if parts[-2] in ("IntegrationTests", "UnitTests") and len(parts) >= 3: + # parts[0] may contain dots — take the last segment of it + module = parts[0].rsplit(".", 1)[-1] + else: + module = parts[-2] else: module = parts[-1] else: @@ -148,28 +172,61 @@ def _parse_junit_xml(xml_path: Path) -> list[dict[str, str]]: # --------------------------------------------------------------------------- +def _discover_xml_files(reports_dir: Path) -> list[tuple[str, Path]]: + """Discover JUnit XML test result files in artifact subdirectories. + + Handles two directory layouts: + - **Python (pytest):** ``test-results-/pytest.xml`` + - **Dotnet (xunit):** ``dotnet-test-results--/*.junit.xml`` + + Returns: + List of ``(directory_name, xml_path)`` tuples. + """ + xml_files: list[tuple[str, Path]] = [] + if not reports_dir.is_dir(): + return xml_files + + for subdir in sorted(reports_dir.iterdir()): + if not subdir.is_dir(): + continue + + # Python layout: single pytest.xml per artifact + pytest_xml = subdir / "pytest.xml" + if pytest_xml.exists(): + xml_files.append((subdir.name, pytest_xml)) + continue + + # Dotnet layout: multiple *.junit.xml files per artifact + junit_files = sorted(subdir.rglob("*.junit.xml")) + for jf in junit_files: + xml_files.append((subdir.name, jf)) + + # Fallback: any .xml file that looks like JUnit (not .trx, not cobertura) + if not junit_files: + for xf in sorted(subdir.rglob("*.xml")): + if xf.suffix == ".xml" and not xf.name.endswith(".cobertura.xml"): + xml_files.append((subdir.name, xf)) + + return xml_files + + def load_current_run(reports_dir: Path) -> dict[str, Any]: """Load per-provider JUnit XML reports from the current CI run and merge. + Supports both pytest (Python) and xunit v3 (dotnet) JUnit XML formats. + Args: - reports_dir: Directory containing ``test-results-/`` subdirs. + reports_dir: Directory containing artifact subdirectories with XML reports. Returns: Merged run dict with ``timestamp``, ``summary``, ``results``. """ combined_results: dict[str, dict[str, str]] = {} # nodeid → {status, provider} - # actions/download-artifact creates: reports_dir/test-results-openai/pytest.xml - xml_files: list[tuple[str, Path]] = [] - if reports_dir.is_dir(): - for subdir in sorted(reports_dir.iterdir()): - if subdir.is_dir(): - xml_file = subdir / "pytest.xml" - if xml_file.exists(): - xml_files.append((subdir.name, xml_file)) + xml_files = _discover_xml_files(reports_dir) if not xml_files: - print(f"Warning: No pytest.xml files found in {reports_dir}") + print(f"Warning: No JUnit XML files found in {reports_dir}") return { "timestamp": datetime.now(timezone.utc).isoformat(), "summary": { @@ -186,7 +243,21 @@ def load_current_run(reports_dir: Path) -> dict[str, Any]: provider = _derive_provider(dir_name) tests = _parse_junit_xml(xml_file) for test in tests: - combined_results[test["nodeid"]] = { + # Use provider-qualified key when the same test runs under + # multiple providers (e.g. dotnet net10.0 vs net472). This + # prevents later results from silently overwriting earlier ones. + raw_id = test["nodeid"] + key = raw_id + if key in combined_results and combined_results[key]["provider"] != provider: + # Collision: re-key existing entry and use qualified key for new one + existing = combined_results.pop(key) + combined_results[f"{existing['provider']}::{raw_id}"] = existing + key = f"{provider}::{raw_id}" + elif f"{provider}::{raw_id}" in combined_results: + # Provider-qualified key already exists (previous collision) + key = f"{provider}::{raw_id}" + + combined_results[key] = { "status": test["status"], "provider": provider, "module": test.get("module", ""), @@ -247,7 +318,7 @@ def _short_name(nodeid: str) -> str: def generate_trend_report(runs: list[dict[str, Any]]) -> str: """Generate a markdown trend report from run history.""" lines = [ - "# 🔬 Flaky Test Report", + "# 🔬 Integration Test Report", "", f"*Generated: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M UTC')}*", "", From 450eab46405b8a3cd0780bb51c4404f1fe2a9206 Mon Sep 17 00:00:00 2001 From: Giles Odigwe Date: Mon, 27 Apr 2026 08:36:25 -0700 Subject: [PATCH 2/6] chore: trigger dotnet CI for report validation Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- dotnet/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/dotnet/README.md b/dotnet/README.md index 328dfdf684..2edb402a94 100644 --- a/dotnet/README.md +++ b/dotnet/README.md @@ -33,3 +33,4 @@ Console.WriteLine(await agent.RunAsync("Write a haiku about Microsoft Agent Fram - [Design Documents](../docs/design) - [Architectural Decision Records](../docs/decisions) - [MSFT Learn Docs](https://learn.microsoft.com/agent-framework/overview/agent-framework-overview) + From f48c8b3cfc1cdc97721ef860c6de130ab33972bc Mon Sep 17 00:00:00 2001 From: Giles Odigwe Date: Mon, 27 Apr 2026 09:28:45 -0700 Subject: [PATCH 3/6] fix: use .junit extension (not .junit.xml) for xunit v3 output xUnit v3 generates files with .junit extension, not .junit.xml. Update upload glob and aggregate.py discovery to match. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/workflows/dotnet-build-and-test.yml | 2 +- python/scripts/flaky_report/aggregate.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/dotnet-build-and-test.yml b/.github/workflows/dotnet-build-and-test.yml index 6fb2c77fc8..e53ccd0e1b 100644 --- a/.github/workflows/dotnet-build-and-test.yml +++ b/.github/workflows/dotnet-build-and-test.yml @@ -306,7 +306,7 @@ jobs: uses: actions/upload-artifact@v7 with: name: dotnet-test-results-${{ matrix.targetFramework }}-${{ matrix.os }} - path: IntegrationTestResults/**/*.junit.xml + path: IntegrationTestResults/**/*.junit if-no-files-found: ignore # This final job is required to satisfy the merge queue. It must only run (or succeed) if no tests failed diff --git a/python/scripts/flaky_report/aggregate.py b/python/scripts/flaky_report/aggregate.py index 579f9ac935..cd93c7db49 100644 --- a/python/scripts/flaky_report/aggregate.py +++ b/python/scripts/flaky_report/aggregate.py @@ -3,7 +3,7 @@ """Aggregate per-provider JUnit XML test results and generate a trend report. Parses JUnit XML files produced by CI jobs — both ``pytest.xml`` (Python) and -xunit v3 ``*.junit.xml`` (dotnet) — merges them into a single run, combines +xunit v3 ``*.junit`` (dotnet) — merges them into a single run, combines with historical data, and generates a markdown trend table. Usage (from CI): @@ -13,7 +13,7 @@ layouts are supported: - **Python (pytest):** ``test-results-/pytest.xml`` -- **Dotnet (xunit):** ``dotnet-test-results--/*.junit.xml`` +- **Dotnet (xunit):** ``dotnet-test-results--/*.junit`` """ from __future__ import annotations @@ -177,7 +177,7 @@ def _discover_xml_files(reports_dir: Path) -> list[tuple[str, Path]]: Handles two directory layouts: - **Python (pytest):** ``test-results-/pytest.xml`` - - **Dotnet (xunit):** ``dotnet-test-results--/*.junit.xml`` + - **Dotnet (xunit):** ``dotnet-test-results--/*.junit`` Returns: List of ``(directory_name, xml_path)`` tuples. @@ -196,8 +196,8 @@ def _discover_xml_files(reports_dir: Path) -> list[tuple[str, Path]]: xml_files.append((subdir.name, pytest_xml)) continue - # Dotnet layout: multiple *.junit.xml files per artifact - junit_files = sorted(subdir.rglob("*.junit.xml")) + # Dotnet layout: multiple *.junit files per artifact + junit_files = sorted(subdir.rglob("*.junit")) for jf in junit_files: xml_files.append((subdir.name, jf)) From df49b9114143196d2130a93174a1c4c74bab95e5 Mon Sep 17 00:00:00 2001 From: Giles Odigwe Date: Mon, 27 Apr 2026 12:27:28 -0700 Subject: [PATCH 4/6] fix: use deterministic provider-qualified keys for dotnet tests Always prefix dotnet test keys with provider (e.g. net10.0 (ubuntu)::TestName) to ensure stable, comparable counts across runs regardless of file parse order. Also show Executed (passed+failed) instead of Total in summary table. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- python/scripts/flaky_report/aggregate.py | 35 +++++++++++------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/python/scripts/flaky_report/aggregate.py b/python/scripts/flaky_report/aggregate.py index cd93c7db49..9f0c8aa478 100644 --- a/python/scripts/flaky_report/aggregate.py +++ b/python/scripts/flaky_report/aggregate.py @@ -238,24 +238,18 @@ def load_current_run(reports_dir: Path) -> dict[str, Any]: "results": {}, } + # Dotnet tests always run under multiple frameworks, so we always + # qualify their keys with the provider to ensure deterministic, + # stable keys across runs regardless of file parse order. + is_dotnet = any(d.startswith("dotnet-test-results-") for d, _ in xml_files) + for dir_name, xml_file in xml_files: print(f" Loading: {xml_file}") provider = _derive_provider(dir_name) tests = _parse_junit_xml(xml_file) for test in tests: - # Use provider-qualified key when the same test runs under - # multiple providers (e.g. dotnet net10.0 vs net472). This - # prevents later results from silently overwriting earlier ones. raw_id = test["nodeid"] - key = raw_id - if key in combined_results and combined_results[key]["provider"] != provider: - # Collision: re-key existing entry and use qualified key for new one - existing = combined_results.pop(key) - combined_results[f"{existing['provider']}::{raw_id}"] = existing - key = f"{provider}::{raw_id}" - elif f"{provider}::{raw_id}" in combined_results: - # Provider-qualified key already exists (previous collision) - key = f"{provider}::{raw_id}" + key = f"{provider}::{raw_id}" if is_dotnet else raw_id combined_results[key] = { "status": test["status"], @@ -327,19 +321,22 @@ def generate_trend_report(runs: list[dict[str, Any]]) -> str: # --- Overall status table (most recent first) --- lines.append("## Overall Status (Last 5 Runs)") lines.append("") - lines.append("| Run | Total | ✅ Passed | ❌ Failed | ⏭️ Skipped |") - lines.append("|-----|-------|-----------|-----------|------------|") + lines.append("| Run | Executed | ✅ Passed | ❌ Failed | ⏭️ Skipped |") + lines.append("|-----|----------|-----------|-----------|------------|") for run in reversed(runs): s = run.get("summary", {}) - total = s.get("total", 0) + passed = s.get("passed", 0) + failed = s.get("failed", 0) + skipped = s.get("skipped", 0) + executed = passed + failed label = _format_run_label(run["timestamp"]) lines.append( f"| {label} " - f"| {total} " - f"| {s.get('passed', 0)}/{total} " - f"| {s.get('failed', 0)}/{total} " - f"| {s.get('skipped', 0)}/{total} |" + f"| {executed} " + f"| {passed} " + f"| {failed} " + f"| {skipped} |" ) for _ in range(MAX_HISTORY - len(runs)): From 4ff5130c1c547fd6e6e3a219493d804255dfefcc Mon Sep 17 00:00:00 2001 From: Giles Odigwe Date: Mon, 27 Apr 2026 13:44:12 -0700 Subject: [PATCH 5/6] fix: match Python report summary format (Total, passed/total, etc.) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- python/scripts/flaky_report/aggregate.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/python/scripts/flaky_report/aggregate.py b/python/scripts/flaky_report/aggregate.py index 9f0c8aa478..6bde7ecd2e 100644 --- a/python/scripts/flaky_report/aggregate.py +++ b/python/scripts/flaky_report/aggregate.py @@ -321,22 +321,19 @@ def generate_trend_report(runs: list[dict[str, Any]]) -> str: # --- Overall status table (most recent first) --- lines.append("## Overall Status (Last 5 Runs)") lines.append("") - lines.append("| Run | Executed | ✅ Passed | ❌ Failed | ⏭️ Skipped |") - lines.append("|-----|----------|-----------|-----------|------------|") + lines.append("| Run | Total | ✅ Passed | ❌ Failed | ⏭️ Skipped |") + lines.append("|-----|-------|-----------|-----------|------------|") for run in reversed(runs): s = run.get("summary", {}) - passed = s.get("passed", 0) - failed = s.get("failed", 0) - skipped = s.get("skipped", 0) - executed = passed + failed + total = s.get("total", 0) label = _format_run_label(run["timestamp"]) lines.append( f"| {label} " - f"| {executed} " - f"| {passed} " - f"| {failed} " - f"| {skipped} |" + f"| {total} " + f"| {s.get('passed', 0)}/{total} " + f"| {s.get('failed', 0)}/{total} " + f"| {s.get('skipped', 0)}/{total} |" ) for _ in range(MAX_HISTORY - len(runs)): From 2aee1d81f339c2af4aaeb2757fbc2e1dd1be4329 Mon Sep 17 00:00:00 2001 From: Giles Odigwe Date: Mon, 27 Apr 2026 14:40:26 -0700 Subject: [PATCH 6/6] feat: split dotnet report into per-framework tables MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dotnet tests run on multiple frameworks (net10.0, net472). Instead of one combined table with unstable totals, show separate sections per framework — each with its own summary row and per-test table. Python reports retain the original single-table format. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- python/scripts/flaky_report/aggregate.py | 149 +++++++++++++++++++---- 1 file changed, 125 insertions(+), 24 deletions(-) diff --git a/python/scripts/flaky_report/aggregate.py b/python/scripts/flaky_report/aggregate.py index 6bde7ecd2e..708f47fcf0 100644 --- a/python/scripts/flaky_report/aggregate.py +++ b/python/scripts/flaky_report/aggregate.py @@ -257,8 +257,23 @@ def load_current_run(reports_dir: Path) -> dict[str, Any]: "module": test.get("module", ""), } - # Build summary counts using mutually exclusive status buckets. - # Errors are folded into the failed count for display purposes. + # Build per-provider summary counts so the report can show one row per + # framework (dotnet) or per provider (Python). + provider_counts: dict[str, dict[str, int]] = {} + for r in combined_results.values(): + prov = r.get("provider", "Unknown") + if prov not in provider_counts: + provider_counts[prov] = {"total": 0, "passed": 0, "failed": 0, "skipped": 0} + provider_counts[prov]["total"] += 1 + st = r["status"] + if st == "passed": + provider_counts[prov]["passed"] += 1 + elif st in ("failed", "error"): + provider_counts[prov]["failed"] += 1 + elif st == "skipped": + provider_counts[prov]["skipped"] += 1 + + # Overall summary (sum across all providers). statuses = [r["status"] for r in combined_results.values()] summary = { "total": len(statuses), @@ -270,6 +285,7 @@ def load_current_run(reports_dir: Path) -> dict[str, Any]: return { "timestamp": datetime.now(timezone.utc).isoformat(), "summary": summary, + "provider_summaries": provider_counts, "results": combined_results, } @@ -318,7 +334,29 @@ def generate_trend_report(runs: list[dict[str, Any]]) -> str: "", ] - # --- Overall status table (most recent first) --- + # Detect whether this is a dotnet report (provider-qualified keys). + is_dotnet = False + for run in runs: + provider_sums = run.get("provider_summaries", {}) + if any(p.startswith("net") for p in provider_sums): + is_dotnet = True + break + + if is_dotnet: + _generate_dotnet_report(lines, runs) + else: + _generate_python_report(lines, runs) + + lines.append("") + lines.append("**Legend:** ✅ Passed · ❌ Failed · ⏭️ Skipped · ⚠️ Expected Failure (xfail) · N/A Not available") + lines.append("") + + return "\n".join(lines) + + +def _generate_python_report(lines: list[str], runs: list[dict[str, Any]]) -> None: + """Generate the original single-table Python report format.""" + # --- Overall status table --- lines.append("## Overall Status (Last 5 Runs)") lines.append("") lines.append("| Run | Total | ✅ Passed | ❌ Failed | ⏭️ Skipped |") @@ -341,27 +379,91 @@ def generate_trend_report(runs: list[dict[str, Any]]) -> str: lines.append("") - # --- Per-test results table --- - lines.append("## Per-Test Results") - lines.append("") + # --- Single per-test results table --- + _generate_per_test_table(lines, runs, "## Per-Test Results") + + +def _generate_dotnet_report(lines: list[str], runs: list[dict[str, Any]]) -> None: + """Generate per-framework tables for dotnet (net10.0, net472, etc.).""" + # Collect all providers seen across all runs, sorted for stable ordering + all_providers: set[str] = set() + for run in runs: + all_providers.update(run.get("provider_summaries", {}).keys()) + providers = sorted(all_providers) + + for provider in providers: + lines.append(f"## {provider}") + lines.append("") - # Collect all test nodeids, providers, and modules across all runs - all_tests: dict[str, str] = {} # nodeid → provider (from most recent run) - all_modules: dict[str, str] = {} # nodeid → module (from most recent run) + # --- Per-provider summary table --- + lines.append("| Run | Total | ✅ Passed | ❌ Failed | ⏭️ Skipped |") + lines.append("|-----|-------|-----------|-----------|------------|") + + for run in reversed(runs): + ps = run.get("provider_summaries", {}).get(provider, {}) + total = ps.get("total", 0) + label = _format_run_label(run["timestamp"]) + if total == 0: + lines.append(f"| {label} | N/A | N/A | N/A | N/A |") + else: + lines.append( + f"| {label} " + f"| {total} " + f"| {ps.get('passed', 0)}/{total} " + f"| {ps.get('failed', 0)}/{total} " + f"| {ps.get('skipped', 0)}/{total} |" + ) + + for _ in range(MAX_HISTORY - len(runs)): + lines.append("| N/A | N/A | N/A | N/A | N/A |") + + lines.append("") + + # --- Per-test table filtered to this provider --- + _generate_per_test_table( + lines, runs, + heading=None, + provider_filter=provider, + ) + + +def _generate_per_test_table( + lines: list[str], + runs: list[dict[str, Any]], + heading: str | None = None, + provider_filter: str | None = None, +) -> None: + """Emit a per-test trend table, optionally filtered to a single provider.""" + if heading: + lines.append(heading) + lines.append("") + + # Collect all test nodeids (and metadata) across all runs + all_tests: dict[str, str] = {} # nodeid → provider + all_modules: dict[str, str] = {} # nodeid → module for run in runs: for nodeid, info in run.get("results", {}).items(): - provider = info.get("provider", "Unknown") if isinstance(info, dict) else "Unknown" - module = info.get("module", "") if isinstance(info, dict) else "" - all_tests[nodeid] = provider + if not isinstance(info, dict): + continue + prov = info.get("provider", "Unknown") + if provider_filter and prov != provider_filter: + continue + module = info.get("module", "") + all_tests[nodeid] = prov all_modules[nodeid] = module if not all_tests: lines.append("*No test results available.*") - return "\n".join(lines) + lines.append("") + return - # Build header (most recent run first) - header = "| Test | File | Provider |" - separator = "|------|------|----------|" + # Build header + if provider_filter: + header = "| Test | File |" + separator = "|------|------|" + else: + header = "| Test | File | Provider |" + separator = "|------|------|----------|" for run in reversed(runs): label = _format_run_label(run["timestamp"]) header += f" {label} |" @@ -373,12 +475,15 @@ def generate_trend_report(runs: list[dict[str, Any]]) -> str: lines.append(header) lines.append(separator) - # Sort by provider then test name - for nodeid in sorted(all_tests, key=lambda n: (all_tests[n], n)): - provider = all_tests[nodeid] + # Sort by module then test name + for nodeid in sorted(all_tests, key=lambda n: (all_modules.get(n, ""), n)): module = all_modules.get(nodeid, "") short = _short_name(nodeid) - row = f"| `{short}` | `{module}` | {provider} |" + if provider_filter: + row = f"| `{short}` | `{module}` |" + else: + provider = all_tests[nodeid] + row = f"| `{short}` | `{module}` | {provider} |" for run in reversed(runs): result = run.get("results", {}).get(nodeid) @@ -395,10 +500,6 @@ def generate_trend_report(runs: list[dict[str, Any]]) -> str: lines.append(row) lines.append("") - lines.append("**Legend:** ✅ Passed · ❌ Failed · ⏭️ Skipped · ⚠️ Expected Failure (xfail) · N/A Not available") - lines.append("") - - return "\n".join(lines) # ---------------------------------------------------------------------------