From aec47dd514ad5a8609ca04c9a78064a5c170739a Mon Sep 17 00:00:00 2001
From: Giles Odigwe <gilesodigwe@microsoft.com>
Date: Mon, 27 Apr 2026 08:04:47 -0700
Subject: [PATCH 1/6] Add dotnet integration test report to CI

- Add --report-junit flag to dotnet integration test step to generate
  JUnit XML alongside TRX, with explicit --results-directory to
  centralize output in IntegrationTestResults/
- Upload JUnit XML artifacts from each matrix leg (net10.0/ubuntu,
  net472/windows) as dotnet-test-results-{framework}-{os}
- Add dotnet-integration-test-report job that downloads artifacts,
  runs the existing aggregate.py script, posts markdown to Job Summary,
  and saves trend history via actions/cache
- Refactor aggregate.py to discover JUnit XML files recursively,
  supporting both pytest (pytest.xml) and xunit (*.junit.xml) layouts
- Handle provider name derivation for dotnet artifact naming convention
- Fix nodeid collision when same test runs under multiple frameworks
  by qualifying keys with provider when collisions are detected
- Improve module extraction for dotnet C# classnames (recognizes
  IntegrationTests/UnitTests namespace segments)

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .github/workflows/dotnet-build-and-test.yml |  66 ++++++++++++
 python/scripts/flaky_report/aggregate.py    | 113 ++++++++++++++++----
 2 files changed, 158 insertions(+), 21 deletions(-)

diff --git a/.github/workflows/dotnet-build-and-test.yml b/.github/workflows/dotnet-build-and-test.yml
index 6454adba31..6fb2c77fc8 100644
--- a/.github/workflows/dotnet-build-and-test.yml
+++ b/.github/workflows/dotnet-build-and-test.yml
@@ -257,6 +257,8 @@ jobs:
             -c ${{ matrix.configuration }} `
             --no-build -v Normal `
             --report-xunit-trx `
+            --report-junit `
+            --results-directory ../IntegrationTestResults/ `
             --ignore-exit-code 8 `
             --filter-not-trait "Category=IntegrationDisabled" `
             --parallel-algorithm aggressive `
@@ -299,6 +301,14 @@ jobs:
         shell: pwsh
         run: ./dotnet/eng/scripts/dotnet-check-coverage.ps1 -JsonReportPath "TestResults/Reports/Summary.json" -CoverageThreshold $env:COVERAGE_THRESHOLD
 
+      - name: Upload integration test results
+        if: always() && github.event_name != 'pull_request' && matrix.integration-tests
+        uses: actions/upload-artifact@v7
+        with:
+          name: dotnet-test-results-${{ matrix.targetFramework }}-${{ matrix.os }}
+          path: IntegrationTestResults/**/*.junit.xml
+          if-no-files-found: ignore
+
   # This final job is required to satisfy the merge queue. It must only run (or succeed) if no tests failed
   dotnet-build-and-test-check:
     if: always()
@@ -341,3 +351,59 @@ jobs:
         uses: actions/github-script@v8
         with:
           script: core.setFailed('Integration Tests Cancelled!')
+
+  # Integration test trend report (aggregates JUnit XML results from dotnet test jobs)
+  dotnet-integration-test-report:
+    name: Integration Test Report
+    if: >
+      always() &&
+      github.event_name != 'pull_request' &&
+      (contains(join(needs.*.result, ','), 'success') ||
+       contains(join(needs.*.result, ','), 'failure'))
+    needs: [dotnet-test]
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        working-directory: python
+    steps:
+      - uses: actions/checkout@v6
+      - name: Set up python and install the project
+        uses: ./.github/actions/python-setup
+        with:
+          python-version: "3.13"
+          os: ${{ runner.os }}
+      - name: Download all test results from current run
+        uses: actions/download-artifact@v4
+        with:
+          pattern: dotnet-test-results-*
+          path: dotnet-test-results/
+      - name: Restore report history cache
+        uses: actions/cache/restore@v4
+        with:
+          path: python/dotnet-integration-report-history.json
+          key: dotnet-integration-report-history-${{ github.run_id }}
+          restore-keys: |
+            dotnet-integration-report-history-
+      - name: Generate trend report
+        run: >
+          uv run python scripts/flaky_report/aggregate.py
+          ../dotnet-test-results/
+          dotnet-integration-report-history.json
+          dotnet-integration-test-report.md
+      - name: Post to Job Summary
+        if: always()
+        run: cat dotnet-integration-test-report.md >> $GITHUB_STEP_SUMMARY
+      - name: Save report history cache
+        if: always()
+        uses: actions/cache/save@v4
+        with:
+          path: python/dotnet-integration-report-history.json
+          key: dotnet-integration-report-history-${{ github.run_id }}
+      - name: Upload trend report
+        if: always()
+        uses: actions/upload-artifact@v7
+        with:
+          name: dotnet-integration-test-report
+          path: |
+            python/dotnet-integration-test-report.md
+            python/dotnet-integration-report-history.json
diff --git a/python/scripts/flaky_report/aggregate.py b/python/scripts/flaky_report/aggregate.py
index e07a5e136a..579f9ac935 100644
--- a/python/scripts/flaky_report/aggregate.py
+++ b/python/scripts/flaky_report/aggregate.py
@@ -2,16 +2,18 @@
 
 """Aggregate per-provider JUnit XML test results and generate a trend report.
 
-Parses ``pytest.xml`` (JUnit XML) files produced by each CI job, merges them
-into a single run, combines with historical data, and generates a markdown
-trend table — the same pattern used by ``scripts/sample_validation/aggregate.py``.
+Parses JUnit XML files produced by CI jobs — both ``pytest.xml`` (Python) and
+xunit v3 ``*.junit.xml`` (dotnet) — merges them into a single run, combines
+with historical data, and generates a markdown trend table.
 
 Usage (from CI):
     python aggregate.py <reports-dir> <history-file> <output-file>
 
-The reports directory is expected to contain subdirectories named
-``test-results-<provider>/`` each containing a ``pytest.xml`` file
-(created by ``actions/download-artifact``).
+The reports directory is expected to contain artifact subdirectories.  Two
+layouts are supported:
+
+- **Python (pytest):**  ``test-results-<provider>/pytest.xml``
+- **Dotnet (xunit):**   ``dotnet-test-results-<tfm>-<os>/*.junit.xml``
 """
 
 from __future__ import annotations
@@ -46,9 +48,21 @@ def _format_run_label(timestamp: str) -> str:
 def _derive_provider(directory_name: str) -> str:
     """Derive a provider label from a report directory name.
 
-    ``test-results-openai`` → ``OpenAI``
-    ``test-results-azure-openai`` → ``Azure OpenAI``
+    Handles both Python and dotnet naming conventions:
+    - ``test-results-openai`` → ``OpenAI``
+    - ``test-results-azure-openai`` → ``Azure OpenAI``
+    - ``dotnet-test-results-net10.0-ubuntu-latest`` → ``net10.0 (ubuntu)``
     """
+    # Dotnet convention: dotnet-test-results-<framework>-<os>
+    if directory_name.startswith("dotnet-test-results-"):
+        raw = directory_name.replace("dotnet-test-results-", "")
+        # e.g. "net10.0-ubuntu-latest" → framework="net10.0", os="ubuntu-latest"
+        parts = raw.split("-", 1)
+        framework = parts[0]
+        os_label = parts[1].split("-")[0] if len(parts) > 1 else ""
+        return f"{framework} ({os_label})" if os_label else framework
+
+    # Python convention: test-results-<provider>
     raw = directory_name.replace("test-results-", "")
     known = {
         "openai": "OpenAI",
@@ -102,11 +116,21 @@ def _parse_junit_xml(xml_path: Path) -> list[dict[str, str]]:
         # it appends the class name, e.g.:
         #   "packages.foundry.tests.foundry.test_foundry_embedding_client.TestFoundryEmbeddingIntegration"
         # We want the file-level module: "test_foundry_embedding_client"
+        #
+        # xunit (dotnet) writes classname as the full C# type, e.g.:
+        #   "OpenAIChatCompletion.IntegrationTests.ChatCompletionTests"
+        # We want the project prefix: "OpenAIChatCompletion"
         if classname:
             parts = classname.rsplit(".", 2)
             # If the last segment starts with uppercase it's a class name — take the one before it
             if len(parts) >= 2 and parts[-1][0:1].isupper():
-                module = parts[-2]
+                # For dotnet: if the penultimate part is "IntegrationTests" or "UnitTests",
+                # use the part before that (the project name) instead
+                if parts[-2] in ("IntegrationTests", "UnitTests") and len(parts) >= 3:
+                    # parts[0] may contain dots — take the last segment of it
+                    module = parts[0].rsplit(".", 1)[-1]
+                else:
+                    module = parts[-2]
             else:
                 module = parts[-1]
         else:
@@ -148,28 +172,61 @@ def _parse_junit_xml(xml_path: Path) -> list[dict[str, str]]:
 # ---------------------------------------------------------------------------
 
 
+def _discover_xml_files(reports_dir: Path) -> list[tuple[str, Path]]:
+    """Discover JUnit XML test result files in artifact subdirectories.
+
+    Handles two directory layouts:
+    - **Python (pytest):** ``test-results-<provider>/pytest.xml``
+    - **Dotnet (xunit):** ``dotnet-test-results-<tfm>-<os>/*.junit.xml``
+
+    Returns:
+        List of ``(directory_name, xml_path)`` tuples.
+    """
+    xml_files: list[tuple[str, Path]] = []
+    if not reports_dir.is_dir():
+        return xml_files
+
+    for subdir in sorted(reports_dir.iterdir()):
+        if not subdir.is_dir():
+            continue
+
+        # Python layout: single pytest.xml per artifact
+        pytest_xml = subdir / "pytest.xml"
+        if pytest_xml.exists():
+            xml_files.append((subdir.name, pytest_xml))
+            continue
+
+        # Dotnet layout: multiple *.junit.xml files per artifact
+        junit_files = sorted(subdir.rglob("*.junit.xml"))
+        for jf in junit_files:
+            xml_files.append((subdir.name, jf))
+
+        # Fallback: any .xml file that looks like JUnit (not .trx, not cobertura)
+        if not junit_files:
+            for xf in sorted(subdir.rglob("*.xml")):
+                if xf.suffix == ".xml" and not xf.name.endswith(".cobertura.xml"):
+                    xml_files.append((subdir.name, xf))
+
+    return xml_files
+
+
 def load_current_run(reports_dir: Path) -> dict[str, Any]:
     """Load per-provider JUnit XML reports from the current CI run and merge.
 
+    Supports both pytest (Python) and xunit v3 (dotnet) JUnit XML formats.
+
     Args:
-        reports_dir: Directory containing ``test-results-<provider>/`` subdirs.
+        reports_dir: Directory containing artifact subdirectories with XML reports.
 
     Returns:
         Merged run dict with ``timestamp``, ``summary``, ``results``.
     """
     combined_results: dict[str, dict[str, str]] = {}  # nodeid → {status, provider}
 
-    # actions/download-artifact creates: reports_dir/test-results-openai/pytest.xml
-    xml_files: list[tuple[str, Path]] = []
-    if reports_dir.is_dir():
-        for subdir in sorted(reports_dir.iterdir()):
-            if subdir.is_dir():
-                xml_file = subdir / "pytest.xml"
-                if xml_file.exists():
-                    xml_files.append((subdir.name, xml_file))
+    xml_files = _discover_xml_files(reports_dir)
 
     if not xml_files:
-        print(f"Warning: No pytest.xml files found in {reports_dir}")
+        print(f"Warning: No JUnit XML files found in {reports_dir}")
         return {
             "timestamp": datetime.now(timezone.utc).isoformat(),
             "summary": {
@@ -186,7 +243,21 @@ def load_current_run(reports_dir: Path) -> dict[str, Any]:
         provider = _derive_provider(dir_name)
         tests = _parse_junit_xml(xml_file)
         for test in tests:
-            combined_results[test["nodeid"]] = {
+            # Use provider-qualified key when the same test runs under
+            # multiple providers (e.g. dotnet net10.0 vs net472).  This
+            # prevents later results from silently overwriting earlier ones.
+            raw_id = test["nodeid"]
+            key = raw_id
+            if key in combined_results and combined_results[key]["provider"] != provider:
+                # Collision: re-key existing entry and use qualified key for new one
+                existing = combined_results.pop(key)
+                combined_results[f"{existing['provider']}::{raw_id}"] = existing
+                key = f"{provider}::{raw_id}"
+            elif f"{provider}::{raw_id}" in combined_results:
+                # Provider-qualified key already exists (previous collision)
+                key = f"{provider}::{raw_id}"
+
+            combined_results[key] = {
                 "status": test["status"],
                 "provider": provider,
                 "module": test.get("module", ""),
@@ -247,7 +318,7 @@ def _short_name(nodeid: str) -> str:
 def generate_trend_report(runs: list[dict[str, Any]]) -> str:
     """Generate a markdown trend report from run history."""
     lines = [
-        "# 🔬 Flaky Test Report",
+        "# 🔬 Integration Test Report",
         "",
         f"*Generated: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M UTC')}*",
         "",

From 450eab46405b8a3cd0780bb51c4404f1fe2a9206 Mon Sep 17 00:00:00 2001
From: Giles Odigwe <gilesodigwe@microsoft.com>
Date: Mon, 27 Apr 2026 08:36:25 -0700
Subject: [PATCH 2/6] chore: trigger dotnet CI for report validation

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 dotnet/README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/dotnet/README.md b/dotnet/README.md
index 328dfdf684..2edb402a94 100644
--- a/dotnet/README.md
+++ b/dotnet/README.md
@@ -33,3 +33,4 @@ Console.WriteLine(await agent.RunAsync("Write a haiku about Microsoft Agent Fram
 - [Design Documents](../docs/design)
 - [Architectural Decision Records](../docs/decisions)
 - [MSFT Learn Docs](https://learn.microsoft.com/agent-framework/overview/agent-framework-overview)
+

From f48c8b3cfc1cdc97721ef860c6de130ab33972bc Mon Sep 17 00:00:00 2001
From: Giles Odigwe <gilesodigwe@microsoft.com>
Date: Mon, 27 Apr 2026 09:28:45 -0700
Subject: [PATCH 3/6] fix: use .junit extension (not .junit.xml) for xunit v3
 output

xUnit v3 generates files with .junit extension, not .junit.xml.
Update upload glob and aggregate.py discovery to match.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .github/workflows/dotnet-build-and-test.yml |  2 +-
 python/scripts/flaky_report/aggregate.py    | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/dotnet-build-and-test.yml b/.github/workflows/dotnet-build-and-test.yml
index 6fb2c77fc8..e53ccd0e1b 100644
--- a/.github/workflows/dotnet-build-and-test.yml
+++ b/.github/workflows/dotnet-build-and-test.yml
@@ -306,7 +306,7 @@ jobs:
         uses: actions/upload-artifact@v7
         with:
           name: dotnet-test-results-${{ matrix.targetFramework }}-${{ matrix.os }}
-          path: IntegrationTestResults/**/*.junit.xml
+          path: IntegrationTestResults/**/*.junit
           if-no-files-found: ignore
 
   # This final job is required to satisfy the merge queue. It must only run (or succeed) if no tests failed
diff --git a/python/scripts/flaky_report/aggregate.py b/python/scripts/flaky_report/aggregate.py
index 579f9ac935..cd93c7db49 100644
--- a/python/scripts/flaky_report/aggregate.py
+++ b/python/scripts/flaky_report/aggregate.py
@@ -3,7 +3,7 @@
 """Aggregate per-provider JUnit XML test results and generate a trend report.
 
 Parses JUnit XML files produced by CI jobs — both ``pytest.xml`` (Python) and
-xunit v3 ``*.junit.xml`` (dotnet) — merges them into a single run, combines
+xunit v3 ``*.junit`` (dotnet) — merges them into a single run, combines
 with historical data, and generates a markdown trend table.
 
 Usage (from CI):
@@ -13,7 +13,7 @@
 layouts are supported:
 
 - **Python (pytest):**  ``test-results-<provider>/pytest.xml``
-- **Dotnet (xunit):**   ``dotnet-test-results-<tfm>-<os>/*.junit.xml``
+- **Dotnet (xunit):**   ``dotnet-test-results-<tfm>-<os>/*.junit``
 """
 
 from __future__ import annotations
@@ -177,7 +177,7 @@ def _discover_xml_files(reports_dir: Path) -> list[tuple[str, Path]]:
 
     Handles two directory layouts:
     - **Python (pytest):** ``test-results-<provider>/pytest.xml``
-    - **Dotnet (xunit):** ``dotnet-test-results-<tfm>-<os>/*.junit.xml``
+    - **Dotnet (xunit):** ``dotnet-test-results-<tfm>-<os>/*.junit``
 
     Returns:
         List of ``(directory_name, xml_path)`` tuples.
@@ -196,8 +196,8 @@ def _discover_xml_files(reports_dir: Path) -> list[tuple[str, Path]]:
             xml_files.append((subdir.name, pytest_xml))
             continue
 
-        # Dotnet layout: multiple *.junit.xml files per artifact
-        junit_files = sorted(subdir.rglob("*.junit.xml"))
+        # Dotnet layout: multiple *.junit files per artifact
+        junit_files = sorted(subdir.rglob("*.junit"))
         for jf in junit_files:
             xml_files.append((subdir.name, jf))
 

From df49b9114143196d2130a93174a1c4c74bab95e5 Mon Sep 17 00:00:00 2001
From: Giles Odigwe <gilesodigwe@microsoft.com>
Date: Mon, 27 Apr 2026 12:27:28 -0700
Subject: [PATCH 4/6] fix: use deterministic provider-qualified keys for dotnet
 tests

Always prefix dotnet test keys with provider (e.g. net10.0 (ubuntu)::TestName)
to ensure stable, comparable counts across runs regardless of file parse order.
Also show Executed (passed+failed) instead of Total in summary table.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 python/scripts/flaky_report/aggregate.py | 35 +++++++++++-------------
 1 file changed, 16 insertions(+), 19 deletions(-)

diff --git a/python/scripts/flaky_report/aggregate.py b/python/scripts/flaky_report/aggregate.py
index cd93c7db49..9f0c8aa478 100644
--- a/python/scripts/flaky_report/aggregate.py
+++ b/python/scripts/flaky_report/aggregate.py
@@ -238,24 +238,18 @@ def load_current_run(reports_dir: Path) -> dict[str, Any]:
             "results": {},
         }
 
+    # Dotnet tests always run under multiple frameworks, so we always
+    # qualify their keys with the provider to ensure deterministic,
+    # stable keys across runs regardless of file parse order.
+    is_dotnet = any(d.startswith("dotnet-test-results-") for d, _ in xml_files)
+
     for dir_name, xml_file in xml_files:
         print(f"  Loading: {xml_file}")
         provider = _derive_provider(dir_name)
         tests = _parse_junit_xml(xml_file)
         for test in tests:
-            # Use provider-qualified key when the same test runs under
-            # multiple providers (e.g. dotnet net10.0 vs net472).  This
-            # prevents later results from silently overwriting earlier ones.
             raw_id = test["nodeid"]
-            key = raw_id
-            if key in combined_results and combined_results[key]["provider"] != provider:
-                # Collision: re-key existing entry and use qualified key for new one
-                existing = combined_results.pop(key)
-                combined_results[f"{existing['provider']}::{raw_id}"] = existing
-                key = f"{provider}::{raw_id}"
-            elif f"{provider}::{raw_id}" in combined_results:
-                # Provider-qualified key already exists (previous collision)
-                key = f"{provider}::{raw_id}"
+            key = f"{provider}::{raw_id}" if is_dotnet else raw_id
 
             combined_results[key] = {
                 "status": test["status"],
@@ -327,19 +321,22 @@ def generate_trend_report(runs: list[dict[str, Any]]) -> str:
     # --- Overall status table (most recent first) ---
     lines.append("## Overall Status (Last 5 Runs)")
     lines.append("")
-    lines.append("| Run | Total | ✅ Passed | ❌ Failed | ⏭️ Skipped |")
-    lines.append("|-----|-------|-----------|-----------|------------|")
+    lines.append("| Run | Executed | ✅ Passed | ❌ Failed | ⏭️ Skipped |")
+    lines.append("|-----|----------|-----------|-----------|------------|")
 
     for run in reversed(runs):
         s = run.get("summary", {})
-        total = s.get("total", 0)
+        passed = s.get("passed", 0)
+        failed = s.get("failed", 0)
+        skipped = s.get("skipped", 0)
+        executed = passed + failed
         label = _format_run_label(run["timestamp"])
         lines.append(
             f"| {label} "
-            f"| {total} "
-            f"| {s.get('passed', 0)}/{total} "
-            f"| {s.get('failed', 0)}/{total} "
-            f"| {s.get('skipped', 0)}/{total} |"
+            f"| {executed} "
+            f"| {passed} "
+            f"| {failed} "
+            f"| {skipped} |"
         )
 
     for _ in range(MAX_HISTORY - len(runs)):

From 4ff5130c1c547fd6e6e3a219493d804255dfefcc Mon Sep 17 00:00:00 2001
From: Giles Odigwe <gilesodigwe@microsoft.com>
Date: Mon, 27 Apr 2026 13:44:12 -0700
Subject: [PATCH 5/6] fix: match Python report summary format (Total,
 passed/total, etc.)

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 python/scripts/flaky_report/aggregate.py | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/python/scripts/flaky_report/aggregate.py b/python/scripts/flaky_report/aggregate.py
index 9f0c8aa478..6bde7ecd2e 100644
--- a/python/scripts/flaky_report/aggregate.py
+++ b/python/scripts/flaky_report/aggregate.py
@@ -321,22 +321,19 @@ def generate_trend_report(runs: list[dict[str, Any]]) -> str:
     # --- Overall status table (most recent first) ---
     lines.append("## Overall Status (Last 5 Runs)")
     lines.append("")
-    lines.append("| Run | Executed | ✅ Passed | ❌ Failed | ⏭️ Skipped |")
-    lines.append("|-----|----------|-----------|-----------|------------|")
+    lines.append("| Run | Total | ✅ Passed | ❌ Failed | ⏭️ Skipped |")
+    lines.append("|-----|-------|-----------|-----------|------------|")
 
     for run in reversed(runs):
         s = run.get("summary", {})
-        passed = s.get("passed", 0)
-        failed = s.get("failed", 0)
-        skipped = s.get("skipped", 0)
-        executed = passed + failed
+        total = s.get("total", 0)
         label = _format_run_label(run["timestamp"])
         lines.append(
             f"| {label} "
-            f"| {executed} "
-            f"| {passed} "
-            f"| {failed} "
-            f"| {skipped} |"
+            f"| {total} "
+            f"| {s.get('passed', 0)}/{total} "
+            f"| {s.get('failed', 0)}/{total} "
+            f"| {s.get('skipped', 0)}/{total} |"
         )
 
     for _ in range(MAX_HISTORY - len(runs)):

From 2aee1d81f339c2af4aaeb2757fbc2e1dd1be4329 Mon Sep 17 00:00:00 2001
From: Giles Odigwe <gilesodigwe@microsoft.com>
Date: Mon, 27 Apr 2026 14:40:26 -0700
Subject: [PATCH 6/6] feat: split dotnet report into per-framework tables
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Dotnet tests run on multiple frameworks (net10.0, net472). Instead of
one combined table with unstable totals, show separate sections per
framework — each with its own summary row and per-test table. Python
reports retain the original single-table format.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 python/scripts/flaky_report/aggregate.py | 149 +++++++++++++++++++----
 1 file changed, 125 insertions(+), 24 deletions(-)

diff --git a/python/scripts/flaky_report/aggregate.py b/python/scripts/flaky_report/aggregate.py
index 6bde7ecd2e..708f47fcf0 100644
--- a/python/scripts/flaky_report/aggregate.py
+++ b/python/scripts/flaky_report/aggregate.py
@@ -257,8 +257,23 @@ def load_current_run(reports_dir: Path) -> dict[str, Any]:
                 "module": test.get("module", ""),
             }
 
-    # Build summary counts using mutually exclusive status buckets.
-    # Errors are folded into the failed count for display purposes.
+    # Build per-provider summary counts so the report can show one row per
+    # framework (dotnet) or per provider (Python).
+    provider_counts: dict[str, dict[str, int]] = {}
+    for r in combined_results.values():
+        prov = r.get("provider", "Unknown")
+        if prov not in provider_counts:
+            provider_counts[prov] = {"total": 0, "passed": 0, "failed": 0, "skipped": 0}
+        provider_counts[prov]["total"] += 1
+        st = r["status"]
+        if st == "passed":
+            provider_counts[prov]["passed"] += 1
+        elif st in ("failed", "error"):
+            provider_counts[prov]["failed"] += 1
+        elif st == "skipped":
+            provider_counts[prov]["skipped"] += 1
+
+    # Overall summary (sum across all providers).
     statuses = [r["status"] for r in combined_results.values()]
     summary = {
         "total": len(statuses),
@@ -270,6 +285,7 @@ def load_current_run(reports_dir: Path) -> dict[str, Any]:
     return {
         "timestamp": datetime.now(timezone.utc).isoformat(),
         "summary": summary,
+        "provider_summaries": provider_counts,
         "results": combined_results,
     }
 
@@ -318,7 +334,29 @@ def generate_trend_report(runs: list[dict[str, Any]]) -> str:
         "",
     ]
 
-    # --- Overall status table (most recent first) ---
+    # Detect whether this is a dotnet report (provider-qualified keys).
+    is_dotnet = False
+    for run in runs:
+        provider_sums = run.get("provider_summaries", {})
+        if any(p.startswith("net") for p in provider_sums):
+            is_dotnet = True
+            break
+
+    if is_dotnet:
+        _generate_dotnet_report(lines, runs)
+    else:
+        _generate_python_report(lines, runs)
+
+    lines.append("")
+    lines.append("**Legend:** ✅ Passed · ❌ Failed · ⏭️ Skipped · ⚠️ Expected Failure (xfail) · N/A Not available")
+    lines.append("")
+
+    return "\n".join(lines)
+
+
+def _generate_python_report(lines: list[str], runs: list[dict[str, Any]]) -> None:
+    """Generate the original single-table Python report format."""
+    # --- Overall status table ---
     lines.append("## Overall Status (Last 5 Runs)")
     lines.append("")
     lines.append("| Run | Total | ✅ Passed | ❌ Failed | ⏭️ Skipped |")
@@ -341,27 +379,91 @@ def generate_trend_report(runs: list[dict[str, Any]]) -> str:
 
     lines.append("")
 
-    # --- Per-test results table ---
-    lines.append("## Per-Test Results")
-    lines.append("")
+    # --- Single per-test results table ---
+    _generate_per_test_table(lines, runs, "## Per-Test Results")
+
+
+def _generate_dotnet_report(lines: list[str], runs: list[dict[str, Any]]) -> None:
+    """Generate per-framework tables for dotnet (net10.0, net472, etc.)."""
+    # Collect all providers seen across all runs, sorted for stable ordering
+    all_providers: set[str] = set()
+    for run in runs:
+        all_providers.update(run.get("provider_summaries", {}).keys())
+    providers = sorted(all_providers)
+
+    for provider in providers:
+        lines.append(f"## {provider}")
+        lines.append("")
 
-    # Collect all test nodeids, providers, and modules across all runs
-    all_tests: dict[str, str] = {}  # nodeid → provider (from most recent run)
-    all_modules: dict[str, str] = {}  # nodeid → module (from most recent run)
+        # --- Per-provider summary table ---
+        lines.append("| Run | Total | ✅ Passed | ❌ Failed | ⏭️ Skipped |")
+        lines.append("|-----|-------|-----------|-----------|------------|")
+
+        for run in reversed(runs):
+            ps = run.get("provider_summaries", {}).get(provider, {})
+            total = ps.get("total", 0)
+            label = _format_run_label(run["timestamp"])
+            if total == 0:
+                lines.append(f"| {label} | N/A | N/A | N/A | N/A |")
+            else:
+                lines.append(
+                    f"| {label} "
+                    f"| {total} "
+                    f"| {ps.get('passed', 0)}/{total} "
+                    f"| {ps.get('failed', 0)}/{total} "
+                    f"| {ps.get('skipped', 0)}/{total} |"
+                )
+
+        for _ in range(MAX_HISTORY - len(runs)):
+            lines.append("| N/A | N/A | N/A | N/A | N/A |")
+
+        lines.append("")
+
+        # --- Per-test table filtered to this provider ---
+        _generate_per_test_table(
+            lines, runs,
+            heading=None,
+            provider_filter=provider,
+        )
+
+
+def _generate_per_test_table(
+    lines: list[str],
+    runs: list[dict[str, Any]],
+    heading: str | None = None,
+    provider_filter: str | None = None,
+) -> None:
+    """Emit a per-test trend table, optionally filtered to a single provider."""
+    if heading:
+        lines.append(heading)
+        lines.append("")
+
+    # Collect all test nodeids (and metadata) across all runs
+    all_tests: dict[str, str] = {}  # nodeid → provider
+    all_modules: dict[str, str] = {}  # nodeid → module
     for run in runs:
         for nodeid, info in run.get("results", {}).items():
-            provider = info.get("provider", "Unknown") if isinstance(info, dict) else "Unknown"
-            module = info.get("module", "") if isinstance(info, dict) else ""
-            all_tests[nodeid] = provider
+            if not isinstance(info, dict):
+                continue
+            prov = info.get("provider", "Unknown")
+            if provider_filter and prov != provider_filter:
+                continue
+            module = info.get("module", "")
+            all_tests[nodeid] = prov
             all_modules[nodeid] = module
 
     if not all_tests:
         lines.append("*No test results available.*")
-        return "\n".join(lines)
+        lines.append("")
+        return
 
-    # Build header (most recent run first)
-    header = "| Test | File | Provider |"
-    separator = "|------|------|----------|"
+    # Build header
+    if provider_filter:
+        header = "| Test | File |"
+        separator = "|------|------|"
+    else:
+        header = "| Test | File | Provider |"
+        separator = "|------|------|----------|"
     for run in reversed(runs):
         label = _format_run_label(run["timestamp"])
         header += f" {label} |"
@@ -373,12 +475,15 @@ def generate_trend_report(runs: list[dict[str, Any]]) -> str:
     lines.append(header)
     lines.append(separator)
 
-    # Sort by provider then test name
-    for nodeid in sorted(all_tests, key=lambda n: (all_tests[n], n)):
-        provider = all_tests[nodeid]
+    # Sort by module then test name
+    for nodeid in sorted(all_tests, key=lambda n: (all_modules.get(n, ""), n)):
         module = all_modules.get(nodeid, "")
         short = _short_name(nodeid)
-        row = f"| `{short}` | `{module}` | {provider} |"
+        if provider_filter:
+            row = f"| `{short}` | `{module}` |"
+        else:
+            provider = all_tests[nodeid]
+            row = f"| `{short}` | `{module}` | {provider} |"
 
         for run in reversed(runs):
             result = run.get("results", {}).get(nodeid)
@@ -395,10 +500,6 @@ def generate_trend_report(runs: list[dict[str, Any]]) -> str:
         lines.append(row)
 
     lines.append("")
-    lines.append("**Legend:** ✅ Passed · ❌ Failed · ⏭️ Skipped · ⚠️ Expected Failure (xfail) · N/A Not available")
-    lines.append("")
-
-    return "\n".join(lines)
 
 
 # ---------------------------------------------------------------------------