diff --git a/.github/workflows/dotnet-build-and-test.yml b/.github/workflows/dotnet-build-and-test.yml index 6454adba31..e53ccd0e1b 100644 --- a/.github/workflows/dotnet-build-and-test.yml +++ b/.github/workflows/dotnet-build-and-test.yml @@ -257,6 +257,8 @@ jobs: -c ${{ matrix.configuration }} ` --no-build -v Normal ` --report-xunit-trx ` + --report-junit ` + --results-directory ../IntegrationTestResults/ ` --ignore-exit-code 8 ` --filter-not-trait "Category=IntegrationDisabled" ` --parallel-algorithm aggressive ` @@ -299,6 +301,14 @@ jobs: shell: pwsh run: ./dotnet/eng/scripts/dotnet-check-coverage.ps1 -JsonReportPath "TestResults/Reports/Summary.json" -CoverageThreshold $env:COVERAGE_THRESHOLD + - name: Upload integration test results + if: always() && github.event_name != 'pull_request' && matrix.integration-tests + uses: actions/upload-artifact@v7 + with: + name: dotnet-test-results-${{ matrix.targetFramework }}-${{ matrix.os }} + path: IntegrationTestResults/**/*.junit + if-no-files-found: ignore + # This final job is required to satisfy the merge queue. It must only run (or succeed) if no tests failed dotnet-build-and-test-check: if: always() @@ -341,3 +351,59 @@ jobs: uses: actions/github-script@v8 with: script: core.setFailed('Integration Tests Cancelled!') + + # Integration test trend report (aggregates JUnit XML results from dotnet test jobs) + dotnet-integration-test-report: + name: Integration Test Report + if: > + always() && + github.event_name != 'pull_request' && + (contains(join(needs.*.result, ','), 'success') || + contains(join(needs.*.result, ','), 'failure')) + needs: [dotnet-test] + runs-on: ubuntu-latest + defaults: + run: + working-directory: python + steps: + - uses: actions/checkout@v6 + - name: Set up python and install the project + uses: ./.github/actions/python-setup + with: + python-version: "3.13" + os: ${{ runner.os }} + - name: Download all test results from current run + uses: actions/download-artifact@v4 + with: + pattern: dotnet-test-results-* + path: dotnet-test-results/ + - name: Restore report history cache + uses: actions/cache/restore@v4 + with: + path: python/dotnet-integration-report-history.json + key: dotnet-integration-report-history-${{ github.run_id }} + restore-keys: | + dotnet-integration-report-history- + - name: Generate trend report + run: > + uv run python scripts/flaky_report/aggregate.py + ../dotnet-test-results/ + dotnet-integration-report-history.json + dotnet-integration-test-report.md + - name: Post to Job Summary + if: always() + run: cat dotnet-integration-test-report.md >> $GITHUB_STEP_SUMMARY + - name: Save report history cache + if: always() + uses: actions/cache/save@v4 + with: + path: python/dotnet-integration-report-history.json + key: dotnet-integration-report-history-${{ github.run_id }} + - name: Upload trend report + if: always() + uses: actions/upload-artifact@v7 + with: + name: dotnet-integration-test-report + path: | + python/dotnet-integration-test-report.md + python/dotnet-integration-report-history.json diff --git a/dotnet/README.md b/dotnet/README.md index 328dfdf684..2edb402a94 100644 --- a/dotnet/README.md +++ b/dotnet/README.md @@ -33,3 +33,4 @@ Console.WriteLine(await agent.RunAsync("Write a haiku about Microsoft Agent Fram - [Design Documents](../docs/design) - [Architectural Decision Records](../docs/decisions) - [MSFT Learn Docs](https://learn.microsoft.com/agent-framework/overview/agent-framework-overview) + diff --git a/dotnet/tests/Microsoft.Agents.AI.DurableTask.IntegrationTests/ConsoleAppSamplesValidation.cs b/dotnet/tests/Microsoft.Agents.AI.DurableTask.IntegrationTests/ConsoleAppSamplesValidation.cs index 7b8fa3a8f9..006b1c1531 100644 --- a/dotnet/tests/Microsoft.Agents.AI.DurableTask.IntegrationTests/ConsoleAppSamplesValidation.cs +++ b/dotnet/tests/Microsoft.Agents.AI.DurableTask.IntegrationTests/ConsoleAppSamplesValidation.cs @@ -13,8 +13,6 @@ namespace Microsoft.Agents.AI.DurableTask.IntegrationTests; [Trait("Category", "SampleValidation")] public sealed class ConsoleAppSamplesValidation(ITestOutputHelper outputHelper) : SamplesValidationBase(outputHelper) { - private const string SkipFlakyTimingTest = "Flaky: timing-dependent LLM test, see https://github.com/microsoft/agent-framework/issues/4971"; - private static readonly string s_samplesPath = Path.GetFullPath( Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "..", "..", "samples", "04-hosting", "DurableAgents", "ConsoleApps")); @@ -237,7 +235,7 @@ private async Task TestSpamDetectionAsync( Assert.True(foundSuccess, "Orchestration did not complete successfully."); } - [Fact(Skip = SkipFlakyTimingTest)] + [Fact] public async Task SingleAgentOrchestrationHITLSampleValidationAsync() { string samplePath = Path.Combine(s_samplesPath, "05_AgentOrchestration_HITL"); @@ -311,14 +309,14 @@ await this.WriteInputAsync( }); } - [Fact(Skip = SkipFlakyTimingTest)] + [Fact] public async Task LongRunningToolsSampleValidationAsync() { string samplePath = Path.Combine(s_samplesPath, "06_LongRunningTools"); await this.RunSampleTestAsync(samplePath, async (process, logs) => { // This test takes a bit longer to run due to the multiple agent interactions and the lengthy content generation. - using CancellationTokenSource testTimeoutCts = this.CreateTestTimeoutCts(TimeSpan.FromSeconds(90)); + using CancellationTokenSource testTimeoutCts = this.CreateTestTimeoutCts(TimeSpan.FromSeconds(150)); // Test starting an agent that schedules a content generation orchestration await this.WriteInputAsync( @@ -396,14 +394,14 @@ await this.WriteInputAsync( }); } - [Fact(Skip = SkipFlakyTimingTest)] + [Fact] public async Task ReliableStreamingSampleValidationAsync() { string samplePath = Path.Combine(s_samplesPath, "07_ReliableStreaming"); await this.RunSampleTestAsync(samplePath, async (process, logs) => { // This test takes a bit longer to run due to the multiple agent interactions and the lengthy content generation. - using CancellationTokenSource testTimeoutCts = this.CreateTestTimeoutCts(TimeSpan.FromSeconds(90)); + using CancellationTokenSource testTimeoutCts = this.CreateTestTimeoutCts(TimeSpan.FromSeconds(150)); // Test the agent endpoint with a simple prompt await this.WriteInputAsync(process, "Plan a 5-day trip to Seattle. Include daily activities.", testTimeoutCts.Token); diff --git a/dotnet/tests/Microsoft.Agents.AI.DurableTask.IntegrationTests/ExternalClientTests.cs b/dotnet/tests/Microsoft.Agents.AI.DurableTask.IntegrationTests/ExternalClientTests.cs index 134a12e688..a9bf2ee16a 100644 --- a/dotnet/tests/Microsoft.Agents.AI.DurableTask.IntegrationTests/ExternalClientTests.cs +++ b/dotnet/tests/Microsoft.Agents.AI.DurableTask.IntegrationTests/ExternalClientTests.cs @@ -19,11 +19,9 @@ namespace Microsoft.Agents.AI.DurableTask.IntegrationTests; [Trait("Category", "Integration")] public sealed class ExternalClientTests(ITestOutputHelper outputHelper) : IDisposable { - private const string SkipFlakyTimingTest = "Flaky: timing-dependent LLM test, see https://github.com/microsoft/agent-framework/issues/4971"; - private static readonly TimeSpan s_defaultTimeout = Debugger.IsAttached ? TimeSpan.FromMinutes(5) - : TimeSpan.FromSeconds(60); + : TimeSpan.FromSeconds(120); private static readonly IConfiguration s_configuration = new ConfigurationBuilder() @@ -77,7 +75,7 @@ await simpleAgentProxy.RunAsync( Assert.Contains(agentLogs, log => log.EventId.Name == "LogAgentResponse"); } - [Fact(Skip = SkipFlakyTimingTest)] + [Fact] public async Task CallFunctionToolsAsync() { int weatherToolInvocationCount = 0; @@ -129,7 +127,7 @@ string SuggestPackingList(string weather, bool isSunny) Assert.Equal(1, packingListToolInvocationCount); } - [Fact(Skip = SkipFlakyTimingTest)] + [Fact] public async Task CallLongRunningFunctionToolsAsync() { [Description("Starts a greeting workflow and returns the workflow instance ID")] diff --git a/dotnet/tests/Microsoft.Agents.AI.DurableTask.IntegrationTests/SamplesValidationBase.cs b/dotnet/tests/Microsoft.Agents.AI.DurableTask.IntegrationTests/SamplesValidationBase.cs index f5ecf0354d..3f01b83e54 100644 --- a/dotnet/tests/Microsoft.Agents.AI.DurableTask.IntegrationTests/SamplesValidationBase.cs +++ b/dotnet/tests/Microsoft.Agents.AI.DurableTask.IntegrationTests/SamplesValidationBase.cs @@ -217,7 +217,7 @@ protected async Task WriteInputAsync(Process process, string input, Cancellation /// protected CancellationTokenSource CreateTestTimeoutCts(TimeSpan? timeout = null) { - TimeSpan testTimeout = Debugger.IsAttached ? TimeSpan.FromMinutes(5) : timeout ?? TimeSpan.FromSeconds(60); + TimeSpan testTimeout = Debugger.IsAttached ? TimeSpan.FromMinutes(5) : timeout ?? TimeSpan.FromSeconds(120); return new CancellationTokenSource(testTimeout); } diff --git a/dotnet/tests/Microsoft.Agents.AI.Hosting.AzureFunctions.IntegrationTests/SamplesValidation.cs b/dotnet/tests/Microsoft.Agents.AI.Hosting.AzureFunctions.IntegrationTests/SamplesValidation.cs index 078b6af790..6ccb309c12 100644 --- a/dotnet/tests/Microsoft.Agents.AI.Hosting.AzureFunctions.IntegrationTests/SamplesValidation.cs +++ b/dotnet/tests/Microsoft.Agents.AI.Hosting.AzureFunctions.IntegrationTests/SamplesValidation.cs @@ -15,8 +15,6 @@ namespace Microsoft.Agents.AI.Hosting.AzureFunctions.IntegrationTests; [Trait("Category", "SampleValidation")] public sealed class SamplesValidation(ITestOutputHelper outputHelper) : IAsyncLifetime { - private const string SkipFlakyTimingTest = "Flaky: timing-dependent LLM test, see https://github.com/microsoft/agent-framework/issues/4971"; - private const string AzureFunctionsPort = "7071"; private const string AzuritePort = "10000"; private const string DtsPort = "8080"; @@ -37,7 +35,7 @@ public sealed class SamplesValidation(ITestOutputHelper outputHelper) : IAsyncLi .Build(); private static bool s_infrastructureStarted; - private static readonly TimeSpan s_orchestrationTimeout = TimeSpan.FromMinutes(2); + private static readonly TimeSpan s_orchestrationTimeout = TimeSpan.FromMinutes(3); // In CI, `dotnet run` builds the Functions project from scratch before the host starts, so 60s is not enough. private static readonly TimeSpan s_functionsReadyTimeout = TimeSpan.FromSeconds(180); @@ -274,7 +272,7 @@ await this.RunSampleTestAsync(samplePath, async (logs) => }); } - [Fact(Skip = SkipFlakyTimingTest)] + [Fact] public async Task LongRunningToolsSampleValidationAsync() { string samplePath = Path.Combine(s_samplesPath, "06_LongRunningTools"); @@ -316,7 +314,7 @@ await this.WaitForConditionAsync( } }, message: "Orchestration is requesting human feedback", - timeout: TimeSpan.FromSeconds(60)); + timeout: TimeSpan.FromSeconds(90)); // Approve the content Uri approvalUri = new($"{runAgentUri}?thread_id={sessionId}"); @@ -336,7 +334,7 @@ await this.WaitForConditionAsync( } }, message: "Content published notification is logged", - timeout: TimeSpan.FromSeconds(60)); + timeout: TimeSpan.FromSeconds(90)); // Verify the final orchestration status by asking the agent for the status Uri statusUri = new($"{runAgentUri}?thread_id={sessionId}"); @@ -360,7 +358,7 @@ await this.WaitForConditionAsync( return isCompleted && hasContent; }, message: "Orchestration is completed", - timeout: TimeSpan.FromSeconds(60)); + timeout: TimeSpan.FromSeconds(90)); }); } @@ -404,7 +402,7 @@ await this.WaitForConditionAsync( }); } - [Fact(Skip = SkipFlakyTimingTest)] + [Fact] public async Task ReliableStreamingSampleValidationAsync() { string samplePath = Path.Combine(s_samplesPath, "08_ReliableStreaming"); diff --git a/python/scripts/flaky_report/aggregate.py b/python/scripts/flaky_report/aggregate.py index e07a5e136a..708f47fcf0 100644 --- a/python/scripts/flaky_report/aggregate.py +++ b/python/scripts/flaky_report/aggregate.py @@ -2,16 +2,18 @@ """Aggregate per-provider JUnit XML test results and generate a trend report. -Parses ``pytest.xml`` (JUnit XML) files produced by each CI job, merges them -into a single run, combines with historical data, and generates a markdown -trend table — the same pattern used by ``scripts/sample_validation/aggregate.py``. +Parses JUnit XML files produced by CI jobs — both ``pytest.xml`` (Python) and +xunit v3 ``*.junit`` (dotnet) — merges them into a single run, combines +with historical data, and generates a markdown trend table. Usage (from CI): python aggregate.py -The reports directory is expected to contain subdirectories named -``test-results-/`` each containing a ``pytest.xml`` file -(created by ``actions/download-artifact``). +The reports directory is expected to contain artifact subdirectories. Two +layouts are supported: + +- **Python (pytest):** ``test-results-/pytest.xml`` +- **Dotnet (xunit):** ``dotnet-test-results--/*.junit`` """ from __future__ import annotations @@ -46,9 +48,21 @@ def _format_run_label(timestamp: str) -> str: def _derive_provider(directory_name: str) -> str: """Derive a provider label from a report directory name. - ``test-results-openai`` → ``OpenAI`` - ``test-results-azure-openai`` → ``Azure OpenAI`` + Handles both Python and dotnet naming conventions: + - ``test-results-openai`` → ``OpenAI`` + - ``test-results-azure-openai`` → ``Azure OpenAI`` + - ``dotnet-test-results-net10.0-ubuntu-latest`` → ``net10.0 (ubuntu)`` """ + # Dotnet convention: dotnet-test-results-- + if directory_name.startswith("dotnet-test-results-"): + raw = directory_name.replace("dotnet-test-results-", "") + # e.g. "net10.0-ubuntu-latest" → framework="net10.0", os="ubuntu-latest" + parts = raw.split("-", 1) + framework = parts[0] + os_label = parts[1].split("-")[0] if len(parts) > 1 else "" + return f"{framework} ({os_label})" if os_label else framework + + # Python convention: test-results- raw = directory_name.replace("test-results-", "") known = { "openai": "OpenAI", @@ -102,11 +116,21 @@ def _parse_junit_xml(xml_path: Path) -> list[dict[str, str]]: # it appends the class name, e.g.: # "packages.foundry.tests.foundry.test_foundry_embedding_client.TestFoundryEmbeddingIntegration" # We want the file-level module: "test_foundry_embedding_client" + # + # xunit (dotnet) writes classname as the full C# type, e.g.: + # "OpenAIChatCompletion.IntegrationTests.ChatCompletionTests" + # We want the project prefix: "OpenAIChatCompletion" if classname: parts = classname.rsplit(".", 2) # If the last segment starts with uppercase it's a class name — take the one before it if len(parts) >= 2 and parts[-1][0:1].isupper(): - module = parts[-2] + # For dotnet: if the penultimate part is "IntegrationTests" or "UnitTests", + # use the part before that (the project name) instead + if parts[-2] in ("IntegrationTests", "UnitTests") and len(parts) >= 3: + # parts[0] may contain dots — take the last segment of it + module = parts[0].rsplit(".", 1)[-1] + else: + module = parts[-2] else: module = parts[-1] else: @@ -148,28 +172,61 @@ def _parse_junit_xml(xml_path: Path) -> list[dict[str, str]]: # --------------------------------------------------------------------------- +def _discover_xml_files(reports_dir: Path) -> list[tuple[str, Path]]: + """Discover JUnit XML test result files in artifact subdirectories. + + Handles two directory layouts: + - **Python (pytest):** ``test-results-/pytest.xml`` + - **Dotnet (xunit):** ``dotnet-test-results--/*.junit`` + + Returns: + List of ``(directory_name, xml_path)`` tuples. + """ + xml_files: list[tuple[str, Path]] = [] + if not reports_dir.is_dir(): + return xml_files + + for subdir in sorted(reports_dir.iterdir()): + if not subdir.is_dir(): + continue + + # Python layout: single pytest.xml per artifact + pytest_xml = subdir / "pytest.xml" + if pytest_xml.exists(): + xml_files.append((subdir.name, pytest_xml)) + continue + + # Dotnet layout: multiple *.junit files per artifact + junit_files = sorted(subdir.rglob("*.junit")) + for jf in junit_files: + xml_files.append((subdir.name, jf)) + + # Fallback: any .xml file that looks like JUnit (not .trx, not cobertura) + if not junit_files: + for xf in sorted(subdir.rglob("*.xml")): + if xf.suffix == ".xml" and not xf.name.endswith(".cobertura.xml"): + xml_files.append((subdir.name, xf)) + + return xml_files + + def load_current_run(reports_dir: Path) -> dict[str, Any]: """Load per-provider JUnit XML reports from the current CI run and merge. + Supports both pytest (Python) and xunit v3 (dotnet) JUnit XML formats. + Args: - reports_dir: Directory containing ``test-results-/`` subdirs. + reports_dir: Directory containing artifact subdirectories with XML reports. Returns: Merged run dict with ``timestamp``, ``summary``, ``results``. """ combined_results: dict[str, dict[str, str]] = {} # nodeid → {status, provider} - # actions/download-artifact creates: reports_dir/test-results-openai/pytest.xml - xml_files: list[tuple[str, Path]] = [] - if reports_dir.is_dir(): - for subdir in sorted(reports_dir.iterdir()): - if subdir.is_dir(): - xml_file = subdir / "pytest.xml" - if xml_file.exists(): - xml_files.append((subdir.name, xml_file)) + xml_files = _discover_xml_files(reports_dir) if not xml_files: - print(f"Warning: No pytest.xml files found in {reports_dir}") + print(f"Warning: No JUnit XML files found in {reports_dir}") return { "timestamp": datetime.now(timezone.utc).isoformat(), "summary": { @@ -181,19 +238,42 @@ def load_current_run(reports_dir: Path) -> dict[str, Any]: "results": {}, } + # Dotnet tests always run under multiple frameworks, so we always + # qualify their keys with the provider to ensure deterministic, + # stable keys across runs regardless of file parse order. + is_dotnet = any(d.startswith("dotnet-test-results-") for d, _ in xml_files) + for dir_name, xml_file in xml_files: print(f" Loading: {xml_file}") provider = _derive_provider(dir_name) tests = _parse_junit_xml(xml_file) for test in tests: - combined_results[test["nodeid"]] = { + raw_id = test["nodeid"] + key = f"{provider}::{raw_id}" if is_dotnet else raw_id + + combined_results[key] = { "status": test["status"], "provider": provider, "module": test.get("module", ""), } - # Build summary counts using mutually exclusive status buckets. - # Errors are folded into the failed count for display purposes. + # Build per-provider summary counts so the report can show one row per + # framework (dotnet) or per provider (Python). + provider_counts: dict[str, dict[str, int]] = {} + for r in combined_results.values(): + prov = r.get("provider", "Unknown") + if prov not in provider_counts: + provider_counts[prov] = {"total": 0, "passed": 0, "failed": 0, "skipped": 0} + provider_counts[prov]["total"] += 1 + st = r["status"] + if st == "passed": + provider_counts[prov]["passed"] += 1 + elif st in ("failed", "error"): + provider_counts[prov]["failed"] += 1 + elif st == "skipped": + provider_counts[prov]["skipped"] += 1 + + # Overall summary (sum across all providers). statuses = [r["status"] for r in combined_results.values()] summary = { "total": len(statuses), @@ -205,6 +285,7 @@ def load_current_run(reports_dir: Path) -> dict[str, Any]: return { "timestamp": datetime.now(timezone.utc).isoformat(), "summary": summary, + "provider_summaries": provider_counts, "results": combined_results, } @@ -247,13 +328,35 @@ def _short_name(nodeid: str) -> str: def generate_trend_report(runs: list[dict[str, Any]]) -> str: """Generate a markdown trend report from run history.""" lines = [ - "# 🔬 Flaky Test Report", + "# 🔬 Integration Test Report", "", f"*Generated: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M UTC')}*", "", ] - # --- Overall status table (most recent first) --- + # Detect whether this is a dotnet report (provider-qualified keys). + is_dotnet = False + for run in runs: + provider_sums = run.get("provider_summaries", {}) + if any(p.startswith("net") for p in provider_sums): + is_dotnet = True + break + + if is_dotnet: + _generate_dotnet_report(lines, runs) + else: + _generate_python_report(lines, runs) + + lines.append("") + lines.append("**Legend:** ✅ Passed · ❌ Failed · ⏭️ Skipped · ⚠️ Expected Failure (xfail) · N/A Not available") + lines.append("") + + return "\n".join(lines) + + +def _generate_python_report(lines: list[str], runs: list[dict[str, Any]]) -> None: + """Generate the original single-table Python report format.""" + # --- Overall status table --- lines.append("## Overall Status (Last 5 Runs)") lines.append("") lines.append("| Run | Total | ✅ Passed | ❌ Failed | ⏭️ Skipped |") @@ -276,27 +379,91 @@ def generate_trend_report(runs: list[dict[str, Any]]) -> str: lines.append("") - # --- Per-test results table --- - lines.append("## Per-Test Results") - lines.append("") + # --- Single per-test results table --- + _generate_per_test_table(lines, runs, "## Per-Test Results") + + +def _generate_dotnet_report(lines: list[str], runs: list[dict[str, Any]]) -> None: + """Generate per-framework tables for dotnet (net10.0, net472, etc.).""" + # Collect all providers seen across all runs, sorted for stable ordering + all_providers: set[str] = set() + for run in runs: + all_providers.update(run.get("provider_summaries", {}).keys()) + providers = sorted(all_providers) + + for provider in providers: + lines.append(f"## {provider}") + lines.append("") + + # --- Per-provider summary table --- + lines.append("| Run | Total | ✅ Passed | ❌ Failed | ⏭️ Skipped |") + lines.append("|-----|-------|-----------|-----------|------------|") + + for run in reversed(runs): + ps = run.get("provider_summaries", {}).get(provider, {}) + total = ps.get("total", 0) + label = _format_run_label(run["timestamp"]) + if total == 0: + lines.append(f"| {label} | N/A | N/A | N/A | N/A |") + else: + lines.append( + f"| {label} " + f"| {total} " + f"| {ps.get('passed', 0)}/{total} " + f"| {ps.get('failed', 0)}/{total} " + f"| {ps.get('skipped', 0)}/{total} |" + ) - # Collect all test nodeids, providers, and modules across all runs - all_tests: dict[str, str] = {} # nodeid → provider (from most recent run) - all_modules: dict[str, str] = {} # nodeid → module (from most recent run) + for _ in range(MAX_HISTORY - len(runs)): + lines.append("| N/A | N/A | N/A | N/A | N/A |") + + lines.append("") + + # --- Per-test table filtered to this provider --- + _generate_per_test_table( + lines, runs, + heading=None, + provider_filter=provider, + ) + + +def _generate_per_test_table( + lines: list[str], + runs: list[dict[str, Any]], + heading: str | None = None, + provider_filter: str | None = None, +) -> None: + """Emit a per-test trend table, optionally filtered to a single provider.""" + if heading: + lines.append(heading) + lines.append("") + + # Collect all test nodeids (and metadata) across all runs + all_tests: dict[str, str] = {} # nodeid → provider + all_modules: dict[str, str] = {} # nodeid → module for run in runs: for nodeid, info in run.get("results", {}).items(): - provider = info.get("provider", "Unknown") if isinstance(info, dict) else "Unknown" - module = info.get("module", "") if isinstance(info, dict) else "" - all_tests[nodeid] = provider + if not isinstance(info, dict): + continue + prov = info.get("provider", "Unknown") + if provider_filter and prov != provider_filter: + continue + module = info.get("module", "") + all_tests[nodeid] = prov all_modules[nodeid] = module if not all_tests: lines.append("*No test results available.*") - return "\n".join(lines) + lines.append("") + return - # Build header (most recent run first) - header = "| Test | File | Provider |" - separator = "|------|------|----------|" + # Build header + if provider_filter: + header = "| Test | File |" + separator = "|------|------|" + else: + header = "| Test | File | Provider |" + separator = "|------|------|----------|" for run in reversed(runs): label = _format_run_label(run["timestamp"]) header += f" {label} |" @@ -308,12 +475,15 @@ def generate_trend_report(runs: list[dict[str, Any]]) -> str: lines.append(header) lines.append(separator) - # Sort by provider then test name - for nodeid in sorted(all_tests, key=lambda n: (all_tests[n], n)): - provider = all_tests[nodeid] + # Sort by module then test name + for nodeid in sorted(all_tests, key=lambda n: (all_modules.get(n, ""), n)): module = all_modules.get(nodeid, "") short = _short_name(nodeid) - row = f"| `{short}` | `{module}` | {provider} |" + if provider_filter: + row = f"| `{short}` | `{module}` |" + else: + provider = all_tests[nodeid] + row = f"| `{short}` | `{module}` | {provider} |" for run in reversed(runs): result = run.get("results", {}).get(nodeid) @@ -330,10 +500,6 @@ def generate_trend_report(runs: list[dict[str, Any]]) -> str: lines.append(row) lines.append("") - lines.append("**Legend:** ✅ Passed · ❌ Failed · ⏭️ Skipped · ⚠️ Expected Failure (xfail) · N/A Not available") - lines.append("") - - return "\n".join(lines) # ---------------------------------------------------------------------------