diff --git a/src/Tools/AI Test Toolkit/Permissions/AITestToolkitObj.PermissionSet.al b/src/Tools/AI Test Toolkit/Permissions/AITestToolkitObj.PermissionSet.al index 717cff3ddf..3f1a2e0930 100644 --- a/src/Tools/AI Test Toolkit/Permissions/AITestToolkitObj.PermissionSet.al +++ b/src/Tools/AI Test Toolkit/Permissions/AITestToolkitObj.PermissionSet.al @@ -43,5 +43,7 @@ permissionset 149031 "AI Test Toolkit - Obj" page "AIT Test Suite" = X, page "AIT Test Suite List" = X, page "AIT Test Suite Language Lookup" = X, + page "AIT Log Entry Outcome Part" = X, + page "AIT Agent Log Entry Part" = X, page "AIT Run History" = X; } \ No newline at end of file diff --git a/src/Tools/AI Test Toolkit/src/Agent/AITAgentLogEntryPart.Page.al b/src/Tools/AI Test Toolkit/src/Agent/AITAgentLogEntryPart.Page.al new file mode 100644 index 0000000000..bdab620812 --- /dev/null +++ b/src/Tools/AI Test Toolkit/src/Agent/AITAgentLogEntryPart.Page.al @@ -0,0 +1,55 @@ +// ------------------------------------------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. See License.txt in the project root for license information. +// ------------------------------------------------------------------------------------------------ + +namespace System.TestTools.AITestToolkit; + +page 149050 "AIT Agent Log Entry Part" +{ + Caption = 'Agent Details'; + PageType = CardPart; + Editable = false; + SourceTable = "AIT Log Entry"; + + layout + { + area(Content) + { + field("Agent Task IDs"; AgentTaskIDs) + { + ApplicationArea = All; + Caption = 'Agent Tasks Executed'; + ToolTip = 'Specifies the comma-separated list of Agent Task IDs related to this log entry.'; + + trigger OnDrillDown() + begin + AgentTestContextImpl.OpenAgentTaskList(AgentTaskIDs); + end; + } + field("Copilot Credits"; CopilotCredits) + { + ApplicationArea = All; + AutoFormatType = 0; + Caption = 'Copilot Credits Consumed'; + ToolTip = 'Specifies the total Copilot Credits consumed by the Agent Tasks for this log entry.'; + + trigger OnDrillDown() + begin + AgentTestContextImpl.OpenAgentConsumptionOverview(AgentTaskIDs); + end; + } + } + } + + var + AgentTestContextImpl: Codeunit "Agent Test Context Impl."; + CopilotCredits: Decimal; + AgentTaskIDs: Text; + + trigger OnAfterGetRecord() + begin + CopilotCredits := AgentTestContextImpl.GetCopilotCreditsForLogEntry(Rec."Entry No."); + AgentTaskIDs := AgentTestContextImpl.GetAgentTaskIDsForLogEntry(Rec."Entry No."); + end; +} diff --git a/src/Tools/AI Test Toolkit/src/Agent/AgentLogEntries.PageExt.al b/src/Tools/AI Test Toolkit/src/Agent/AgentLogEntries.PageExt.al index df3fe46c30..dca373cb48 100644 --- a/src/Tools/AI Test Toolkit/src/Agent/AgentLogEntries.PageExt.al +++ b/src/Tools/AI Test Toolkit/src/Agent/AgentLogEntries.PageExt.al @@ -39,6 +39,14 @@ pageextension 149030 "Agent Log Entries" extends "AIT Log Entries" end; } } + addafter(TestOutcome) + { + part(AgentDetails; "AIT Agent Log Entry Part") + { + ApplicationArea = All; + SubPageLink = "Entry No." = field("Entry No."); + } + } } trigger OnAfterGetRecord() diff --git a/src/Tools/AI Test Toolkit/src/Logs/AITLogEntries.Page.al b/src/Tools/AI Test Toolkit/src/Logs/AITLogEntries.Page.al index e0945c0ff3..cf95bc3ba9 100644 --- a/src/Tools/AI Test Toolkit/src/Logs/AITLogEntries.Page.al +++ b/src/Tools/AI Test Toolkit/src/Logs/AITLogEntries.Page.al @@ -186,6 +186,14 @@ page 149033 "AIT Log Entries" } } } + area(FactBoxes) + { + part(TestOutcome; "AIT Log Entry Outcome Part") + { + ApplicationArea = All; + SubPageLink = "Entry No." = field("Entry No."); + } + } } actions { diff --git a/src/Tools/AI Test Toolkit/src/Logs/AITLogEntryOutcomePart.Page.al b/src/Tools/AI Test Toolkit/src/Logs/AITLogEntryOutcomePart.Page.al new file mode 100644 index 0000000000..65f44fff15 --- /dev/null +++ b/src/Tools/AI Test Toolkit/src/Logs/AITLogEntryOutcomePart.Page.al @@ -0,0 +1,110 @@ +// ------------------------------------------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. See License.txt in the project root for license information. +// ------------------------------------------------------------------------------------------------ + +namespace System.TestTools.AITestToolkit; + +page 149049 "AIT Log Entry Outcome Part" +{ + ApplicationArea = All; + Caption = 'Test Outcome'; + PageType = CardPart; + Editable = false; + SourceTable = "AIT Log Entry"; + Extensible = true; + + layout + { + area(Content) + { + field(Status; Rec.Status) + { + StyleExpr = StatusStyleExpr; + } + field(Accuracy; Rec."Test Method Line Accuracy") + { + Caption = 'Evaluation Result'; + ToolTip = 'Specifies the accuracy of the eval line.'; + AutoFormatType = 0; + } + field(TurnsText; TurnsText) + { + Caption = 'No. of Turns Passed'; + ToolTip = 'Specifies the number of turns that passed out of the total number of turns.'; + StyleExpr = TurnsStyleExpr; + } + group(ErrorMessageGroup) + { + Caption = 'Error Message'; + field(ErrorMessage; ErrorMessage) + { + ShowCaption = false; + ToolTip = 'Specifies the error message from the eval.'; + Style = Unfavorable; + Multiline = true; + + trigger OnDrillDown() + begin + Message(ErrorMessage); + end; + } + } + field(Duration; Rec."Duration (ms)") + { + Caption = 'Duration (ms)'; + ToolTip = 'Specifies the duration of the test execution in milliseconds.'; + AutoFormatType = 0; + } + } + } + + var + TurnsText: Text; + ErrorMessage: Text; + StatusStyleExpr: Text; + TurnsStyleExpr: Text; + + trigger OnAfterGetRecord() + var + AITTestSuiteMgt: Codeunit "AIT Test Suite Mgt."; + begin + TurnsText := AITTestSuiteMgt.GetTurnsAsText(Rec); + SetStatusStyleExpr(); + SetTurnsStyleExpr(); + SetErrorMessage(); + end; + + local procedure SetStatusStyleExpr() + begin + case Rec.Status of + Rec.Status::Success: + StatusStyleExpr := Format(PageStyle::Favorable); + Rec.Status::Error: + StatusStyleExpr := Format(PageStyle::Unfavorable); + Rec.Status::Skipped: + StatusStyleExpr := Format(PageStyle::Ambiguous); + else + StatusStyleExpr := ''; + end; + end; + + local procedure SetTurnsStyleExpr() + begin + case Rec."No. of Turns Passed" of + Rec."No. of Turns": + TurnsStyleExpr := Format(PageStyle::Favorable); + 0: + TurnsStyleExpr := Format(PageStyle::Unfavorable); + else + TurnsStyleExpr := Format(PageStyle::Ambiguous); + end; + end; + + local procedure SetErrorMessage() + begin + ErrorMessage := ''; + if Rec.Status = Rec.Status::Error then + ErrorMessage := Rec.GetMessage(); + end; +} diff --git a/src/Tools/Test Framework/Test Libraries/LibraryAgent/AI-TEST-AUTHORING.md b/src/Tools/Test Framework/Test Libraries/LibraryAgent/AI-TEST-AUTHORING.md index 49dadde375..e4cf57cf37 100644 --- a/src/Tools/Test Framework/Test Libraries/LibraryAgent/AI-TEST-AUTHORING.md +++ b/src/Tools/Test Framework/Test Libraries/LibraryAgent/AI-TEST-AUTHORING.md @@ -161,10 +161,15 @@ query: title: message: attachments: - - file: - - file: + - file: # static file + - file: # OR: dynamically generated + action_type: + action_data: + : # arbitrary data for the generator ``` +The `file` key supports two forms: a **scalar** value (static file path) or an **object** with `action_type` / `action_data` (dynamically generated file). + How keys flow into library calls: | YAML key | Flows into | @@ -172,7 +177,8 @@ How keys flow into library calls: | `query.title` | `AgentTaskBuilder.Initialize(AgentUserSecurityId, title)` — required, asserted via `Library Assert`. | | `query.from` | `AgentTaskMessageBuilder.Initialize(from, ...)`. If `from` is missing, no message is added (only the task title). | | `query.message` | `AgentTaskMessageBuilder.Initialize(..., message)`. Optional. | -| `query.attachments[].file` | `IAgentTestResourceProvider.GetResource(file, ...)` → `AgentTaskMessageBuilder.AddAttachment(...)`. Use the `RunTurnAndWait` overload that accepts a provider when YAML uses attachments. | +| `query.attachments[].file` (scalar) | `IAgentTestResourceProvider.GetResource(file, ...)` → `AgentTaskMessageBuilder.AddAttachment(...)`. Use the `RunTurnAndWait` overload that accepts a provider when YAML uses attachments. | +| `query.attachments[].file` (object) | `IAgentTestResourceProvider.GenerateResource(action_type, action_data, ...)` → `AgentTaskMessageBuilder.AddAttachment(...)`. The `action_data` sub-object is extracted and passed as a `Test Input Json` codeunit; `action_type` is passed separately. | ### 7.3 Intervention continuation @@ -212,6 +218,7 @@ expected_data: suggestions: # optional — list of suggestion codes that MUST be present - - + intent: "" # optional — LLM judge validates the intervention message : 1 # implemented per agent test app : Released # implemented per agent test app ``` @@ -222,10 +229,12 @@ expected_data: |---|---| | `expected_data.intervention_request.type` | `LibraryAgent.ParseUserInterventionRequestType(text)` → `Enum "Agent User Int Request Type"`. Values: `Assistance`, `Review`, `Message` (English ordinal names; no translation). | | `expected_data.intervention_request.suggestions[]` | Validated by `LibraryAgent.ValidateInterventionRequest` — every expected code must be present on the actual request. | +| `expected_data.intervention_request.intent` | Validated by an LLM judge that evaluates whether the agent's intervention message semantically matches the declared intent. The judge returns a pass/fail verdict with reasoning. | Automatic validation in `LibraryAgent.FinalizeTurn`: - If `intervention_request` is declared in YAML: the agent must have paused for an intervention with the matching `type` and including every `suggestion` code listed. +- If `intent` is declared: the framework calls an LLM judge to semantically validate that the intervention message matches the expected intent. This replaces brittle substring matching with semantic evaluation. - If `intervention_request` is **not** declared: the agent must **not** have paused for an intervention. Unexpected interventions fail the turn. So: declare `intervention_request` on every turn where you expect the diff --git a/src/Tools/Test Framework/Test Libraries/LibraryAgent/README.md b/src/Tools/Test Framework/Test Libraries/LibraryAgent/README.md index d7cfe73fe2..f361b1898e 100644 --- a/src/Tools/Test Framework/Test Libraries/LibraryAgent/README.md +++ b/src/Tools/Test Framework/Test Libraries/LibraryAgent/README.md @@ -2,6 +2,14 @@ Test helpers for authoring AI agent tests in Business Central. The library provides helper methods to create and manage agent tasks, messages, and user interventions, drive YAML-described turn loops via `Library - Agent.RunTurnAndWait` / `FinalizeTurn`, and integrate with the AI Test Toolkit for evaluation. +## Features + +- **Turn-loop driver** — `RunTurnAndWait` + `FinalizeTurn` handle the multi-turn lifecycle from YAML. +- **Intervention validation** — type, suggestions, and semantic intent matching. +- **LLM-as-judge** — when an `intent` key is declared in `intervention_request`, the framework uses GPT-4.1 to semantically evaluate whether the agent's intervention message matches the expected intent. Requires the `Agent Test LLM Judge` Copilot Capability (registered automatically by the library's install codeunit). +- **Dynamic file generation** — `IAgentTestResourceProvider.GenerateResource` for test attachments that must be created at runtime. +- **Placeholder engine** — date/time formula substitution in YAML values. + ## Public documentation - [AI-TEST-AUTHORING.md](AI-TEST-AUTHORING.md) — YAML format reference for AI agent tests, the placeholder syntax, and how each YAML key maps to the library methods that consume it. diff --git a/src/Tools/Test Framework/Test Libraries/LibraryAgent/src/AgentTestLLMJudgeCap.EnumExt.al b/src/Tools/Test Framework/Test Libraries/LibraryAgent/src/AgentTestLLMJudgeCap.EnumExt.al new file mode 100644 index 0000000000..72fe75fe68 --- /dev/null +++ b/src/Tools/Test Framework/Test Libraries/LibraryAgent/src/AgentTestLLMJudgeCap.EnumExt.al @@ -0,0 +1,15 @@ +// ------------------------------------------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. See License.txt in the project root for license information. +// ------------------------------------------------------------------------------------------------ +namespace System.TestLibraries.AI; + +using System.AI; + +enumextension 130566 "Agent Test LLM Judge Cap." extends "Copilot Capability" +{ + value(130566; "Agent Test LLM Judge") + { + Caption = 'Agent Test Library LLM Judge'; + } +} diff --git a/src/Tools/Test Framework/Test Libraries/LibraryAgent/src/IAgentTestResourceProvider.Interface.al b/src/Tools/Test Framework/Test Libraries/LibraryAgent/src/IAgentTestResourceProvider.Interface.al index a50ca74eff..ed7e675e73 100644 --- a/src/Tools/Test Framework/Test Libraries/LibraryAgent/src/IAgentTestResourceProvider.Interface.al +++ b/src/Tools/Test Framework/Test Libraries/LibraryAgent/src/IAgentTestResourceProvider.Interface.al @@ -5,6 +5,8 @@ namespace System.TestLibraries.Agents; +using System.TestTools.TestRunner; + /// /// Interface for resolving test resource files from the consuming test app. /// Implement this in your test app to provide resource file access to the agent test library. @@ -19,4 +21,19 @@ interface "IAgentTestResourceProvider" /// Returns the file name extracted from the path. /// Returns the MIME type of the file. procedure GetResource(ResourcePath: Text; var ResourceInStream: InStream; var FileName: Text[250]; var MIMEType: Text[100]) + + /// + /// Generates a resource dynamically from YAML-declared data. + /// Override this to support 'filegenerator' entries in YAML attachments. + /// The GeneratorData parameter contains the full filegenerator object from the YAML, + /// including a 'name' key for dispatch and any additional data keys the generator needs. + /// + /// The name of the generator, from the 'name' key in the YAML filegenerator object. + /// The filegenerator object from the YAML, accessible via Test Input Json. + /// Returns the generated file content as an InStream. + /// Returns the generated file name. + /// Returns the MIME type of the generated file. + procedure GenerateResource(GeneratorName: Text; GeneratorData: Codeunit "Test Input Json"; var ResourceInStream: InStream; var FileName: Text[250]; var MIMEType: Text[100]) + begin + end; } diff --git a/src/Tools/Test Framework/Test Libraries/LibraryAgent/src/Internal/LibraryAgentImpl.Codeunit.al b/src/Tools/Test Framework/Test Libraries/LibraryAgent/src/Internal/LibraryAgentImpl.Codeunit.al index 1637aa4081..5af0dda528 100644 --- a/src/Tools/Test Framework/Test Libraries/LibraryAgent/src/Internal/LibraryAgentImpl.Codeunit.al +++ b/src/Tools/Test Framework/Test Libraries/LibraryAgent/src/Internal/LibraryAgentImpl.Codeunit.al @@ -6,6 +6,7 @@ namespace System.TestLibraries.Agents; using System.Agents; +using System.AI; using System.Environment; using System.Environment.Configuration; using System.Globalization; @@ -469,6 +470,9 @@ codeunit 130561 "Library - Agent Impl." AgentTaskBuilder: Codeunit "Agent Task Builder"; AgentTaskMessageBuilder: Codeunit "Agent Task Message Builder"; AttachmentsInput: Codeunit "Test Input Json"; + AttachmentElement: Codeunit "Test Input Json"; + AttachmentFileElement: Codeunit "Test Input Json"; + ActionTypeElement: Codeunit "Test Input Json"; TitleInput, FromInput, MessageInput : Codeunit "Test Input Json"; Assert: Codeunit "Library Assert"; ResourceInStream: InStream; @@ -476,7 +480,7 @@ codeunit 130561 "Library - Agent Impl." MessageValue: Text; FileName: Text[250]; MIMEType: Text[100]; - HasTitle, HasFrom, HasMessage, HasAttachments : Boolean; + HasTitle, HasFrom, HasMessage, HasAttachments, HasFile, HasActionType : Boolean; I: Integer; begin TitleInput := QueryInput.ElementExists(TitleTok, HasTitle); @@ -506,9 +510,26 @@ codeunit 130561 "Library - Agent Impl." AttachmentsInput := QueryInput.ElementExists(AttachmentsTok, HasAttachments); if HasAttachments then for I := 0 to AttachmentsInput.GetElementCount() - 1 do begin - AgentTestResourceProvider.GetResource( - AttachmentsInput.ElementAt(I).Element(FileTok).ValueAsText(), - ResourceInStream, FileName, MIMEType); + Clear(FileName); + Clear(MIMEType); + + AttachmentElement := AttachmentsInput.ElementAt(I); + AttachmentFileElement := AttachmentElement.ElementExists(FileTok, HasFile); + + if not HasFile then + continue; + + ActionTypeElement := AttachmentFileElement.ElementExists(ActionTypeTok, HasActionType); + if HasActionType then + AgentTestResourceProvider.GenerateResource( + ActionTypeElement.ValueAsText(), + AttachmentFileElement.Element(ActionDataTok), + ResourceInStream, FileName, MIMEType) + else + AgentTestResourceProvider.GetResource( + AttachmentFileElement.ValueAsText(), + ResourceInStream, FileName, MIMEType); + AgentTaskMessageBuilder.AddAttachment(FileName, MIMEType, ResourceInStream); end; end; @@ -556,18 +577,22 @@ codeunit 130561 "Library - Agent Impl." Assert.Fail(UnexpectedInterventionErr); if HasActualIntervention and HasExpectedIntervention then - ValidateInterventionDetails(TempUserInterventionRequest, TempSuggestion, ExpectedInterventionRequest); + ValidateInterventionDetails(AgentTask, TempUserInterventionRequest, TempAnnotation, TempSuggestion, ExpectedInterventionRequest); end; local procedure ValidateInterventionDetails( + AgentTask: Record "Agent Task"; TempUserInterventionRequest: Record "Agent User Int Request Details" temporary; + var TempAnnotation: Record "Agent Annotation" temporary; var TempSuggestion: Record "Agent Task User Int Suggestion" temporary; ExpectedInterventionRequest: Codeunit "Test Input Json") var - TypeInput, SuggestionsInput : Codeunit "Test Input Json"; + TypeInput, SuggestionsInput, IntentInput : Codeunit "Test Input Json"; Assert: Codeunit "Library Assert"; ExpectedType: Enum "Agent User Int Request Type"; - TypeExists, SuggestionsExist : Boolean; + ActualMessage: Text; + Reasoning: Text; + TypeExists, SuggestionsExist, IntentExists : Boolean; I: Integer; begin TypeInput := ExpectedInterventionRequest.ElementExists(TypeTok, TypeExists); @@ -591,6 +616,87 @@ codeunit 130561 "Library - Agent Impl." Assert.AreEqual(SuggestionsInput.GetElementCount(), TempSuggestion.Count(), StrSubstNo(SuggestionCountMismatchErr, SuggestionsInput.GetElementCount(), TempSuggestion.Count())); end; + + IntentInput := ExpectedInterventionRequest.ElementExists(IntentTok, IntentExists); + if IntentExists then begin + ActualMessage := GetInterventionActualMessage(AgentTask, TempUserInterventionRequest, TempAnnotation); + + Assert.IsTrue( + EvaluateIntentWithLLM(ActualMessage, IntentInput.ValueAsText(), Reasoning), + StrSubstNo(IntentMismatchErr, IntentInput.ValueAsText(), ActualMessage) + ' | Judge reasoning: ' + Reasoning); + end; + end; + + local procedure GetInterventionActualMessage( + AgentTask: Record "Agent Task"; + TempUserInterventionRequest: Record "Agent User Int Request Details" temporary; + var TempAnnotation: Record "Agent Annotation" temporary): Text + var + AgentTaskMessage: Record "Agent Task Message"; + ContentInStream: InStream; + ContentText: Text; + begin + // For ReviewMessage type, the actual content is in the related output message + if TempUserInterventionRequest.Type = TempUserInterventionRequest.Type::ReviewMessage then + if AgentTaskMessage.Get(AgentTask.ID, TempUserInterventionRequest."Message ID") then begin + AgentTaskMessage.CalcFields(Content); + AgentTaskMessage.Content.CreateInStream(ContentInStream, GetDefaultEncoding()); + ContentInStream.Read(ContentText); + exit(ContentText); + end; + + // For other types the message is in the annotation; fall back to request message + if TempAnnotation.FindFirst() then + exit(TempAnnotation.Message); + + exit(TempUserInterventionRequest.Message); + end; + + local procedure EvaluateIntentWithLLM(ActualMessage: Text; ExpectedIntent: Text; var Reasoning: Text): Boolean + var + AzureOpenAI: Codeunit "Azure OpenAI"; + AOAIChatMessages: Codeunit "AOAI Chat Messages"; + AOAIChatCompletionParams: Codeunit "AOAI Chat Completion Params"; + AOAIOperationResponse: Codeunit "AOAI Operation Response"; + AOAIDeployments: Codeunit "AOAI Deployments"; + ResultText: Text; + ResultJson: JsonToken; + PassToken: JsonToken; + ReasonToken: JsonToken; + begin + AzureOpenAI.SetAuthorization(Enum::"AOAI Model Type"::"Chat Completions", AOAIDeployments.GetGPT41Latest()); + AzureOpenAI.SetCopilotCapability(Enum::"Copilot Capability"::"Agent Test LLM Judge"); + + AOAIChatCompletionParams.SetTemperature(0); + AOAIChatCompletionParams.SetMaxTokens(500); + AOAIChatCompletionParams.SetJsonMode(true); + + AOAIChatMessages.AddSystemMessage(IntentJudgeSystemPromptTxt); + AOAIChatMessages.AddUserMessage(StrSubstNo(IntentJudgeUserPromptTxt, ExpectedIntent, ActualMessage)); + + AzureOpenAI.GenerateChatCompletion(AOAIChatMessages, AOAIChatCompletionParams, AOAIOperationResponse); + + if not AOAIOperationResponse.IsSuccess() then begin + Reasoning := StrSubstNo(LLMCallFailedLbl, AOAIOperationResponse.GetError()); + exit(false); + end; + + ResultText := AOAIChatMessages.GetLastMessage(); + + if not ResultJson.ReadFrom(ResultText) then begin + Reasoning := StrSubstNo(LLMInvalidJsonLbl, ResultText); + exit(false); + end; + + if not ResultJson.AsObject().Get(PassTok, PassToken) then begin + Reasoning := StrSubstNo(LLMInvalidJsonLbl, ResultText); + exit(false); + end; + + if ResultJson.AsObject().Get(ReasoningTok, ReasonToken) then + Reasoning := ReasonToken.AsValue().AsText(); + + exit(PassToken.AsValue().AsBoolean()); end; procedure GetExpectedInterventionRequest(var ExpectedInterventionRequest: Codeunit "Test Input Json"): Boolean @@ -675,8 +781,10 @@ codeunit 130561 "Library - Agent Impl." FromTok: Label 'from', Locked = true; AttachmentsTok: Label 'attachments', Locked = true; FileTok: Label 'file', Locked = true; + ActionTypeTok: Label 'action_type', Locked = true; + ActionDataTok: Label 'action_data', Locked = true; + NameTok: Label 'name', Locked = true; InterventionRequestTok: Label 'intervention_request', Locked = true; - SuggestionsTok: Label 'suggestions', Locked = true; InvalidQueryBothErr: Label 'Query cannot contain both ''title'' and ''intervention'' elements.'; InvalidQueryNeitherErr: Label 'Query must contain either a ''title'' (task input) or ''intervention'' element.'; @@ -689,4 +797,12 @@ codeunit 130561 "Library - Agent Impl." SuggestionCountMismatchErr: Label 'Expected %1 suggestions but found %2 actual suggestions.', Comment = '%1 = expected count, %2 = actual count'; UnexpectedInterventionErr: Label 'Task paused for user intervention but no intervention_request found in expected_data for this turn.'; ExpectedInterventionNotFoundErr: Label 'Expected intervention_request in expected_data but the task did not pause for user intervention.'; + IntentTok: Label 'intent', Locked = true; + IntentMismatchErr: Label 'Intervention intent mismatch: expected intent "%1" but got: %2', Comment = '%1 = expected intent, %2 = actual message'; + LLMCallFailedLbl: Label 'LLM intent judge call failed: %1', Comment = '%1 = error message'; + LLMInvalidJsonLbl: Label 'LLM intent judge returned invalid JSON: %1', Comment = '%1 = raw response'; + PassTok: Label 'pass', Locked = true; + ReasoningTok: Label 'reasoning', Locked = true; + IntentJudgeSystemPromptTxt: Label 'You are an evaluator for a Business Central agent test framework. You will receive the agent''s actual intervention message and the expected intent. Determine whether the agent''s message aligns with the expected intent. Respond with JSON: { "pass": true or false, "reasoning": "brief explanation" }', Locked = true; + IntentJudgeUserPromptTxt: Label 'Expected intent: %1\nActual agent message: %2', Locked = true, Comment = '%1 = expected intent, %2 = actual message'; } \ No newline at end of file diff --git a/src/Tools/Test Framework/Test Libraries/LibraryAgent/src/Internal/LibraryAgentInstall.Codeunit.al b/src/Tools/Test Framework/Test Libraries/LibraryAgent/src/Internal/LibraryAgentInstall.Codeunit.al index 4294ca8dfd..f4f45c0b5d 100644 --- a/src/Tools/Test Framework/Test Libraries/LibraryAgent/src/Internal/LibraryAgentInstall.Codeunit.al +++ b/src/Tools/Test Framework/Test Libraries/LibraryAgent/src/Internal/LibraryAgentInstall.Codeunit.al @@ -5,6 +5,9 @@ namespace System.TestLibraries.Agents; +using System.AI; +using System.TestLibraries.AI; + codeunit 130562 "Library - Agent Install" { Access = Internal; @@ -14,8 +17,12 @@ codeunit 130562 "Library - Agent Install" trigger OnInstallAppPerDatabase() var + LibraryCopilotCapability: Codeunit "Library - Copilot Capability"; LibraryAgentUtilities: Codeunit "Library - Agent Utilities"; + AppInfo: ModuleInfo; begin LibraryAgentUtilities.VerifyCanRunOnCurrentEnvironment(); + NavApp.GetCurrentModuleInfo(AppInfo); + LibraryCopilotCapability.ActivateCopilotCapability(Enum::"Copilot Capability"::"Agent Test LLM Judge", AppInfo.Id); end; } \ No newline at end of file diff --git a/src/Tools/Test Framework/Test Libraries/LibraryAgent/src/NoOpAgentTestResProvider.Codeunit.al b/src/Tools/Test Framework/Test Libraries/LibraryAgent/src/Internal/NoOpAgentTestResProvider.Codeunit.al similarity index 78% rename from src/Tools/Test Framework/Test Libraries/LibraryAgent/src/NoOpAgentTestResProvider.Codeunit.al rename to src/Tools/Test Framework/Test Libraries/LibraryAgent/src/Internal/NoOpAgentTestResProvider.Codeunit.al index 443acb9d43..db0c6a4ea8 100644 --- a/src/Tools/Test Framework/Test Libraries/LibraryAgent/src/NoOpAgentTestResProvider.Codeunit.al +++ b/src/Tools/Test Framework/Test Libraries/LibraryAgent/src/Internal/NoOpAgentTestResProvider.Codeunit.al @@ -5,6 +5,8 @@ namespace System.TestLibraries.Agents; +using System.TestTools.TestRunner; + /// /// No-op implementation of IAgentTestResourceProvider. /// Used by the ProvideInputAndWait overload that does not support attachments. @@ -22,6 +24,13 @@ codeunit 130565 "NoOp Agent Test Res. Provider" implements "IAgentTestResourcePr Error(NoResourceProviderErr); end; +#pragma warning disable AA0150 + procedure GenerateResource(GeneratorName: Text; GeneratorData: Codeunit "Test Input Json"; var ResourceInStream: InStream; var FileName: Text[250]; var MIMEType: Text[100]) +#pragma warning restore AA0150 + begin + Error(NoResourceProviderErr); + end; + var NoResourceProviderErr: Label 'No resource provider configured. Use the ProvideInputAndWait overload that accepts an IAgentTestResourceProvider to load attachments.'; } diff --git a/src/Tools/Test Framework/Test Libraries/LibraryAgent/src/LibraryAgent.Codeunit.al b/src/Tools/Test Framework/Test Libraries/LibraryAgent/src/LibraryAgent.Codeunit.al index fa40374aea..4746829a17 100644 --- a/src/Tools/Test Framework/Test Libraries/LibraryAgent/src/LibraryAgent.Codeunit.al +++ b/src/Tools/Test Framework/Test Libraries/LibraryAgent/src/LibraryAgent.Codeunit.al @@ -309,6 +309,7 @@ codeunit 130560 "Library - Agent" /// Validates the current intervention request against expected data from the test input. /// Uses Assert to fail the test with a descriptive message if any check fails. /// Checks that the task requires intervention, the type matches, and expected suggestions are present. + /// If an 'intent' field is declared, validates the intervention message semantically using an LLM judge. /// /// The agent task to validate. /// The expected intervention request data from the YAML. @@ -321,6 +322,7 @@ codeunit 130560 "Library - Agent" /// Writes the turn output and determines if the test should continue to the next turn. /// Calls Commit() after writing output. /// Validates that the task did not pause for an unexpected intervention (no intervention_request in expected_data). + /// When an intervention_request is declared, validates type, suggestions, and intent (via LLM judge if 'intent' is specified). /// /// The agent task for the current turn. /// Whether the current turn completed successfully.