From 6bcc41a8ae2ca11fcdb63e0ae4fad2298234553b Mon Sep 17 00:00:00 2001 From: notgitika Date: Tue, 24 Mar 2026 23:47:11 -0400 Subject: [PATCH 1/5] feat: support custom log group and service name for external agent online evals Allow online eval configs to monitor agents not hosted on AgentCore Runtime by accepting customLogGroupName and customServiceName instead of requiring a project agent reference. - Schema: agent is now optional; XOR refinement enforces agent OR both custom fields - CLI: --custom-log-group-name and --custom-service-name flags added - TUI: new "log source" wizard step branches between project agent and external agent - CDK L3: construct uses custom fields directly for dataSourceConfig when present - Error handling: selecting "Project Agent" with zero agents shows inline error --- .../primitives/OnlineEvalConfigPrimitive.ts | 37 ++++- .../OnlineEvalConfigPrimitive.test.ts | 37 +++++ src/cli/tui/hooks/useCreateOnlineEval.ts | 8 +- .../screens/online-eval/AddOnlineEvalFlow.tsx | 8 -- .../online-eval/AddOnlineEvalScreen.tsx | 128 +++++++++++++++--- src/cli/tui/screens/online-eval/types.ts | 20 ++- .../online-eval/useAddOnlineEvalWizard.ts | 72 ++++++++-- src/schema/schemas/agentcore-project.ts | 11 +- .../__tests__/online-eval-config.test.ts | 89 +++++++++++- .../schemas/primitives/online-eval-config.ts | 70 ++++++++-- 10 files changed, 420 insertions(+), 60 deletions(-) diff --git a/src/cli/primitives/OnlineEvalConfigPrimitive.ts b/src/cli/primitives/OnlineEvalConfigPrimitive.ts index 03687047e..9645b05a7 100644 --- a/src/cli/primitives/OnlineEvalConfigPrimitive.ts +++ b/src/cli/primitives/OnlineEvalConfigPrimitive.ts @@ -9,10 +9,12 @@ import type { Command } from '@commander-js/extra-typings'; export interface AddOnlineEvalConfigOptions { name: string; - agent: string; + agent?: string; evaluators: string[]; samplingRate: number; enableOnCreate?: boolean; + customLogGroupName?: string; + customServiceName?: string; } export type RemovableOnlineEvalConfig = RemovableResource; @@ -109,6 +111,11 @@ export class OnlineEvalConfigPrimitive extends BasePrimitive', 'Evaluator ARN(s) [non-interactive]') .option('--sampling-rate ', 'Sampling percentage (0.01-100) [non-interactive]') .option('--enable-on-create', 'Enable evaluation immediately after deploy [non-interactive]') + .option( + '--custom-log-group-name ', + 'Custom CloudWatch log group name for external agents [non-interactive]' + ) + .option('--custom-service-name ', 'Custom service name for external agents [non-interactive]') .option('--json', 'Output as JSON [non-interactive]') .action( async (cliOptions: { @@ -118,6 +125,8 @@ export class OnlineEvalConfigPrimitive extends BasePrimitive { try { @@ -130,9 +139,23 @@ export class OnlineEvalConfigPrimitive extends BasePrimitive { expect(result).toEqual(expect.objectContaining({ success: false, error: 'no project' })); }); + + it('adds config with custom log source fields (external agent)', async () => { + mockReadProjectSpec.mockResolvedValue(makeProject()); + mockWriteProjectSpec.mockResolvedValue(undefined); + + const result = await primitive.add({ + name: 'ExternalConfig', + evaluators: ['Builtin.GoalSuccessRate'], + samplingRate: 25, + customLogGroupName: '/aws/bedrock-agentcore/runtimes/ext-agent', + customServiceName: 'ext-service', + }); + + expect(result.success).toBe(true); + expect(result).toHaveProperty('configName', 'ExternalConfig'); + + const config = mockWriteProjectSpec.mock.calls[0]![0].onlineEvalConfigs[0]; + expect(config.agent).toBeUndefined(); + expect(config.customLogGroupName).toBe('/aws/bedrock-agentcore/runtimes/ext-agent'); + expect(config.customServiceName).toBe('ext-service'); + }); + + it('omits custom fields when not provided', async () => { + mockReadProjectSpec.mockResolvedValue(makeProject()); + mockWriteProjectSpec.mockResolvedValue(undefined); + + await primitive.add({ + name: 'AgentConfig', + agent: 'MyAgent', + evaluators: ['Builtin.GoalSuccessRate'], + samplingRate: 10, + }); + + const config = mockWriteProjectSpec.mock.calls[0]![0].onlineEvalConfigs[0]; + expect(config.customLogGroupName).toBeUndefined(); + expect(config.customServiceName).toBeUndefined(); + }); }); describe('remove', () => { diff --git a/src/cli/tui/hooks/useCreateOnlineEval.ts b/src/cli/tui/hooks/useCreateOnlineEval.ts index 2d0190552..47d644567 100644 --- a/src/cli/tui/hooks/useCreateOnlineEval.ts +++ b/src/cli/tui/hooks/useCreateOnlineEval.ts @@ -3,10 +3,12 @@ import { useCallback, useEffect, useState } from 'react'; interface CreateOnlineEvalConfig { name: string; - agent: string; + agent?: string; evaluators: string[]; samplingRate: number; enableOnCreate: boolean; + customLogGroupName?: string; + customServiceName?: string; } export function useCreateOnlineEval() { @@ -19,10 +21,12 @@ export function useCreateOnlineEval() { try { const addResult = await onlineEvalConfigPrimitive.add({ name: config.name, - agent: config.agent, + ...(config.agent && { agent: config.agent }), evaluators: config.evaluators, samplingRate: config.samplingRate, enableOnCreate: config.enableOnCreate, + ...(config.customLogGroupName && { customLogGroupName: config.customLogGroupName }), + ...(config.customServiceName && { customServiceName: config.customServiceName }), }); if (!addResult.success) { throw new Error(addResult.error ?? 'Failed to create online eval config'); diff --git a/src/cli/tui/screens/online-eval/AddOnlineEvalFlow.tsx b/src/cli/tui/screens/online-eval/AddOnlineEvalFlow.tsx index d5322e9ed..c363f6121 100644 --- a/src/cli/tui/screens/online-eval/AddOnlineEvalFlow.tsx +++ b/src/cli/tui/screens/online-eval/AddOnlineEvalFlow.tsx @@ -62,14 +62,6 @@ export function AddOnlineEvalFlow({ isInteractive = true, onExit, onBack, onDev, const agentNames = projectSpec.runtimes.map(a => a.name); - if (agentNames.length === 0) { - setFlow({ - name: 'error', - message: 'No agents found in project. Add an agent first with `agentcore add agent`.', - }); - return; - } - setFlow({ name: 'create-wizard', evaluators: items, agentNames }); } catch (err) { if (!cancelled) setFlow({ name: 'error', message: getErrorMessage(err) }); diff --git a/src/cli/tui/screens/online-eval/AddOnlineEvalScreen.tsx b/src/cli/tui/screens/online-eval/AddOnlineEvalScreen.tsx index fc863a2d1..6bac6caa9 100644 --- a/src/cli/tui/screens/online-eval/AddOnlineEvalScreen.tsx +++ b/src/cli/tui/screens/online-eval/AddOnlineEvalScreen.tsx @@ -1,4 +1,4 @@ -import { OnlineEvalConfigNameSchema } from '../../../../schema'; +import { LogGroupNameSchema, OnlineEvalConfigNameSchema, ServiceNameSchema } from '../../../../schema'; import type { SelectableItem } from '../../components'; import { ConfirmReview, @@ -16,7 +16,7 @@ import type { AddOnlineEvalConfig, EvaluatorItem } from './types'; import { DEFAULT_SAMPLING_RATE, ONLINE_EVAL_STEP_LABELS } from './types'; import { useAddOnlineEvalWizard } from './useAddOnlineEvalWizard'; import { Box, Text } from 'ink'; -import React, { useMemo } from 'react'; +import React, { useCallback, useMemo, useState } from 'react'; interface AddOnlineEvalScreenProps { onComplete: (config: AddOnlineEvalConfig) => void; @@ -35,13 +35,13 @@ export function AddOnlineEvalScreen({ }: AddOnlineEvalScreenProps) { const wizard = useAddOnlineEvalWizard(agentNames.length); - // Auto-set agent when there's only one + // Auto-set agent when there's only one and using project-agent source const effectiveConfig = useMemo(() => { - if (agentNames.length === 1 && !wizard.config.agent) { + if (wizard.logSource === 'project-agent' && agentNames.length === 1 && !wizard.config.agent) { return { ...wizard.config, agent: agentNames[0]! }; } return wizard.config; - }, [wizard.config, agentNames]); + }, [wizard.config, wizard.logSource, agentNames]); const evaluatorItems: SelectableItem[] = useMemo(() => { return rawEvaluatorItems.map(e => ({ @@ -55,8 +55,23 @@ export function AddOnlineEvalScreen({ return agentNames.map(name => ({ id: name, title: name })); }, [agentNames]); + const logSourceItems: SelectableItem[] = useMemo( + () => [ + { id: 'project-agent', title: 'Project Agent', description: 'Monitor an agent deployed in this project' }, + { + id: 'external-agent', + title: 'External Agent', + description: 'Monitor an agent outside AgentCore Runtime via custom log source', + }, + ], + [] + ); + const isNameStep = wizard.step === 'name'; + const isLogSourceStep = wizard.step === 'logSource'; const isAgentStep = wizard.step === 'agent'; + const isCustomServiceNameStep = wizard.step === 'customServiceName'; + const isCustomLogGroupNameStep = wizard.step === 'customLogGroupName'; const isEvaluatorsStep = wizard.step === 'evaluators'; const isSamplingRateStep = wizard.step === 'samplingRate'; const isEnableOnCreateStep = wizard.step === 'enableOnCreate'; @@ -70,6 +85,27 @@ export function AddOnlineEvalScreen({ [] ); + const [noAgentsError, setNoAgentsError] = useState(false); + + const handleLogSourceSelect = useCallback( + (item: SelectableItem) => { + if (item.id === 'project-agent' && agentNames.length === 0) { + setNoAgentsError(true); + return; + } + setNoAgentsError(false); + wizard.setLogSource(item.id as 'project-agent' | 'external-agent'); + }, + [agentNames.length, wizard] + ); + + const logSourceNav = useListNavigation({ + items: logSourceItems, + onSelect: handleLogSourceSelect, + onExit: () => wizard.goBack(), + isActive: isLogSourceStep, + }); + const agentNav = useListNavigation({ items: agentItems, onSelect: item => wizard.setAgent(item.id), @@ -102,7 +138,7 @@ export function AddOnlineEvalScreen({ const helpText = isEvaluatorsStep ? 'Space toggle · Enter confirm · Esc back' - : isAgentStep || isEnableOnCreateStep + : isAgentStep || isEnableOnCreateStep || isLogSourceStep ? HELP_TEXT.NAVIGATE_SELECT : isConfirmStep ? HELP_TEXT.CONFIRM_CANCEL @@ -112,6 +148,26 @@ export function AddOnlineEvalScreen({ ); + // Build confirm review fields based on log source + const confirmFields = useMemo(() => { + const fields = [{ label: 'Name', value: effectiveConfig.name }]; + if (effectiveConfig.agent) { + fields.push({ label: 'Agent', value: effectiveConfig.agent }); + } + if (effectiveConfig.customServiceName) { + fields.push({ label: 'Service Name', value: effectiveConfig.customServiceName }); + } + if (effectiveConfig.customLogGroupName) { + fields.push({ label: 'Log Group', value: effectiveConfig.customLogGroupName }); + } + fields.push( + { label: 'Evaluators', value: effectiveConfig.evaluators.join(', ') }, + { label: 'Sampling Rate', value: `${effectiveConfig.samplingRate}%` }, + { label: 'Enable on Deploy', value: effectiveConfig.enableOnCreate ? 'Yes' : 'No' } + ); + return fields; + }, [effectiveConfig]); + return ( @@ -127,6 +183,22 @@ export function AddOnlineEvalScreen({ /> )} + {isLogSourceStep && ( + + + {noAgentsError && ( + + No agents found in project. Add an agent first with `agentcore add agent`, or select External Agent. + + )} + + )} + {isAgentStep && ( )} + {isCustomServiceNameStep && ( + + + The service name configured in OTEL_RESOURCE_ATTRIBUTES for your external agent. This is the primary + identifier used to match log entries. + + wizard.goBack()} + schema={ServiceNameSchema} + /> + + )} + + {isCustomLogGroupNameStep && ( + + + The CloudWatch log group where your external agent sends logs. Typically follows the pattern: + /aws/bedrock-agentcore/runtimes/{''} + + wizard.goBack()} + schema={LogGroupNameSchema} + /> + + )} + {isEvaluatorsStep && ( )} - {isConfirmStep && ( - - )} + {isConfirmStep && } ); diff --git a/src/cli/tui/screens/online-eval/types.ts b/src/cli/tui/screens/online-eval/types.ts index 943eaee73..d081f1eed 100644 --- a/src/cli/tui/screens/online-eval/types.ts +++ b/src/cli/tui/screens/online-eval/types.ts @@ -2,20 +2,36 @@ // Online Eval Config Flow Types // ───────────────────────────────────────────────────────────────────────────── -export type AddOnlineEvalStep = 'name' | 'agent' | 'evaluators' | 'samplingRate' | 'enableOnCreate' | 'confirm'; +export type LogSourceType = 'project-agent' | 'external-agent'; + +export type AddOnlineEvalStep = + | 'name' + | 'logSource' + | 'agent' + | 'customServiceName' + | 'customLogGroupName' + | 'evaluators' + | 'samplingRate' + | 'enableOnCreate' + | 'confirm'; export interface AddOnlineEvalConfig { name: string; - agent: string; + agent?: string; evaluators: string[]; samplingRate: number; enableOnCreate: boolean; description?: string; + customLogGroupName?: string; + customServiceName?: string; } export const ONLINE_EVAL_STEP_LABELS: Record = { name: 'Name', + logSource: 'Source', agent: 'Agent', + customServiceName: 'Service', + customLogGroupName: 'Log Group', evaluators: 'Evaluators', samplingRate: 'Rate', enableOnCreate: 'Enable', diff --git a/src/cli/tui/screens/online-eval/useAddOnlineEvalWizard.ts b/src/cli/tui/screens/online-eval/useAddOnlineEvalWizard.ts index 0032469f2..91a3a74ef 100644 --- a/src/cli/tui/screens/online-eval/useAddOnlineEvalWizard.ts +++ b/src/cli/tui/screens/online-eval/useAddOnlineEvalWizard.ts @@ -1,18 +1,31 @@ -import type { AddOnlineEvalConfig, AddOnlineEvalStep } from './types'; +import type { AddOnlineEvalConfig, AddOnlineEvalStep, LogSourceType } from './types'; import { DEFAULT_SAMPLING_RATE } from './types'; import { useCallback, useState } from 'react'; -function getAllSteps(agentCount: number): AddOnlineEvalStep[] { +function getAllSteps(agentCount: number, logSource: LogSourceType): AddOnlineEvalStep[] { + if (logSource === 'external-agent') { + return [ + 'name', + 'logSource', + 'customServiceName', + 'customLogGroupName', + 'evaluators', + 'samplingRate', + 'enableOnCreate', + 'confirm', + ]; + } + // Project agent path — skip agent selection if only one agent if (agentCount <= 1) { - return ['name', 'evaluators', 'samplingRate', 'enableOnCreate', 'confirm']; + return ['name', 'logSource', 'evaluators', 'samplingRate', 'enableOnCreate', 'confirm']; } - return ['name', 'agent', 'evaluators', 'samplingRate', 'enableOnCreate', 'confirm']; + return ['name', 'logSource', 'agent', 'evaluators', 'samplingRate', 'enableOnCreate', 'confirm']; } function getDefaultConfig(): AddOnlineEvalConfig { return { name: '', - agent: '', + agent: undefined, evaluators: [], samplingRate: DEFAULT_SAMPLING_RATE, enableOnCreate: true, @@ -20,7 +33,8 @@ function getDefaultConfig(): AddOnlineEvalConfig { } export function useAddOnlineEvalWizard(agentCount: number) { - const allSteps = getAllSteps(agentCount); + const [logSource, setLogSourceState] = useState('project-agent'); + const allSteps = getAllSteps(agentCount, logSource); const [config, setConfig] = useState(getDefaultConfig); const [step, setStep] = useState(allSteps[0]!); @@ -48,6 +62,24 @@ export function useAddOnlineEvalWizard(agentCount: number) { [nextStep, setConfig, setStep] ); + const setLogSource = useCallback( + (source: LogSourceType) => { + setLogSourceState(source); + // Clear fields from the other path + if (source === 'external-agent') { + setConfig(c => ({ ...c, agent: undefined })); + } else { + setConfig(c => ({ ...c, customLogGroupName: undefined, customServiceName: undefined })); + } + // Steps will recalculate on next render; advance to the step after logSource + const newSteps = getAllSteps(agentCount, source); + const logSourceIdx = newSteps.indexOf('logSource'); + const next = newSteps[logSourceIdx + 1]; + if (next) setStep(next); + }, + [agentCount, setConfig, setStep] + ); + const setAgent = useCallback( (agent: string) => { setConfig(c => ({ ...c, agent })); @@ -57,6 +89,24 @@ export function useAddOnlineEvalWizard(agentCount: number) { [nextStep, setConfig, setStep] ); + const setCustomServiceName = useCallback( + (customServiceName: string) => { + setConfig(c => ({ ...c, customServiceName })); + const next = nextStep('customServiceName'); + if (next) setStep(next); + }, + [nextStep, setConfig, setStep] + ); + + const setCustomLogGroupName = useCallback( + (customLogGroupName: string) => { + setConfig(c => ({ ...c, customLogGroupName })); + const next = nextStep('customLogGroupName'); + if (next) setStep(next); + }, + [nextStep, setConfig, setStep] + ); + const setEvaluators = useCallback( (evaluators: string[]) => { setConfig(c => ({ ...c, evaluators })); @@ -85,18 +135,24 @@ export function useAddOnlineEvalWizard(agentCount: number) { ); const reset = useCallback(() => { + setLogSourceState('project-agent'); setConfig(getDefaultConfig()); - setStep(allSteps[0]!); - }, [allSteps, setConfig, setStep]); + const defaultSteps = getAllSteps(agentCount, 'project-agent'); + setStep(defaultSteps[0]!); + }, [agentCount, setConfig, setStep]); return { config, step, steps: allSteps, currentIndex, + logSource, goBack, setName, + setLogSource, setAgent, + setCustomServiceName, + setCustomLogGroupName, setEvaluators, setSamplingRate, setEnableOnCreate, diff --git a/src/schema/schemas/agentcore-project.ts b/src/schema/schemas/agentcore-project.ts index 8d0f35f1c..4d3af252f 100644 --- a/src/schema/schemas/agentcore-project.ts +++ b/src/schema/schemas/agentcore-project.ts @@ -32,7 +32,12 @@ export { export { EvaluationLevelSchema }; export type { MemoryStrategy, MemoryStrategyType } from './primitives/memory'; export type { OnlineEvalConfig } from './primitives/online-eval-config'; -export { OnlineEvalConfigSchema, OnlineEvalConfigNameSchema } from './primitives/online-eval-config'; +export { + OnlineEvalConfigSchema, + OnlineEvalConfigNameSchema, + LogGroupNameSchema, + ServiceNameSchema, +} from './primitives/online-eval-config'; export type { CodeBasedConfig, EvaluationLevel, @@ -319,8 +324,8 @@ export const AgentCoreProjectSpecSchema = z const evaluatorNames = new Set(spec.evaluators.map(e => e.name)); for (const config of spec.onlineEvalConfigs) { - // Validate agent reference - if (!agentNames.has(config.agent)) { + // Validate agent reference (only when agent is specified — external agents use custom log source) + if (config.agent && !agentNames.has(config.agent)) { ctx.addIssue({ code: z.ZodIssueCode.custom, message: `Online eval config "${config.name}" references unknown agent "${config.agent}"`, diff --git a/src/schema/schemas/primitives/__tests__/online-eval-config.test.ts b/src/schema/schemas/primitives/__tests__/online-eval-config.test.ts index e6e940948..cc855170a 100644 --- a/src/schema/schemas/primitives/__tests__/online-eval-config.test.ts +++ b/src/schema/schemas/primitives/__tests__/online-eval-config.test.ts @@ -1,4 +1,9 @@ -import { OnlineEvalConfigNameSchema, OnlineEvalConfigSchema } from '../online-eval-config'; +import { + LogGroupNameSchema, + OnlineEvalConfigNameSchema, + OnlineEvalConfigSchema, + ServiceNameSchema, +} from '../online-eval-config'; import { describe, expect, it } from 'vitest'; describe('OnlineEvalConfigNameSchema', () => { @@ -98,4 +103,86 @@ describe('OnlineEvalConfigSchema', () => { it('accepts config without description and enableOnCreate', () => { expect(OnlineEvalConfigSchema.safeParse(validConfig).success).toBe(true); }); + + // ── Custom log source (external agent) ────────────────────────── + + it('accepts config with custom log source fields and no agent', () => { + const config = { + type: 'OnlineEvaluationConfig' as const, + name: 'ExternalConfig', + evaluators: ['Builtin.GoalSuccessRate'], + samplingRate: 10, + customLogGroupName: '/aws/bedrock-agentcore/runtimes/my-external-agent', + customServiceName: 'my-external-service', + }; + expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(true); + }); + + it('accepts config with both agent and custom log source fields', () => { + const config = { + ...validConfig, + customLogGroupName: '/custom/log-group', + customServiceName: 'custom-service', + }; + expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(true); + }); + + it('rejects config with neither agent nor custom log source fields', () => { + const config = { + type: 'OnlineEvaluationConfig' as const, + name: 'NoSource', + evaluators: ['Builtin.GoalSuccessRate'], + samplingRate: 10, + }; + expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(false); + }); + + it('rejects config with only customLogGroupName but no customServiceName', () => { + const config = { + type: 'OnlineEvaluationConfig' as const, + name: 'PartialCustom', + evaluators: ['Builtin.GoalSuccessRate'], + samplingRate: 10, + customLogGroupName: '/some/log-group', + }; + expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(false); + }); + + it('rejects config with only customServiceName but no customLogGroupName', () => { + const config = { + type: 'OnlineEvaluationConfig' as const, + name: 'PartialCustom', + evaluators: ['Builtin.GoalSuccessRate'], + samplingRate: 10, + customServiceName: 'my-service', + }; + expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(false); + }); +}); + +describe('LogGroupNameSchema', () => { + it('accepts valid log group names', () => { + expect(LogGroupNameSchema.safeParse('/aws/bedrock-agentcore/runtimes/abc123').success).toBe(true); + expect(LogGroupNameSchema.safeParse('aws/spans').success).toBe(true); + expect(LogGroupNameSchema.safeParse('/my/custom-log-group').success).toBe(true); + }); + + it('rejects empty string', () => { + expect(LogGroupNameSchema.safeParse('').success).toBe(false); + }); + + it('rejects names with invalid characters', () => { + expect(LogGroupNameSchema.safeParse('/log group with spaces').success).toBe(false); + }); +}); + +describe('ServiceNameSchema', () => { + it('accepts valid service names', () => { + expect(ServiceNameSchema.safeParse('my-service').success).toBe(true); + expect(ServiceNameSchema.safeParse('ProjectName_AgentName.DEFAULT').success).toBe(true); + }); + + it('rejects empty string', () => { + expect(ServiceNameSchema.safeParse('').success).toBe(false); + }); }); diff --git a/src/schema/schemas/primitives/online-eval-config.ts b/src/schema/schemas/primitives/online-eval-config.ts index 6dbc0787f..68b856b84 100644 --- a/src/schema/schemas/primitives/online-eval-config.ts +++ b/src/schema/schemas/primitives/online-eval-config.ts @@ -14,19 +14,61 @@ export const OnlineEvalConfigNameSchema = z 'Must begin with a letter and contain only alphanumeric characters and underscores (max 48 chars)' ); -export const OnlineEvalConfigSchema = z.object({ - name: OnlineEvalConfigNameSchema, - /** Agent name to monitor (must match a project agent) */ - agent: z.string().min(1, 'Agent name is required'), - /** Evaluator names (custom), Builtin.* IDs, or evaluator ARNs */ - evaluators: z.array(z.string().min(1)).min(1, 'At least one evaluator is required'), - /** Sampling rate as a percentage (0.01 to 100) */ - samplingRate: z.number().min(0.01).max(100), - /** Optional description for the online eval config */ - description: z.string().max(200).optional(), - /** Whether to enable execution on create (default: true) */ - enableOnCreate: z.boolean().optional(), - tags: TagsSchema.optional(), -}); +/** + * CloudWatch log group name validation. + * @see https://docs.aws.amazon.com/AmazonCloudWatch/latest/logs/Working-with-log-groups-and-streams.html + */ +export const LogGroupNameSchema = z + .string() + .min(1, 'Log group name is required') + .max(512) + .regex( + /^[a-zA-Z0-9_\-/.*]+$/, + 'Log group name may contain alphanumeric characters, underscores, hyphens, forward slashes, dots, and asterisks' + ); + +export const ServiceNameSchema = z.string().min(1, 'Service name is required').max(256); + +export const OnlineEvalConfigSchema = z + .object({ + name: OnlineEvalConfigNameSchema, + /** Agent name to monitor (must match a project agent). Required unless customLogGroupName and customServiceName are provided. */ + agent: z.string().min(1, 'Agent name is required').optional(), + /** Evaluator names (custom), Builtin.* IDs, or evaluator ARNs */ + evaluators: z.array(z.string().min(1)).min(1, 'At least one evaluator is required'), + /** Sampling rate as a percentage (0.01 to 100) */ + samplingRate: z.number().min(0.01).max(100), + /** Optional description for the online eval config */ + description: z.string().max(200).optional(), + /** Whether to enable execution on create (default: true) */ + enableOnCreate: z.boolean().optional(), + /** Custom CloudWatch log group name for evaluating agents not hosted on AgentCore Runtime */ + customLogGroupName: LogGroupNameSchema.optional(), + /** Custom service name for evaluating agents not hosted on AgentCore Runtime */ + customServiceName: ServiceNameSchema.optional(), + tags: TagsSchema.optional(), + }) + .refine( + data => { + const hasAgent = data.agent !== undefined; + const hasCustom = data.customLogGroupName !== undefined && data.customServiceName !== undefined; + return hasAgent || hasCustom; + }, + { + message: + 'Either "agent" must be provided (for project agents) or both "customLogGroupName" and "customServiceName" (for external agents)', + } + ) + .refine( + data => { + // If one custom field is set, the other must also be set + const hasLogGroup = data.customLogGroupName !== undefined; + const hasServiceName = data.customServiceName !== undefined; + return hasLogGroup === hasServiceName; + }, + { + message: 'Both "customLogGroupName" and "customServiceName" must be provided together', + } + ); export type OnlineEvalConfig = z.infer; From 6aed6307ce71f7244f645d9c471ad934c7f38d88 Mon Sep 17 00:00:00 2001 From: notgitika Date: Tue, 24 Mar 2026 23:56:13 -0400 Subject: [PATCH 2/5] feat: support custom service name for on-demand evals of external agents Add --custom-service-name and --custom-log-group-name flags to `agentcore run evals` for evaluating agents not hosted on AgentCore Runtime. When provided, CloudWatch queries filter by service.name instead of cloud.resource_id, matching the external agent observability contract. --- .github/workflows/peer-dep-compat.yml | 29 ++ .../26498a12-f26a-43b7-b89d-131ac995aa62.json | 8 + .../43db174c-d800-4e28-8ac9-f1e39aa6aa7f.json | 8 + .../4971c61c-9a8f-4475-b164-105c601d2fba.json | 8 + .../4dd2cc49-8d18-4168-8460-acacf125e650.json | 8 + .../e9773b37-7fb9-4794-bd18-beca59e15738.json | 8 + ...00abd583-2ef0-45e8-9134-f395ce6d5bbe.jsonl | 13 + ...9423878a-39c1-4869-87f0-0f8a3910890b.jsonl | 16 + .omc/state/hud-state.json | 6 + .omc/state/hud-stdin-cache.json | 1 + .omc/state/idle-notif-cooldown.json | 3 + .omc/state/last-tool-error.json | 7 + .omc/state/mission-state.json | 217 ++++++++++++ .omc/state/subagent-tracking.json | 26 ++ docs/individual-memory-deployment-plan.md | 315 ++++++++++++++++++ docs/release-process.md | 129 +++++++ package/package.json | 167 ++++++++++ scripts/check-peer-dep-compat.mjs | 161 +++++++++ src/cli/commands/run/command.tsx | 12 +- .../eval/__tests__/run-eval.test.ts | 119 +++++++ src/cli/operations/eval/run-eval.ts | 98 +++++- src/cli/operations/eval/types.ts | 6 +- 22 files changed, 1353 insertions(+), 12 deletions(-) create mode 100644 .github/workflows/peer-dep-compat.yml create mode 100644 .omc/sessions/26498a12-f26a-43b7-b89d-131ac995aa62.json create mode 100644 .omc/sessions/43db174c-d800-4e28-8ac9-f1e39aa6aa7f.json create mode 100644 .omc/sessions/4971c61c-9a8f-4475-b164-105c601d2fba.json create mode 100644 .omc/sessions/4dd2cc49-8d18-4168-8460-acacf125e650.json create mode 100644 .omc/sessions/e9773b37-7fb9-4794-bd18-beca59e15738.json create mode 100644 .omc/state/agent-replay-00abd583-2ef0-45e8-9134-f395ce6d5bbe.jsonl create mode 100644 .omc/state/agent-replay-9423878a-39c1-4869-87f0-0f8a3910890b.jsonl create mode 100644 .omc/state/hud-state.json create mode 100644 .omc/state/hud-stdin-cache.json create mode 100644 .omc/state/idle-notif-cooldown.json create mode 100644 .omc/state/last-tool-error.json create mode 100644 .omc/state/mission-state.json create mode 100644 .omc/state/subagent-tracking.json create mode 100644 docs/individual-memory-deployment-plan.md create mode 100644 docs/release-process.md create mode 100644 package/package.json create mode 100644 scripts/check-peer-dep-compat.mjs diff --git a/.github/workflows/peer-dep-compat.yml b/.github/workflows/peer-dep-compat.yml new file mode 100644 index 000000000..b3f07f45f --- /dev/null +++ b/.github/workflows/peer-dep-compat.yml @@ -0,0 +1,29 @@ +name: Peer Dependency Compatibility + +on: + push: + branches: [main] + paths: [package.json] + pull_request: + branches: [main] + paths: [package.json] + +jobs: + check-peer-deps: + name: Check Peer Dep Compatibility + runs-on: ubuntu-latest + timeout-minutes: 5 + + steps: + - uses: actions/checkout@v6 + + - uses: actions/setup-node@v6 + with: + node-version: 20.x + + - run: npm ci + + - name: Check peer dep compatibility + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: node scripts/check-peer-dep-compat.mjs diff --git a/.omc/sessions/26498a12-f26a-43b7-b89d-131ac995aa62.json b/.omc/sessions/26498a12-f26a-43b7-b89d-131ac995aa62.json new file mode 100644 index 000000000..4b184fbea --- /dev/null +++ b/.omc/sessions/26498a12-f26a-43b7-b89d-131ac995aa62.json @@ -0,0 +1,8 @@ +{ + "session_id": "26498a12-f26a-43b7-b89d-131ac995aa62", + "ended_at": "2026-03-29T01:49:15.010Z", + "reason": "other", + "agents_spawned": 3, + "agents_completed": 2, + "modes_used": [] +} \ No newline at end of file diff --git a/.omc/sessions/43db174c-d800-4e28-8ac9-f1e39aa6aa7f.json b/.omc/sessions/43db174c-d800-4e28-8ac9-f1e39aa6aa7f.json new file mode 100644 index 000000000..093c8dd1a --- /dev/null +++ b/.omc/sessions/43db174c-d800-4e28-8ac9-f1e39aa6aa7f.json @@ -0,0 +1,8 @@ +{ + "session_id": "43db174c-d800-4e28-8ac9-f1e39aa6aa7f", + "ended_at": "2026-03-27T19:13:10.029Z", + "reason": "clear", + "agents_spawned": 0, + "agents_completed": 0, + "modes_used": [] +} \ No newline at end of file diff --git a/.omc/sessions/4971c61c-9a8f-4475-b164-105c601d2fba.json b/.omc/sessions/4971c61c-9a8f-4475-b164-105c601d2fba.json new file mode 100644 index 000000000..d8dde0299 --- /dev/null +++ b/.omc/sessions/4971c61c-9a8f-4475-b164-105c601d2fba.json @@ -0,0 +1,8 @@ +{ + "session_id": "4971c61c-9a8f-4475-b164-105c601d2fba", + "ended_at": "2026-03-29T01:49:18.048Z", + "reason": "other", + "agents_spawned": 0, + "agents_completed": 0, + "modes_used": [] +} \ No newline at end of file diff --git a/.omc/sessions/4dd2cc49-8d18-4168-8460-acacf125e650.json b/.omc/sessions/4dd2cc49-8d18-4168-8460-acacf125e650.json new file mode 100644 index 000000000..e4b16aad4 --- /dev/null +++ b/.omc/sessions/4dd2cc49-8d18-4168-8460-acacf125e650.json @@ -0,0 +1,8 @@ +{ + "session_id": "4dd2cc49-8d18-4168-8460-acacf125e650", + "ended_at": "2026-04-02T20:47:39.078Z", + "reason": "other", + "agents_spawned": 0, + "agents_completed": 0, + "modes_used": [] +} \ No newline at end of file diff --git a/.omc/sessions/e9773b37-7fb9-4794-bd18-beca59e15738.json b/.omc/sessions/e9773b37-7fb9-4794-bd18-beca59e15738.json new file mode 100644 index 000000000..7a42ed679 --- /dev/null +++ b/.omc/sessions/e9773b37-7fb9-4794-bd18-beca59e15738.json @@ -0,0 +1,8 @@ +{ + "session_id": "e9773b37-7fb9-4794-bd18-beca59e15738", + "ended_at": "2026-04-09T17:29:57.919Z", + "reason": "clear", + "agents_spawned": 0, + "agents_completed": 0, + "modes_used": [] +} \ No newline at end of file diff --git a/.omc/state/agent-replay-00abd583-2ef0-45e8-9134-f395ce6d5bbe.jsonl b/.omc/state/agent-replay-00abd583-2ef0-45e8-9134-f395ce6d5bbe.jsonl new file mode 100644 index 000000000..48cb81342 --- /dev/null +++ b/.omc/state/agent-replay-00abd583-2ef0-45e8-9134-f395ce6d5bbe.jsonl @@ -0,0 +1,13 @@ +{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} +{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} +{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} +{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} +{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} +{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} +{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} +{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} +{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} +{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} +{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} +{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} +{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} diff --git a/.omc/state/agent-replay-9423878a-39c1-4869-87f0-0f8a3910890b.jsonl b/.omc/state/agent-replay-9423878a-39c1-4869-87f0-0f8a3910890b.jsonl new file mode 100644 index 000000000..ca30d8086 --- /dev/null +++ b/.omc/state/agent-replay-9423878a-39c1-4869-87f0-0f8a3910890b.jsonl @@ -0,0 +1,16 @@ +{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} +{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} +{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} +{"t":0,"agent":"a160d74","agent_type":"Explore","event":"agent_start","parent_mode":"none"} +{"t":0,"agent":"a740f6b","agent_type":"general-purpose","event":"agent_start","parent_mode":"none"} +{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} +{"t":0,"agent":"a160d74","agent_type":"Explore","event":"agent_stop","success":true,"duration_ms":208069} +{"t":0,"agent":"a740f6b","agent_type":"general-purpose","event":"agent_stop","success":true,"duration_ms":240447} +{"t":0,"agent":"system","event":"skill_invoked","skill_name":"commit-ready"} +{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} +{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} +{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} +{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} +{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} +{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} +{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} diff --git a/.omc/state/hud-state.json b/.omc/state/hud-state.json new file mode 100644 index 000000000..05f1a5a57 --- /dev/null +++ b/.omc/state/hud-state.json @@ -0,0 +1,6 @@ +{ + "timestamp": "2026-04-11T01:09:53.820Z", + "backgroundTasks": [], + "sessionStartTimestamp": "2026-04-10T22:43:56.788Z", + "sessionId": "00abd583-2ef0-45e8-9134-f395ce6d5bbe" +} \ No newline at end of file diff --git a/.omc/state/hud-stdin-cache.json b/.omc/state/hud-stdin-cache.json new file mode 100644 index 000000000..95305b0ac --- /dev/null +++ b/.omc/state/hud-stdin-cache.json @@ -0,0 +1 @@ +{"session_id":"00abd583-2ef0-45e8-9134-f395ce6d5bbe","transcript_path":"/Users/gitikavj/.claude/projects/-Volumes-workplace-agentcore-agentcore-gh/00abd583-2ef0-45e8-9134-f395ce6d5bbe.jsonl","cwd":"/Volumes/workplace/agentcore/agentcore-gh/agentcore-cli","model":{"id":"us.anthropic.claude-opus-4-6-v1","display_name":"Opus 4.6"},"workspace":{"current_dir":"/Volumes/workplace/agentcore/agentcore-gh/agentcore-cli","project_dir":"/Volumes/workplace/agentcore/agentcore-gh","added_dirs":["/Users/gitikavj/workplace/agentcore/agentcore-gh"]},"version":"2.1.101","output_style":{"name":"default"},"cost":{"total_cost_usd":6.130216400000001,"total_duration_ms":14421649,"total_api_duration_ms":681873,"total_lines_added":97,"total_lines_removed":13},"context_window":{"total_input_tokens":873,"total_output_tokens":31781,"context_window_size":200000,"current_usage":{"input_tokens":3,"output_tokens":181,"cache_creation_input_tokens":2040,"cache_read_input_tokens":81786},"used_percentage":42,"remaining_percentage":58},"exceeds_200k_tokens":false} \ No newline at end of file diff --git a/.omc/state/idle-notif-cooldown.json b/.omc/state/idle-notif-cooldown.json new file mode 100644 index 000000000..7bf99974d --- /dev/null +++ b/.omc/state/idle-notif-cooldown.json @@ -0,0 +1,3 @@ +{ + "lastSentAt": "2026-04-11T02:44:15.086Z" +} \ No newline at end of file diff --git a/.omc/state/last-tool-error.json b/.omc/state/last-tool-error.json new file mode 100644 index 000000000..43a653d86 --- /dev/null +++ b/.omc/state/last-tool-error.json @@ -0,0 +1,7 @@ +{ + "tool_name": "Read", + "tool_input_preview": "{\"file_path\":\"/Users/gitikavj/.claude/projects/-Volumes-workplace-agentcore-agentcore-gh/00abd583-2ef0-45e8-9134-f395ce6d5bbe/tool-results/b4w8ow5ys.txt\"}", + "error": "File content (31694 tokens) exceeds maximum allowed tokens (25000). Use offset and limit parameters to read specific portions of the file, or search for specific content instead of reading the whole file.", + "timestamp": "2026-04-11T02:44:45.598Z", + "retry_count": 4 +} \ No newline at end of file diff --git a/.omc/state/mission-state.json b/.omc/state/mission-state.json new file mode 100644 index 000000000..c6adf14d2 --- /dev/null +++ b/.omc/state/mission-state.json @@ -0,0 +1,217 @@ +{ + "updatedAt": "2026-04-10T20:10:12.773Z", + "missions": [ + { + "id": "session:1dd61909-07a8-4903-8592-a6de32ae7476:none", + "source": "session", + "name": "none", + "objective": "Session mission", + "createdAt": "2026-03-27T22:47:19.586Z", + "updatedAt": "2026-03-27T22:51:03.329Z", + "status": "done", + "workerCount": 2, + "taskCounts": { + "total": 2, + "pending": 0, + "blocked": 0, + "inProgress": 0, + "completed": 2, + "failed": 0 + }, + "agents": [ + { + "name": "code-reviewer:abd2c70", + "role": "code-reviewer", + "ownership": "abd2c70f23f3f4c90", + "status": "done", + "currentStep": null, + "latestUpdate": "completed", + "completedSummary": null, + "updatedAt": "2026-03-27T22:51:03.329Z" + }, + { + "name": "superpowers:code-reviewer:acaa328", + "role": "superpowers:code-reviewer", + "ownership": "acaa3288832d1e52d", + "status": "done", + "currentStep": null, + "latestUpdate": "completed", + "completedSummary": null, + "updatedAt": "2026-03-27T22:50:33.166Z" + } + ], + "timeline": [ + { + "id": "session-start:abd2c70f23f3f4c90:2026-03-27T22:47:19.586Z", + "at": "2026-03-27T22:47:19.586Z", + "kind": "update", + "agent": "code-reviewer:abd2c70", + "detail": "started code-reviewer:abd2c70", + "sourceKey": "session-start:abd2c70f23f3f4c90" + }, + { + "id": "session-start:acaa3288832d1e52d:2026-03-27T22:47:29.187Z", + "at": "2026-03-27T22:47:29.187Z", + "kind": "update", + "agent": "superpowers:code-reviewer:acaa328", + "detail": "started superpowers:code-reviewer:acaa328", + "sourceKey": "session-start:acaa3288832d1e52d" + }, + { + "id": "session-stop:acaa3288832d1e52d:2026-03-27T22:50:33.166Z", + "at": "2026-03-27T22:50:33.166Z", + "kind": "completion", + "agent": "superpowers:code-reviewer:acaa328", + "detail": "completed", + "sourceKey": "session-stop:acaa3288832d1e52d" + }, + { + "id": "session-stop:hook-agent-2163411d-aa52-477b-9545-c69759d86e3e:2026-03-27T22:51:03.329Z", + "at": "2026-03-27T22:51:03.329Z", + "kind": "completion", + "agent": "code-reviewer:abd2c70", + "detail": "completed", + "sourceKey": "session-stop:hook-agent-2163411d-aa52-477b-9545-c69759d86e3e" + } + ] + }, + { + "id": "session:9423878a-39c1-4869-87f0-0f8a3910890b:none", + "source": "session", + "name": "none", + "objective": "Session mission", + "createdAt": "2026-04-10T17:56:19.252Z", + "updatedAt": "2026-04-10T20:10:12.773Z", + "status": "done", + "workerCount": 2, + "taskCounts": { + "total": 2, + "pending": 0, + "blocked": 0, + "inProgress": 0, + "completed": 2, + "failed": 0 + }, + "agents": [ + { + "name": "Explore:a160d74", + "role": "Explore", + "ownership": "a160d74768541cb40", + "status": "done", + "currentStep": null, + "latestUpdate": "completed", + "completedSummary": null, + "updatedAt": "2026-04-10T20:10:12.773Z" + }, + { + "name": "general-purpose:a740f6b", + "role": "general-purpose", + "ownership": "a740f6b71fe059953", + "status": "done", + "currentStep": null, + "latestUpdate": "completed", + "completedSummary": null, + "updatedAt": "2026-04-10T18:00:19.760Z" + } + ], + "timeline": [ + { + "id": "session-start:a160d74768541cb40:2026-04-10T17:56:19.252Z", + "at": "2026-04-10T17:56:19.252Z", + "kind": "update", + "agent": "Explore:a160d74", + "detail": "started Explore:a160d74", + "sourceKey": "session-start:a160d74768541cb40" + }, + { + "id": "session-start:a740f6b71fe059953:2026-04-10T17:56:19.313Z", + "at": "2026-04-10T17:56:19.313Z", + "kind": "update", + "agent": "general-purpose:a740f6b", + "detail": "started general-purpose:a740f6b", + "sourceKey": "session-start:a740f6b71fe059953" + }, + { + "id": "session-stop:hook-agent-3e13f9d4-5816-419c-be51-de7316bd102b:2026-04-10T17:57:04.575Z", + "at": "2026-04-10T17:57:04.575Z", + "kind": "completion", + "agent": "Explore:a160d74", + "detail": "completed", + "sourceKey": "session-stop:hook-agent-3e13f9d4-5816-419c-be51-de7316bd102b" + }, + { + "id": "session-stop:a160d74768541cb40:2026-04-10T17:59:47.321Z", + "at": "2026-04-10T17:59:47.321Z", + "kind": "completion", + "agent": "Explore:a160d74", + "detail": "completed", + "sourceKey": "session-stop:a160d74768541cb40" + }, + { + "id": "session-stop:a740f6b71fe059953:2026-04-10T18:00:19.760Z", + "at": "2026-04-10T18:00:19.760Z", + "kind": "completion", + "agent": "general-purpose:a740f6b", + "detail": "completed", + "sourceKey": "session-stop:a740f6b71fe059953" + }, + { + "id": "session-stop:hook-agent-add781eb-f858-4e4f-b3aa-78dfcd6f623c:2026-04-10T18:00:33.874Z", + "at": "2026-04-10T18:00:33.874Z", + "kind": "completion", + "agent": "Explore:a160d74", + "detail": "completed", + "sourceKey": "session-stop:hook-agent-add781eb-f858-4e4f-b3aa-78dfcd6f623c" + }, + { + "id": "session-stop:hook-agent-8d2570bf-4e8f-4362-86a5-dbce42976330:2026-04-10T18:01:08.550Z", + "at": "2026-04-10T18:01:08.550Z", + "kind": "completion", + "agent": "Explore:a160d74", + "detail": "completed", + "sourceKey": "session-stop:hook-agent-8d2570bf-4e8f-4362-86a5-dbce42976330" + }, + { + "id": "session-stop:hook-agent-d5068281-907b-4984-9384-61fd5e52a8a4:2026-04-10T18:27:51.248Z", + "at": "2026-04-10T18:27:51.248Z", + "kind": "completion", + "agent": "Explore:a160d74", + "detail": "completed", + "sourceKey": "session-stop:hook-agent-d5068281-907b-4984-9384-61fd5e52a8a4" + }, + { + "id": "session-stop:hook-agent-adc45b09-b559-4c45-9f38-28bb13f97f12:2026-04-10T19:39:52.418Z", + "at": "2026-04-10T19:39:52.418Z", + "kind": "completion", + "agent": "Explore:a160d74", + "detail": "completed", + "sourceKey": "session-stop:hook-agent-adc45b09-b559-4c45-9f38-28bb13f97f12" + }, + { + "id": "session-stop:hook-agent-9f572f32-9c3e-4256-b0a4-25aeb14977b0:2026-04-10T20:07:41.431Z", + "at": "2026-04-10T20:07:41.431Z", + "kind": "completion", + "agent": "Explore:a160d74", + "detail": "completed", + "sourceKey": "session-stop:hook-agent-9f572f32-9c3e-4256-b0a4-25aeb14977b0" + }, + { + "id": "session-stop:hook-agent-19bb6986-af41-4f9f-a108-38b1af29c6a1:2026-04-10T20:08:48.444Z", + "at": "2026-04-10T20:08:48.444Z", + "kind": "completion", + "agent": "Explore:a160d74", + "detail": "completed", + "sourceKey": "session-stop:hook-agent-19bb6986-af41-4f9f-a108-38b1af29c6a1" + }, + { + "id": "session-stop:hook-agent-1e07eac7-0203-4a54-8173-c4ade4181f37:2026-04-10T20:10:12.773Z", + "at": "2026-04-10T20:10:12.773Z", + "kind": "completion", + "agent": "Explore:a160d74", + "detail": "completed", + "sourceKey": "session-stop:hook-agent-1e07eac7-0203-4a54-8173-c4ade4181f37" + } + ] + } + ] +} \ No newline at end of file diff --git a/.omc/state/subagent-tracking.json b/.omc/state/subagent-tracking.json new file mode 100644 index 000000000..1e2e9b0aa --- /dev/null +++ b/.omc/state/subagent-tracking.json @@ -0,0 +1,26 @@ +{ + "agents": [ + { + "agent_id": "a160d74768541cb40", + "agent_type": "Explore", + "started_at": "2026-04-10T17:56:19.252Z", + "parent_mode": "none", + "status": "completed", + "completed_at": "2026-04-10T17:59:47.321Z", + "duration_ms": 208069 + }, + { + "agent_id": "a740f6b71fe059953", + "agent_type": "general-purpose", + "started_at": "2026-04-10T17:56:19.313Z", + "parent_mode": "none", + "status": "completed", + "completed_at": "2026-04-10T18:00:19.760Z", + "duration_ms": 240447 + } + ], + "total_spawned": 1, + "total_completed": 2, + "total_failed": 0, + "last_updated": "2026-04-11T02:44:58.899Z" +} \ No newline at end of file diff --git a/docs/individual-memory-deployment-plan.md b/docs/individual-memory-deployment-plan.md new file mode 100644 index 000000000..c51918223 --- /dev/null +++ b/docs/individual-memory-deployment-plan.md @@ -0,0 +1,315 @@ +# Individual Memory Deployment — CLI Plan + +## Problem + +The CLI currently blocks deployment when no agents are defined: + +``` +// preflight.ts:83-93 +if (!projectSpec.agents || projectSpec.agents.length === 0) { + // ... only allows through if isTeardownDeploy + throw new Error( + 'No agents defined in project. Add at least one agent with "agentcore add agent" before deploying.' + ); +} +``` + +A user cannot `agentcore create --no-agent`, then `agentcore add memory`, then `agentcore deploy`. The schema already +supports top-level memories without agents, and the CDK constructs handle it — the CLI just needs to get out of the way. + +## User Flow (Target State) + +```bash +agentcore create --no-agent --name my-memory-project +cd my-memory-project +agentcore add memory +agentcore deploy +# => Deploys only memory resources, no agents +# => User can later: agentcore add agent && agentcore deploy +``` + +--- + +## Changes Required + +### 1. Update preflight validation to allow memory-only deployments + +**File:** `src/cli/operations/deploy/preflight.ts` + +The current check at line 83 rejects any project with zero agents (unless it's a teardown). Change this to allow +deployment when _any_ deployable resources exist (agents OR memories OR credentials with identity providers). + +```typescript +// OLD +if (!projectSpec.agents || projectSpec.agents.length === 0) { + // ... teardown check ... + throw new Error('No agents defined in project...'); +} + +// NEW +const hasDeployableResources = (projectSpec.agents?.length ?? 0) > 0 || (projectSpec.memories?.length ?? 0) > 0; + +if (!hasDeployableResources) { + let hasExistingStack = false; + try { + const deployedState = await configIO.readDeployedState(); + hasExistingStack = Object.keys(deployedState.targets).length > 0; + } catch { + // No deployed state file + } + if (!hasExistingStack) { + throw new Error( + 'No resources defined in project. Add an agent with "agentcore add agent" ' + + 'or a memory with "agentcore add memory" before deploying.' + ); + } + isTeardownDeploy = true; +} +``` + +Also skip `validateRuntimeNames()` and `validateContainerAgents()` when there are no agents (they already handle empty +arrays, but making it explicit is cleaner). + +### 2. Parse memory outputs from CloudFormation stack + +**File:** `src/cli/cloudformation/outputs.ts` + +Add a `parseMemoryOutputs` function alongside `parseAgentOutputs`: + +```typescript +export function parseMemoryOutputs(outputs: StackOutputs, memoryNames: string[]): Record { + const memories: Record = {}; + + // Map PascalCase memory names to original names + const memoryIdMap = new Map(memoryNames.map(name => [toPascalId(name), name])); + + const outputsByMemory: Record = {}; + + // Match pattern: ApplicationMemory{MemoryName}Memory{Id|Arn}Output + const outputPattern = /^ApplicationMemory(.+?)Memory(Id|Arn)Output/; + + for (const [key, value] of Object.entries(outputs)) { + const match = outputPattern.exec(key); + if (!match) continue; + + const logicalMemory = match[1]; + const outputType = match[2]; + if (!logicalMemory || !outputType) continue; + + const memoryName = memoryIdMap.get(logicalMemory) ?? logicalMemory; + outputsByMemory[memoryName] ??= {}; + + if (outputType === 'Id') { + outputsByMemory[memoryName].memoryId = value; + } else if (outputType === 'Arn') { + outputsByMemory[memoryName].memoryArn = value; + } + } + + for (const [memoryName, memoryOutputs] of Object.entries(outputsByMemory)) { + if (memoryOutputs.memoryId && memoryOutputs.memoryArn) { + memories[memoryName] = { + memoryId: memoryOutputs.memoryId, + memoryArn: memoryOutputs.memoryArn, + }; + } + } + + return memories; +} +``` + +### 3. Update `buildDeployedState` to include memory state + +**File:** `src/cli/cloudformation/outputs.ts` + +```typescript +export function buildDeployedState( + targetName: string, + stackName: string, + agents: Record, + existingState?: DeployedState, + identityKmsKeyArn?: string, + memories?: Record // NEW +): DeployedState { + const targetState: TargetDeployedState = { + resources: { + agents: Object.keys(agents).length > 0 ? agents : undefined, + memories: memories && Object.keys(memories).length > 0 ? memories : undefined, + stackName, + identityKmsKeyArn, + }, + }; + // ... +} +``` + +### 4. Update deploy action to parse and persist memory state + +**File:** `src/cli/commands/deploy/actions.ts` + +In `handleDeploy()`, after deployment succeeds, parse memory outputs alongside agent outputs: + +```typescript +// Get stack outputs and persist state +startStep('Persist deployment state'); +const outputs = await getStackOutputs(target.region, stackName); + +const agentNames = context.projectSpec.agents.map(a => a.name); +const agents = parseAgentOutputs(outputs, agentNames, stackName); + +const memoryNames = (context.projectSpec.memories ?? []).map(m => m.name); +const memories = parseMemoryOutputs(outputs, memoryNames); + +const existingState = await configIO.readDeployedState().catch(() => undefined); +const deployedState = buildDeployedState(target.name, stackName, agents, existingState, identityKmsKeyArn, memories); +await configIO.writeDeployedState(deployedState); +``` + +### 5. Update `nextSteps` to be context-aware + +**File:** `src/cli/commands/deploy/actions.ts` + +When only memories are deployed (no agents), `agentcore invoke` doesn't make sense. Make next steps conditional: + +```typescript +const hasAgents = context.projectSpec.agents.length > 0; +const nextSteps = hasAgents ? ['agentcore invoke', 'agentcore status'] : ['agentcore add agent', 'agentcore status']; +``` + +### 6. Update `agentcore status` to show memory resources + +**File:** `src/cli/commands/status/` (command handler) + +The status command should display deployed memory resources. When checking deployed state, also show memory IDs/ARNs. +This is an additive change — show memory info when `resources.memories` exists in deployed state. + +### 7. Update TUI deploy screen for memory-only feedback + +**File:** `src/cli/commands/deploy/` (TUI components) + +The TUI deploy screen should show appropriate messaging when deploying memory-only: + +- Progress steps still apply (validate, build, synth, deploy) +- Success message should mention memories deployed, not just agents +- The "invoke" suggestion should be conditional + +### 8. Update deployed-state schema (mirror CDK changes) + +**File:** `src/schema/schemas/deployed-state.ts` + +Add the same `MemoryDeployedState` schema as the CDK package (schemas are duplicated across packages per CLAUDE.md): + +```typescript +export const MemoryDeployedStateSchema = z.object({ + memoryId: z.string().min(1), + memoryArn: z.string().min(1), +}); + +export type MemoryDeployedState = z.infer; + +// Update DeployedResourceStateSchema +export const DeployedResourceStateSchema = z.object({ + agents: z.record(z.string(), AgentCoreDeployedStateSchema).optional(), + memories: z.record(z.string(), MemoryDeployedStateSchema).optional(), // NEW + mcp: McpDeployedStateSchema.optional(), + externallyManaged: ExternallyManagedStateSchema.optional(), + stackName: z.string().optional(), + identityKmsKeyArn: z.string().optional(), +}); +``` + +### 9. Update `agentcore create --no-agent` flow + +**File:** `src/cli/commands/create/action.ts` + +Currently `--no-agent` creates a project with empty arrays. This already works. But the messaging after create should +suggest `agentcore add memory` as a valid next step (not just `agentcore add agent`). + +### 10. Consider: Allow `agentcore add memory` to prompt for deployment + +This is optional/future — after adding a memory, the CLI could suggest `agentcore deploy` if the user has a deployment +target configured. Currently it only suggests this after `add agent`. + +--- + +## Files to Modify + +| File | Change | Effort | +| ---------------------------------------- | --------------------------------------------------------- | ------- | +| `src/cli/operations/deploy/preflight.ts` | Allow memory-only deploys | Small | +| `src/cli/cloudformation/outputs.ts` | Add `parseMemoryOutputs`, update `buildDeployedState` | Medium | +| `src/cli/commands/deploy/actions.ts` | Parse memory outputs, conditional next steps | Small | +| `src/schema/schemas/deployed-state.ts` | Add `MemoryDeployedState`, update `DeployedResourceState` | Small | +| `src/schema/index.ts` | Export new types | Trivial | +| `src/cli/commands/status/` | Show memory resources in status | Small | +| `src/cli/commands/deploy/` (TUI) | Context-aware messaging | Small | + +## Files NOT changed + +- `src/cli/operations/memory/create-memory.ts` — already works correctly +- `src/cli/operations/memory/generate-memory-files.ts` — only relevant when agents exist +- `src/cli/commands/create/` — `--no-agent` flow already works +- `src/assets/cdk/bin/cdk.ts` — CDK entry point doesn't need changes +- `src/assets/cdk/lib/cdk-stack.ts` — `AgentCoreApplication` already handles empty agents + +--- + +## Testing + +1. **E2E: Memory-only deploy** + + ```bash + agentcore create --no-agent --name memtest + cd memtest + agentcore add memory # add a short-term memory + agentcore deploy -y + # Verify: stack created, memory resources exist, deployed-state.json has memories + ``` + +2. **E2E: Memory-only then add agent** + + ```bash + # ... after memory-only deploy ... + agentcore add agent + agentcore deploy -y + # Verify: stack updated, both agent and memory in deployed-state.json + # Memory still accessible, no orphaned resources + ``` + +3. **Unit test: Preflight allows memory-only** + - Mock project with `agents: [], memories: [{ ... }]` + - Verify `validateProject()` does NOT throw + - Verify `isTeardownDeploy` is `false` + +4. **Unit test: Preflight still blocks empty projects** + - Mock project with `agents: [], memories: []` + - Verify `validateProject()` throws appropriate error + +5. **Unit test: parseMemoryOutputs** + - Mock CloudFormation outputs with memory patterns + - Verify correct parsing into `MemoryDeployedState` + +6. **Unit test: buildDeployedState with memories** + - Verify deployed state includes both agents and memories sections + +7. **Snapshot tests** — May need updating if CDK template assets change + +--- + +## Rollout Considerations + +- **Backwards compatibility**: Existing projects with agents + memories continue to work unchanged. The deployed-state + schema change is additive (new optional `memories` field). +- **Schema sync**: `deployed-state.ts` changes must be reflected in both `agentcore-cli` and + `agentcore-l3-cdk-constructs` packages. +- **CDK package dependency**: The CDK package needs per-memory outputs before the CLI can parse them. Ship CDK changes + first or together. + +--- + +## Complexity Assessment + +**Medium.** The core change (preflight validation) is trivial. The supporting work (output parsing, state tracking, UX +messaging) requires touching several files but each change is small and well-contained. No architectural changes needed +— the design already supports this, we just need to remove the artificial gate and add plumbing. diff --git a/docs/release-process.md b/docs/release-process.md new file mode 100644 index 000000000..c7e8e3c17 --- /dev/null +++ b/docs/release-process.md @@ -0,0 +1,129 @@ +# Release Process + +This document describes the release process for both AgentCore packages. Releases are always done **CDK first, then +CLI**, since the CLI depends on `@aws/agentcore-cdk`. + +## Release Order + +1. **`@aws/agentcore-cdk`** (CDK L3 Constructs) +2. **`@aws/agentcore`** (CLI) + +## Overview + +Both packages use a GitHub Actions `workflow_dispatch` workflow with the same four-stage pipeline: + +1. **Prepare Release** — bump version, update changelog, open a PR to `main` +2. **Test and Build** — lint, typecheck, build, test on the release branch +3. **Release Approval** — manual approval gate in a GitHub Environment +4. **Publish to npm** — publish, tag, and create a GitHub Release + +The workflow must be triggered from the `main` branch. + +--- + +## CDK L3 Constructs (`@aws/agentcore-cdk`) + +**Workflow:** `agentcore-l3-cdk-constructs/.github/workflows/release.yml` + +### Inputs + +| Input | Options | Notes | +| ---------------- | ------------------------------------------------ | ----------------------------------------------- | +| `bump_type` | `alpha`, `patch`, `minor`, `major`, `prerelease` | Required | +| `changelog` | free text | Optional — auto-generates from commits if empty | +| `prerelease_tag` | e.g. `alpha`, `beta`, `rc` | Only used with `prerelease` bump type | + +### Version bumping + +Runs `npx tsx scripts/bump-version.ts ` which updates `package.json`, `package-lock.json`, and +`CHANGELOG.md`. + +### Pipeline details + +| Stage | Environment | Notes | +| ---------------- | -------------- | -------------------------------------------------------------------------------------------------------------- | +| Prepare Release | — | Creates `release/v` branch and PR against `main` | +| Test and Build | — | Runs lint, typecheck, build; uploads `dist/` and tarball as artifacts | +| Release Approval | `npm-approval` | Manual approval required | +| Publish to npm | `npm` | Uses `NPM_SECRET` token; checks version doesn't already exist on npm; polls npm for availability after publish | + +### Auth + +Uses token-based npm auth via the `NPM_SECRET` repository secret. + +--- + +## CLI (`@aws/agentcore`) + +**Workflow:** `agentcore-cli/.github/workflows/release.yml` + +### Inputs + +| Input | Options | Notes | +| ---------------- | ------------------------------------------------------------------- | ----------------------------------------------- | +| `bump_type` | `preview`, `preview-major`, `patch`, `minor`, `major`, `prerelease` | Required | +| `changelog` | free text | Optional — auto-generates from commits if empty | +| `prerelease_tag` | e.g. `alpha`, `beta`, `rc` | Only used with `prerelease` bump type | + +### Version bumping + +Same approach as CDK — runs `npx tsx scripts/bump-version.ts `. The CLI additionally supports `preview` and +`preview-major` bump types for the `0.x.y-preview.N.M` versioning scheme. + +### Pipeline details + +| Stage | Environment | Notes | +| ---------------- | ---------------------- | -------------------------------------------------------------------------------------------------------- | +| Prepare Release | — | Creates `release/v` branch and PR against `main` | +| Test and Build | — | Runs lint, typecheck, build, **unit tests**; also configures git + installs `uv` for Python test support | +| Release Approval | `npm-publish-approval` | Manual approval required | +| Publish to npm | `npm-publish` | Uses OIDC trusted publishing (no npm token needed); publishes with `--provenance --tag latest` | + +### Auth + +Uses GitHub OIDC trusted publishing — no `NPM_TOKEN` or secret needed. Requires `id-token: write` permission and npm >= +11.5.1. + +--- + +## Step-by-step: How to cut a release + +### 1. Release CDK L3 Constructs + +1. Go to **Actions > Release** in the `agentcore-l3-cdk-constructs` repo. +2. Click **Run workflow** from `main`. +3. Select `bump_type` (e.g. `alpha` for pre-GA, `patch`/`minor`/`major` for GA). +4. Optionally provide a `changelog` message. +5. Wait for the PR to be created on `release/v`. +6. Review the PR — verify CHANGELOG.md and version numbers. +7. Merge the PR to `main`. +8. Approve the deployment in the `npm-approval` environment. +9. Verify the package appears on npm: `npm view @aws/agentcore-cdk@`. + +### 2. Release CLI + +1. If the CLI depends on the new CDK version, update the dependency in `agentcore-cli/package.json` first and merge that + to `main`. +2. Go to **Actions > Release** in the `agentcore-cli` repo. +3. Click **Run workflow** from `main`. +4. Select `bump_type` (e.g. `preview` for pre-GA, `patch`/`minor`/`major` for GA). +5. Optionally provide a `changelog` message. +6. Wait for the PR to be created on `release/v`. +7. Review the PR — verify CHANGELOG.md and version numbers. +8. Merge the PR to `main`. +9. Approve the deployment in the `npm-publish-approval` environment. +10. Verify: `npm view @aws/agentcore@`. + +--- + +## Key differences between the two workflows + +| | CDK L3 Constructs | CLI | +| ---------------- | -------------------------------------------------- | ------------------------------------------------------------------- | +| **Package** | `@aws/agentcore-cdk` | `@aws/agentcore` | +| **npm auth** | `NPM_SECRET` token | OIDC trusted publishing | +| **Approval env** | `npm-approval` | `npm-publish-approval` | +| **Publish env** | `npm` | `npm-publish` | +| **Bump types** | `alpha`, `patch`, `minor`, `major`, `prerelease` | `preview`, `preview-major`, `patch`, `minor`, `major`, `prerelease` | +| **Extra checks** | Version existence check + npm availability polling | Installs `uv` for Python; runs unit tests in CI | +| **PR token** | `PAT_TOKEN` secret | Default `github.token` | diff --git a/package/package.json b/package/package.json new file mode 100644 index 000000000..0048d2aaa --- /dev/null +++ b/package/package.json @@ -0,0 +1,167 @@ +{ + "name": "@aws/evo-pb-cli", + "version": "0.7.1-evo-pb-cli.20260408", + "description": "CLI for Amazon Bedrock AgentCore", + "license": "Apache-2.0", + "repository": { + "type": "git", + "url": "https://github.com/aws/agentcore-cli.git" + }, + "homepage": "https://github.com/aws/agentcore-cli", + "bugs": { + "url": "https://github.com/aws/agentcore-cli/issues" + }, + "keywords": [ + "aws", + "amazon", + "bedrock", + "agentcore", + "cli", + "agents", + "ai", + "cdk", + "langchain", + "langgraph", + "openai", + "anthropic", + "google-adk", + "strands" + ], + "main": "./dist/index.js", + "types": "./dist/index.d.ts", + "bin": { + "agentcore": "dist/cli/index.mjs" + }, + "exports": { + ".": { + "types": "./dist/index.d.ts", + "require": "./dist/index.js" + } + }, + "files": [ + "dist", + "scripts", + "!dist/mcp-harness" + ], + "scripts": { + "postinstall": "node scripts/check-old-cli.mjs", + "build": "npm run build:lib && npm run build:cli && npm run build:assets", + "build:schema": "node scripts/generate-schema.mjs && prettier --write schemas/", + "build:lib": "tsc -p tsconfig.build.json", + "build:cli": "node esbuild.config.mjs", + "build:assets": "node scripts/copy-assets.mjs", + "build:harness": "BUILD_HARNESS=1 node esbuild.config.mjs", + "cli": "npx tsx src/cli/index.ts", + "typecheck": "tsc --noEmit", + "lint": "eslint src/", + "lint:fix": "eslint src/ --fix", + "format": "prettier --write .", + "format:check": "prettier --check .", + "secrets:check": "secretlint '**/*'", + "security:audit": "npm audit --audit-level=high --omit=dev", + "clean": "node -e \"require('fs').rmSync('dist', {recursive: true, force: true})\"", + "prepare": "husky", + "test": "vitest run --project unit", + "test:all": "vitest run", + "test:watch": "vitest --project unit", + "test:integ": "vitest run --project integ", + "test:unit": "vitest run --project unit --coverage", + "test:e2e": "vitest run --project e2e", + "test:update-snapshots": "vitest run --project unit --update", + "test:tui": "npm run build:harness && vitest run --project tui", + "bundle": "node scripts/bundle.mjs" + }, + "dependencies": { + "@aws-cdk/toolkit-lib": "^1.16.0", + "@aws-sdk/client-application-signals": "^3.1003.0", + "@aws-sdk/client-bedrock": "^3.1012.0", + "@aws-sdk/client-bedrock-agent": "^3.1012.0", + "@aws-sdk/client-bedrock-agentcore": "^3.1020.0", + "@aws-sdk/client-bedrock-agentcore-control": "^3.1020.0", + "@aws-sdk/client-bedrock-runtime": "^3.893.0", + "@aws-sdk/client-cloudformation": "^3.893.0", + "@aws-sdk/client-cloudwatch-logs": "^3.893.0", + "@aws-sdk/client-iam": "^3.1025.0", + "@aws-sdk/client-resource-groups-tagging-api": "^3.893.0", + "@aws-sdk/client-s3": "^3.1012.0", + "@aws-sdk/client-sts": "^3.893.0", + "@aws-sdk/client-xray": "^3.1003.0", + "@aws-sdk/credential-providers": "^3.893.0", + "@commander-js/extra-typings": "^14.0.0", + "@smithy/shared-ini-file-loader": "^4.4.2", + "commander": "^14.0.2", + "dotenv": "^17.2.3", + "fflate": "^0.8.2", + "handlebars": "^4.7.8", + "ink": "^6.6.0", + "ink-link": "5.0.0", + "ink-spinner": "^5.0.0", + "js-yaml": "^4.1.1", + "react": "^19.2.3", + "yaml": "^2.8.3", + "zod": "^4.3.5" + }, + "peerDependencies": { + "aws-cdk-lib": "^2.243.0", + "constructs": "^10.0.0" + }, + "devDependencies": { + "@aws-sdk/client-cognito-identity-provider": "^3.1018.0", + "@eslint/js": "^9.39.2", + "@modelcontextprotocol/sdk": "^1.0.0", + "@secretlint/secretlint-rule-preset-recommend": "^11.3.0", + "@trivago/prettier-plugin-sort-imports": "^6.0.2", + "@types/js-yaml": "^4.0.9", + "@types/node": "^25.0.3", + "@types/react": "^19.2.7", + "@typescript-eslint/eslint-plugin": "^8.50.0", + "@typescript-eslint/parser": "^8.50.0", + "@vitest/coverage-v8": "^4.0.18", + "@xterm/headless": "^6.0.0", + "aws-cdk-lib": "^2.243.0", + "constructs": "^10.4.4", + "esbuild": "^0.27.2", + "eslint": "^9.39.4", + "eslint-config-prettier": "^10.1.8", + "eslint-import-resolver-typescript": "^4.4.4", + "eslint-plugin-import": "^2.32.0", + "eslint-plugin-react": "^7.37.5", + "eslint-plugin-react-hooks": "^7.0.1", + "eslint-plugin-react-refresh": "^0.5.2", + "eslint-plugin-security": "^4.0.0", + "husky": "^9.1.7", + "ink-testing-library": "^4.0.0", + "lint-staged": "^16.2.7", + "node-pty": "^1.1.0", + "prettier": "^3.7.4", + "secretlint": "^11.3.0", + "tsx": "^4.21.0", + "typescript": "^5", + "typescript-eslint": "^8.50.1", + "vitest": "^4.0.18" + }, + "overridesComments": { + "minimatch": "GHSA-7r86-cg39-jmmj, GHSA-23c5-xmqv-rm74: minimatch 10.0.0-10.2.2 has ReDoS vulnerabilities. Multiple transitive deps (eslint, typescript-eslint, eslint-plugin-import, eslint-plugin-react, prettier-plugin-sort-imports, aws-cdk-lib) pin older versions. Remove this override once upstream packages update their minimatch dependency to >=10.2.3.", + "fast-xml-parser": "GHSA-8gc5-j5rx-235r, GHSA-jp2q-39xq-3w4g: fast-xml-parser <=5.5.6 has entity expansion bypass (CVE-2026-33036, CVE-2026-33349). Transitive via @aws-sdk/xml-builder. Remove once @aws-sdk updates to fast-xml-parser >=5.5.7.", + "@aws-sdk/xml-builder": "aws/aws-sdk-js-v3#7867: @aws-sdk/xml-builder <3.972.14 does not configure maxTotalExpansions on fast-xml-parser, causing 'Entity expansion limit exceeded' on large CloudFormation responses. Remove once @aws-sdk/client-* deps are bumped past 3.972.14." + }, + "overrides": { + "minimatch": "10.2.4", + "fast-xml-parser": "5.5.7", + "@aws-sdk/xml-builder": "3.972.15" + }, + "engines": { + "node": ">=20" + }, + "lint-staged": { + "*.{ts,tsx}": [ + "eslint --max-warnings=10" + ], + "*.{ts,tsx,js,jsx,json,md,css,html,yml,yaml}": [ + "prettier --check" + ], + "*": [ + "secretlint" + ] + } +} diff --git a/scripts/check-peer-dep-compat.mjs b/scripts/check-peer-dep-compat.mjs new file mode 100644 index 000000000..342d6da51 --- /dev/null +++ b/scripts/check-peer-dep-compat.mjs @@ -0,0 +1,161 @@ +#!/usr/bin/env node + +/** + * Cross-package peer dependency compatibility check. + * + * Verifies that the peer dependency ranges declared by this package + * overlap with those declared by its partner package (@aws/agentcore-cdk). + * Uses semver.intersects() to detect version drift that would cause + * unresolvable install errors for customers using both packages. + * + * Exit 0 = compatible, Exit 1 = incompatible or error. + */ +import { readFileSync } from 'node:fs'; +import { createRequire } from 'node:module'; +import { dirname, join } from 'node:path'; +import { fileURLToPath } from 'node:url'; + +const require = createRequire(import.meta.url); +const semver = require('semver'); + +const __dirname = dirname(fileURLToPath(import.meta.url)); + +// --- Configuration --- +const PARTNER_GITHUB_REPO = 'aws/agentcore-l3-cdk-constructs'; // owner/repo +const PARTNER_NPM_PACKAGE = '@aws/agentcore-cdk'; // npm package name +const SHARED_PEER_DEPS = ['aws-cdk-lib', 'constructs']; +const MAX_MIN_VERSION_DRIFT_MAJOR = 0; // fail if minimum versions differ by more than this many majors +// --- End Configuration --- + +async function fetchPartnerPeerDeps() { + // Try GitHub API first (works for public repos and with GITHUB_TOKEN) + const githubToken = process.env.GITHUB_TOKEN; + const githubUrl = `https://api.github.com/repos/${PARTNER_GITHUB_REPO}/contents/package.json`; + + try { + const headers = { + Accept: 'application/vnd.github.v3.raw', + 'User-Agent': 'peer-dep-compat-check', + }; + if (githubToken) { + headers.Authorization = `token ${githubToken}`; + } + + const res = await fetch(githubUrl, { headers }); + if (res.ok) { + const pkg = await res.json(); + console.log(`Fetched partner peerDependencies from GitHub (${PARTNER_GITHUB_REPO})`); + return pkg.peerDependencies || {}; + } + } catch { + // fall through to npm + } + + // Fallback: npm registry + const npmUrl = `https://registry.npmjs.org/${PARTNER_NPM_PACKAGE}/latest`; + try { + const res = await fetch(npmUrl, { + headers: { Accept: 'application/json' }, + }); + if (res.ok) { + const pkg = await res.json(); + console.log(`Fetched partner peerDependencies from npm (${PARTNER_NPM_PACKAGE})`); + return pkg.peerDependencies || {}; + } + } catch { + // fall through + } + + throw new Error(`Failed to fetch partner peerDependencies from both GitHub and npm`); +} + +function readLocalPeerDeps() { + const pkgPath = join(__dirname, '..', 'package.json'); + const pkg = JSON.parse(readFileSync(pkgPath, 'utf-8')); + return pkg.peerDependencies || {}; +} + +function checkCompatibility(localDeps, partnerDeps) { + let hasFailure = false; + + for (const dep of SHARED_PEER_DEPS) { + const localRange = localDeps[dep]; + const partnerRange = partnerDeps[dep]; + + if (!localRange) { + console.log(` SKIP: ${dep} — not in local peerDependencies`); + continue; + } + if (!partnerRange) { + console.log(` SKIP: ${dep} — not in partner peerDependencies`); + continue; + } + + console.log(`\n Checking ${dep}:`); + console.log(` Local: ${localRange}`); + console.log(` Partner: ${partnerRange}`); + + // Check range overlap + if (!semver.intersects(localRange, partnerRange)) { + console.log(` FAIL: Ranges do not overlap!`); + hasFailure = true; + continue; + } + console.log(` OK: Ranges overlap`); + + // Check minimum version drift + const localMin = semver.minVersion(localRange); + const partnerMin = semver.minVersion(partnerRange); + if (localMin && partnerMin) { + const majorDiff = Math.abs(localMin.major - partnerMin.major); + const minorDiff = Math.abs(localMin.minor - partnerMin.minor); + + if (majorDiff > MAX_MIN_VERSION_DRIFT_MAJOR) { + console.log( + ` FAIL: Minimum versions differ by ${majorDiff} major version(s) (${localMin} vs ${partnerMin})` + ); + hasFailure = true; + } else if (minorDiff > 20) { + console.log( + ` WARN: Minimum versions differ by ${minorDiff} minor version(s) (${localMin} vs ${partnerMin})` + ); + } else { + console.log(` OK: Minimum versions are close (${localMin} vs ${partnerMin})`); + } + } + } + + return !hasFailure; +} + +async function main() { + console.log('Peer Dependency Compatibility Check'); + console.log('====================================\n'); + + const localDeps = readLocalPeerDeps(); + console.log('Local peerDependencies:', JSON.stringify(localDeps, null, 2)); + + let partnerDeps; + try { + partnerDeps = await fetchPartnerPeerDeps(); + } catch (err) { + console.error(`\nERROR: ${err.message}`); + console.error('Cannot verify compatibility — treating as failure for safety.'); + process.exit(1); + } + console.log('Partner peerDependencies:', JSON.stringify(partnerDeps, null, 2)); + + const compatible = checkCompatibility(localDeps, partnerDeps); + + if (compatible) { + console.log('\n✅ All shared peer dependencies are compatible.'); + process.exit(0); + } else { + console.log('\n❌ Peer dependency incompatibility detected!'); + console.log('Customers installing both packages will encounter version conflicts.'); + console.log('Please align the peer dependency ranges before releasing.'); + process.exit(1); + } +} + +main(); diff --git a/src/cli/commands/run/command.tsx b/src/cli/commands/run/command.tsx index fffe23a5a..0bcb4b00a 100644 --- a/src/cli/commands/run/command.tsx +++ b/src/cli/commands/run/command.tsx @@ -72,6 +72,11 @@ export const registerRun = (program: Command) => { .option('-A, --assertion ', 'Assertion the agent should satisfy (repeatable)') .option('--expected-trajectory ', 'Expected tool calls in order (comma-separated)') .option('--expected-response ', 'Expected agent response text') + .option( + '--custom-service-name ', + 'Custom service name for external agents — filters by service.name instead of cloud.resource_id' + ) + .option('--custom-log-group-name ', 'Custom CloudWatch log group name for external agents') .option('--output ', 'Custom output file path for results') .option('--json', 'Output as JSON') .action( @@ -88,11 +93,14 @@ export const registerRun = (program: Command) => { expectedTrajectory?: string; expectedResponse?: string; days: string; + customServiceName?: string; + customLogGroupName?: string; output?: string; json?: boolean; }) => { const isArnMode = !!(cliOptions.runtimeArn && cliOptions.evaluatorArn); - if (!isArnMode) { + const isCustomMode = !!cliOptions.customServiceName; + if (!isArnMode && !isCustomMode) { requireProject(); } @@ -120,6 +128,8 @@ export const registerRun = (program: Command) => { ? cliOptions.expectedTrajectory.split(',').map(s => s.trim()) : undefined, expectedResponse: cliOptions.expectedResponse, + customServiceName: cliOptions.customServiceName, + customLogGroupName: cliOptions.customLogGroupName, days: parseInt(cliOptions.days, 10), output: cliOptions.output, json: cliOptions.json, diff --git a/src/cli/operations/eval/__tests__/run-eval.test.ts b/src/cli/operations/eval/__tests__/run-eval.test.ts index 39314af69..06c6854f8 100644 --- a/src/cli/operations/eval/__tests__/run-eval.test.ts +++ b/src/cli/operations/eval/__tests__/run-eval.test.ts @@ -1053,6 +1053,125 @@ describe('handleRunEval', () => { expect(query).not.toContain("sess'"); }); + // ─── Custom mode (external agents) ────────────────────────────────────── + + it('resolves context from custom service name without project config', async () => { + setupCloudWatchToReturn([makeOtelSpanRow('s1', 't1')]); + mockEvaluate.mockResolvedValue({ + evaluationResults: [{ value: 4.0, context: { spanContext: { sessionId: 's1' } } }], + }); + + const result = await handleRunEval({ + evaluator: ['Builtin.Helpfulness'], + customServiceName: 'my-external-agent', + customLogGroupName: '/custom/log-group', + region: 'us-west-2', + days: 7, + }); + + expect(result.success).toBe(true); + expect(result.run!.agent).toBe('my-external-agent'); + expect(mockLoadDeployedProjectConfig).not.toHaveBeenCalled(); + expect(mockResolveAgent).not.toHaveBeenCalled(); + }); + + it('returns error when custom mode is missing region', async () => { + const result = await handleRunEval({ + evaluator: ['Builtin.Helpfulness'], + customServiceName: 'my-external-agent', + customLogGroupName: '/custom/log-group', + days: 7, + }); + + expect(result.success).toBe(false); + expect(result.error).toContain('--region is required'); + }); + + it('returns error when custom mode is missing log group name', async () => { + const result = await handleRunEval({ + evaluator: ['Builtin.Helpfulness'], + customServiceName: 'my-external-agent', + region: 'us-west-2', + days: 7, + }); + + expect(result.success).toBe(false); + expect(result.error).toContain('--custom-log-group-name'); + }); + + it('rejects custom evaluator names in custom mode', async () => { + const result = await handleRunEval({ + evaluator: ['MyCustomEval'], + customServiceName: 'my-external-agent', + customLogGroupName: '/custom/log-group', + region: 'us-west-2', + days: 7, + }); + + expect(result.success).toBe(false); + expect(result.error).toContain('cannot be resolved in custom mode'); + }); + + it('resolves evaluator ARNs in custom mode', async () => { + setupCloudWatchToReturn([makeOtelSpanRow('s1', 't1')]); + mockEvaluate.mockResolvedValue({ + evaluationResults: [{ value: 5.0, context: { spanContext: { sessionId: 's1' } } }], + }); + + const result = await handleRunEval({ + evaluator: [], + evaluatorArn: ['arn:aws:bedrock-agentcore:us-west-2:123456789012:evaluator/eval-custom-ext'], + customServiceName: 'my-external-agent', + customLogGroupName: '/custom/log-group', + region: 'us-west-2', + days: 7, + }); + + expect(result.success).toBe(true); + expect(mockEvaluate).toHaveBeenCalledWith(expect.objectContaining({ evaluatorId: 'eval-custom-ext' })); + }); + + it('filters CloudWatch query by service.name in custom mode', async () => { + setupCloudWatchToReturn([makeOtelSpanRow('s1', 't1')]); + mockEvaluate.mockResolvedValue({ + evaluationResults: [{ value: 3.0, context: { spanContext: { sessionId: 's1' } } }], + }); + + await handleRunEval({ + evaluator: ['Builtin.GoalSuccessRate'], + customServiceName: 'my-external-agent', + customLogGroupName: '/custom/log-group', + region: 'us-west-2', + days: 7, + }); + + const query = getFirstQueryString(); + expect(query).toContain("resource.attributes.service.name = 'my-external-agent'"); + expect(query).not.toContain('cloud.resource_id'); + }); + + it('saves to cwd in custom mode when no --output is specified', async () => { + setupCloudWatchToReturn([makeOtelSpanRow('s1', 't1')]); + mockEvaluate.mockResolvedValue({ + evaluationResults: [{ value: 4.0, context: { spanContext: { sessionId: 's1' } } }], + }); + + const result = await handleRunEval({ + evaluator: ['Builtin.Helpfulness'], + customServiceName: 'my-external-agent', + customLogGroupName: '/custom/log-group', + region: 'us-west-2', + days: 7, + }); + + expect(result.success).toBe(true); + expect(mockSaveEvalRun).not.toHaveBeenCalled(); + expect(mockWriteFileSync).toHaveBeenCalledWith( + expect.stringContaining('eval_2025-01-15_10-00-00.json'), + expect.any(String) + ); + }); + // ─── Query sanitization ─────────────────────────────────────────────────── it('sanitizes runtimeId in CloudWatch query to prevent injection', async () => { diff --git a/src/cli/operations/eval/run-eval.ts b/src/cli/operations/eval/run-eval.ts index d130438ff..96805aa2a 100644 --- a/src/cli/operations/eval/run-eval.ts +++ b/src/cli/operations/eval/run-eval.ts @@ -24,10 +24,12 @@ const SUPPORTED_SCOPES = new Set([ interface ResolvedEvalContext { agentLabel: string; region: string; - runtimeId: string; + runtimeId?: string; runtimeLogGroup: string; evaluatorIds: string[]; evaluatorLabels: string[]; + /** When set, queries filter by service.name instead of cloud.resource_id */ + customServiceName?: string; } type ResolveResult = { success: true; ctx: ResolvedEvalContext } | { success: false; error: string }; @@ -170,6 +172,62 @@ function resolveFromProject(context: DeployedProjectConfig, options: RunEvalOpti }; } +/** + * Custom mode: resolve context from explicit custom service name and log group. + * Evaluators must come from --evaluator-arn or Builtin.* names. + */ +function resolveFromCustom(options: RunEvalOptions): ResolveResult { + const { customServiceName, customLogGroupName } = options; + + if (!customServiceName || !customLogGroupName) { + return { + success: false, + error: 'Both --custom-service-name and --custom-log-group-name are required for external agent evaluation', + }; + } + + if (!options.region) { + return { success: false, error: '--region is required when using --custom-service-name' }; + } + + const evaluatorIds: string[] = []; + const evaluatorLabels: string[] = []; + + for (const evalName of options.evaluator) { + if (evalName.startsWith('Builtin.')) { + evaluatorIds.push(evalName); + evaluatorLabels.push(evalName); + } else { + return { + success: false, + error: `Custom evaluator "${evalName}" cannot be resolved in custom mode. Use --evaluator-arn with an evaluator ARN or ID, or use Builtin.* evaluators.`, + }; + } + } + + if (options.evaluatorArn) { + const resolved = resolveEvaluatorArns(options.evaluatorArn); + evaluatorIds.push(...resolved); + evaluatorLabels.push(...options.evaluatorArn); + } + + if (evaluatorIds.length === 0) { + return { success: false, error: 'No evaluators specified. Use -e/--evaluator with Builtin.* or --evaluator-arn.' }; + } + + return { + success: true, + ctx: { + agentLabel: customServiceName, + region: options.region, + runtimeLogGroup: customLogGroupName, + evaluatorIds, + evaluatorLabels, + customServiceName, + }, + }; +} + type EvaluatorLevel = 'SESSION' | 'TRACE' | 'TOOL_CALL'; const BUILTIN_EVALUATOR_LEVELS: Record = { @@ -369,9 +427,11 @@ function sanitizeQueryValue(value: string): string { const MAX_DISCOVERED_SESSIONS = 50; export interface DiscoverSessionsOptions { - runtimeId: string; + runtimeId?: string; region: string; lookbackDays: number; + /** When set, filter by service.name instead of cloud.resource_id */ + customServiceName?: string; } /** @@ -389,9 +449,16 @@ export async function discoverSessions(opts: DiscoverSessionsOptions): Promise { let resolution: ResolveResult; - if (options.agentArn) { + if (options.customServiceName) { + resolution = resolveFromCustom(options); + } else if (options.agentArn) { resolution = resolveFromArn(options); } else { const context = await loadDeployedProjectConfig(); @@ -582,6 +659,7 @@ export async function handleRunEval(options: RunEvalOptions): Promise Date: Wed, 25 Mar 2026 00:35:45 -0400 Subject: [PATCH 3/5] feat: add --input-path flag for local trace evaluation Allow `agentcore run evals` to evaluate local trace files instead of querying CloudWatch. Accepts a JSON file (traces get output or raw span array) or a directory of JSON files. Spans are grouped by session.id and fed into the evaluate API. Requires --region and Builtin.* or --evaluator-arn evaluators. --- src/cli/commands/run/command.tsx | 6 +- .../eval/__tests__/run-eval.test.ts | 180 +++++++++++++++ src/cli/operations/eval/run-eval.ts | 208 ++++++++++++++++-- src/cli/operations/eval/types.ts | 2 + 4 files changed, 374 insertions(+), 22 deletions(-) diff --git a/src/cli/commands/run/command.tsx b/src/cli/commands/run/command.tsx index 0bcb4b00a..bd7721573 100644 --- a/src/cli/commands/run/command.tsx +++ b/src/cli/commands/run/command.tsx @@ -77,6 +77,7 @@ export const registerRun = (program: Command) => { 'Custom service name for external agents — filters by service.name instead of cloud.resource_id' ) .option('--custom-log-group-name ', 'Custom CloudWatch log group name for external agents') + .option('--input-path ', 'Path to a local trace file or directory — skips CloudWatch discovery') .option('--output ', 'Custom output file path for results') .option('--json', 'Output as JSON') .action( @@ -95,12 +96,14 @@ export const registerRun = (program: Command) => { days: string; customServiceName?: string; customLogGroupName?: string; + inputPath?: string; output?: string; json?: boolean; }) => { const isArnMode = !!(cliOptions.runtimeArn && cliOptions.evaluatorArn); const isCustomMode = !!cliOptions.customServiceName; - if (!isArnMode && !isCustomMode) { + const isInputMode = !!cliOptions.inputPath; + if (!isArnMode && !isCustomMode && !isInputMode) { requireProject(); } @@ -130,6 +133,7 @@ export const registerRun = (program: Command) => { expectedResponse: cliOptions.expectedResponse, customServiceName: cliOptions.customServiceName, customLogGroupName: cliOptions.customLogGroupName, + inputPath: cliOptions.inputPath, days: parseInt(cliOptions.days, 10), output: cliOptions.output, json: cliOptions.json, diff --git a/src/cli/operations/eval/__tests__/run-eval.test.ts b/src/cli/operations/eval/__tests__/run-eval.test.ts index 06c6854f8..f0feda750 100644 --- a/src/cli/operations/eval/__tests__/run-eval.test.ts +++ b/src/cli/operations/eval/__tests__/run-eval.test.ts @@ -12,6 +12,10 @@ const mockGenerateFilename = vi.fn(); const mockSend = vi.fn(); const mockGetCredentialProvider = vi.fn().mockReturnValue({}); const mockWriteFileSync = vi.fn(); +const mockExistsSync = vi.fn(); +const mockReadFileSync = vi.fn(); +const mockReaddirSync = vi.fn(); +const mockStatSync = vi.fn(); vi.mock('../../resolve-agent', () => ({ loadDeployedProjectConfig: () => mockLoadDeployedProjectConfig(), @@ -40,6 +44,10 @@ vi.mock('fs', async importOriginal => { return { ...original, writeFileSync: (...args: unknown[]) => mockWriteFileSync(...args), + existsSync: (...args: unknown[]) => mockExistsSync(...args), + readFileSync: (...args: unknown[]) => mockReadFileSync(...args), + readdirSync: (...args: unknown[]) => mockReaddirSync(...args), + statSync: (...args: unknown[]) => mockStatSync(...args), }; }); @@ -1172,6 +1180,178 @@ describe('handleRunEval', () => { ); }); + // ─── Input-path mode (local traces) ───────────────────────────────────── + + function makeTracesGetEntry(sessionId: string, traceId: string, spanBody: Record = {}) { + return { + '@timestamp': '2025-01-15 10:00:00.000', + '@message': { + scope: { name: 'strands.telemetry.tracer' }, + body: spanBody, + traceId, + attributes: { 'session.id': sessionId }, + }, + }; + } + + function makeRawSpanEntry(sessionId: string, traceId: string) { + return { + scope: { name: 'strands.telemetry.tracer' }, + traceId, + attributes: { 'session.id': sessionId }, + body: {}, + }; + } + + it('loads spans from a local trace file and evaluates', async () => { + const traceData = [makeTracesGetEntry('session-1', 'trace-1')]; + mockExistsSync.mockReturnValue(true); + mockStatSync.mockReturnValue({ isDirectory: () => false }); + mockReadFileSync.mockReturnValue(JSON.stringify(traceData)); + + mockEvaluate.mockResolvedValue({ + evaluationResults: [{ value: 4.0, context: { spanContext: { sessionId: 'session-1' } } }], + }); + + const result = await handleRunEval({ + evaluator: ['Builtin.GoalSuccessRate'], + inputPath: '/tmp/trace.json', + region: 'us-east-1', + days: 7, + }); + + expect(result.success).toBe(true); + expect(result.run!.agent).toBe('local'); + expect(result.run!.sessionCount).toBe(1); + expect(mockLoadDeployedProjectConfig).not.toHaveBeenCalled(); + expect(mockSend).not.toHaveBeenCalled(); // No CloudWatch calls + }); + + it('loads spans from a directory of trace files', async () => { + mockExistsSync.mockReturnValue(true); + mockStatSync.mockReturnValue({ isDirectory: () => true }); + mockReaddirSync.mockReturnValue(['trace1.json', 'trace2.json', 'readme.txt']); + mockReadFileSync.mockImplementation((filePath: string) => { + if (filePath.includes('trace1')) { + return JSON.stringify([makeTracesGetEntry('session-1', 'trace-1')]); + } + return JSON.stringify([makeTracesGetEntry('session-2', 'trace-2')]); + }); + + mockEvaluate.mockResolvedValue({ + evaluationResults: [{ value: 3.0, context: { spanContext: { sessionId: 'session-1' } } }], + }); + + const result = await handleRunEval({ + evaluator: ['Builtin.GoalSuccessRate'], + inputPath: '/tmp/traces/', + region: 'us-east-1', + days: 7, + }); + + expect(result.success).toBe(true); + expect(result.run!.sessionCount).toBe(2); + }); + + it('supports raw span format (without @message wrapper)', async () => { + const traceData = [makeRawSpanEntry('session-1', 'trace-1')]; + mockExistsSync.mockReturnValue(true); + mockStatSync.mockReturnValue({ isDirectory: () => false }); + mockReadFileSync.mockReturnValue(JSON.stringify(traceData)); + + mockEvaluate.mockResolvedValue({ + evaluationResults: [{ value: 5.0, context: { spanContext: { sessionId: 'session-1' } } }], + }); + + const result = await handleRunEval({ + evaluator: ['Builtin.Helpfulness'], + inputPath: '/tmp/raw-spans.json', + region: 'us-east-1', + days: 7, + }); + + expect(result.success).toBe(true); + expect(result.run!.sessionCount).toBe(1); + }); + + it('returns error when input path does not exist', async () => { + mockExistsSync.mockReturnValue(false); + + const result = await handleRunEval({ + evaluator: ['Builtin.GoalSuccessRate'], + inputPath: '/tmp/nonexistent.json', + region: 'us-east-1', + days: 7, + }); + + expect(result.success).toBe(false); + expect(result.error).toContain('Path not found'); + }); + + it('returns error when directory has no JSON files', async () => { + mockExistsSync.mockReturnValue(true); + mockStatSync.mockReturnValue({ isDirectory: () => true }); + mockReaddirSync.mockReturnValue(['readme.txt', 'notes.md']); + + const result = await handleRunEval({ + evaluator: ['Builtin.GoalSuccessRate'], + inputPath: '/tmp/empty-dir/', + region: 'us-east-1', + days: 7, + }); + + expect(result.success).toBe(false); + expect(result.error).toContain('No .json files found'); + }); + + it('returns error when --region is missing in input-path mode', async () => { + const result = await handleRunEval({ + evaluator: ['Builtin.GoalSuccessRate'], + inputPath: '/tmp/trace.json', + days: 7, + }); + + expect(result.success).toBe(false); + expect(result.error).toContain('--region is required'); + }); + + it('rejects custom evaluator names in input-path mode', async () => { + const result = await handleRunEval({ + evaluator: ['MyCustomEval'], + inputPath: '/tmp/trace.json', + region: 'us-east-1', + days: 7, + }); + + expect(result.success).toBe(false); + expect(result.error).toContain('cannot be resolved in input-path mode'); + }); + + it('saves to cwd in input-path mode when no --output is specified', async () => { + const traceData = [makeTracesGetEntry('session-1', 'trace-1')]; + mockExistsSync.mockReturnValue(true); + mockStatSync.mockReturnValue({ isDirectory: () => false }); + mockReadFileSync.mockReturnValue(JSON.stringify(traceData)); + + mockEvaluate.mockResolvedValue({ + evaluationResults: [{ value: 4.0, context: { spanContext: { sessionId: 'session-1' } } }], + }); + + const result = await handleRunEval({ + evaluator: ['Builtin.GoalSuccessRate'], + inputPath: '/tmp/trace.json', + region: 'us-east-1', + days: 7, + }); + + expect(result.success).toBe(true); + expect(mockSaveEvalRun).not.toHaveBeenCalled(); + expect(mockWriteFileSync).toHaveBeenCalledWith( + expect.stringContaining('eval_2025-01-15_10-00-00.json'), + expect.any(String) + ); + }); + // ─── Query sanitization ─────────────────────────────────────────────────── it('sanitizes runtimeId in CloudWatch query to prevent injection', async () => { diff --git a/src/cli/operations/eval/run-eval.ts b/src/cli/operations/eval/run-eval.ts index 96805aa2a..a81961702 100644 --- a/src/cli/operations/eval/run-eval.ts +++ b/src/cli/operations/eval/run-eval.ts @@ -10,7 +10,7 @@ import type { EvaluationReferenceInput } from '@aws-sdk/client-bedrock-agentcore import { CloudWatchLogsClient, GetQueryResultsCommand, StartQueryCommand } from '@aws-sdk/client-cloudwatch-logs'; import type { ResultField } from '@aws-sdk/client-cloudwatch-logs'; import type { DocumentType } from '@smithy/types'; -import { writeFileSync } from 'fs'; +import { existsSync, readFileSync, readdirSync, statSync, writeFileSync } from 'fs'; import { join } from 'path'; const SPANS_LOG_GROUP = 'aws/spans'; @@ -228,6 +228,160 @@ function resolveFromCustom(options: RunEvalOptions): ResolveResult { }; } +/** + * Input-path mode: resolve context for local trace files. + * Evaluators must come from Builtin.* names or --evaluator-arn. Requires --region. + */ +function resolveFromInput(options: RunEvalOptions): ResolveResult { + if (!options.region) { + return { success: false, error: '--region is required when using --input-path' }; + } + + const evaluatorIds: string[] = []; + const evaluatorLabels: string[] = []; + + for (const evalName of options.evaluator) { + if (evalName.startsWith('Builtin.')) { + evaluatorIds.push(evalName); + evaluatorLabels.push(evalName); + } else { + return { + success: false, + error: `Custom evaluator "${evalName}" cannot be resolved in input-path mode. Use --evaluator-arn with an evaluator ARN or ID, or use Builtin.* evaluators.`, + }; + } + } + + if (options.evaluatorArn) { + const resolved = resolveEvaluatorArns(options.evaluatorArn); + evaluatorIds.push(...resolved); + evaluatorLabels.push(...options.evaluatorArn); + } + + if (evaluatorIds.length === 0) { + return { success: false, error: 'No evaluators specified. Use -e/--evaluator with Builtin.* or --evaluator-arn.' }; + } + + return { + success: true, + ctx: { + agentLabel: 'local', + region: options.region, + runtimeLogGroup: '', + evaluatorIds, + evaluatorLabels, + }, + }; +} + +/** + * Load span documents from a local file or directory of JSON files. + * + * Supports two formats: + * 1. `traces get` output: array of objects with `@message` field (parsed or string) + * 2. Raw span array: array of OTel span documents directly + * + * Returns spans grouped by session ID. + */ +function loadSpansFromPath( + inputPath: string +): { success: true; sessions: SessionSpans[] } | { success: false; error: string } { + if (!existsSync(inputPath)) { + return { success: false, error: `Path not found: ${inputPath}` }; + } + + const filePaths: string[] = []; + const stat = statSync(inputPath); + + if (stat.isDirectory()) { + const entries = readdirSync(inputPath).filter(f => f.endsWith('.json')); + if (entries.length === 0) { + return { success: false, error: `No .json files found in directory: ${inputPath}` }; + } + filePaths.push(...entries.map(f => join(inputPath, f))); + } else { + filePaths.push(inputPath); + } + + const sessionMap = new Map(); + + for (const filePath of filePaths) { + let parsed: unknown; + try { + parsed = JSON.parse(readFileSync(filePath, 'utf-8')); + } catch { + return { success: false, error: `Failed to parse JSON from: ${filePath}` }; + } + + if (!Array.isArray(parsed)) { + return { success: false, error: `Expected a JSON array in: ${filePath}` }; + } + + for (const entry of parsed) { + const doc = extractSpanDoc(entry as Record); + if (!doc) continue; + + const sessionId = extractSessionId(doc) ?? 'unknown'; + if (!sessionMap.has(sessionId)) { + sessionMap.set(sessionId, []); + } + sessionMap.get(sessionId)!.push(doc as DocumentType); + } + } + + const sessions: SessionSpans[] = []; + for (const [sessionId, spans] of sessionMap) { + if (spans.length > 0) { + sessions.push({ sessionId, spans }); + } + } + + return { success: true, sessions }; +} + +/** + * Extract a span document from an entry. Handles both: + * - `traces get` format: `{ "@message": { ... }, "@timestamp": ... }` + * - Raw span format: `{ traceId: ..., scope: ..., ... }` + */ +function extractSpanDoc(entry: Record): Record | undefined { + // traces get format: @message contains the actual span + if ('@message' in entry) { + const msg = entry['@message']; + if (typeof msg === 'string') { + try { + return JSON.parse(msg) as Record; + } catch { + return undefined; + } + } + if (msg && typeof msg === 'object') { + return msg as Record; + } + return undefined; + } + + // Raw span document (has traceId or scope) + if ('traceId' in entry || 'scope' in entry || 'body' in entry) { + return entry; + } + + return undefined; +} + +/** Extract session ID from a span document's attributes. */ +function extractSessionId(doc: Record): string | undefined { + const attrs = doc.attributes as Record | undefined; + if (attrs?.['session.id']) return attrs['session.id'] as string; + + // Some spans nest under resource.attributes + const resource = doc.resource as Record | undefined; + const resourceAttrs = resource?.attributes as Record | undefined; + if (resourceAttrs?.['session.id']) return resourceAttrs['session.id'] as string; + + return undefined; +} + type EvaluatorLevel = 'SESSION' | 'TRACE' | 'TOOL_CALL'; const BUILTIN_EVALUATOR_LEVELS: Record = { @@ -636,7 +790,9 @@ export interface RunEvalResult { export async function handleRunEval(options: RunEvalOptions): Promise { let resolution: ResolveResult; - if (options.customServiceName) { + if (options.inputPath) { + resolution = resolveFromInput(options); + } else if (options.customServiceName) { resolution = resolveFromCustom(options); } else if (options.agentArn) { resolution = resolveFromArn(options); @@ -651,28 +807,38 @@ export async function handleRunEval(options: RunEvalOptions): Promise 0) { - const selected = new Set(options.sessionIds); - sessions = sessions.filter(s => selected.has(s.sessionId)); + // Filter to selected session IDs if provided (from TUI multi-select) + if (options.sessionIds && options.sessionIds.length > 0) { + const selected = new Set(options.sessionIds); + sessions = sessions.filter(s => selected.has(s.sessionId)); + } } if (sessions.length === 0) { - return { - success: false, - error: `No session spans found for agent "${ctx.agentLabel}" in the last ${options.days} day(s). Has the agent been invoked?`, - }; + const errorDetail = options.inputPath + ? `No span documents found in: ${options.inputPath}` + : `No session spans found for agent "${ctx.agentLabel}" in the last ${options.days} day(s). Has the agent been invoked?`; + return { success: false, error: errorDetail }; } // Resolve evaluator levels to determine how to send spans @@ -829,7 +995,7 @@ export async function handleRunEval(options: RunEvalOptions): Promise Date: Fri, 10 Apr 2026 23:52:44 -0400 Subject: [PATCH 4/5] chore: remove unrelated files accidentally included in branch --- .github/workflows/peer-dep-compat.yml | 29 -- .../26498a12-f26a-43b7-b89d-131ac995aa62.json | 8 - .../43db174c-d800-4e28-8ac9-f1e39aa6aa7f.json | 8 - .../4971c61c-9a8f-4475-b164-105c601d2fba.json | 8 - .../4dd2cc49-8d18-4168-8460-acacf125e650.json | 8 - .../e9773b37-7fb9-4794-bd18-beca59e15738.json | 8 - ...00abd583-2ef0-45e8-9134-f395ce6d5bbe.jsonl | 13 - ...9423878a-39c1-4869-87f0-0f8a3910890b.jsonl | 16 - .omc/state/hud-state.json | 6 - .omc/state/hud-stdin-cache.json | 1 - .omc/state/idle-notif-cooldown.json | 3 - .omc/state/last-tool-error.json | 7 - .omc/state/mission-state.json | 217 ------------ .omc/state/subagent-tracking.json | 26 -- docs/individual-memory-deployment-plan.md | 315 ------------------ docs/release-process.md | 129 ------- package/package.json | 167 ---------- scripts/check-peer-dep-compat.mjs | 161 --------- 18 files changed, 1130 deletions(-) delete mode 100644 .github/workflows/peer-dep-compat.yml delete mode 100644 .omc/sessions/26498a12-f26a-43b7-b89d-131ac995aa62.json delete mode 100644 .omc/sessions/43db174c-d800-4e28-8ac9-f1e39aa6aa7f.json delete mode 100644 .omc/sessions/4971c61c-9a8f-4475-b164-105c601d2fba.json delete mode 100644 .omc/sessions/4dd2cc49-8d18-4168-8460-acacf125e650.json delete mode 100644 .omc/sessions/e9773b37-7fb9-4794-bd18-beca59e15738.json delete mode 100644 .omc/state/agent-replay-00abd583-2ef0-45e8-9134-f395ce6d5bbe.jsonl delete mode 100644 .omc/state/agent-replay-9423878a-39c1-4869-87f0-0f8a3910890b.jsonl delete mode 100644 .omc/state/hud-state.json delete mode 100644 .omc/state/hud-stdin-cache.json delete mode 100644 .omc/state/idle-notif-cooldown.json delete mode 100644 .omc/state/last-tool-error.json delete mode 100644 .omc/state/mission-state.json delete mode 100644 .omc/state/subagent-tracking.json delete mode 100644 docs/individual-memory-deployment-plan.md delete mode 100644 docs/release-process.md delete mode 100644 package/package.json delete mode 100644 scripts/check-peer-dep-compat.mjs diff --git a/.github/workflows/peer-dep-compat.yml b/.github/workflows/peer-dep-compat.yml deleted file mode 100644 index b3f07f45f..000000000 --- a/.github/workflows/peer-dep-compat.yml +++ /dev/null @@ -1,29 +0,0 @@ -name: Peer Dependency Compatibility - -on: - push: - branches: [main] - paths: [package.json] - pull_request: - branches: [main] - paths: [package.json] - -jobs: - check-peer-deps: - name: Check Peer Dep Compatibility - runs-on: ubuntu-latest - timeout-minutes: 5 - - steps: - - uses: actions/checkout@v6 - - - uses: actions/setup-node@v6 - with: - node-version: 20.x - - - run: npm ci - - - name: Check peer dep compatibility - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: node scripts/check-peer-dep-compat.mjs diff --git a/.omc/sessions/26498a12-f26a-43b7-b89d-131ac995aa62.json b/.omc/sessions/26498a12-f26a-43b7-b89d-131ac995aa62.json deleted file mode 100644 index 4b184fbea..000000000 --- a/.omc/sessions/26498a12-f26a-43b7-b89d-131ac995aa62.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "session_id": "26498a12-f26a-43b7-b89d-131ac995aa62", - "ended_at": "2026-03-29T01:49:15.010Z", - "reason": "other", - "agents_spawned": 3, - "agents_completed": 2, - "modes_used": [] -} \ No newline at end of file diff --git a/.omc/sessions/43db174c-d800-4e28-8ac9-f1e39aa6aa7f.json b/.omc/sessions/43db174c-d800-4e28-8ac9-f1e39aa6aa7f.json deleted file mode 100644 index 093c8dd1a..000000000 --- a/.omc/sessions/43db174c-d800-4e28-8ac9-f1e39aa6aa7f.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "session_id": "43db174c-d800-4e28-8ac9-f1e39aa6aa7f", - "ended_at": "2026-03-27T19:13:10.029Z", - "reason": "clear", - "agents_spawned": 0, - "agents_completed": 0, - "modes_used": [] -} \ No newline at end of file diff --git a/.omc/sessions/4971c61c-9a8f-4475-b164-105c601d2fba.json b/.omc/sessions/4971c61c-9a8f-4475-b164-105c601d2fba.json deleted file mode 100644 index d8dde0299..000000000 --- a/.omc/sessions/4971c61c-9a8f-4475-b164-105c601d2fba.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "session_id": "4971c61c-9a8f-4475-b164-105c601d2fba", - "ended_at": "2026-03-29T01:49:18.048Z", - "reason": "other", - "agents_spawned": 0, - "agents_completed": 0, - "modes_used": [] -} \ No newline at end of file diff --git a/.omc/sessions/4dd2cc49-8d18-4168-8460-acacf125e650.json b/.omc/sessions/4dd2cc49-8d18-4168-8460-acacf125e650.json deleted file mode 100644 index e4b16aad4..000000000 --- a/.omc/sessions/4dd2cc49-8d18-4168-8460-acacf125e650.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "session_id": "4dd2cc49-8d18-4168-8460-acacf125e650", - "ended_at": "2026-04-02T20:47:39.078Z", - "reason": "other", - "agents_spawned": 0, - "agents_completed": 0, - "modes_used": [] -} \ No newline at end of file diff --git a/.omc/sessions/e9773b37-7fb9-4794-bd18-beca59e15738.json b/.omc/sessions/e9773b37-7fb9-4794-bd18-beca59e15738.json deleted file mode 100644 index 7a42ed679..000000000 --- a/.omc/sessions/e9773b37-7fb9-4794-bd18-beca59e15738.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "session_id": "e9773b37-7fb9-4794-bd18-beca59e15738", - "ended_at": "2026-04-09T17:29:57.919Z", - "reason": "clear", - "agents_spawned": 0, - "agents_completed": 0, - "modes_used": [] -} \ No newline at end of file diff --git a/.omc/state/agent-replay-00abd583-2ef0-45e8-9134-f395ce6d5bbe.jsonl b/.omc/state/agent-replay-00abd583-2ef0-45e8-9134-f395ce6d5bbe.jsonl deleted file mode 100644 index 48cb81342..000000000 --- a/.omc/state/agent-replay-00abd583-2ef0-45e8-9134-f395ce6d5bbe.jsonl +++ /dev/null @@ -1,13 +0,0 @@ -{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} -{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} -{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} -{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} -{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} -{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} -{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} -{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} -{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} -{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} -{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} -{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} -{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} diff --git a/.omc/state/agent-replay-9423878a-39c1-4869-87f0-0f8a3910890b.jsonl b/.omc/state/agent-replay-9423878a-39c1-4869-87f0-0f8a3910890b.jsonl deleted file mode 100644 index ca30d8086..000000000 --- a/.omc/state/agent-replay-9423878a-39c1-4869-87f0-0f8a3910890b.jsonl +++ /dev/null @@ -1,16 +0,0 @@ -{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} -{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} -{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} -{"t":0,"agent":"a160d74","agent_type":"Explore","event":"agent_start","parent_mode":"none"} -{"t":0,"agent":"a740f6b","agent_type":"general-purpose","event":"agent_start","parent_mode":"none"} -{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} -{"t":0,"agent":"a160d74","agent_type":"Explore","event":"agent_stop","success":true,"duration_ms":208069} -{"t":0,"agent":"a740f6b","agent_type":"general-purpose","event":"agent_stop","success":true,"duration_ms":240447} -{"t":0,"agent":"system","event":"skill_invoked","skill_name":"commit-ready"} -{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} -{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} -{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} -{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} -{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} -{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} -{"t":0,"agent":"hook-ag","agent_type":"unknown","event":"agent_stop","success":true} diff --git a/.omc/state/hud-state.json b/.omc/state/hud-state.json deleted file mode 100644 index 05f1a5a57..000000000 --- a/.omc/state/hud-state.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "timestamp": "2026-04-11T01:09:53.820Z", - "backgroundTasks": [], - "sessionStartTimestamp": "2026-04-10T22:43:56.788Z", - "sessionId": "00abd583-2ef0-45e8-9134-f395ce6d5bbe" -} \ No newline at end of file diff --git a/.omc/state/hud-stdin-cache.json b/.omc/state/hud-stdin-cache.json deleted file mode 100644 index 95305b0ac..000000000 --- a/.omc/state/hud-stdin-cache.json +++ /dev/null @@ -1 +0,0 @@ -{"session_id":"00abd583-2ef0-45e8-9134-f395ce6d5bbe","transcript_path":"/Users/gitikavj/.claude/projects/-Volumes-workplace-agentcore-agentcore-gh/00abd583-2ef0-45e8-9134-f395ce6d5bbe.jsonl","cwd":"/Volumes/workplace/agentcore/agentcore-gh/agentcore-cli","model":{"id":"us.anthropic.claude-opus-4-6-v1","display_name":"Opus 4.6"},"workspace":{"current_dir":"/Volumes/workplace/agentcore/agentcore-gh/agentcore-cli","project_dir":"/Volumes/workplace/agentcore/agentcore-gh","added_dirs":["/Users/gitikavj/workplace/agentcore/agentcore-gh"]},"version":"2.1.101","output_style":{"name":"default"},"cost":{"total_cost_usd":6.130216400000001,"total_duration_ms":14421649,"total_api_duration_ms":681873,"total_lines_added":97,"total_lines_removed":13},"context_window":{"total_input_tokens":873,"total_output_tokens":31781,"context_window_size":200000,"current_usage":{"input_tokens":3,"output_tokens":181,"cache_creation_input_tokens":2040,"cache_read_input_tokens":81786},"used_percentage":42,"remaining_percentage":58},"exceeds_200k_tokens":false} \ No newline at end of file diff --git a/.omc/state/idle-notif-cooldown.json b/.omc/state/idle-notif-cooldown.json deleted file mode 100644 index 7bf99974d..000000000 --- a/.omc/state/idle-notif-cooldown.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "lastSentAt": "2026-04-11T02:44:15.086Z" -} \ No newline at end of file diff --git a/.omc/state/last-tool-error.json b/.omc/state/last-tool-error.json deleted file mode 100644 index 43a653d86..000000000 --- a/.omc/state/last-tool-error.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "tool_name": "Read", - "tool_input_preview": "{\"file_path\":\"/Users/gitikavj/.claude/projects/-Volumes-workplace-agentcore-agentcore-gh/00abd583-2ef0-45e8-9134-f395ce6d5bbe/tool-results/b4w8ow5ys.txt\"}", - "error": "File content (31694 tokens) exceeds maximum allowed tokens (25000). Use offset and limit parameters to read specific portions of the file, or search for specific content instead of reading the whole file.", - "timestamp": "2026-04-11T02:44:45.598Z", - "retry_count": 4 -} \ No newline at end of file diff --git a/.omc/state/mission-state.json b/.omc/state/mission-state.json deleted file mode 100644 index c6adf14d2..000000000 --- a/.omc/state/mission-state.json +++ /dev/null @@ -1,217 +0,0 @@ -{ - "updatedAt": "2026-04-10T20:10:12.773Z", - "missions": [ - { - "id": "session:1dd61909-07a8-4903-8592-a6de32ae7476:none", - "source": "session", - "name": "none", - "objective": "Session mission", - "createdAt": "2026-03-27T22:47:19.586Z", - "updatedAt": "2026-03-27T22:51:03.329Z", - "status": "done", - "workerCount": 2, - "taskCounts": { - "total": 2, - "pending": 0, - "blocked": 0, - "inProgress": 0, - "completed": 2, - "failed": 0 - }, - "agents": [ - { - "name": "code-reviewer:abd2c70", - "role": "code-reviewer", - "ownership": "abd2c70f23f3f4c90", - "status": "done", - "currentStep": null, - "latestUpdate": "completed", - "completedSummary": null, - "updatedAt": "2026-03-27T22:51:03.329Z" - }, - { - "name": "superpowers:code-reviewer:acaa328", - "role": "superpowers:code-reviewer", - "ownership": "acaa3288832d1e52d", - "status": "done", - "currentStep": null, - "latestUpdate": "completed", - "completedSummary": null, - "updatedAt": "2026-03-27T22:50:33.166Z" - } - ], - "timeline": [ - { - "id": "session-start:abd2c70f23f3f4c90:2026-03-27T22:47:19.586Z", - "at": "2026-03-27T22:47:19.586Z", - "kind": "update", - "agent": "code-reviewer:abd2c70", - "detail": "started code-reviewer:abd2c70", - "sourceKey": "session-start:abd2c70f23f3f4c90" - }, - { - "id": "session-start:acaa3288832d1e52d:2026-03-27T22:47:29.187Z", - "at": "2026-03-27T22:47:29.187Z", - "kind": "update", - "agent": "superpowers:code-reviewer:acaa328", - "detail": "started superpowers:code-reviewer:acaa328", - "sourceKey": "session-start:acaa3288832d1e52d" - }, - { - "id": "session-stop:acaa3288832d1e52d:2026-03-27T22:50:33.166Z", - "at": "2026-03-27T22:50:33.166Z", - "kind": "completion", - "agent": "superpowers:code-reviewer:acaa328", - "detail": "completed", - "sourceKey": "session-stop:acaa3288832d1e52d" - }, - { - "id": "session-stop:hook-agent-2163411d-aa52-477b-9545-c69759d86e3e:2026-03-27T22:51:03.329Z", - "at": "2026-03-27T22:51:03.329Z", - "kind": "completion", - "agent": "code-reviewer:abd2c70", - "detail": "completed", - "sourceKey": "session-stop:hook-agent-2163411d-aa52-477b-9545-c69759d86e3e" - } - ] - }, - { - "id": "session:9423878a-39c1-4869-87f0-0f8a3910890b:none", - "source": "session", - "name": "none", - "objective": "Session mission", - "createdAt": "2026-04-10T17:56:19.252Z", - "updatedAt": "2026-04-10T20:10:12.773Z", - "status": "done", - "workerCount": 2, - "taskCounts": { - "total": 2, - "pending": 0, - "blocked": 0, - "inProgress": 0, - "completed": 2, - "failed": 0 - }, - "agents": [ - { - "name": "Explore:a160d74", - "role": "Explore", - "ownership": "a160d74768541cb40", - "status": "done", - "currentStep": null, - "latestUpdate": "completed", - "completedSummary": null, - "updatedAt": "2026-04-10T20:10:12.773Z" - }, - { - "name": "general-purpose:a740f6b", - "role": "general-purpose", - "ownership": "a740f6b71fe059953", - "status": "done", - "currentStep": null, - "latestUpdate": "completed", - "completedSummary": null, - "updatedAt": "2026-04-10T18:00:19.760Z" - } - ], - "timeline": [ - { - "id": "session-start:a160d74768541cb40:2026-04-10T17:56:19.252Z", - "at": "2026-04-10T17:56:19.252Z", - "kind": "update", - "agent": "Explore:a160d74", - "detail": "started Explore:a160d74", - "sourceKey": "session-start:a160d74768541cb40" - }, - { - "id": "session-start:a740f6b71fe059953:2026-04-10T17:56:19.313Z", - "at": "2026-04-10T17:56:19.313Z", - "kind": "update", - "agent": "general-purpose:a740f6b", - "detail": "started general-purpose:a740f6b", - "sourceKey": "session-start:a740f6b71fe059953" - }, - { - "id": "session-stop:hook-agent-3e13f9d4-5816-419c-be51-de7316bd102b:2026-04-10T17:57:04.575Z", - "at": "2026-04-10T17:57:04.575Z", - "kind": "completion", - "agent": "Explore:a160d74", - "detail": "completed", - "sourceKey": "session-stop:hook-agent-3e13f9d4-5816-419c-be51-de7316bd102b" - }, - { - "id": "session-stop:a160d74768541cb40:2026-04-10T17:59:47.321Z", - "at": "2026-04-10T17:59:47.321Z", - "kind": "completion", - "agent": "Explore:a160d74", - "detail": "completed", - "sourceKey": "session-stop:a160d74768541cb40" - }, - { - "id": "session-stop:a740f6b71fe059953:2026-04-10T18:00:19.760Z", - "at": "2026-04-10T18:00:19.760Z", - "kind": "completion", - "agent": "general-purpose:a740f6b", - "detail": "completed", - "sourceKey": "session-stop:a740f6b71fe059953" - }, - { - "id": "session-stop:hook-agent-add781eb-f858-4e4f-b3aa-78dfcd6f623c:2026-04-10T18:00:33.874Z", - "at": "2026-04-10T18:00:33.874Z", - "kind": "completion", - "agent": "Explore:a160d74", - "detail": "completed", - "sourceKey": "session-stop:hook-agent-add781eb-f858-4e4f-b3aa-78dfcd6f623c" - }, - { - "id": "session-stop:hook-agent-8d2570bf-4e8f-4362-86a5-dbce42976330:2026-04-10T18:01:08.550Z", - "at": "2026-04-10T18:01:08.550Z", - "kind": "completion", - "agent": "Explore:a160d74", - "detail": "completed", - "sourceKey": "session-stop:hook-agent-8d2570bf-4e8f-4362-86a5-dbce42976330" - }, - { - "id": "session-stop:hook-agent-d5068281-907b-4984-9384-61fd5e52a8a4:2026-04-10T18:27:51.248Z", - "at": "2026-04-10T18:27:51.248Z", - "kind": "completion", - "agent": "Explore:a160d74", - "detail": "completed", - "sourceKey": "session-stop:hook-agent-d5068281-907b-4984-9384-61fd5e52a8a4" - }, - { - "id": "session-stop:hook-agent-adc45b09-b559-4c45-9f38-28bb13f97f12:2026-04-10T19:39:52.418Z", - "at": "2026-04-10T19:39:52.418Z", - "kind": "completion", - "agent": "Explore:a160d74", - "detail": "completed", - "sourceKey": "session-stop:hook-agent-adc45b09-b559-4c45-9f38-28bb13f97f12" - }, - { - "id": "session-stop:hook-agent-9f572f32-9c3e-4256-b0a4-25aeb14977b0:2026-04-10T20:07:41.431Z", - "at": "2026-04-10T20:07:41.431Z", - "kind": "completion", - "agent": "Explore:a160d74", - "detail": "completed", - "sourceKey": "session-stop:hook-agent-9f572f32-9c3e-4256-b0a4-25aeb14977b0" - }, - { - "id": "session-stop:hook-agent-19bb6986-af41-4f9f-a108-38b1af29c6a1:2026-04-10T20:08:48.444Z", - "at": "2026-04-10T20:08:48.444Z", - "kind": "completion", - "agent": "Explore:a160d74", - "detail": "completed", - "sourceKey": "session-stop:hook-agent-19bb6986-af41-4f9f-a108-38b1af29c6a1" - }, - { - "id": "session-stop:hook-agent-1e07eac7-0203-4a54-8173-c4ade4181f37:2026-04-10T20:10:12.773Z", - "at": "2026-04-10T20:10:12.773Z", - "kind": "completion", - "agent": "Explore:a160d74", - "detail": "completed", - "sourceKey": "session-stop:hook-agent-1e07eac7-0203-4a54-8173-c4ade4181f37" - } - ] - } - ] -} \ No newline at end of file diff --git a/.omc/state/subagent-tracking.json b/.omc/state/subagent-tracking.json deleted file mode 100644 index 1e2e9b0aa..000000000 --- a/.omc/state/subagent-tracking.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "agents": [ - { - "agent_id": "a160d74768541cb40", - "agent_type": "Explore", - "started_at": "2026-04-10T17:56:19.252Z", - "parent_mode": "none", - "status": "completed", - "completed_at": "2026-04-10T17:59:47.321Z", - "duration_ms": 208069 - }, - { - "agent_id": "a740f6b71fe059953", - "agent_type": "general-purpose", - "started_at": "2026-04-10T17:56:19.313Z", - "parent_mode": "none", - "status": "completed", - "completed_at": "2026-04-10T18:00:19.760Z", - "duration_ms": 240447 - } - ], - "total_spawned": 1, - "total_completed": 2, - "total_failed": 0, - "last_updated": "2026-04-11T02:44:58.899Z" -} \ No newline at end of file diff --git a/docs/individual-memory-deployment-plan.md b/docs/individual-memory-deployment-plan.md deleted file mode 100644 index c51918223..000000000 --- a/docs/individual-memory-deployment-plan.md +++ /dev/null @@ -1,315 +0,0 @@ -# Individual Memory Deployment — CLI Plan - -## Problem - -The CLI currently blocks deployment when no agents are defined: - -``` -// preflight.ts:83-93 -if (!projectSpec.agents || projectSpec.agents.length === 0) { - // ... only allows through if isTeardownDeploy - throw new Error( - 'No agents defined in project. Add at least one agent with "agentcore add agent" before deploying.' - ); -} -``` - -A user cannot `agentcore create --no-agent`, then `agentcore add memory`, then `agentcore deploy`. The schema already -supports top-level memories without agents, and the CDK constructs handle it — the CLI just needs to get out of the way. - -## User Flow (Target State) - -```bash -agentcore create --no-agent --name my-memory-project -cd my-memory-project -agentcore add memory -agentcore deploy -# => Deploys only memory resources, no agents -# => User can later: agentcore add agent && agentcore deploy -``` - ---- - -## Changes Required - -### 1. Update preflight validation to allow memory-only deployments - -**File:** `src/cli/operations/deploy/preflight.ts` - -The current check at line 83 rejects any project with zero agents (unless it's a teardown). Change this to allow -deployment when _any_ deployable resources exist (agents OR memories OR credentials with identity providers). - -```typescript -// OLD -if (!projectSpec.agents || projectSpec.agents.length === 0) { - // ... teardown check ... - throw new Error('No agents defined in project...'); -} - -// NEW -const hasDeployableResources = (projectSpec.agents?.length ?? 0) > 0 || (projectSpec.memories?.length ?? 0) > 0; - -if (!hasDeployableResources) { - let hasExistingStack = false; - try { - const deployedState = await configIO.readDeployedState(); - hasExistingStack = Object.keys(deployedState.targets).length > 0; - } catch { - // No deployed state file - } - if (!hasExistingStack) { - throw new Error( - 'No resources defined in project. Add an agent with "agentcore add agent" ' + - 'or a memory with "agentcore add memory" before deploying.' - ); - } - isTeardownDeploy = true; -} -``` - -Also skip `validateRuntimeNames()` and `validateContainerAgents()` when there are no agents (they already handle empty -arrays, but making it explicit is cleaner). - -### 2. Parse memory outputs from CloudFormation stack - -**File:** `src/cli/cloudformation/outputs.ts` - -Add a `parseMemoryOutputs` function alongside `parseAgentOutputs`: - -```typescript -export function parseMemoryOutputs(outputs: StackOutputs, memoryNames: string[]): Record { - const memories: Record = {}; - - // Map PascalCase memory names to original names - const memoryIdMap = new Map(memoryNames.map(name => [toPascalId(name), name])); - - const outputsByMemory: Record = {}; - - // Match pattern: ApplicationMemory{MemoryName}Memory{Id|Arn}Output - const outputPattern = /^ApplicationMemory(.+?)Memory(Id|Arn)Output/; - - for (const [key, value] of Object.entries(outputs)) { - const match = outputPattern.exec(key); - if (!match) continue; - - const logicalMemory = match[1]; - const outputType = match[2]; - if (!logicalMemory || !outputType) continue; - - const memoryName = memoryIdMap.get(logicalMemory) ?? logicalMemory; - outputsByMemory[memoryName] ??= {}; - - if (outputType === 'Id') { - outputsByMemory[memoryName].memoryId = value; - } else if (outputType === 'Arn') { - outputsByMemory[memoryName].memoryArn = value; - } - } - - for (const [memoryName, memoryOutputs] of Object.entries(outputsByMemory)) { - if (memoryOutputs.memoryId && memoryOutputs.memoryArn) { - memories[memoryName] = { - memoryId: memoryOutputs.memoryId, - memoryArn: memoryOutputs.memoryArn, - }; - } - } - - return memories; -} -``` - -### 3. Update `buildDeployedState` to include memory state - -**File:** `src/cli/cloudformation/outputs.ts` - -```typescript -export function buildDeployedState( - targetName: string, - stackName: string, - agents: Record, - existingState?: DeployedState, - identityKmsKeyArn?: string, - memories?: Record // NEW -): DeployedState { - const targetState: TargetDeployedState = { - resources: { - agents: Object.keys(agents).length > 0 ? agents : undefined, - memories: memories && Object.keys(memories).length > 0 ? memories : undefined, - stackName, - identityKmsKeyArn, - }, - }; - // ... -} -``` - -### 4. Update deploy action to parse and persist memory state - -**File:** `src/cli/commands/deploy/actions.ts` - -In `handleDeploy()`, after deployment succeeds, parse memory outputs alongside agent outputs: - -```typescript -// Get stack outputs and persist state -startStep('Persist deployment state'); -const outputs = await getStackOutputs(target.region, stackName); - -const agentNames = context.projectSpec.agents.map(a => a.name); -const agents = parseAgentOutputs(outputs, agentNames, stackName); - -const memoryNames = (context.projectSpec.memories ?? []).map(m => m.name); -const memories = parseMemoryOutputs(outputs, memoryNames); - -const existingState = await configIO.readDeployedState().catch(() => undefined); -const deployedState = buildDeployedState(target.name, stackName, agents, existingState, identityKmsKeyArn, memories); -await configIO.writeDeployedState(deployedState); -``` - -### 5. Update `nextSteps` to be context-aware - -**File:** `src/cli/commands/deploy/actions.ts` - -When only memories are deployed (no agents), `agentcore invoke` doesn't make sense. Make next steps conditional: - -```typescript -const hasAgents = context.projectSpec.agents.length > 0; -const nextSteps = hasAgents ? ['agentcore invoke', 'agentcore status'] : ['agentcore add agent', 'agentcore status']; -``` - -### 6. Update `agentcore status` to show memory resources - -**File:** `src/cli/commands/status/` (command handler) - -The status command should display deployed memory resources. When checking deployed state, also show memory IDs/ARNs. -This is an additive change — show memory info when `resources.memories` exists in deployed state. - -### 7. Update TUI deploy screen for memory-only feedback - -**File:** `src/cli/commands/deploy/` (TUI components) - -The TUI deploy screen should show appropriate messaging when deploying memory-only: - -- Progress steps still apply (validate, build, synth, deploy) -- Success message should mention memories deployed, not just agents -- The "invoke" suggestion should be conditional - -### 8. Update deployed-state schema (mirror CDK changes) - -**File:** `src/schema/schemas/deployed-state.ts` - -Add the same `MemoryDeployedState` schema as the CDK package (schemas are duplicated across packages per CLAUDE.md): - -```typescript -export const MemoryDeployedStateSchema = z.object({ - memoryId: z.string().min(1), - memoryArn: z.string().min(1), -}); - -export type MemoryDeployedState = z.infer; - -// Update DeployedResourceStateSchema -export const DeployedResourceStateSchema = z.object({ - agents: z.record(z.string(), AgentCoreDeployedStateSchema).optional(), - memories: z.record(z.string(), MemoryDeployedStateSchema).optional(), // NEW - mcp: McpDeployedStateSchema.optional(), - externallyManaged: ExternallyManagedStateSchema.optional(), - stackName: z.string().optional(), - identityKmsKeyArn: z.string().optional(), -}); -``` - -### 9. Update `agentcore create --no-agent` flow - -**File:** `src/cli/commands/create/action.ts` - -Currently `--no-agent` creates a project with empty arrays. This already works. But the messaging after create should -suggest `agentcore add memory` as a valid next step (not just `agentcore add agent`). - -### 10. Consider: Allow `agentcore add memory` to prompt for deployment - -This is optional/future — after adding a memory, the CLI could suggest `agentcore deploy` if the user has a deployment -target configured. Currently it only suggests this after `add agent`. - ---- - -## Files to Modify - -| File | Change | Effort | -| ---------------------------------------- | --------------------------------------------------------- | ------- | -| `src/cli/operations/deploy/preflight.ts` | Allow memory-only deploys | Small | -| `src/cli/cloudformation/outputs.ts` | Add `parseMemoryOutputs`, update `buildDeployedState` | Medium | -| `src/cli/commands/deploy/actions.ts` | Parse memory outputs, conditional next steps | Small | -| `src/schema/schemas/deployed-state.ts` | Add `MemoryDeployedState`, update `DeployedResourceState` | Small | -| `src/schema/index.ts` | Export new types | Trivial | -| `src/cli/commands/status/` | Show memory resources in status | Small | -| `src/cli/commands/deploy/` (TUI) | Context-aware messaging | Small | - -## Files NOT changed - -- `src/cli/operations/memory/create-memory.ts` — already works correctly -- `src/cli/operations/memory/generate-memory-files.ts` — only relevant when agents exist -- `src/cli/commands/create/` — `--no-agent` flow already works -- `src/assets/cdk/bin/cdk.ts` — CDK entry point doesn't need changes -- `src/assets/cdk/lib/cdk-stack.ts` — `AgentCoreApplication` already handles empty agents - ---- - -## Testing - -1. **E2E: Memory-only deploy** - - ```bash - agentcore create --no-agent --name memtest - cd memtest - agentcore add memory # add a short-term memory - agentcore deploy -y - # Verify: stack created, memory resources exist, deployed-state.json has memories - ``` - -2. **E2E: Memory-only then add agent** - - ```bash - # ... after memory-only deploy ... - agentcore add agent - agentcore deploy -y - # Verify: stack updated, both agent and memory in deployed-state.json - # Memory still accessible, no orphaned resources - ``` - -3. **Unit test: Preflight allows memory-only** - - Mock project with `agents: [], memories: [{ ... }]` - - Verify `validateProject()` does NOT throw - - Verify `isTeardownDeploy` is `false` - -4. **Unit test: Preflight still blocks empty projects** - - Mock project with `agents: [], memories: []` - - Verify `validateProject()` throws appropriate error - -5. **Unit test: parseMemoryOutputs** - - Mock CloudFormation outputs with memory patterns - - Verify correct parsing into `MemoryDeployedState` - -6. **Unit test: buildDeployedState with memories** - - Verify deployed state includes both agents and memories sections - -7. **Snapshot tests** — May need updating if CDK template assets change - ---- - -## Rollout Considerations - -- **Backwards compatibility**: Existing projects with agents + memories continue to work unchanged. The deployed-state - schema change is additive (new optional `memories` field). -- **Schema sync**: `deployed-state.ts` changes must be reflected in both `agentcore-cli` and - `agentcore-l3-cdk-constructs` packages. -- **CDK package dependency**: The CDK package needs per-memory outputs before the CLI can parse them. Ship CDK changes - first or together. - ---- - -## Complexity Assessment - -**Medium.** The core change (preflight validation) is trivial. The supporting work (output parsing, state tracking, UX -messaging) requires touching several files but each change is small and well-contained. No architectural changes needed -— the design already supports this, we just need to remove the artificial gate and add plumbing. diff --git a/docs/release-process.md b/docs/release-process.md deleted file mode 100644 index c7e8e3c17..000000000 --- a/docs/release-process.md +++ /dev/null @@ -1,129 +0,0 @@ -# Release Process - -This document describes the release process for both AgentCore packages. Releases are always done **CDK first, then -CLI**, since the CLI depends on `@aws/agentcore-cdk`. - -## Release Order - -1. **`@aws/agentcore-cdk`** (CDK L3 Constructs) -2. **`@aws/agentcore`** (CLI) - -## Overview - -Both packages use a GitHub Actions `workflow_dispatch` workflow with the same four-stage pipeline: - -1. **Prepare Release** — bump version, update changelog, open a PR to `main` -2. **Test and Build** — lint, typecheck, build, test on the release branch -3. **Release Approval** — manual approval gate in a GitHub Environment -4. **Publish to npm** — publish, tag, and create a GitHub Release - -The workflow must be triggered from the `main` branch. - ---- - -## CDK L3 Constructs (`@aws/agentcore-cdk`) - -**Workflow:** `agentcore-l3-cdk-constructs/.github/workflows/release.yml` - -### Inputs - -| Input | Options | Notes | -| ---------------- | ------------------------------------------------ | ----------------------------------------------- | -| `bump_type` | `alpha`, `patch`, `minor`, `major`, `prerelease` | Required | -| `changelog` | free text | Optional — auto-generates from commits if empty | -| `prerelease_tag` | e.g. `alpha`, `beta`, `rc` | Only used with `prerelease` bump type | - -### Version bumping - -Runs `npx tsx scripts/bump-version.ts ` which updates `package.json`, `package-lock.json`, and -`CHANGELOG.md`. - -### Pipeline details - -| Stage | Environment | Notes | -| ---------------- | -------------- | -------------------------------------------------------------------------------------------------------------- | -| Prepare Release | — | Creates `release/v` branch and PR against `main` | -| Test and Build | — | Runs lint, typecheck, build; uploads `dist/` and tarball as artifacts | -| Release Approval | `npm-approval` | Manual approval required | -| Publish to npm | `npm` | Uses `NPM_SECRET` token; checks version doesn't already exist on npm; polls npm for availability after publish | - -### Auth - -Uses token-based npm auth via the `NPM_SECRET` repository secret. - ---- - -## CLI (`@aws/agentcore`) - -**Workflow:** `agentcore-cli/.github/workflows/release.yml` - -### Inputs - -| Input | Options | Notes | -| ---------------- | ------------------------------------------------------------------- | ----------------------------------------------- | -| `bump_type` | `preview`, `preview-major`, `patch`, `minor`, `major`, `prerelease` | Required | -| `changelog` | free text | Optional — auto-generates from commits if empty | -| `prerelease_tag` | e.g. `alpha`, `beta`, `rc` | Only used with `prerelease` bump type | - -### Version bumping - -Same approach as CDK — runs `npx tsx scripts/bump-version.ts `. The CLI additionally supports `preview` and -`preview-major` bump types for the `0.x.y-preview.N.M` versioning scheme. - -### Pipeline details - -| Stage | Environment | Notes | -| ---------------- | ---------------------- | -------------------------------------------------------------------------------------------------------- | -| Prepare Release | — | Creates `release/v` branch and PR against `main` | -| Test and Build | — | Runs lint, typecheck, build, **unit tests**; also configures git + installs `uv` for Python test support | -| Release Approval | `npm-publish-approval` | Manual approval required | -| Publish to npm | `npm-publish` | Uses OIDC trusted publishing (no npm token needed); publishes with `--provenance --tag latest` | - -### Auth - -Uses GitHub OIDC trusted publishing — no `NPM_TOKEN` or secret needed. Requires `id-token: write` permission and npm >= -11.5.1. - ---- - -## Step-by-step: How to cut a release - -### 1. Release CDK L3 Constructs - -1. Go to **Actions > Release** in the `agentcore-l3-cdk-constructs` repo. -2. Click **Run workflow** from `main`. -3. Select `bump_type` (e.g. `alpha` for pre-GA, `patch`/`minor`/`major` for GA). -4. Optionally provide a `changelog` message. -5. Wait for the PR to be created on `release/v`. -6. Review the PR — verify CHANGELOG.md and version numbers. -7. Merge the PR to `main`. -8. Approve the deployment in the `npm-approval` environment. -9. Verify the package appears on npm: `npm view @aws/agentcore-cdk@`. - -### 2. Release CLI - -1. If the CLI depends on the new CDK version, update the dependency in `agentcore-cli/package.json` first and merge that - to `main`. -2. Go to **Actions > Release** in the `agentcore-cli` repo. -3. Click **Run workflow** from `main`. -4. Select `bump_type` (e.g. `preview` for pre-GA, `patch`/`minor`/`major` for GA). -5. Optionally provide a `changelog` message. -6. Wait for the PR to be created on `release/v`. -7. Review the PR — verify CHANGELOG.md and version numbers. -8. Merge the PR to `main`. -9. Approve the deployment in the `npm-publish-approval` environment. -10. Verify: `npm view @aws/agentcore@`. - ---- - -## Key differences between the two workflows - -| | CDK L3 Constructs | CLI | -| ---------------- | -------------------------------------------------- | ------------------------------------------------------------------- | -| **Package** | `@aws/agentcore-cdk` | `@aws/agentcore` | -| **npm auth** | `NPM_SECRET` token | OIDC trusted publishing | -| **Approval env** | `npm-approval` | `npm-publish-approval` | -| **Publish env** | `npm` | `npm-publish` | -| **Bump types** | `alpha`, `patch`, `minor`, `major`, `prerelease` | `preview`, `preview-major`, `patch`, `minor`, `major`, `prerelease` | -| **Extra checks** | Version existence check + npm availability polling | Installs `uv` for Python; runs unit tests in CI | -| **PR token** | `PAT_TOKEN` secret | Default `github.token` | diff --git a/package/package.json b/package/package.json deleted file mode 100644 index 0048d2aaa..000000000 --- a/package/package.json +++ /dev/null @@ -1,167 +0,0 @@ -{ - "name": "@aws/evo-pb-cli", - "version": "0.7.1-evo-pb-cli.20260408", - "description": "CLI for Amazon Bedrock AgentCore", - "license": "Apache-2.0", - "repository": { - "type": "git", - "url": "https://github.com/aws/agentcore-cli.git" - }, - "homepage": "https://github.com/aws/agentcore-cli", - "bugs": { - "url": "https://github.com/aws/agentcore-cli/issues" - }, - "keywords": [ - "aws", - "amazon", - "bedrock", - "agentcore", - "cli", - "agents", - "ai", - "cdk", - "langchain", - "langgraph", - "openai", - "anthropic", - "google-adk", - "strands" - ], - "main": "./dist/index.js", - "types": "./dist/index.d.ts", - "bin": { - "agentcore": "dist/cli/index.mjs" - }, - "exports": { - ".": { - "types": "./dist/index.d.ts", - "require": "./dist/index.js" - } - }, - "files": [ - "dist", - "scripts", - "!dist/mcp-harness" - ], - "scripts": { - "postinstall": "node scripts/check-old-cli.mjs", - "build": "npm run build:lib && npm run build:cli && npm run build:assets", - "build:schema": "node scripts/generate-schema.mjs && prettier --write schemas/", - "build:lib": "tsc -p tsconfig.build.json", - "build:cli": "node esbuild.config.mjs", - "build:assets": "node scripts/copy-assets.mjs", - "build:harness": "BUILD_HARNESS=1 node esbuild.config.mjs", - "cli": "npx tsx src/cli/index.ts", - "typecheck": "tsc --noEmit", - "lint": "eslint src/", - "lint:fix": "eslint src/ --fix", - "format": "prettier --write .", - "format:check": "prettier --check .", - "secrets:check": "secretlint '**/*'", - "security:audit": "npm audit --audit-level=high --omit=dev", - "clean": "node -e \"require('fs').rmSync('dist', {recursive: true, force: true})\"", - "prepare": "husky", - "test": "vitest run --project unit", - "test:all": "vitest run", - "test:watch": "vitest --project unit", - "test:integ": "vitest run --project integ", - "test:unit": "vitest run --project unit --coverage", - "test:e2e": "vitest run --project e2e", - "test:update-snapshots": "vitest run --project unit --update", - "test:tui": "npm run build:harness && vitest run --project tui", - "bundle": "node scripts/bundle.mjs" - }, - "dependencies": { - "@aws-cdk/toolkit-lib": "^1.16.0", - "@aws-sdk/client-application-signals": "^3.1003.0", - "@aws-sdk/client-bedrock": "^3.1012.0", - "@aws-sdk/client-bedrock-agent": "^3.1012.0", - "@aws-sdk/client-bedrock-agentcore": "^3.1020.0", - "@aws-sdk/client-bedrock-agentcore-control": "^3.1020.0", - "@aws-sdk/client-bedrock-runtime": "^3.893.0", - "@aws-sdk/client-cloudformation": "^3.893.0", - "@aws-sdk/client-cloudwatch-logs": "^3.893.0", - "@aws-sdk/client-iam": "^3.1025.0", - "@aws-sdk/client-resource-groups-tagging-api": "^3.893.0", - "@aws-sdk/client-s3": "^3.1012.0", - "@aws-sdk/client-sts": "^3.893.0", - "@aws-sdk/client-xray": "^3.1003.0", - "@aws-sdk/credential-providers": "^3.893.0", - "@commander-js/extra-typings": "^14.0.0", - "@smithy/shared-ini-file-loader": "^4.4.2", - "commander": "^14.0.2", - "dotenv": "^17.2.3", - "fflate": "^0.8.2", - "handlebars": "^4.7.8", - "ink": "^6.6.0", - "ink-link": "5.0.0", - "ink-spinner": "^5.0.0", - "js-yaml": "^4.1.1", - "react": "^19.2.3", - "yaml": "^2.8.3", - "zod": "^4.3.5" - }, - "peerDependencies": { - "aws-cdk-lib": "^2.243.0", - "constructs": "^10.0.0" - }, - "devDependencies": { - "@aws-sdk/client-cognito-identity-provider": "^3.1018.0", - "@eslint/js": "^9.39.2", - "@modelcontextprotocol/sdk": "^1.0.0", - "@secretlint/secretlint-rule-preset-recommend": "^11.3.0", - "@trivago/prettier-plugin-sort-imports": "^6.0.2", - "@types/js-yaml": "^4.0.9", - "@types/node": "^25.0.3", - "@types/react": "^19.2.7", - "@typescript-eslint/eslint-plugin": "^8.50.0", - "@typescript-eslint/parser": "^8.50.0", - "@vitest/coverage-v8": "^4.0.18", - "@xterm/headless": "^6.0.0", - "aws-cdk-lib": "^2.243.0", - "constructs": "^10.4.4", - "esbuild": "^0.27.2", - "eslint": "^9.39.4", - "eslint-config-prettier": "^10.1.8", - "eslint-import-resolver-typescript": "^4.4.4", - "eslint-plugin-import": "^2.32.0", - "eslint-plugin-react": "^7.37.5", - "eslint-plugin-react-hooks": "^7.0.1", - "eslint-plugin-react-refresh": "^0.5.2", - "eslint-plugin-security": "^4.0.0", - "husky": "^9.1.7", - "ink-testing-library": "^4.0.0", - "lint-staged": "^16.2.7", - "node-pty": "^1.1.0", - "prettier": "^3.7.4", - "secretlint": "^11.3.0", - "tsx": "^4.21.0", - "typescript": "^5", - "typescript-eslint": "^8.50.1", - "vitest": "^4.0.18" - }, - "overridesComments": { - "minimatch": "GHSA-7r86-cg39-jmmj, GHSA-23c5-xmqv-rm74: minimatch 10.0.0-10.2.2 has ReDoS vulnerabilities. Multiple transitive deps (eslint, typescript-eslint, eslint-plugin-import, eslint-plugin-react, prettier-plugin-sort-imports, aws-cdk-lib) pin older versions. Remove this override once upstream packages update their minimatch dependency to >=10.2.3.", - "fast-xml-parser": "GHSA-8gc5-j5rx-235r, GHSA-jp2q-39xq-3w4g: fast-xml-parser <=5.5.6 has entity expansion bypass (CVE-2026-33036, CVE-2026-33349). Transitive via @aws-sdk/xml-builder. Remove once @aws-sdk updates to fast-xml-parser >=5.5.7.", - "@aws-sdk/xml-builder": "aws/aws-sdk-js-v3#7867: @aws-sdk/xml-builder <3.972.14 does not configure maxTotalExpansions on fast-xml-parser, causing 'Entity expansion limit exceeded' on large CloudFormation responses. Remove once @aws-sdk/client-* deps are bumped past 3.972.14." - }, - "overrides": { - "minimatch": "10.2.4", - "fast-xml-parser": "5.5.7", - "@aws-sdk/xml-builder": "3.972.15" - }, - "engines": { - "node": ">=20" - }, - "lint-staged": { - "*.{ts,tsx}": [ - "eslint --max-warnings=10" - ], - "*.{ts,tsx,js,jsx,json,md,css,html,yml,yaml}": [ - "prettier --check" - ], - "*": [ - "secretlint" - ] - } -} diff --git a/scripts/check-peer-dep-compat.mjs b/scripts/check-peer-dep-compat.mjs deleted file mode 100644 index 342d6da51..000000000 --- a/scripts/check-peer-dep-compat.mjs +++ /dev/null @@ -1,161 +0,0 @@ -#!/usr/bin/env node - -/** - * Cross-package peer dependency compatibility check. - * - * Verifies that the peer dependency ranges declared by this package - * overlap with those declared by its partner package (@aws/agentcore-cdk). - * Uses semver.intersects() to detect version drift that would cause - * unresolvable install errors for customers using both packages. - * - * Exit 0 = compatible, Exit 1 = incompatible or error. - */ -import { readFileSync } from 'node:fs'; -import { createRequire } from 'node:module'; -import { dirname, join } from 'node:path'; -import { fileURLToPath } from 'node:url'; - -const require = createRequire(import.meta.url); -const semver = require('semver'); - -const __dirname = dirname(fileURLToPath(import.meta.url)); - -// --- Configuration --- -const PARTNER_GITHUB_REPO = 'aws/agentcore-l3-cdk-constructs'; // owner/repo -const PARTNER_NPM_PACKAGE = '@aws/agentcore-cdk'; // npm package name -const SHARED_PEER_DEPS = ['aws-cdk-lib', 'constructs']; -const MAX_MIN_VERSION_DRIFT_MAJOR = 0; // fail if minimum versions differ by more than this many majors -// --- End Configuration --- - -async function fetchPartnerPeerDeps() { - // Try GitHub API first (works for public repos and with GITHUB_TOKEN) - const githubToken = process.env.GITHUB_TOKEN; - const githubUrl = `https://api.github.com/repos/${PARTNER_GITHUB_REPO}/contents/package.json`; - - try { - const headers = { - Accept: 'application/vnd.github.v3.raw', - 'User-Agent': 'peer-dep-compat-check', - }; - if (githubToken) { - headers.Authorization = `token ${githubToken}`; - } - - const res = await fetch(githubUrl, { headers }); - if (res.ok) { - const pkg = await res.json(); - console.log(`Fetched partner peerDependencies from GitHub (${PARTNER_GITHUB_REPO})`); - return pkg.peerDependencies || {}; - } - } catch { - // fall through to npm - } - - // Fallback: npm registry - const npmUrl = `https://registry.npmjs.org/${PARTNER_NPM_PACKAGE}/latest`; - try { - const res = await fetch(npmUrl, { - headers: { Accept: 'application/json' }, - }); - if (res.ok) { - const pkg = await res.json(); - console.log(`Fetched partner peerDependencies from npm (${PARTNER_NPM_PACKAGE})`); - return pkg.peerDependencies || {}; - } - } catch { - // fall through - } - - throw new Error(`Failed to fetch partner peerDependencies from both GitHub and npm`); -} - -function readLocalPeerDeps() { - const pkgPath = join(__dirname, '..', 'package.json'); - const pkg = JSON.parse(readFileSync(pkgPath, 'utf-8')); - return pkg.peerDependencies || {}; -} - -function checkCompatibility(localDeps, partnerDeps) { - let hasFailure = false; - - for (const dep of SHARED_PEER_DEPS) { - const localRange = localDeps[dep]; - const partnerRange = partnerDeps[dep]; - - if (!localRange) { - console.log(` SKIP: ${dep} — not in local peerDependencies`); - continue; - } - if (!partnerRange) { - console.log(` SKIP: ${dep} — not in partner peerDependencies`); - continue; - } - - console.log(`\n Checking ${dep}:`); - console.log(` Local: ${localRange}`); - console.log(` Partner: ${partnerRange}`); - - // Check range overlap - if (!semver.intersects(localRange, partnerRange)) { - console.log(` FAIL: Ranges do not overlap!`); - hasFailure = true; - continue; - } - console.log(` OK: Ranges overlap`); - - // Check minimum version drift - const localMin = semver.minVersion(localRange); - const partnerMin = semver.minVersion(partnerRange); - if (localMin && partnerMin) { - const majorDiff = Math.abs(localMin.major - partnerMin.major); - const minorDiff = Math.abs(localMin.minor - partnerMin.minor); - - if (majorDiff > MAX_MIN_VERSION_DRIFT_MAJOR) { - console.log( - ` FAIL: Minimum versions differ by ${majorDiff} major version(s) (${localMin} vs ${partnerMin})` - ); - hasFailure = true; - } else if (minorDiff > 20) { - console.log( - ` WARN: Minimum versions differ by ${minorDiff} minor version(s) (${localMin} vs ${partnerMin})` - ); - } else { - console.log(` OK: Minimum versions are close (${localMin} vs ${partnerMin})`); - } - } - } - - return !hasFailure; -} - -async function main() { - console.log('Peer Dependency Compatibility Check'); - console.log('====================================\n'); - - const localDeps = readLocalPeerDeps(); - console.log('Local peerDependencies:', JSON.stringify(localDeps, null, 2)); - - let partnerDeps; - try { - partnerDeps = await fetchPartnerPeerDeps(); - } catch (err) { - console.error(`\nERROR: ${err.message}`); - console.error('Cannot verify compatibility — treating as failure for safety.'); - process.exit(1); - } - console.log('Partner peerDependencies:', JSON.stringify(partnerDeps, null, 2)); - - const compatible = checkCompatibility(localDeps, partnerDeps); - - if (compatible) { - console.log('\n✅ All shared peer dependencies are compatible.'); - process.exit(0); - } else { - console.log('\n❌ Peer dependency incompatibility detected!'); - console.log('Customers installing both packages will encounter version conflicts.'); - console.log('Please align the peer dependency ranges before releasing.'); - process.exit(1); - } -} - -main(); From f7640bd745ac4b60d47bdedc11320cfe128c07fd Mon Sep 17 00:00:00 2001 From: notgitika Date: Fri, 10 Apr 2026 23:56:03 -0400 Subject: [PATCH 5/5] fix: enforce XOR between agent and custom source in online eval config --- src/cli/commands/run/command.tsx | 2 +- .../__tests__/online-eval-config.test.ts | 4 ++-- .../schemas/primitives/online-eval-config.ts | 21 ++++++++++--------- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/src/cli/commands/run/command.tsx b/src/cli/commands/run/command.tsx index bd7721573..c7c928187 100644 --- a/src/cli/commands/run/command.tsx +++ b/src/cli/commands/run/command.tsx @@ -101,7 +101,7 @@ export const registerRun = (program: Command) => { json?: boolean; }) => { const isArnMode = !!(cliOptions.runtimeArn && cliOptions.evaluatorArn); - const isCustomMode = !!cliOptions.customServiceName; + const isCustomMode = !!(cliOptions.customServiceName && cliOptions.customLogGroupName); const isInputMode = !!cliOptions.inputPath; if (!isArnMode && !isCustomMode && !isInputMode) { requireProject(); diff --git a/src/schema/schemas/primitives/__tests__/online-eval-config.test.ts b/src/schema/schemas/primitives/__tests__/online-eval-config.test.ts index cc855170a..d4eee1b11 100644 --- a/src/schema/schemas/primitives/__tests__/online-eval-config.test.ts +++ b/src/schema/schemas/primitives/__tests__/online-eval-config.test.ts @@ -118,13 +118,13 @@ describe('OnlineEvalConfigSchema', () => { expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(true); }); - it('accepts config with both agent and custom log source fields', () => { + it('rejects config with both agent and custom log source fields', () => { const config = { ...validConfig, customLogGroupName: '/custom/log-group', customServiceName: 'custom-service', }; - expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(true); + expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(false); }); it('rejects config with neither agent nor custom log source fields', () => { diff --git a/src/schema/schemas/primitives/online-eval-config.ts b/src/schema/schemas/primitives/online-eval-config.ts index 68b856b84..ad8bec23c 100644 --- a/src/schema/schemas/primitives/online-eval-config.ts +++ b/src/schema/schemas/primitives/online-eval-config.ts @@ -50,24 +50,25 @@ export const OnlineEvalConfigSchema = z }) .refine( data => { - const hasAgent = data.agent !== undefined; - const hasCustom = data.customLogGroupName !== undefined && data.customServiceName !== undefined; - return hasAgent || hasCustom; + // Custom fields must be provided together + const hasLogGroup = data.customLogGroupName !== undefined; + const hasServiceName = data.customServiceName !== undefined; + return hasLogGroup === hasServiceName; }, { - message: - 'Either "agent" must be provided (for project agents) or both "customLogGroupName" and "customServiceName" (for external agents)', + message: 'Both "customLogGroupName" and "customServiceName" must be provided together', } ) .refine( data => { - // If one custom field is set, the other must also be set - const hasLogGroup = data.customLogGroupName !== undefined; - const hasServiceName = data.customServiceName !== undefined; - return hasLogGroup === hasServiceName; + const hasAgent = data.agent !== undefined; + const hasCustom = data.customLogGroupName !== undefined && data.customServiceName !== undefined; + // Exactly one source must be specified, not both + return (hasAgent || hasCustom) && !(hasAgent && hasCustom); }, { - message: 'Both "customLogGroupName" and "customServiceName" must be provided together', + message: + 'Specify either "agent" (for project agents) or both "customLogGroupName" and "customServiceName" (for external agents), but not both', } );