diff --git a/e2e/exec_command_agent_test.go b/e2e/exec_command_agent_test.go new file mode 100644 index 000000000..bd46a20b9 --- /dev/null +++ b/e2e/exec_command_agent_test.go @@ -0,0 +1,18 @@ +package e2e_test + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +// TestExec_CommandTargetsAgent verifies that invoking a /command which targets +// a sub-agent sends the instructions directly to that agent, bypassing the root +// agent (no transfer_task round-trip). The recorded cassette only contains a +// single request carrying the specialist agent's system prompt, which proves +// the message reached the specialist directly. +func TestExec_CommandTargetsAgent(t *testing.T) { + out := runCLI(t, "run", "--exec", "testdata/command_agent.yaml", "/ask What's 2+2?") + + require.Equal(t, "SPECIALIST: 4", out) +} diff --git a/e2e/testdata/cassettes/TestExec_CommandTargetsAgent.yaml b/e2e/testdata/cassettes/TestExec_CommandTargetsAgent.yaml new file mode 100644 index 000000000..40fdba857 --- /dev/null +++ b/e2e/testdata/cassettes/TestExec_CommandTargetsAgent.yaml @@ -0,0 +1,39 @@ +--- +version: 2 +interactions: + - id: 0 + request: + proto: HTTP/1.1 + proto_major: 1 + proto_minor: 1 + content_length: 0 + host: api.openai.com + body: '{"messages":[{"content":"You are the SPECIALIST agent. Answer the user''s question directly and\nconcisely. Always prefix your answer with the single word: SPECIALIST.\n","role":"system"},{"content":"What''s 2+2?","role":"user"}],"model":"gpt-3.5-turbo","stream_options":{"include_usage":true},"stream":true}' + url: https://api.openai.com/v1/chat/completions + method: POST + response: + proto: HTTP/2.0 + proto_major: 2 + proto_minor: 0 + content_length: -1 + body: |+ + data: {"id":"chatcmpl-CmdAgentTest000000000000000000","object":"chat.completion.chunk","created":1768842358,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"P0QZ1Sie"} + + data: {"id":"chatcmpl-CmdAgentTest000000000000000000","object":"chat.completion.chunk","created":1768842358,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"SPECIALIST"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"cGnJBQBwX"} + + data: {"id":"chatcmpl-CmdAgentTest000000000000000000","object":"chat.completion.chunk","created":1768842358,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":":"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"qOtIXzvm"} + + data: {"id":"chatcmpl-CmdAgentTest000000000000000000","object":"chat.completion.chunk","created":1768842358,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" "},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"fPfIfp9O4"} + + data: {"id":"chatcmpl-CmdAgentTest000000000000000000","object":"chat.completion.chunk","created":1768842358,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"4"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"Oh8CzMr2d"} + + data: {"id":"chatcmpl-CmdAgentTest000000000000000000","object":"chat.completion.chunk","created":1768842358,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null,"obfuscation":"q1lX"} + + data: {"id":"chatcmpl-CmdAgentTest000000000000000000","object":"chat.completion.chunk","created":1768842358,"model":"gpt-3.5-turbo-0125","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":41,"completion_tokens":4,"total_tokens":45,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}},"obfuscation":"OBW5xAJ5rP"} + + data: [DONE] + + headers: {} + status: 200 OK + code: 200 + duration: 1.612958419s diff --git a/e2e/testdata/command_agent.yaml b/e2e/testdata/command_agent.yaml new file mode 100644 index 000000000..6cf61a66d --- /dev/null +++ b/e2e/testdata/command_agent.yaml @@ -0,0 +1,20 @@ +version: "2" + +agents: + root: + model: openai/gpt-3.5-turbo + instruction: | + You are the ROOT agent. Whatever you are asked, you must refuse and + only reply with the single word: ROOT. + sub_agents: + - specialist + commands: + ask: + description: Hand the question off to the specialist sub-agent + agent: specialist + + specialist: + model: openai/gpt-3.5-turbo + instruction: | + You are the SPECIALIST agent. Answer the user's question directly and + concisely. Always prefix your answer with the single word: SPECIALIST. diff --git a/pkg/js/expand.go b/pkg/js/expand.go index cda94aa1e..95fc308b4 100644 --- a/pkg/js/expand.go +++ b/pkg/js/expand.go @@ -125,10 +125,12 @@ func (exp *Expander) ExpandCommands(ctx context.Context, cmds types.Commands) ty expanded := make(types.Commands, len(cmds)) for k, cmd := range cmds { - expanded[k] = types.Command{ - Description: runExpansion(vm, cmd.Description), - Instruction: runExpansion(vm, cmd.Instruction), - } + // Copy the command so non-template fields (e.g. Agent) are preserved, + // then expand only the text fields. This keeps the expansion robust if + // new fields are added to types.Command. + cmd.Description = runExpansion(vm, cmd.Description) + cmd.Instruction = runExpansion(vm, cmd.Instruction) + expanded[k] = cmd } return expanded } diff --git a/pkg/js/expand_agent_test.go b/pkg/js/expand_agent_test.go new file mode 100644 index 000000000..80422d8d4 --- /dev/null +++ b/pkg/js/expand_agent_test.go @@ -0,0 +1,28 @@ +package js + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/docker/docker-agent/pkg/config/types" +) + +// TestExpandCommandsPreservesAgent verifies the agent-switch target survives +// command expansion. Dropping it makes agent-only slash commands silently run +// on the root agent instead of handing off to the named sub-agent. +func TestExpandCommandsPreservesAgent(t *testing.T) { + t.Parallel() + + env := testEnvProvider(map[string]string{}) + expander := NewJsExpander(&env) + + expanded := expander.ExpandCommands(t.Context(), types.Commands{ + "ask": { + Description: "Hand off to the specialist", + Agent: "specialist", + }, + }) + + assert.Equal(t, "specialist", expanded["ask"].Agent) +}