diff --git a/src/scenarios/index.ts b/src/scenarios/index.ts index 0e2191a..e82a16c 100644 --- a/src/scenarios/index.ts +++ b/src/scenarios/index.ts @@ -63,6 +63,16 @@ import { import { DNSRebindingProtectionScenario } from './server/dns-rebinding'; +import { TasksLifecycleScenario } from './server/tasks/lifecycle'; +import { TasksCapabilityNegotiationScenario } from './server/tasks/capability'; +import { TasksWireFieldsScenario } from './server/tasks/wire-fields'; +import { TasksRequestStateScenario } from './server/tasks/request-state'; +import { TasksMRTRInputScenario } from './server/tasks/mrtr-input'; +import { TasksRequestHeadersScenario } from './server/tasks/headers'; +import { TasksDispatchScenario } from './server/tasks/dispatch'; +import { TasksStatusNotificationsScenario } from './server/tasks/notifications'; +import { MrtrEphemeralFlowScenario } from './server/mrtr/ephemeral-flow'; + import { authScenariosList, backcompatScenariosList, @@ -81,7 +91,28 @@ const pendingClientScenariosList: ClientScenario[] = [ // On hold until server-side SSE improvements are made // https://github.com/modelcontextprotocol/typescript-sdk/pull/1129 - new ServerSSEPollingScenario() + new ServerSSEPollingScenario(), + + // SEP-2663 Tasks extension lifecycle. + // The SEP is still in draft (see PR 2663) and the everything-server + // does not yet implement the io.modelcontextprotocol/tasks extension, + // so all-scenarios.test.ts cannot exercise this against the default + // fixture. Active runs target a SEP-2663-conformant server via the + // dedicated tasks/lifecycle.test.ts harness. + new TasksLifecycleScenario(), + new TasksCapabilityNegotiationScenario(), + new TasksWireFieldsScenario(), + new TasksRequestStateScenario(), + new TasksMRTRInputScenario(), + new TasksRequestHeadersScenario(), + new TasksDispatchScenario(), + new TasksStatusNotificationsScenario(), + + // SEP-2322 MRTR (ephemeral IncompleteResult flow). + // Targets a different fixture than tasks scenarios; the dedicated + // mrtr/all-scenarios.test.ts runner points at an MRTR-conformant + // server via MRTR_SERVER_URL / MRTR_SERVER_CMD. + new MrtrEphemeralFlowScenario() ]; // All client scenarios @@ -139,7 +170,26 @@ const allClientScenariosList: ClientScenario[] = [ new PromptsGetWithImageScenario(), // Security scenarios - new DNSRebindingProtectionScenario() + new DNSRebindingProtectionScenario(), + + // SEP-2663 Tasks extension (draft). + // Listed here so the CLI can find it by name and so the active/pending + // filter sees it; pendingClientScenariosList below excludes it from + // automatic runs against the everything-server (which doesn't implement + // io.modelcontextprotocol/tasks yet). + new TasksLifecycleScenario(), + new TasksCapabilityNegotiationScenario(), + new TasksWireFieldsScenario(), + new TasksRequestStateScenario(), + new TasksMRTRInputScenario(), + new TasksRequestHeadersScenario(), + new TasksDispatchScenario(), + new TasksStatusNotificationsScenario(), + + // SEP-2322 MRTR (ephemeral IncompleteResult flow). Targets a + // dedicated MRTR fixture — out of scope for the default + // everything-server until SEP-2322 lands there. + new MrtrEphemeralFlowScenario() ]; // Active client scenarios (excludes pending) diff --git a/src/scenarios/server/_shared/test-runner.ts b/src/scenarios/server/_shared/test-runner.ts new file mode 100644 index 0000000..5eb044b --- /dev/null +++ b/src/scenarios/server/_shared/test-runner.ts @@ -0,0 +1,56 @@ +/** + * Test-runner utilities for server-conformance scenarios. + * + * Used by `*.test.ts` runner files that auto-spawn a fixture binary + * before running scenarios. These helpers are language-agnostic and + * harness-only — they don't touch MCP protocol, so they don't belong + * in the SDK. + * + * Single responsibility today: TCP readiness polling. Spawn / cleanup + * scaffolding stays inline in each runner so the file reads top-to-bottom + * without indirection (per AGENTS.md "repetitive check blocks are fine"). + */ + +import { connect } from 'net'; + +/** + * Poll the host/port of the given URL until a TCP connection succeeds + * or the timeout elapses. Language-agnostic readiness check — works + * for any server that binds before serving requests. + */ +export async function waitForServerReady( + url: string, + timeoutMs: number +): Promise { + const u = new URL(url); + const port = parseInt(u.port || (u.protocol === 'https:' ? '443' : '80'), 10); + const host = u.hostname; + const deadline = Date.now() + timeoutMs; + let lastErr: Error | null = null; + + while (Date.now() < deadline) { + try { + await new Promise((resolve, reject) => { + const socket = connect({ host, port }, () => { + socket.end(); + resolve(); + }); + socket.once('error', (err) => { + socket.destroy(); + reject(err); + }); + socket.setTimeout(1_000, () => { + socket.destroy(); + reject(new Error('connect timeout')); + }); + }); + return; + } catch (err) { + lastErr = err as Error; + await new Promise((r) => setTimeout(r, 200)); + } + } + throw new Error( + `${host}:${port} did not accept TCP connections (last: ${lastErr?.message ?? 'unknown'})` + ); +} diff --git a/src/scenarios/server/_shared/wire-format.ts b/src/scenarios/server/_shared/wire-format.ts new file mode 100644 index 0000000..ea37bc7 --- /dev/null +++ b/src/scenarios/server/_shared/wire-format.ts @@ -0,0 +1,33 @@ +/** + * Wire-format validation helpers shared across server-conformance + * scenarios. Pure predicates / regex — no I/O, no async. + * + * Pragmatic choices documented per helper. When validation needs + * tighten (e.g., the spec mandates a stricter timestamp format), edit + * here once and every scenario picks it up. + */ + +/** + * ISO-8601 timestamp prefix (YYYY-MM-DDThh:mm:ss). Tolerant about + * the timezone tail (`Z`, `+00:00`, `+0000`) and sub-second precision — + * matches what real servers emit (Go `time.RFC3339Nano`, + * Python `datetime.isoformat()`, JavaScript `toISOString()`). + * + * Why a regex over `Date.parse` / `new Date(s).toISOString() === s` / + * `Temporal.Instant.from`: + * - `Date.parse` accepts RFC-2822, "May 4 2026", and other + * non-ISO strings — too permissive. + * - `new Date(s).toISOString() === s` is too strict — rejects + * valid `+00:00`-style offsets that don't survive the canonical + * `Z` round-trip. + * - `Temporal.Instant.from` is Node 24+ experimental. + * + * Swap this constant for a stdlib validator if/when one becomes + * broadly available. + */ +export const ISO_8601_PATTERN = /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}/; + +/** Returns true when the input is a string matching ISO-8601 prefix. */ +export function isIso8601(s: unknown): boolean { + return typeof s === 'string' && ISO_8601_PATTERN.test(s); +} diff --git a/src/scenarios/server/mrtr/README.md b/src/scenarios/server/mrtr/README.md new file mode 100644 index 0000000..8e1bf53 --- /dev/null +++ b/src/scenarios/server/mrtr/README.md @@ -0,0 +1,111 @@ +# SEP-2322 MRTR — Server Conformance + +Tests any MCP server that implements the SEP-2322 ephemeral +Multi Round-Trip Request flow on `tools/call` — the +`IncompleteResult` → retry-with-`inputResponses` → `ToolResult` +contract that lets a tool gather elicitation / sampling / roots input +without creating a task envelope. + +## Specs covered + +| SEP | What it adds | Where it shows up | +| -------- | ---------------------------------------------------------------------------------------------------------------- | ----------------------------- | +| SEP-2322 | Ephemeral MRTR — `resultType` discriminator, `inputRequests` / `inputResponses` keyed maps, `requestState` token | every check | +| SEP-2663 | MRTR → Tasks composition (final round returns `CreateTaskResult`) | mrtr-08 (SKIPPED — see below) | + +## ClientScenario classes + +### `mrtr-ephemeral-flow` (`ephemeral-flow.ts`) + +A single scenario covering the full ephemeral MRTR contract — per the +AGENTS.md "fewer scenarios, more checks" rule. A server that +implemented elicitation round-trips but not sampling round-trips would +be incoherent, so they bundle. + +| Check | What it tests | +| ---------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------- | +| `mrtr-basic-elicitation-round-trip` | Round 1 returns `IncompleteResult` with `elicitation/create`; round 2 completes with the answer reflected | +| `mrtr-sampling-round-trip` | Same flow with `sampling/createMessage` | +| `mrtr-roots-list-round-trip` | Same flow with `roots/list` | +| `mrtr-request-state-round-trip` | When server emits `requestState`, it's a non-empty string and the server validates the echo | +| `mrtr-multiple-input-requests-one-round` | A single `IncompleteResult` MAY carry inputRequests for `elicitation/create` + `sampling/createMessage` + `roots/list` together | +| `mrtr-multi-round-flow` | A handler MAY take 2+ rounds; each round mints a fresh `requestState`; final result reflects answers from every round | +| `mrtr-wrong-input-key-rerequests` | When client sends a wrong `inputResponses` key, server SHOULD re-request via `IncompleteResult` rather than erroring | +| `mrtr-tasks-composition` | **SKIPPED** — see "Open issues" below | + +## Required server fixtures + +The fixture server MUST register these tools: + +| Tool | Behavior | +| ---------------------------------------- | ------------------------------------------------------------------------------------------- | +| `test_tool_with_elicitation` | One `elicitation/create` round, completes with answer reflected | +| `test_incomplete_result_sampling` | One `sampling/createMessage` round | +| `test_incomplete_result_list_roots` | One `roots/list` round | +| `test_incomplete_result_request_state` | Exercises `requestState` validation; final result includes `state-ok` to confirm validation | +| `test_incomplete_result_multiple_inputs` | Emits 3+ inputRequests of different methods in one round | +| `test_incomplete_result_multi_round` | Drives 2+ MRTR rounds, final result references every answer | +| `test_incomplete_result_elicitation` | Emits inputRequest for `user_name`; server re-requests on wrong-key responses | + +The fixture can be implemented in any language; one example reference +implementation lives at +[`panyam/mcpkit/examples/mrtr`](https://github.com/panyam/mcpkit/tree/main/examples/mrtr). + +## Running + +```bash +# Against an already-running server +MRTR_SERVER_URL=http://localhost:8080/mcp \ + npx vitest run src/scenarios/server/mrtr/all-scenarios.test.ts + +# Auto-spawn a fixture in beforeAll +MRTR_SERVER_URL=http://localhost:18093/mcp \ +MRTR_SERVER_CMD="/path/to/mrtr-server --port 18093" \ + npx vitest run src/scenarios/server/mrtr/all-scenarios.test.ts +``` + +## Open issues + +### `mrtr-tasks-composition` deferred + +SEP-2663 commit `451f5e1` (Apr 30) made the MRTR → Tasks composition +flow normative: a `tools/call` MAY exchange `IncompleteResult` rounds +to gather input, then return `CreateTaskResult` to go async on a +subsequent round. Two blockers prevent enabling the check today: + +1. **Spec watch — discriminator value.** SEP-2322 (MRTR base) and + SEP-2663 (Tasks Extension) currently disagree on the wire value for + the "needs more input" discriminator: SEP-2322's draft uses + `"input_required"`, SEP-2663's draft uses `"incomplete"`. Awaiting + alignment between the SEP authors. The current literal lives in + `MRTR_INCOMPLETE_RESULT_TYPE` (helpers.ts) so it's a one-line flip + when the spec converges. + +2. **Reference-impl gap.** The natural server-side implementation + pattern for tasks (mint task up-front, run handler in a goroutine / + async task) means the handler's `IncompleteResult` signal isn't + visible to the middleware in time — by the time the handler returns + `IsIncomplete`, the `CreateTaskResult` is already on the wire. SDKs + in any language need an inverted middleware pattern that runs the + first round synchronously and only spins up the task once the + handler signals async-promotion. + ([panyam/mcpkit issue 347](https://github.com/panyam/mcpkit/issues/347) + tracks this for one example impl; SDKs in any language hit the + same architectural choice.) + +The check is registered with `status: 'SKIPPED'` so it's discoverable +but doesn't fail conformance runs. When both blockers resolve, remove +the SKIPPED short-circuit in `ephemeral-flow.ts` Check 8. + +## Design notes + +### Why the MRTR scenarios share helpers with `tasks/` + +`MRTR_INCOMPLETE_RESULT_TYPE`, the result-type predicates +(`isIncompleteResult`, `isCompleteResult`), and the elicitation/sampling/ +roots mocks live in `mrtr/helpers.ts`. The raw-fetch primitives +(`initRawSession`, `rawRequest`) are imported from the sibling +`../tasks/helpers` because both scenario sets share the same wire-shape +problem (SDK Zod schemas strip extension fields). When the upstream +SDK gains schemas for SEP-2322 / SEP-2663 shapes, those import paths +collapse back into the SDK. diff --git a/src/scenarios/server/mrtr/all-scenarios.test.ts b/src/scenarios/server/mrtr/all-scenarios.test.ts new file mode 100644 index 0000000..1e8154a --- /dev/null +++ b/src/scenarios/server/mrtr/all-scenarios.test.ts @@ -0,0 +1,115 @@ +/** + * SEP-2322 MRTR test runner. + * + * Iterates the MRTR scenario classes against a SEP-2322-conformant + * server. Configuration is brand-neutral and language-agnostic: + * + * 1. Point at an already-running server: + * MRTR_SERVER_URL=http://localhost:8080/mcp npm test -- mrtr/all-scenarios.test.ts + * + * 2. Auto-spawn a fixture before tests (any language): + * MRTR_SERVER_URL=http://localhost:18093/mcp \ + * MRTR_SERVER_CMD="/path/to/server --port 18093" \ + * npm test -- mrtr/all-scenarios.test.ts + * + * If MRTR_SERVER_URL is unset the suite is skipped — keeping CI runs + * against the everything-server green. + * + * The fixture server can be implemented in any language as long as it + * exposes a SEP-2322 conformant Streamable HTTP MCP endpoint. Anyone is + * free to bring their own; one example reference implementation lives + * at https://github.com/panyam/mcpkit/tree/main/examples/mrtr. + */ + +import { spawn, ChildProcess } from 'child_process'; +import { describe, it, expect, beforeAll, afterAll } from 'vitest'; +import { MrtrEphemeralFlowScenario } from './ephemeral-flow'; +import { waitForServerReady } from '../_shared/test-runner'; + +const SERVER_URL = process.env.MRTR_SERVER_URL; +const SERVER_CMD = process.env.MRTR_SERVER_CMD; +const SERVER_STARTUP_TIMEOUT_MS = 15_000; +const SHOULD_SPAWN = Boolean(SERVER_URL && SERVER_CMD); +const HAVE_TARGET = Boolean(SERVER_URL); + +const MRTR_SCENARIOS = [new MrtrEphemeralFlowScenario()]; + +const describeIfTarget = HAVE_TARGET ? describe : describe.skip; + +describeIfTarget('SEP-2322 MRTR — server conformance', () => { + let serverProcess: ChildProcess | null = null; + + beforeAll(async () => { + if (!SHOULD_SPAWN) return; + + serverProcess = spawn('sh', ['-c', SERVER_CMD!], { + stdio: ['ignore', 'pipe', 'pipe'], + detached: false + }); + + let stdoutBuf = ''; + let stderrBuf = ''; + serverProcess.stdout?.on('data', (b) => { + stdoutBuf += b.toString(); + }); + serverProcess.stderr?.on('data', (b) => { + stderrBuf += b.toString(); + }); + + serverProcess.on('exit', (code) => { + if (code !== null && code !== 0) { + console.error( + `mrtr fixture exited unexpectedly with code ${code}.\nSTDOUT: ${stdoutBuf}\nSTDERR: ${stderrBuf}` + ); + } + }); + + await waitForServerReady(SERVER_URL!, SERVER_STARTUP_TIMEOUT_MS).catch( + (err) => { + if (serverProcess && !serverProcess.killed) { + serverProcess.kill('SIGKILL'); + } + throw new Error( + `mrtr fixture did not become reachable within ${SERVER_STARTUP_TIMEOUT_MS}ms: ${err.message}\nSTDOUT: ${stdoutBuf}\nSTDERR: ${stderrBuf}` + ); + } + ); + }, SERVER_STARTUP_TIMEOUT_MS + 5_000); + + afterAll(async () => { + if (!SHOULD_SPAWN) return; + if (!serverProcess || serverProcess.killed) return; + serverProcess.kill('SIGTERM'); + await new Promise((resolve) => { + const timer = setTimeout(() => { + if (serverProcess && !serverProcess.killed) { + serverProcess.kill('SIGKILL'); + } + resolve(); + }, 3_000); + serverProcess!.once('exit', () => { + clearTimeout(timer); + resolve(); + }); + }); + serverProcess = null; + }); + + for (const scenario of MRTR_SCENARIOS) { + it(`${scenario.name} — all checks succeed against fixture`, async () => { + const checks = await scenario.run(SERVER_URL!); + expect(checks.length).toBeGreaterThan(0); + const failures = checks.filter( + (c) => c.status === 'FAILURE' || c.status === 'WARNING' + ); + if (failures.length > 0) { + const detail = failures + .map((c) => ` - ${c.id}: ${c.errorMessage ?? '(no message)'}`) + .join('\n'); + throw new Error( + `${failures.length}/${checks.length} checks failed:\n${detail}` + ); + } + }); + } +}); diff --git a/src/scenarios/server/mrtr/ephemeral-flow.ts b/src/scenarios/server/mrtr/ephemeral-flow.ts new file mode 100644 index 0000000..8deaf68 --- /dev/null +++ b/src/scenarios/server/mrtr/ephemeral-flow.ts @@ -0,0 +1,585 @@ +/** + * SEP-2322 MRTR ephemeral IncompleteResult flow. + * + * Tests the multi-round-trip-request contract end-to-end against any + * server that implements SEP-2322's ephemeral path: tools/call returns + * `IncompleteResult` to gather input, the client retries the SAME + * tools/call with `inputResponses` (and echoed `requestState`), and + * the server eventually returns a normal `ToolResult`. No task + * envelope, no separate methods. + * + * Required server fixtures (tools/list output must include all): + * - test_tool_with_elicitation — single elicitation/create round + * - test_incomplete_result_sampling — single sampling/createMessage round + * - test_incomplete_result_list_roots — single roots/list round + * - test_incomplete_result_request_state — exercises requestState validation + * - test_incomplete_result_multiple_inputs — emits 3+ inputRequests in one round + * - test_incomplete_result_multi_round — drives 2+ MRTR rounds + * - test_incomplete_result_elicitation — emits inputRequest for "user_name"; + * server re-requests on wrong key + */ + +import { + ClientScenario, + ConformanceCheck, + ScenarioSpecTag, + DRAFT_PROTOCOL_VERSION +} from '../../../types'; +import { initRawSession, rawRequest } from '../tasks/helpers'; +import { + MRTR_INCOMPLETE_RESULT_TYPE, + SEP_2322_REF, + errMsg, + failureCheck, + isCompleteResult, + isIncompleteResult, + mockElicitResponse, + mockListRootsResponse, + mockSamplingResponse +} from './helpers'; + +export class MrtrEphemeralFlowScenario implements ClientScenario { + name = 'mrtr-ephemeral-flow'; + // MRTR is in draft alongside SEP-2322; tagged 'extension' because it + // introduces an ephemeral resultType discriminator that's not on the + // dated-spec timeline yet. + specVersions: ScenarioSpecTag[] = ['extension', DRAFT_PROTOCOL_VERSION]; + description = `Test SEP-2322 ephemeral MRTR (Multi Round-Trip Request) flow. + +**Server Implementation Requirements:** + +Every \`tools/call\` response in the MRTR contract is one of: +- \`resultType:"${MRTR_INCOMPLETE_RESULT_TYPE}"\` — server is asking for + more input; carries an \`inputRequests\` map keyed by server-minted + opaque ids and (optionally) a \`requestState\` token to echo on the + next round. +- \`resultType:"complete"\` (or absent — current SDKs may strip the + discriminator on responses without one) — the tools/call has finished; + the body is a normal \`ToolResult\` with \`content[]\`. + +**Round-trip rules (SEP-2322):** +- Round 1 with no \`inputResponses\` MUST return \`IncompleteResult\` + with \`inputRequests\`. +- The client retries the SAME tools/call (same name + arguments) with + \`inputResponses\` keyed against the previously-emitted ids, plus the + echoed \`requestState\` if one was provided. +- The server MUST validate the echoed \`requestState\` and complete on + the next round. + +**Multi-method support:** +- A single \`IncompleteResult\` can carry \`inputRequests\` for + \`elicitation/create\`, \`sampling/createMessage\`, and \`roots/list\` + in any combination. + +**Multi-round + state accumulation:** +- A handler MAY take more than two rounds. Each MRTR round mints a + fresh \`requestState\`; the prior token MUST NOT be reused. Answers + from prior rounds MUST be available to the handler on the final + round (server forwards them via \`requestState\`). + +**Wrong-key tolerance:** +- When a client retries with an \`inputResponses\` key the server did + not emit, the server SHOULD re-request via \`IncompleteResult\` + rather than erroring. (The spec is soft here; this scenario asserts + the re-request path.)`; + + async run(serverUrl: string): Promise { + const checks: ConformanceCheck[] = []; + + let sessionId: string; + try { + ({ sessionId } = await initRawSession(serverUrl, { + capabilities: { + elicitation: {}, + sampling: {}, + roots: {} + } + })); + } catch (error) { + checks.push({ + id: 'mrtr-session-bootstrap', + name: 'MrtrSessionBootstrap', + description: + 'Initialize handshake declaring elicitation/sampling/roots capabilities succeeds', + status: 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: `Failed to initialize: ${errMsg(error)}`, + specReferences: [SEP_2322_REF] + }); + return checks; + } + + // Check 1: basic elicitation round-trip. + { + const id = 'mrtr-basic-elicitation-round-trip'; + const name = 'MrtrBasicElicitationRoundTrip'; + const description = + 'tools/call returns IncompleteResult on round 1 (elicitation/create); completes on round 2 with the answer reflected in the result'; + try { + const r1 = await rawRequest( + serverUrl, + 'tools/call', + { name: 'test_tool_with_elicitation', arguments: {} }, + { sessionId } + ); + const errs: string[] = []; + if (!isIncompleteResult(r1)) { + errs.push( + `round 1 MUST be IncompleteResult; got ${JSON.stringify(r1)}` + ); + } + if (r1.resultType !== MRTR_INCOMPLETE_RESULT_TYPE) { + errs.push( + `resultType MUST be "${MRTR_INCOMPLETE_RESULT_TYPE}"; got ${JSON.stringify(r1.resultType)}` + ); + } + if (!r1.inputRequests || !r1.inputRequests.user_name) { + errs.push( + 'IncompleteResult MUST carry inputRequests with the "user_name" key' + ); + } else if (r1.inputRequests.user_name.method !== 'elicitation/create') { + errs.push( + `inputRequest method MUST be "elicitation/create"; got ${JSON.stringify(r1.inputRequests.user_name.method)}` + ); + } + + const r2 = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'test_tool_with_elicitation', + arguments: {}, + inputResponses: { + user_name: mockElicitResponse({ name: 'Alice' }) + }, + ...(r1.requestState !== undefined + ? { requestState: r1.requestState } + : {}) + }, + { sessionId } + ); + if (!isCompleteResult(r2)) { + errs.push(`round 2 MUST be complete; got ${JSON.stringify(r2)}`); + } + const text = r2.content?.[0]?.text ?? ''; + if (!/Alice/.test(text)) { + errs.push( + 'response text SHOULD reference the answered name ("Alice")' + ); + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2322_REF] + }); + } catch (error) { + checks.push(failureCheck(id, name, description, error)); + } + } + + // Check 2: sampling round-trip. + { + const id = 'mrtr-sampling-round-trip'; + const name = 'MrtrSamplingRoundTrip'; + const description = + 'IncompleteResult with sampling/createMessage round-trips through the inputResponses retry'; + try { + const r1 = await rawRequest( + serverUrl, + 'tools/call', + { name: 'test_incomplete_result_sampling', arguments: {} }, + { sessionId } + ); + const errs: string[] = []; + if (!isIncompleteResult(r1)) { + errs.push('round 1 MUST be IncompleteResult'); + } else { + const key = Object.keys(r1.inputRequests)[0]; + if (r1.inputRequests[key].method !== 'sampling/createMessage') { + errs.push( + `inputRequest method MUST be "sampling/createMessage"; got ${JSON.stringify(r1.inputRequests[key].method)}` + ); + } + const r2 = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'test_incomplete_result_sampling', + arguments: {}, + inputResponses: { [key]: mockSamplingResponse('Paris') }, + ...(r1.requestState !== undefined + ? { requestState: r1.requestState } + : {}) + }, + { sessionId } + ); + if (!isCompleteResult(r2)) { + errs.push('round 2 MUST be complete'); + } + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2322_REF] + }); + } catch (error) { + checks.push(failureCheck(id, name, description, error)); + } + } + + // Check 3: roots/list round-trip. + { + const id = 'mrtr-roots-list-round-trip'; + const name = 'MrtrRootsListRoundTrip'; + const description = + 'IncompleteResult with roots/list round-trips through the inputResponses retry'; + try { + const r1 = await rawRequest( + serverUrl, + 'tools/call', + { name: 'test_incomplete_result_list_roots', arguments: {} }, + { sessionId } + ); + const errs: string[] = []; + if (!isIncompleteResult(r1)) { + errs.push('round 1 MUST be IncompleteResult'); + } else { + const key = Object.keys(r1.inputRequests)[0]; + if (r1.inputRequests[key].method !== 'roots/list') { + errs.push( + `inputRequest method MUST be "roots/list"; got ${JSON.stringify(r1.inputRequests[key].method)}` + ); + } + const r2 = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'test_incomplete_result_list_roots', + arguments: {}, + inputResponses: { [key]: mockListRootsResponse() }, + ...(r1.requestState !== undefined + ? { requestState: r1.requestState } + : {}) + }, + { sessionId } + ); + if (!isCompleteResult(r2)) { + errs.push('round 2 MUST be complete'); + } + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2322_REF] + }); + } catch (error) { + checks.push(failureCheck(id, name, description, error)); + } + } + + // Check 4: requestState round-trip validation. + { + const id = 'mrtr-request-state-round-trip'; + const name = 'MrtrRequestStateRoundTrip'; + const description = + 'When server emits requestState on round 1, it MUST be a non-empty string and the server MUST validate the echo on round 2'; + try { + const r1 = await rawRequest( + serverUrl, + 'tools/call', + { name: 'test_incomplete_result_request_state', arguments: {} }, + { sessionId } + ); + const errs: string[] = []; + if (!isIncompleteResult(r1)) { + errs.push('round 1 MUST be IncompleteResult'); + } + if (typeof r1.requestState !== 'string') { + errs.push( + `requestState MUST be a string when emitted; got ${typeof r1.requestState}` + ); + } else if (r1.requestState.length === 0) { + errs.push( + 'requestState MUST be non-empty when emitted (omit instead of "")' + ); + } + const key = Object.keys(r1.inputRequests ?? {})[0]; + if (key) { + const r2 = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'test_incomplete_result_request_state', + arguments: {}, + inputResponses: { [key]: mockElicitResponse({ ok: true }) }, + requestState: r1.requestState + }, + { sessionId } + ); + if (!isCompleteResult(r2)) { + errs.push('round 2 MUST be complete after valid requestState echo'); + } + const text = + r2.content?.find((c: any) => c.type === 'text')?.text ?? ''; + if (!/state-ok/.test(text)) { + errs.push( + 'final response SHOULD include "state-ok" to confirm the server validated requestState' + ); + } + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2322_REF] + }); + } catch (error) { + checks.push(failureCheck(id, name, description, error)); + } + } + + // Check 5: multiple inputRequests of different methods in one round. + { + const id = 'mrtr-multiple-input-requests-one-round'; + const name = 'MrtrMultipleInputRequestsOneRound'; + const description = + 'A single IncompleteResult MAY carry inputRequests for elicitation/create + sampling/createMessage + roots/list together'; + try { + const r1 = await rawRequest( + serverUrl, + 'tools/call', + { name: 'test_incomplete_result_multiple_inputs', arguments: {} }, + { sessionId } + ); + const errs: string[] = []; + if (!isIncompleteResult(r1)) { + errs.push('round 1 MUST be IncompleteResult'); + } else { + const keys = Object.keys(r1.inputRequests); + if (keys.length < 3) { + errs.push( + `expected at least 3 inputRequests in one round; got ${keys.length}` + ); + } + const methods = new Set(keys.map((k) => r1.inputRequests[k].method)); + for (const expected of [ + 'elicitation/create', + 'sampling/createMessage', + 'roots/list' + ]) { + if (!methods.has(expected)) { + errs.push(`inputRequests MUST include method "${expected}"`); + } + } + const inputResponses: Record = {}; + for (const [key, req] of Object.entries(r1.inputRequests) as Array< + [string, any] + >) { + if (req.method === 'elicitation/create') + inputResponses[key] = mockElicitResponse({ name: 'Alice' }); + else if (req.method === 'sampling/createMessage') + inputResponses[key] = mockSamplingResponse('hi'); + else if (req.method === 'roots/list') + inputResponses[key] = mockListRootsResponse(); + } + const r2 = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'test_incomplete_result_multiple_inputs', + arguments: {}, + inputResponses, + ...(r1.requestState !== undefined + ? { requestState: r1.requestState } + : {}) + }, + { sessionId } + ); + if (!isCompleteResult(r2)) { + errs.push('round 2 MUST be complete with all three answers'); + } + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2322_REF] + }); + } catch (error) { + checks.push(failureCheck(id, name, description, error)); + } + } + + // Check 6: multi-round flow accumulates answers via requestState. + { + const id = 'mrtr-multi-round-flow'; + const name = 'MrtrMultiRoundFlow'; + const description = + 'A handler may take 2+ MRTR rounds; each round mints a fresh requestState; final result MUST reflect answers from every round'; + try { + const r1 = await rawRequest( + serverUrl, + 'tools/call', + { name: 'test_incomplete_result_multi_round', arguments: {} }, + { sessionId } + ); + const errs: string[] = []; + if (!isIncompleteResult(r1)) { + errs.push('round 1 MUST be IncompleteResult'); + } + if (!r1.requestState) { + errs.push('round 1 MUST mint requestState for multi-round flow'); + } + const k1 = Object.keys(r1.inputRequests ?? {})[0]; + + const r2 = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'test_incomplete_result_multi_round', + arguments: {}, + inputResponses: { [k1]: mockElicitResponse({ name: 'Alice' }) }, + requestState: r1.requestState + }, + { sessionId } + ); + if (!isIncompleteResult(r2)) { + errs.push('round 2 MUST still be IncompleteResult (asks for step2)'); + } + if (!r2.requestState) { + errs.push('round 2 MUST mint a fresh requestState'); + } + if (r2.requestState === r1.requestState) { + errs.push( + 'round 2 requestState MUST differ from round 1 (each round mints a fresh token)' + ); + } + const k2 = Object.keys(r2.inputRequests ?? {})[0]; + + const r3 = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'test_incomplete_result_multi_round', + arguments: {}, + inputResponses: { [k2]: mockElicitResponse({ color: 'blue' }) }, + requestState: r2.requestState + }, + { sessionId } + ); + if (!isCompleteResult(r3)) { + errs.push('round 3 MUST be complete'); + } + const text = r3.content?.[0]?.text ?? ''; + if (!/Alice/.test(text)) { + errs.push( + 'final result MUST reflect round 1 answer (server forwards via requestState)' + ); + } + if (!/blue/.test(text)) { + errs.push('final result MUST reflect round 2 answer'); + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2322_REF] + }); + } catch (error) { + checks.push(failureCheck(id, name, description, error)); + } + } + + // Check 7: wrong-key inputResponses → server re-requests. + { + const id = 'mrtr-wrong-input-key-rerequests'; + const name = 'MrtrWrongInputKeyRerequests'; + const description = + 'When the client sends inputResponses with a key the server did not emit, the server SHOULD re-request via IncompleteResult'; + try { + const r1 = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'test_incomplete_result_elicitation', + arguments: {}, + inputResponses: { wrong_key: mockElicitResponse({ data: 'wrong' }) } + }, + { sessionId } + ); + const errs: string[] = []; + if (!isIncompleteResult(r1)) { + errs.push( + `expected IncompleteResult re-request when inputResponses key is wrong; got ${JSON.stringify(r1)}` + ); + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2322_REF] + }); + } catch (error) { + checks.push(failureCheck(id, name, description, error)); + } + } + + // Check 8: SKIPPED — MRTR → Tasks composition. + // Tracking placeholder; spec made this normative in commit 451f5e1 + // (Apr 30) but two blockers remain before this can be enabled: + // (a) Spec watch on the MRTR resultType discriminator value + // (input_required vs incomplete; see helpers.ts SPEC WATCH). + // (b) Reference servers need middleware that observes the + // handler's IncompleteResult signal BEFORE creating a task — + // the natural implementation pattern (create task up-front, + // run handler in goroutine) doesn't expose the signal in time. + // Tracked in https://github.com/panyam/mcpkit/issues/347 as + // one example impl that hits this; SDKs in any language will + // need an equivalent fix. + { + checks.push({ + id: 'mrtr-tasks-composition', + name: 'MrtrTasksComposition', + description: + 'MRTR loop gathers input then final round returns CreateTaskResult (SEP-2663 451f5e1; deferred — spec authors disagree on the resultType discriminator value, and reference implementations still in flight)', + status: 'SKIPPED', + timestamp: new Date().toISOString(), + errorMessage: + "Skipped: deferred until (a) spec authors converge on the MRTR resultType value (input_required vs incomplete) and (b) reference servers can observe the handler's IsIncomplete signal before creating a task.", + specReferences: [ + SEP_2322_REF, + { + id: 'SEP-2663', + url: 'https://github.com/modelcontextprotocol/specification/pull/2663' + } + ] + }); + } + + return checks; + } +} diff --git a/src/scenarios/server/mrtr/helpers.ts b/src/scenarios/server/mrtr/helpers.ts new file mode 100644 index 0000000..7e0ee19 --- /dev/null +++ b/src/scenarios/server/mrtr/helpers.ts @@ -0,0 +1,82 @@ +/** + * MRTR (SEP-2322 ephemeral) scenario helpers. + * + * Reuses the raw-rpc + session bootstrap from the tasks scenarios since + * MRTR's wire shape (resultType discriminator, requestState, inputRequests) + * is the SEP-2322 base that SEP-2663 builds on. The MRTR resultType value + * is centralized here so it's a one-liner to flip when the spec converges + * (SEP-2322 draft uses "input_required", SEP-2663 draft uses "incomplete"; + * see prezaei comment on PR 2663 for the open question). + */ + +import type { ConformanceCheck, SpecReference } from '../../../types'; + +export const SEP_2322_REF: SpecReference = { + id: 'SEP-2322', + url: 'https://github.com/modelcontextprotocol/specification/pull/2322' +}; + +// SPEC WATCH — MRTR resultType discriminator value +// SEP-2322 (MRTR) and SEP-2663 (Tasks Extension) currently disagree on +// the wire value: SEP-2322's draft uses "input_required", SEP-2663's +// draft uses "incomplete". Awaiting alignment between SEP authors +// (PR 2663 comment 4381885336 + PR 2322 comment 4381884825). When the +// spec converges, this single constant flips. +export const MRTR_INCOMPLETE_RESULT_TYPE = 'incomplete'; + +export function isIncompleteResult(result: any): boolean { + if (!result) return false; + if (result.resultType === MRTR_INCOMPLETE_RESULT_TYPE) return true; + return 'inputRequests' in result || 'requestState' in result; +} + +export function isCompleteResult(result: any): boolean { + if (!result) return false; + if (result.resultType === 'complete') return true; + if (!('resultType' in result)) return true; + return !isIncompleteResult(result); +} + +/** Build an ElicitResult-shaped mock response payload. */ +export function mockElicitResponse( + content: Record +): Record { + return { action: 'accept', content }; +} + +/** Build a CreateMessageResult-shaped mock response payload. */ +export function mockSamplingResponse(text: string): Record { + return { + role: 'assistant', + content: { type: 'text', text }, + model: 'test-model', + stopReason: 'endTurn' + }; +} + +/** Build a ListRootsResult-shaped mock response payload. */ +export function mockListRootsResponse(): Record { + return { roots: [{ uri: 'file:///test/root', name: 'Test Root' }] }; +} + +export function errMsg(error: unknown): string { + return error instanceof Error ? error.message : String(error); +} + +export function failureCheck( + id: string, + name: string, + description: string, + error: unknown, + specReferences: SpecReference[] = [SEP_2322_REF] +): ConformanceCheck { + return { + id, + name, + description, + status: 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errMsg(error), + specReferences + }; +} diff --git a/src/scenarios/server/tasks/README.md b/src/scenarios/server/tasks/README.md new file mode 100644 index 0000000..f145279 --- /dev/null +++ b/src/scenarios/server/tasks/README.md @@ -0,0 +1,197 @@ +# SEP-2663 Tasks Extension — Server Conformance + +Tests any MCP server that implements the `io.modelcontextprotocol/tasks` +extension (SEP-2663) plus the SEP-2322 base types it builds on, the +SEP-2575 per-request capability override, and the SEP-2243 routing +headers. + +The scenarios assert what the spec text says — not what any particular +implementation does. When the SDK schemas in +`@modelcontextprotocol/sdk/types.js` lag the spec, scenarios bypass +the SDK and use raw `fetch` so the SEP-2663 wire fields (`resultType`, +`taskId`, `inputRequests`, `requestState`, inlined `result`/`error`) +aren't stripped. + +## Specs covered + +| SEP | What it adds | Where it shows up | +| -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------- | +| SEP-2663 | Tasks Extension — `io.modelcontextprotocol/tasks` capability, flat `CreateTaskResult` (`Result & Task`), `DetailedTask` on `tasks/get` (with inlined result/error/inputRequests/requestState), `tasks/update` for MRTR resume, ack-only `tasks/cancel`, wire-field renames (`ttlSeconds`, `pollIntervalMilliseconds`) | every scenario | +| SEP-2322 | MRTR base types — `inputRequests`/`inputResponses` keyed maps, `requestState`, `resultType` discriminator (`"task"`/`"complete"`/`"incomplete"`) | request-state, mrtr-input, dispatch | +| SEP-2575 | Per-request capability override via `_meta.io.modelcontextprotocol/clientCapabilities` | capability | +| SEP-2243 | Server tolerates `Mcp-Method` / `Mcp-Name` request headers as informational routing metadata; body is authoritative | headers | + +## ClientScenario classes + +Per the AGENTS.md "fewer scenarios, more checks" rule, related checks +are bundled into one scenario class with multiple `ConformanceCheck` +records. Each row below is one class. + +### `tasks-lifecycle` (`lifecycle.ts`) + +Sync vs async dispatch, DetailedTask shape on tasks/get, tool errors +vs protocol errors, cancellation semantics. + +| Check | What it tests | +| ------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------ | +| `tasks-sync-tool-call` | Sync tool returns `resultType:"complete"`; no top-level `taskId` | +| `tasks-server-task-creation` | Task-supporting tool returns flat `CreateTaskResult` (no nested `task` wrapper); MUST NOT carry `result`/`error`/`inputRequests` on the envelope | +| `tasks-get-during-working` | `tasks/get` on an active task returns status + metadata | +| `tasks-get-terminal-inlined-result` | Completed task `tasks/get` inlines `result.content[]` (no separate `tasks/result`) | +| `tasks-tool-error-completed-iserror` | Tool execution errors → `status:"completed"` + `result.isError:true` (NOT `failed`) | +| `tasks-protocol-error-failed-shape` | Protocol errors → `status:"failed"` with inlined `error{code,message}`; no `result` | +| `tasks-cancel-empty-ack` | `tasks/cancel` returns `{resultType:"complete"}`; status settles to cancelled | +| `tasks-cancel-terminal-rejected` | `tasks/cancel` on a terminal task returns `-32602` (clarified in spec commit `d963ad0`) | + +### `tasks-capability-negotiation` (`capability.ts`) + +| Check | What it tests | +| ----------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------ | +| `tasks-extension-advertised` | Server advertises `io.modelcontextprotocol/tasks` under `capabilities.extensions`; v1 `capabilities.tasks` slot stays absent | +| `tasks-methods-gated-without-extension` | `tasks/get`, `tasks/update`, `tasks/cancel` return `-32601` for sessions that didn't negotiate the extension | +| `tasks-tools-call-without-extension-sync` | `tools/call` from a non-negotiated session falls through to sync (no `CreateTaskResult`) | +| `tasks-per-request-meta-opt-in` | SEP-2575 — per-request `_meta.io.modelcontextprotocol/clientCapabilities` produces `CreateTaskResult` even without session-level extension | + +### `tasks-wire-fields` (`wire-fields.ts`) + +| Check | What it tests | +| ---------------------------------------------- | -------------------------------------------------------------------------------------------- | +| `tasks-wire-field-renames` | `ttlSeconds` + `pollIntervalMilliseconds` present; legacy `ttl` / `pollInterval` keys absent | +| `tasks-no-early-ttl-expiry` | Task remains accessible via `tasks/get` for the duration of its `ttlSeconds` | +| `tasks-no-related-task-meta-on-inlined-result` | v1 `io.modelcontextprotocol/related-task` `_meta` key absent on tasks/get's inlined `result` | + +### `tasks-request-state` (`request-state.ts`) + +| Check | What it tests | +| ------------------------------------- | ----------------------------------------------------------------------------------------------------------------- | +| `tasks-request-state-shape` | When emitted, `requestState` is a non-empty string (`INFO` if server omits it; emission is optional per SEP-2322) | +| `tasks-request-state-echo` | Server accepts `tasks/get` with the previously-emitted `requestState` echoed back | +| `tasks-request-state-stale-tolerance` | Earlier (stale-but-still-valid) `requestState` MUST still be accepted after a newer one is minted | + +### `tasks-mrtr-input` (`mrtr-input.ts`) + +| Check | What it tests | +| ---------------------------------------- | --------------------------------------------------------------------------------------------------------------- | +| `tasks-mrtr-input-requests-on-tasks-get` | `tasks/get` on `input_required` task surfaces non-empty `inputRequests` map | +| `tasks-mrtr-tasks-update-resumes` | `tasks/update` with matching `inputResponses` is acked with `{resultType:"complete"}`; task resumes to terminal | +| `tasks-mrtr-partial-fulfillment` | A subset-of-keys `tasks/update` keeps the task in `input_required` with only the unanswered key remaining | + +### `tasks-request-headers` (`headers.ts`) + +| Check | What it tests | +| ----------------------------------------------------- | ------------------------------------------------------------------------------------------------ | +| `tasks-headers-tolerate-mcp-method-on-tools-call` | Server tolerates `Mcp-Method` request header on `tools/call` (sync dispatch unaffected) | +| `tasks-headers-tolerate-routing-headers-on-tasks-get` | Server tolerates `Mcp-Method` + `Mcp-Name` request headers on `tasks/get` (body taskId resolves) | +| `tasks-headers-body-method-authoritative` | When `Mcp-Method` header disagrees with body, server MUST dispatch on body method | + +> SEP-2243 defines these as **request** headers (client → server) used by HTTP infrastructure for routing. Whether the server _also_ echoes them on responses for downstream observability is implementation-defined and out of scope here. + +### `tasks-dispatch-and-envelope` (`dispatch.ts`) + +| Check | What it tests | +| -------------------------------------------------- | -------------------------------------------------------------------------------------------------------- | +| `tasks-removed-tasks-result` | `tasks/result` removed in v2 → `-32601` | +| `tasks-removed-tasks-list` | `tasks/list` removed in v2 → `-32601` | +| `tasks-server-directed-creation-no-hint` | `tools/call` without client `task` hint still produces `CreateTaskResult` | +| `tasks-legacy-task-param-ignored` | Legacy v1 `task` param tolerated AND ignored on a sync tool (no error, no promotion) | +| `tasks-immediate-result-shortcut` | Fast operation MAY skip task creation and return a sync `ToolResult` | +| `tasks-result-type-complete-on-non-task-responses` | Sync `tools/call`, `tasks/get`, `tasks/update` ack, `tasks/cancel` ack all carry `resultType:"complete"` | +| `tasks-strong-consistency-immediate-get` | `tasks/get` immediately after `CreateTaskResult` MUST resolve (no -32602) | +| `tasks-get-unknown-task-id-rejected` | `tasks/get` with unknown taskId returns `-32602` | + +### `tasks-status-notifications` (`notifications.ts`) + +| Check | What it tests | +| ---------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `tasks-status-notifications-shape` | Optional check — when sent, each `notifications/tasks/status` carries `taskId` + `status`; terminal notifications SHOULD inline `result` (DetailedTask) | + +> Notifications are optional per SEP-2663. The check emits `INFO` (not `FAILURE`) when no notifications are received, so a server that doesn't implement the optional path stays conformant. + +## Required server fixtures + +The fixture server MUST register these tools: + +| Tool | Behavior | +| -------------------- | --------------------------------------------------------------------------------------- | +| `greet` | Sync — returns `Hello, {name}!` | +| `slow_compute` | Async — `seconds`-second sleep, returns result; `seconds:0` for immediate path | +| `failing_job` | Async — always returns tool error after ~1s | +| `protocol_error_job` | Async — panics, surfaces as protocol error | +| `confirm_delete` | Async — calls `TaskElicit` (single inputRequest) | +| `multi_input` | Async — fans out two `TaskElicit` calls in parallel (used by partial-fulfillment check) | + +The fixture can be implemented in any language; one example reference +implementation lives at +[`panyam/mcpkit/examples/tasks-v2`](https://github.com/panyam/mcpkit/tree/main/examples/tasks-v2). + +## Running + +The runner is brand-neutral and language-agnostic — it just shells out +to a command line and waits for the URL to become reachable. + +### Against an already-running server + +```bash +TASKS_SERVER_URL=http://localhost:8080/mcp \ + npx vitest run src/scenarios/server/tasks/all-scenarios.test.ts +``` + +### Auto-spawn a fixture in `beforeAll` + +```bash +TASKS_SERVER_URL=http://localhost:18092/mcp \ +TASKS_SERVER_CMD="/path/to/tasks-server --port 18092" \ + npx vitest run src/scenarios/server/tasks/all-scenarios.test.ts +``` + +If `TASKS_SERVER_URL` is unset, the suite is `describe.skip`'d so CI +runs against the upstream `everything-server` stay green until that +fixture grows SEP-2663 support. + +## Open spec questions + +Where the spec is silent or ambiguous, this suite picks the louder / +safer option (typically `-32602` over silent ack) so a misbehaving +server fails loudly rather than appearing well-formed. Today: + +1. **Invalid `requestState`** — silent ack vs `-32602`. Suite asserts `-32602` (a server that silently accepts a forged token is a security hazard). +2. **SEP-2575 per-request capabilities envelope shape** — covered by `tasks-per-request-meta-opt-in`; the suite asserts only the observable behavior (`CreateTaskResult` produced) so the inner shape can evolve without churn. +3. **`tasks/update` / `tasks/cancel` for unknown taskId** — silent ack vs `-32602`. The read paths (`tasks/get` and `tasks/cancel` on terminal task) assert `-32602`; the write paths' upstream wording is too soft to assert against here. + +## Wire-format diff vs MCP Tasks v1 (spec 2025-11-25) + +| Aspect | v1 | SEP-2663 | +| -------------------------- | ------------------------------ | ---------------------------------------------------------------------------------------------- | +| Capability slot | `capabilities.tasks` | `capabilities.extensions["io.modelcontextprotocol/tasks"]` | +| Client opt-in | (none) | MUST declare extension at session OR per-request (SEP-2575) | +| Task creation | Client sends `task` hint param | Server decides unilaterally | +| `resultType` discriminator | absent | `"task"` (CreateTaskResult) / `"complete"` (everything else) / `"incomplete"` (MRTR ephemeral) | +| `CreateTaskResult` shape | `{task: {...}}` (nested) | flat: `{resultType, taskId, status, ttlSeconds, ...}` (no nested wrapper) | +| `tasks/get` response | flat `TaskInfo` only | `DetailedTask` with inlined `result`/`error`/`inputRequests`/`requestState` | +| `tasks/update` | n/a | new — MRTR resume path, returns `{resultType:"complete"}` ack | +| `tasks/cancel` response | rich task envelope | `{resultType:"complete"}` ack (no task state) | +| `tasks/result` | separate blocking method | **removed** (result inlined on `tasks/get`) | +| `tasks/list` | session-scoped list | **removed** | +| TTL field | `ttl` (ms by convention) | `ttlSeconds` (units in name) | +| Poll-interval field | `pollInterval` | `pollIntervalMilliseconds` | +| `parentTaskId` | present | removed | +| Tool errors | `status:failed` | `status:completed, result.isError:true` | +| Mcp-Name HTTP header | not set | request-side routing header (SEP-2243) | + +## Design notes + +### Raw fetch escape hatch + +The MCP TS SDK ships with strict Zod schemas that strip SEP-2663 / +SEP-2322 wire fields from responses (`resultType`, `taskId`, +`inputRequests`, `requestState`, inlined result/error). Scenarios that +exercise those fields use the raw-fetch helpers in `helpers.ts` rather +than the SDK client. When the SDK gains schemas for the SEP-2663 +shapes, those call sites switch back to +`client.request(..., AnyResult)` and the helpers shrink (or disappear). + +### Severity follows the spec keyword + +Per AGENTS.md: MUST / MUST NOT → `FAILURE`; SHOULD / SHOULD NOT → +`WARNING`; optional emission with no presence → `INFO`. CI treats +`WARNING` as a failure, so SHOULD-level requirements still gate. diff --git a/src/scenarios/server/tasks/all-scenarios.test.ts b/src/scenarios/server/tasks/all-scenarios.test.ts new file mode 100644 index 0000000..d6ad16d --- /dev/null +++ b/src/scenarios/server/tasks/all-scenarios.test.ts @@ -0,0 +1,137 @@ +/** + * SEP-2663 Tasks extension test runner. + * + * Iterates the tasks server scenarios against a SEP-2663-conformant + * server. Configuration is brand-neutral and language-agnostic: + * + * 1. Point at an already-running server: + * TASKS_SERVER_URL=http://localhost:8080/mcp npm test -- tasks/all-scenarios.test.ts + * + * 2. Auto-spawn a fixture before tests (any language; the runner just + * shells out to TASKS_SERVER_CMD and waits until TASKS_SERVER_URL + * becomes reachable): + * TASKS_SERVER_URL=http://localhost:18092/mcp \ + * TASKS_SERVER_CMD="/path/to/server --port 18092" \ + * npm test -- tasks/all-scenarios.test.ts + * + * If TASKS_SERVER_URL is unset, the suite is skipped — letting CI runs + * against the everything-server stay green until the upstream fixture + * grows SEP-2663 support. + * + * Readiness is detected by polling the URL's host/port for a TCP + * connection (deliberately language-agnostic — no log-line scanning). + * + * The fixture server can be implemented in any language as long as it + * exposes a SEP-2663 conformant Streamable HTTP MCP endpoint. Anyone is + * free to bring their own; one example reference implementation lives + * at https://github.com/panyam/mcpkit/tree/main/examples/tasks-v2. + */ + +import { spawn, ChildProcess } from 'child_process'; +import { describe, it, expect, beforeAll, afterAll } from 'vitest'; +import { TasksLifecycleScenario } from './lifecycle'; +import { TasksCapabilityNegotiationScenario } from './capability'; +import { TasksWireFieldsScenario } from './wire-fields'; +import { TasksRequestStateScenario } from './request-state'; +import { TasksMRTRInputScenario } from './mrtr-input'; +import { TasksRequestHeadersScenario } from './headers'; +import { TasksDispatchScenario } from './dispatch'; +import { TasksStatusNotificationsScenario } from './notifications'; +import { waitForServerReady } from '../_shared/test-runner'; + +const SERVER_URL = process.env.TASKS_SERVER_URL; +const SERVER_CMD = process.env.TASKS_SERVER_CMD; +const SERVER_STARTUP_TIMEOUT_MS = 15_000; +const SHOULD_SPAWN = Boolean(SERVER_URL && SERVER_CMD); +const HAVE_TARGET = Boolean(SERVER_URL); + +const TASKS_SCENARIOS = [ + new TasksLifecycleScenario(), + new TasksCapabilityNegotiationScenario(), + new TasksWireFieldsScenario(), + new TasksRequestStateScenario(), + new TasksMRTRInputScenario(), + new TasksRequestHeadersScenario(), + new TasksDispatchScenario(), + new TasksStatusNotificationsScenario() +]; + +const describeIfTarget = HAVE_TARGET ? describe : describe.skip; + +describeIfTarget('SEP-2663 Tasks — server conformance', () => { + let serverProcess: ChildProcess | null = null; + + beforeAll(async () => { + if (!SHOULD_SPAWN) return; + + serverProcess = spawn('sh', ['-c', SERVER_CMD!], { + stdio: ['ignore', 'pipe', 'pipe'], + detached: false + }); + + let stdoutBuf = ''; + let stderrBuf = ''; + serverProcess.stdout?.on('data', (b) => { + stdoutBuf += b.toString(); + }); + serverProcess.stderr?.on('data', (b) => { + stderrBuf += b.toString(); + }); + + serverProcess.on('exit', (code) => { + if (code !== null && code !== 0) { + console.error( + `tasks fixture exited unexpectedly with code ${code}.\nSTDOUT: ${stdoutBuf}\nSTDERR: ${stderrBuf}` + ); + } + }); + + await waitForServerReady(SERVER_URL!, SERVER_STARTUP_TIMEOUT_MS).catch( + (err) => { + if (serverProcess && !serverProcess.killed) { + serverProcess.kill('SIGKILL'); + } + throw new Error( + `tasks fixture did not become reachable within ${SERVER_STARTUP_TIMEOUT_MS}ms: ${err.message}\nSTDOUT: ${stdoutBuf}\nSTDERR: ${stderrBuf}` + ); + } + ); + }, SERVER_STARTUP_TIMEOUT_MS + 5_000); + + afterAll(async () => { + if (!SHOULD_SPAWN) return; + if (!serverProcess || serverProcess.killed) return; + serverProcess.kill('SIGTERM'); + await new Promise((resolve) => { + const timer = setTimeout(() => { + if (serverProcess && !serverProcess.killed) { + serverProcess.kill('SIGKILL'); + } + resolve(); + }, 3_000); + serverProcess!.once('exit', () => { + clearTimeout(timer); + resolve(); + }); + }); + serverProcess = null; + }); + + for (const scenario of TASKS_SCENARIOS) { + it(`${scenario.name} — all checks succeed against fixture`, async () => { + const checks = await scenario.run(SERVER_URL!); + expect(checks.length).toBeGreaterThan(0); + const failures = checks.filter( + (c) => c.status === 'FAILURE' || c.status === 'WARNING' + ); + if (failures.length > 0) { + const detail = failures + .map((c) => ` - ${c.id}: ${c.errorMessage ?? '(no message)'}`) + .join('\n'); + throw new Error( + `${failures.length}/${checks.length} checks failed:\n${detail}` + ); + } + }); + } +}); diff --git a/src/scenarios/server/tasks/capability.ts b/src/scenarios/server/tasks/capability.ts new file mode 100644 index 0000000..91615d7 --- /dev/null +++ b/src/scenarios/server/tasks/capability.ts @@ -0,0 +1,291 @@ +/** + * SEP-2663 Tasks Extension — capability negotiation conformance. + * + * Tests that the server advertises the io.modelcontextprotocol/tasks + * extension correctly, gates the v2 task surface on negotiation, and + * supports SEP-2575 per-request capability overrides. + * + * Required server fixtures: + * - greet — sync-only, returns "Hello, {name}!" + * - slow_compute — task-supporting, sleeps N seconds + */ + +import { + ClientScenario, + ConformanceCheck, + ScenarioSpecTag, + DRAFT_PROTOCOL_VERSION +} from '../../../types'; +import { + TASKS_EXTENSION_ID, + SEP_2663_REF, + SEP_2575_REF, + errMsg, + failureCheck, + initRawSession, + rawRequest +} from './helpers'; + +export class TasksCapabilityNegotiationScenario implements ClientScenario { + name = 'tasks-capability-negotiation'; + specVersions: ScenarioSpecTag[] = ['extension', DRAFT_PROTOCOL_VERSION]; + description = `Test SEP-2663 capability negotiation for the tasks extension. + +**Server Implementation Requirements:** + +**Capability advertisement (SEP-2663):** +- The server MUST advertise \`io.modelcontextprotocol/tasks\` under + \`capabilities.extensions\` in its \`initialize\` response. +- It MUST NOT use a v1-style \`capabilities.tasks\` slot (the v1 surface + is replaced by the extension). + +**Gating without negotiation (SEP-2663):** +- For sessions that did NOT declare the \`io.modelcontextprotocol/tasks\` + extension during \`initialize\`, the server MUST reject \`tasks/get\`, + \`tasks/update\`, and \`tasks/cancel\` with JSON-RPC \`-32601\` + (MethodNotFound) — clients that didn't negotiate the surface should + not see it. +- A \`tools/call\` from such a session MUST NOT return + \`CreateTaskResult\`. Task-supporting tools fall through to synchronous + execution and return a plain \`ToolResult\` with + \`resultType:"complete"\`. + +**Per-request opt-in (SEP-2575):** +- A session that did not declare the extension at session level can + opt into task creation for a single \`tools/call\` by including the + extension under \`_meta.io.modelcontextprotocol/clientCapabilities.extensions\`. + The server MUST honor the per-request opt-in and produce a + \`CreateTaskResult\` for that call.`; + + async run(serverUrl: string): Promise { + const checks: ConformanceCheck[] = []; + + // Two sessions: one declares the extension, one does NOT. + let withExt: { sessionId: string; serverCapabilities: any }; + let withoutExt: { sessionId: string }; + try { + withExt = await initRawSession(serverUrl, { + capabilities: { + elicitation: {}, + sampling: {}, + extensions: { [TASKS_EXTENSION_ID]: {} } + } + }); + withoutExt = await initRawSession(serverUrl, { capabilities: {} }); + } catch (error) { + checks.push({ + id: 'tasks-session-bootstrap', + name: 'TasksSessionBootstrap', + description: 'Initialize handshakes (with + without extension) succeed', + status: 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: `Failed to initialize: ${errMsg(error)}`, + specReferences: [SEP_2663_REF] + }); + return checks; + } + + // Check 1: server advertises extension under capabilities.extensions. + { + const id = 'tasks-extension-advertised'; + const name = 'TasksExtensionAdvertised'; + const description = `Server advertises ${TASKS_EXTENSION_ID} under capabilities.extensions (and not capabilities.tasks)`; + const caps = withExt.serverCapabilities ?? {}; + const errs: string[] = []; + if (caps.tasks) { + errs.push( + 'v1-style capabilities.tasks slot MUST NOT be used; tasks lives under capabilities.extensions' + ); + } + if (!caps.extensions) { + errs.push('capabilities.extensions MUST be advertised'); + } else if (!caps.extensions[TASKS_EXTENSION_ID]) { + errs.push( + `capabilities.extensions["${TASKS_EXTENSION_ID}"] MUST be present` + ); + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF], + details: { + hasExtensions: Boolean(caps.extensions), + hasTasksExtension: Boolean(caps.extensions?.[TASKS_EXTENSION_ID]), + hasV1TasksSlot: Boolean(caps.tasks) + } + }); + } + + // Check 2: tasks/* methods rejected without extension negotiation. + { + const id = 'tasks-methods-gated-without-extension'; + const name = 'TasksMethodsGatedWithoutExtension'; + const description = + 'tasks/get, tasks/update, tasks/cancel return -32601 when extension was not negotiated'; + const cases: Array<{ method: string; params: any }> = [ + { method: 'tasks/get', params: { taskId: 'gate-test' } }, + { + method: 'tasks/update', + params: { taskId: 'gate-test', inputResponses: {} } + }, + { method: 'tasks/cancel', params: { taskId: 'gate-test' } } + ]; + const errs: string[] = []; + for (const tc of cases) { + try { + await rawRequest(serverUrl, tc.method, tc.params, { + sessionId: withoutExt.sessionId + }); + errs.push(`${tc.method} MUST reject (it returned a result)`); + } catch (e: any) { + if (e.code !== -32601) { + errs.push( + `${tc.method} MUST return -32601; got ${e.code ?? ''}` + ); + } + } + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF] + }); + } + + // Check 3: tools/call without extension returns sync ToolResult, not task. + { + const id = 'tasks-tools-call-without-extension-sync'; + const name = 'TasksToolsCallWithoutExtensionSync'; + const description = + 'tools/call from a session without the extension MUST fall through to sync (no CreateTaskResult, even for task-supporting tools)'; + try { + const result = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'slow_compute', + arguments: { seconds: 0, label: 'capability-no-ext' } + }, + { sessionId: withoutExt.sessionId } + ); + const errs: string[] = []; + if (result.resultType === 'task') { + errs.push( + 'tools/call without extension MUST NOT return resultType:"task"' + ); + } + if (result.taskId) { + errs.push( + `tools/call without extension MUST NOT carry top-level taskId; got ${result.taskId}` + ); + } + if (result.task) { + errs.push( + 'tools/call without extension MUST NOT carry the v1-style nested `task` envelope' + ); + } + if (!result.content) { + errs.push( + 'tools/call without extension MUST return sync ToolResult with content[]' + ); + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF], + details: { + resultType: result.resultType, + hasTaskId: Boolean(result.taskId) + } + }); + } catch (error) { + checks.push(failureCheck(id, name, description, error, [SEP_2663_REF])); + } + } + + // Check 4: SEP-2575 per-request _meta opt-in produces CreateTaskResult. + { + const id = 'tasks-per-request-meta-opt-in'; + const name = 'TasksPerRequestMetaOptIn'; + const description = + 'tools/call with extension declared in _meta.io.modelcontextprotocol/clientCapabilities produces a CreateTaskResult even when the session did not negotiate the extension'; + try { + const result = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'slow_compute', + arguments: { seconds: 1, label: 'capability-meta-opt' } + }, + { + sessionId: withoutExt.sessionId, + meta: { + 'io.modelcontextprotocol/clientCapabilities': { + extensions: { [TASKS_EXTENSION_ID]: {} } + } + } + } + ); + const errs: string[] = []; + if (result.resultType !== 'task') { + errs.push( + `expected resultType:"task" via per-request opt-in; got ${JSON.stringify(result.resultType)}` + ); + } + if (!result.taskId) { + errs.push( + 'per-request opt-in MUST produce a CreateTaskResult with top-level taskId' + ); + } + if (result.task) { + errs.push( + 'CreateTaskResult MUST be flat (no nested `task` wrapper) even on per-request opt-in path' + ); + } + // Best-effort cleanup: cancel the task so we don't leak a 1s + // background goroutine on the server. + if (result.taskId) { + try { + await rawRequest( + serverUrl, + 'tasks/cancel', + { taskId: result.taskId }, + { sessionId: withExt.sessionId } + ); + } catch { + /* swallow — cleanup best-effort */ + } + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2575_REF, SEP_2663_REF], + details: { + resultType: result.resultType, + taskId: result.taskId + } + }); + } catch (error) { + checks.push(failureCheck(id, name, description, error, [SEP_2575_REF])); + } + } + + return checks; + } +} diff --git a/src/scenarios/server/tasks/dispatch.ts b/src/scenarios/server/tasks/dispatch.ts new file mode 100644 index 0000000..3f35e43 --- /dev/null +++ b/src/scenarios/server/tasks/dispatch.ts @@ -0,0 +1,560 @@ +/** + * SEP-2663 Tasks Extension — dispatch + envelope conformance. + * + * Bundles a number of small, related checks under one scenario: + * - Removed v1 methods (tasks/result, tasks/list) reject as -32601. + * - Server-directed task creation works without a client `task` hint + * param; legacy v1 `task` param on tools/call is tolerated and + * ignored on sync tools. + * - Immediate-result shortcut: a fast operation MAY skip task creation + * and return a sync ToolResult. + * - SEP-2322 resultType:"complete" discriminator on every non-task + * response (sync tools/call, tasks/get, tasks/update, tasks/cancel). + * - Strong consistency: tasks/get immediately after CreateTaskResult + * MUST resolve. + * - tasks/get with an unknown taskId MUST return -32602. + * + * Required server fixtures: + * - greet — sync-only + * - slow_compute — task-supporting (seconds:0 = instant) + * - confirm_delete — task-supporting, parks for elicitation + * - failing_job — task-supporting, returns tool error + */ + +import { + ClientScenario, + ConformanceCheck, + ScenarioSpecTag, + DRAFT_PROTOCOL_VERSION +} from '../../../types'; +import { + TASKS_EXTENSION_ID, + SEP_2322_REF, + SEP_2663_REF, + errMsg, + failureCheck, + initRawSession, + rawRequest, + waitForStatus, + waitForTerminal +} from './helpers'; + +export class TasksDispatchScenario implements ClientScenario { + name = 'tasks-dispatch-and-envelope'; + specVersions: ScenarioSpecTag[] = ['extension', DRAFT_PROTOCOL_VERSION]; + description = `Test SEP-2663 dispatch / envelope rules across the tasks surface. + +**Server Implementation Requirements:** + +**Removed v1 methods (SEP-2663):** +- \`tasks/result\` is removed in v2 — the result is inlined on + \`tasks/get\`. Servers MUST reject the method with JSON-RPC \`-32601\`. +- \`tasks/list\` is removed in v2. Servers MUST reject it with + \`-32601\`. + +**Server-directed task creation (SEP-2663):** +- The client does NOT send a \`task\` hint param. The server alone + decides whether to create a task. A \`tools/call\` against a + task-supporting tool MUST produce \`CreateTaskResult\` even with no + client hint. + +**Legacy \`task\` param tolerated (SEP-2663):** +- A v1 client may still send \`task: { ttl, pollInterval }\` on + \`tools/call\`. The server MUST tolerate it (no error) AND MUST NOT + promote a sync-only tool to a task on its presence. The body + arguments + tool registration are authoritative. + +**Immediate-result shortcut (SEP-2663):** +- A server MAY return a sync \`ToolResult\` for task-supporting tools + when the operation completes fast enough. Either return a + \`CreateTaskResult\` (with \`resultType:"task"\`) or a sync + \`ToolResult\` (with \`resultType:"complete"\`); both are valid. + +**resultType:"complete" on non-task responses (SEP-2322):** +- Every JSON-RPC response on the tools+tasks surface other than a + CreateTaskResult MUST carry \`resultType:"complete"\`. This applies + to: sync \`tools/call\`, \`tasks/get\`, \`tasks/update\` ack, + \`tasks/cancel\` ack. + +**Strong consistency / durable create (SEP-2663):** +- A server MUST NOT return \`CreateTaskResult\` until the task is + durably created — that is, until a \`tasks/get\` for the returned + \`taskId\` would resolve. Issuing \`tasks/get\` immediately after the + CreateTaskResult arrives MUST succeed, not -32602. + +**Unknown taskId on tasks/get (SEP-2663):** +- \`tasks/get\` for a taskId the server doesn't recognize MUST return + JSON-RPC \`-32602\` (InvalidParams). Mirrors the same rule for + \`tasks/cancel\` (clarified upstream in spec commit d963ad0).`; + + async run(serverUrl: string): Promise { + const checks: ConformanceCheck[] = []; + + let sessionId: string; + try { + ({ sessionId } = await initRawSession(serverUrl, { + capabilities: { + elicitation: {}, + sampling: {}, + extensions: { [TASKS_EXTENSION_ID]: {} } + } + })); + } catch (error) { + checks.push({ + id: 'tasks-session-bootstrap', + name: 'TasksSessionBootstrap', + description: + 'Initialize handshake declaring io.modelcontextprotocol/tasks extension succeeds', + status: 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: `Failed to initialize: ${errMsg(error)}`, + specReferences: [SEP_2663_REF] + }); + return checks; + } + + // Check 1: tasks/result removed. + { + const id = 'tasks-removed-tasks-result'; + const name = 'TasksRemovedTasksResult'; + const description = + 'tasks/result is removed in v2 and MUST reject with -32601'; + try { + await rawRequest( + serverUrl, + 'tasks/result', + { taskId: 'any' }, + { + sessionId + } + ); + checks.push({ + id, + name, + description, + status: 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: 'tasks/result returned a result instead of -32601', + specReferences: [SEP_2663_REF] + }); + } catch (e: any) { + const errs: string[] = []; + if (e.code !== -32601) { + errs.push(`expected -32601; got ${e.code ?? ''}`); + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF] + }); + } + } + + // Check 2: tasks/list removed. + { + const id = 'tasks-removed-tasks-list'; + const name = 'TasksRemovedTasksList'; + const description = + 'tasks/list is removed in v2 and MUST reject with -32601'; + try { + await rawRequest(serverUrl, 'tasks/list', {}, { sessionId }); + checks.push({ + id, + name, + description, + status: 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: 'tasks/list returned a result instead of -32601', + specReferences: [SEP_2663_REF] + }); + } catch (e: any) { + const errs: string[] = []; + if (e.code !== -32601) { + errs.push(`expected -32601; got ${e.code ?? ''}`); + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF] + }); + } + } + + // Check 3: server-directed task creation without client hint. + { + const id = 'tasks-server-directed-creation-no-hint'; + const name = 'TasksServerDirectedCreationNoHint'; + const description = + 'tools/call with no client `task` hint param MUST still produce CreateTaskResult for task-supporting tools'; + try { + const result = await rawRequest( + serverUrl, + 'tools/call', + { name: 'failing_job', arguments: {} }, + { sessionId } + ); + const errs: string[] = []; + if (result.resultType !== 'task' || !result.taskId) { + errs.push( + `expected CreateTaskResult; got resultType=${JSON.stringify(result.resultType)}, taskId=${JSON.stringify(result.taskId)}` + ); + } + // Best-effort wait so we don't leak. + if (result.taskId) { + try { + await waitForTerminal(serverUrl, sessionId, result.taskId); + } catch { + /* swallow */ + } + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF] + }); + } catch (error) { + checks.push(failureCheck(id, name, description, error, [SEP_2663_REF])); + } + } + + // Check 4: legacy `task` param tolerated + ignored on sync tool. + { + const id = 'tasks-legacy-task-param-ignored'; + const name = 'TasksLegacyTaskParamIgnored'; + const description = + 'tools/call with legacy `task` param against a sync tool MUST NOT error and MUST NOT be promoted to a task'; + try { + const result = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'greet', + arguments: { name: 'legacy-hint' }, + // Legacy v1 hint that the server MUST ignore. + task: { ttl: 60_000, pollInterval: 100 } + }, + { sessionId } + ); + const errs: string[] = []; + if (result.resultType === 'task') { + errs.push( + 'legacy `task` param MUST NOT promote a sync tool to a task' + ); + } + if (result.taskId) { + errs.push( + `sync tool with legacy hint MUST NOT carry top-level taskId; got ${result.taskId}` + ); + } + if (!Array.isArray(result.content) || result.content.length === 0) { + errs.push('sync tool MUST still return content[]'); + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF] + }); + } catch (error) { + checks.push(failureCheck(id, name, description, error, [SEP_2663_REF])); + } + } + + // Check 5: immediate-result shortcut. Either CreateTaskResult OR + // sync ToolResult is acceptable for an instant operation. + { + const id = 'tasks-immediate-result-shortcut'; + const name = 'TasksImmediateResultShortcut'; + const description = + 'For a fast operation, a task-supporting tool MAY skip task creation and return a sync ToolResult; either path is valid'; + try { + const result = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'slow_compute', + arguments: { seconds: 0, label: 'instant' } + }, + { sessionId } + ); + const errs: string[] = []; + if (result.resultType === 'task') { + if (!result.taskId) { + errs.push( + 'task-path response MUST carry top-level taskId on CreateTaskResult' + ); + } + } else { + // Sync path + if (!Array.isArray(result.content)) { + errs.push( + 'sync-path response MUST carry content[] for the immediate ToolResult' + ); + } + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF], + details: { resultType: result.resultType } + }); + } catch (error) { + checks.push(failureCheck(id, name, description, error, [SEP_2663_REF])); + } + } + + // Check 6: resultType:"complete" on every non-task response. + { + const id = 'tasks-result-type-complete-on-non-task-responses'; + const name = 'TasksResultTypeCompleteOnNonTaskResponses'; + const description = + 'Sync tools/call, tasks/get, tasks/update ack, and tasks/cancel ack MUST all carry resultType:"complete"'; + const errs: string[] = []; + try { + // Sync tools/call. + const sync = await rawRequest( + serverUrl, + 'tools/call', + { name: 'greet', arguments: { name: 'rt' } }, + { sessionId } + ); + if (sync.resultType !== 'complete') { + errs.push( + `sync tools/call resultType = ${JSON.stringify(sync.resultType)}, want "complete"` + ); + } + + // tasks/get against a fresh task. + const created = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'slow_compute', + arguments: { seconds: 0, label: 'rt-get' } + }, + { sessionId } + ); + const taskIdForGet = created.taskId; + if (taskIdForGet) { + await waitForTerminal(serverUrl, sessionId, taskIdForGet); + const got = await rawRequest( + serverUrl, + 'tasks/get', + { taskId: taskIdForGet }, + { sessionId } + ); + if (got.resultType !== 'complete') { + errs.push( + `tasks/get resultType = ${JSON.stringify(got.resultType)}, want "complete"` + ); + } + } + + // tasks/cancel ack on a fresh long-running task. + const longLived = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'slow_compute', + arguments: { seconds: 60, label: 'rt-cancel' } + }, + { sessionId } + ); + if (longLived.taskId) { + const cancelAck = await rawRequest( + serverUrl, + 'tasks/cancel', + { taskId: longLived.taskId }, + { sessionId } + ); + if (cancelAck.resultType !== 'complete') { + errs.push( + `tasks/cancel ack resultType = ${JSON.stringify(cancelAck.resultType)}, want "complete"` + ); + } + } + + // tasks/update ack on a parked elicitation task. + const elicit = await rawRequest( + serverUrl, + 'tools/call', + { name: 'confirm_delete', arguments: { filename: 'rt.txt' } }, + { sessionId } + ); + const elicitTaskId = elicit.taskId; + if (elicitTaskId) { + await waitForStatus( + serverUrl, + sessionId, + elicitTaskId, + 'input_required', + 5_000 + ); + const updateAck = await rawRequest( + serverUrl, + 'tasks/update', + { + taskId: elicitTaskId, + inputResponses: { 'unknown-key': { ignored: true } } + }, + { sessionId } + ); + if (updateAck.resultType !== 'complete') { + errs.push( + `tasks/update ack resultType = ${JSON.stringify(updateAck.resultType)}, want "complete"` + ); + } + try { + await rawRequest( + serverUrl, + 'tasks/cancel', + { taskId: elicitTaskId }, + { sessionId } + ); + } catch { + /* swallow */ + } + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2322_REF, SEP_2663_REF] + }); + } catch (error) { + checks.push(failureCheck(id, name, description, error, [SEP_2322_REF])); + } + } + + // Check 7: strong consistency — immediate tasks/get after CreateTaskResult. + { + const id = 'tasks-strong-consistency-immediate-get'; + const name = 'TasksStrongConsistencyImmediateGet'; + const description = + 'tasks/get issued immediately after CreateTaskResult arrives MUST resolve (server MUST NOT return CreateTaskResult before the task is durably created)'; + try { + const created = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'slow_compute', + arguments: { seconds: 60, label: 'consistency' } + }, + { sessionId } + ); + const taskId = created.taskId; + if (!taskId) { + checks.push({ + id, + name, + description, + status: 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: 'slow_compute did not create a task', + specReferences: [SEP_2663_REF] + }); + } else { + // No await/sleep between create and get — codifies the + // strong-consistency ordering. + const got = await rawRequest( + serverUrl, + 'tasks/get', + { taskId }, + { sessionId } + ); + const errs: string[] = []; + if (got.taskId !== taskId) { + errs.push( + `immediate tasks/get MUST resolve the same taskId; got ${got.taskId}` + ); + } + // Cleanup. + try { + await rawRequest( + serverUrl, + 'tasks/cancel', + { taskId }, + { sessionId } + ); + } catch { + /* swallow */ + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF] + }); + } + } catch (error) { + checks.push(failureCheck(id, name, description, error, [SEP_2663_REF])); + } + } + + // Check 8: tasks/get with unknown taskId returns -32602. + { + const id = 'tasks-get-unknown-task-id-rejected'; + const name = 'TasksGetUnknownTaskIdRejected'; + const description = + 'tasks/get for a taskId the server does not recognize MUST return -32602'; + try { + await rawRequest( + serverUrl, + 'tasks/get', + { taskId: 'tasks-conformance-nonexistent-12345' }, + { sessionId } + ); + checks.push({ + id, + name, + description, + status: 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: 'tasks/get with unknown taskId returned a result', + specReferences: [SEP_2663_REF] + }); + } catch (e: any) { + const errs: string[] = []; + if (e.code !== -32602) { + errs.push(`expected -32602; got ${e.code ?? ''}`); + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF] + }); + } + } + + return checks; + } +} diff --git a/src/scenarios/server/tasks/headers.ts b/src/scenarios/server/tasks/headers.ts new file mode 100644 index 0000000..0d5ebdd --- /dev/null +++ b/src/scenarios/server/tasks/headers.ts @@ -0,0 +1,243 @@ +/** + * SEP-2243 Mcp-Method / Mcp-Name request-header tolerance. + * + * SEP-2243 defines Mcp-Method and Mcp-Name as REQUEST headers (client → + * server) used by HTTP infrastructure (proxies, gateways, observability) + * to route or shape JSON-RPC traffic without parsing the body. They are + * informational; the JSON-RPC body is authoritative. A conformant + * server MUST tolerate the headers without changing dispatch. + * + * Whether the server *also* echoes these headers on responses for + * downstream observability is implementation-defined and out of scope + * for SEP-2243 conformance. + * + * Required server fixtures: + * - greet — sync-only, returns "Hello, {name}!" + * - slow_compute — task-supporting, sleeps N seconds + */ + +import { + ClientScenario, + ConformanceCheck, + ScenarioSpecTag, + DRAFT_PROTOCOL_VERSION +} from '../../../types'; +import { + TASKS_EXTENSION_ID, + SEP_2243_REF, + errMsg, + failureCheck, + initRawSession, + rawRequest +} from './helpers'; + +export class TasksRequestHeadersScenario implements ClientScenario { + name = 'tasks-request-headers'; + specVersions: ScenarioSpecTag[] = ['extension', DRAFT_PROTOCOL_VERSION]; + description = `Test SEP-2243 Mcp-Method / Mcp-Name request-header tolerance. + +**Server Implementation Requirements:** + +SEP-2243 defines two informational request headers used by HTTP +infrastructure (proxies, gateways, observability) to route or shape +JSON-RPC traffic without parsing the body: + +- \`Mcp-Method: \` — set on every JSON-RPC request. +- \`Mcp-Name: \` — set on resume operations (\`tasks/get\`, + \`tasks/update\`, \`tasks/cancel\`). + +The JSON-RPC body is authoritative. The server MUST tolerate the +headers, MUST NOT require them, and MUST NOT change dispatch behavior +based on them — including when the headers disagree with the body.`; + + async run(serverUrl: string): Promise { + const checks: ConformanceCheck[] = []; + + let sessionId: string; + try { + ({ sessionId } = await initRawSession(serverUrl, { + capabilities: { extensions: { [TASKS_EXTENSION_ID]: {} } } + })); + } catch (error) { + checks.push({ + id: 'tasks-session-bootstrap', + name: 'TasksSessionBootstrap', + description: + 'Initialize handshake declaring io.modelcontextprotocol/tasks extension succeeds', + status: 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: `Failed to initialize: ${errMsg(error)}`, + specReferences: [SEP_2243_REF] + }); + return checks; + } + + // Check 1: Mcp-Method on tools/call against a sync tool. + { + const id = 'tasks-headers-tolerate-mcp-method-on-tools-call'; + const name = 'TasksHeadersTolerateMcpMethodOnToolsCall'; + const description = + 'Server tolerates Mcp-Method request header on tools/call (sync tool dispatch unaffected)'; + try { + const result = await rawRequest( + serverUrl, + 'tools/call', + { name: 'greet', arguments: { name: 'sep-2243' } }, + { sessionId, headers: { 'Mcp-Method': 'tools/call' } } + ); + const errs: string[] = []; + if (result.resultType !== 'complete') { + errs.push( + `sync ToolResult.resultType MUST be "complete" regardless of routing header; got ${JSON.stringify(result.resultType)}` + ); + } + if ( + !Array.isArray(result.content) || + result.content.length === 0 || + result.content[0]?.text !== 'Hello, sep-2243!' + ) { + errs.push( + 'tool result content MUST be unaffected by the Mcp-Method header' + ); + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2243_REF] + }); + } catch (error) { + checks.push(failureCheck(id, name, description, error, [SEP_2243_REF])); + } + } + + // Check 2: Mcp-Method + Mcp-Name on tasks/get (drive a task first + // so we have a real taskId to route on). + let routingTaskId: string | undefined; + { + const id = 'tasks-headers-tolerate-routing-headers-on-tasks-get'; + const name = 'TasksHeadersTolerateRoutingHeadersOnTasksGet'; + const description = + 'Server tolerates Mcp-Method + Mcp-Name request headers on tasks/get (body taskId resolves regardless of routing headers)'; + try { + const created = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'slow_compute', + arguments: { seconds: 60, label: 'headers-tasks-get' } + }, + { sessionId } + ); + routingTaskId = created.taskId; + if (!routingTaskId) { + checks.push({ + id, + name, + description, + status: 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: 'slow_compute did not create a task', + specReferences: [SEP_2243_REF] + }); + } else { + const got = await rawRequest( + serverUrl, + 'tasks/get', + { taskId: routingTaskId }, + { + sessionId, + headers: { + 'Mcp-Method': 'tasks/get', + 'Mcp-Name': routingTaskId + } + } + ); + const errs: string[] = []; + if (got.taskId !== routingTaskId) { + errs.push( + `tasks/get MUST resolve body taskId regardless of routing headers; got ${got.taskId}` + ); + } + if (!got.status) { + errs.push( + 'tasks/get MUST still return status when routing headers are set' + ); + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2243_REF] + }); + } + } catch (error) { + checks.push(failureCheck(id, name, description, error, [SEP_2243_REF])); + } + } + + // Check 3: Body method is authoritative when Mcp-Method header + // disagrees with body. + { + const id = 'tasks-headers-body-method-authoritative'; + const name = 'TasksHeadersBodyMethodAuthoritative'; + const description = + 'When Mcp-Method header disagrees with body, server MUST dispatch on body method (header is informational)'; + try { + const result = await rawRequest( + serverUrl, + 'tools/call', + { name: 'greet', arguments: { name: 'header-mismatch' } }, + { sessionId, headers: { 'Mcp-Method': 'tasks/get' } } + ); + const errs: string[] = []; + if (result.resultType !== 'complete') { + errs.push( + `server MUST dispatch on body method (tools/call → resultType:"complete"); got ${JSON.stringify(result.resultType)}` + ); + } + if ( + !Array.isArray(result.content) || + result.content[0]?.text !== 'Hello, header-mismatch!' + ) { + errs.push( + 'tool result MUST reflect the body method, not the header claim' + ); + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2243_REF] + }); + } catch (error) { + checks.push(failureCheck(id, name, description, error, [SEP_2243_REF])); + } + } + + // Cleanup the long-lived task. + if (routingTaskId) { + try { + await rawRequest( + serverUrl, + 'tasks/cancel', + { taskId: routingTaskId }, + { sessionId } + ); + } catch { + /* swallow */ + } + } + + return checks; + } +} diff --git a/src/scenarios/server/tasks/helpers.ts b/src/scenarios/server/tasks/helpers.ts new file mode 100644 index 0000000..2eea4e7 --- /dev/null +++ b/src/scenarios/server/tasks/helpers.ts @@ -0,0 +1,301 @@ +/** + * Shared helpers for SEP-2663 Tasks server-conformance scenarios. + * + * The MCP TS SDK's typed schemas (CallToolResultSchema, etc.) strip the + * SEP-2663 / SEP-2322 wire fields — `resultType`, `taskId`, `inputRequests`, + * `requestState`, inlined `result`/`error` on tasks/get's DetailedTask. So + * scenarios that exercise those fields use raw fetch instead. This file + * centralizes the bootstrap + RPC + polling primitives. + * + * If/when the SDK gains schemas for the SEP-2663 wire shapes, the call + * sites in scenarios switch back to `client.request(..., AnyResult)` + * and this file shrinks (or disappears). + */ + +import type { ConformanceCheck, SpecReference } from '../../../types'; + +export const TASKS_EXTENSION_ID = 'io.modelcontextprotocol/tasks'; + +export const SEP_2663_REF: SpecReference = { + id: 'SEP-2663', + url: 'https://github.com/modelcontextprotocol/specification/pull/2663' +}; +export const SEP_2322_REF: SpecReference = { + id: 'SEP-2322', + url: 'https://github.com/modelcontextprotocol/specification/pull/2322' +}; +export const SEP_2243_REF: SpecReference = { + id: 'SEP-2243', + url: 'https://github.com/modelcontextprotocol/specification/pull/2243' +}; +export const SEP_2575_REF: SpecReference = { + id: 'SEP-2575', + url: 'https://github.com/modelcontextprotocol/specification/pull/2575' +}; + +export function errMsg(error: unknown): string { + return error instanceof Error ? error.message : String(error); +} + +/** Build a FAILURE check from a thrown error, preserving id/name/description. */ +export function failureCheck( + id: string, + name: string, + description: string, + error: unknown, + specReferences: SpecReference[] +): ConformanceCheck { + return { + id, + name, + description, + status: 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errMsg(error), + specReferences + }; +} + +/** Build a SKIPPED check (preserves id stability so Ctrl+F still finds it). */ +export function skipCheck( + id: string, + name: string, + description: string, + reason: string, + specReferences: SpecReference[] = [SEP_2663_REF] +): ConformanceCheck { + return { + id, + name, + description, + status: 'SKIPPED', + timestamp: new Date().toISOString(), + errorMessage: `Skipped: ${reason}`, + specReferences + }; +} + +export interface InitOpts { + /** Negotiated wire protocolVersion. Defaults to LATEST_SPEC_VERSION. */ + protocolVersion?: string; + /** Client capabilities (extensions, elicitation, sampling, …). */ + capabilities?: Record; + /** Optional clientInfo override. */ + clientInfo?: { name: string; version: string }; +} + +export interface InitResult { + /** Mcp-Session-Id minted by the server during initialize. */ + sessionId: string; + /** capabilities object the server advertised in its initialize response. */ + serverCapabilities: Record; + /** Negotiated protocolVersion echoed back by the server. */ + serverProtocolVersion?: string; + /** Server info (name, version, …). */ + serverInfo?: Record; +} + +/** + * Run a fresh initialize handshake and return session id + the server's + * advertised capabilities. Bypasses the SDK so callers can declare + * extension capabilities the SDK's typed wrappers don't yet know about, + * and so the SDK's Zod schemas don't strip extension fields off the + * server response. + */ +export async function initRawSession( + serverUrl: string, + opts: InitOpts = {} +): Promise { + const protocolVersion = opts.protocolVersion ?? '2025-11-25'; + const capabilities = opts.capabilities ?? {}; + const clientInfo = opts.clientInfo ?? { + name: 'mcp-conformance', + version: '1.0' + }; + + const initResp = await fetch(serverUrl, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Accept: 'application/json' + }, + body: JSON.stringify({ + jsonrpc: '2.0', + id: 'init-raw', + method: 'initialize', + params: { protocolVersion, clientInfo, capabilities } + }) + }); + const sid = initResp.headers.get('mcp-session-id') || ''; + if (!sid) throw new Error('initialize response missing Mcp-Session-Id'); + + const initBody = await initResp.json(); + if (initBody.error) { + throw new Error( + `initialize returned JSON-RPC error: ${JSON.stringify(initBody.error)}` + ); + } + const result = initBody.result ?? {}; + + await fetch(serverUrl, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Accept: 'application/json', + 'Mcp-Session-Id': sid + }, + body: JSON.stringify({ + jsonrpc: '2.0', + method: 'notifications/initialized' + }) + }); + return { + sessionId: sid, + serverCapabilities: result.capabilities ?? {}, + serverProtocolVersion: result.protocolVersion, + serverInfo: result.serverInfo + }; +} + +export interface RawRequestOpts { + sessionId: string; + /** Optional _meta object merged into the JSON-RPC params. */ + meta?: Record; + /** Optional HTTP request headers merged after the harness defaults. */ + headers?: Record; +} + +export interface RawRequestResult { + /** The JSON-RPC `result` body, when the response carried one. */ + result: any; + /** The raw fetch Response so callers can inspect transport-level headers. */ + response: Response; +} + +let nextId = 1; + +/** + * Send a raw JSON-RPC request via fetch, parsing SSE `data:` lines or + * plain JSON depending on Content-Type. Throws an Error decorated with + * `code` / `data` when the response carries a JSON-RPC error. + */ +export async function rawRequest( + serverUrl: string, + method: string, + params: any, + opts: RawRequestOpts +): Promise { + const { result } = await rawRequestFull(serverUrl, method, params, opts); + return result; +} + +/** + * Like rawRequest, but also returns the raw fetch Response so callers + * can inspect transport-level headers (e.g., SEP-2243 routing headers). + */ +export async function rawRequestFull( + serverUrl: string, + method: string, + params: any, + opts: RawRequestOpts +): Promise { + const id = nextId++; + const requestParams = opts.meta ? { ...params, _meta: opts.meta } : params; + const resp = await fetch(serverUrl, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Accept: 'text/event-stream, application/json', + 'Mcp-Session-Id': opts.sessionId, + ...(opts.headers ?? {}) + }, + body: JSON.stringify({ + jsonrpc: '2.0', + id, + method, + params: requestParams + }) + }); + const ct = resp.headers.get('content-type') || ''; + let body: any; + if (ct.includes('text/event-stream')) { + const text = await resp.text(); + for (const line of text.split('\n')) { + const trimmed = line.trim(); + if (trimmed.startsWith('data:')) { + const payload = trimmed.slice(5).trimStart(); + if (payload.startsWith('{')) { + const parsed = JSON.parse(payload); + if (parsed.id === id) { + body = parsed; + break; + } + } + } + } + } else { + body = await resp.json(); + } + if (!body) throw new Error(`No JSON-RPC response for ${method}`); + if (body.error) { + const err: any = new Error(body.error.message); + err.code = body.error.code; + err.data = body.error.data; + throw err; + } + return { result: body.result, response: resp }; +} + +/** Poll tasks/get until the task reaches a terminal state. */ +export async function waitForTerminal( + serverUrl: string, + sessionId: string, + taskId: string, + timeoutMs = 10_000 +): Promise { + const start = Date.now(); + while (Date.now() - start < timeoutMs) { + const task = await rawRequest( + serverUrl, + 'tasks/get', + { taskId }, + { sessionId } + ); + if (['completed', 'failed', 'cancelled'].includes(task.status)) { + return task; + } + await new Promise((r) => setTimeout(r, 200)); + } + throw new Error( + `Task ${taskId} did not reach terminal state within ${timeoutMs}ms` + ); +} + +/** Poll tasks/get until a specific status (or any terminal state). */ +export async function waitForStatus( + serverUrl: string, + sessionId: string, + taskId: string, + status: string, + timeoutMs = 10_000 +): Promise { + const start = Date.now(); + while (Date.now() - start < timeoutMs) { + const task = await rawRequest( + serverUrl, + 'tasks/get', + { taskId }, + { sessionId } + ); + if ( + task.status === status || + ['completed', 'failed', 'cancelled'].includes(task.status) + ) { + return task; + } + await new Promise((r) => setTimeout(r, 200)); + } + throw new Error( + `Task ${taskId} did not reach status ${status} within ${timeoutMs}ms` + ); +} diff --git a/src/scenarios/server/tasks/lifecycle.ts b/src/scenarios/server/tasks/lifecycle.ts new file mode 100644 index 0000000..21c5a13 --- /dev/null +++ b/src/scenarios/server/tasks/lifecycle.ts @@ -0,0 +1,574 @@ +/** + * SEP-2663 Tasks Extension — server lifecycle conformance. + * + * Tests a server that implements the io.modelcontextprotocol/tasks + * extension end-to-end: sync vs async dispatch, DetailedTask shape on + * tasks/get, tool errors vs protocol errors, and cancellation + * semantics. + * + * Required server fixtures (tools/list output must include all): + * - greet — sync-only, returns "Hello, {name}!" + * - slow_compute — task-supporting, sleeps N seconds + * - failing_job — task-supporting, returns a tool error + * - protocol_error_job — task-supporting, panics into a protocol error + */ + +import { + ClientScenario, + ConformanceCheck, + ScenarioSpecTag, + DRAFT_PROTOCOL_VERSION +} from '../../../types'; +import { + TASKS_EXTENSION_ID, + SEP_2663_REF, + SEP_2322_REF, + errMsg, + failureCheck, + skipCheck, + initRawSession, + rawRequest, + waitForTerminal +} from './helpers'; +import { isIso8601 } from '../_shared/wire-format'; + +export class TasksLifecycleScenario implements ClientScenario { + name = 'tasks-lifecycle'; + // 'extension' tags this as off the dated-version timeline (selectable + // via `--suite extensions`); DRAFT_PROTOCOL_VERSION lets `--spec-version + // draft` runs include it before SEP-2663 lands in a dated release. + specVersions: ScenarioSpecTag[] = ['extension', DRAFT_PROTOCOL_VERSION]; + description = `Test SEP-2663 Tasks extension lifecycle on the server. + +**Server Implementation Requirements (SEP-2663):** + +The server MUST advertise \`io.modelcontextprotocol/tasks\` under +\`capabilities.extensions\` and gate the task surface on negotiation. + +**Sync dispatch (no task created):** +- A \`tools/call\` against a sync-only tool MUST return a flat + \`ToolResult\` with \`resultType:"complete"\` and a \`content[]\` array. +- It MUST NOT carry \`taskId\` at the top level (that would imply a + CreateTaskResult). + +**Server-directed task creation:** +- For task-supporting tools, the server decides whether to create a task — + the client MUST NOT need to opt in via a request param. +- The response MUST be a \`CreateTaskResult\` — a flat \`Result & Task\` + intersection: \`resultType:"task"\`, plus \`taskId\` / \`status\` / + \`createdAt\` / \`lastUpdatedAt\` / \`ttlSeconds\` at the top level. + There MUST NOT be a nested \`task\` wrapper key. + +**tasks/get DetailedTask:** +- Working tasks return \`status\` and basic metadata; result/error are + absent. +- Completed tasks MUST inline the original tool result under \`result\` + with \`content[]\`. There is no separate \`tasks/result\` method. + +**Tool errors vs protocol errors (SEP-2663 §error-semantics):** +- A tool that ran but reported an error MUST surface as + \`status:"completed"\` with \`result.isError:true\`. The status + \`"failed"\` is reserved for protocol-level errors. +- A protocol-level error (server crash, internal failure) MUST surface + as \`status:"failed"\` with an inlined \`error\` object (JSON-RPC + error shape: code/message/data) and MUST NOT carry \`result\`. + +**Cancellation:** +- \`tasks/cancel\` MUST return an empty + \`{resultType:"complete"}\` ack — no task envelope (SEP-2322 + discriminator). The cancelled status is observed via the next + \`tasks/get\`. +- \`tasks/cancel\` against a terminal task MUST return JSON-RPC + \`-32602\` (InvalidParams). Clarified upstream in spec commit d963ad0.`; + + async run(serverUrl: string): Promise { + const checks: ConformanceCheck[] = []; + + let sessionId: string; + try { + ({ sessionId } = await initRawSession(serverUrl, { + capabilities: { + elicitation: {}, + sampling: {}, + extensions: { [TASKS_EXTENSION_ID]: {} } + } + })); + } catch (error) { + checks.push({ + id: 'tasks-session-bootstrap', + name: 'TasksSessionBootstrap', + description: + 'Initialize handshake declaring io.modelcontextprotocol/tasks extension succeeds', + status: 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: `Failed to initialize: ${errMsg(error)}`, + specReferences: [SEP_2663_REF] + }); + return checks; + } + + // Check 1: sync tool call returns ToolResult, no task creation. + { + const id = 'tasks-sync-tool-call'; + const name = 'TasksSyncToolCall'; + const description = + 'Sync tool returns ToolResult (resultType:"complete"), no taskId at top level'; + try { + const result = await rawRequest( + serverUrl, + 'tools/call', + { name: 'greet', arguments: { name: 'World' } }, + { sessionId } + ); + const errs: string[] = []; + if (result.resultType === 'task') { + errs.push('sync tool result MUST NOT carry resultType:"task"'); + } + if (result.taskId) { + errs.push( + `sync tool result MUST NOT carry top-level taskId; got ${result.taskId}` + ); + } + if (!Array.isArray(result.content) || result.content.length === 0) { + errs.push('sync tool result MUST carry a non-empty content[] array'); + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF, SEP_2322_REF], + details: { + resultType: result.resultType, + hasTaskId: Boolean(result.taskId), + contentLength: result.content?.length + } + }); + } catch (error) { + checks.push(failureCheck(id, name, description, error, [SEP_2663_REF])); + } + } + + // Check 2: server-directed task creation produces flat CreateTaskResult. + let workingTaskId: string | undefined; + { + const id = 'tasks-server-task-creation'; + const name = 'TasksServerTaskCreation'; + const description = + 'Task-supporting tool returns flat CreateTaskResult (no nested `task` wrapper)'; + try { + const result = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'slow_compute', + arguments: { seconds: 2, label: 'lifecycle-create' } + }, + { sessionId } + ); + const errs: string[] = []; + if (result.resultType !== 'task') { + errs.push( + `expected resultType:"task"; got ${JSON.stringify(result.resultType)}` + ); + } + if (result.task) { + errs.push( + 'CreateTaskResult MUST be flat (Result & Task); there must be no nested `task` wrapper key' + ); + } + if (!result.taskId) { + errs.push('CreateTaskResult MUST carry top-level taskId'); + } + if (!result.status) { + errs.push('CreateTaskResult MUST carry top-level status'); + } + if ('result' in result) { + errs.push( + 'CreateTaskResult MUST NOT carry `result` (lives on tasks/get DetailedTask)' + ); + } + if ('error' in result) { + errs.push( + 'CreateTaskResult MUST NOT carry `error` (lives on tasks/get DetailedTask)' + ); + } + if ('inputRequests' in result) { + errs.push( + 'CreateTaskResult MUST NOT carry `inputRequests` (lives on tasks/get DetailedTask)' + ); + } + // Timestamps — both keys present, both ISO-8601 formatted. Per + // SEP-2663 these are required on every TaskInfoV2. See + // `_shared/wire-format.ts` for the regex rationale. + if (!isIso8601(result.createdAt)) { + errs.push( + `createdAt MUST be an ISO-8601 string; got ${JSON.stringify(result.createdAt)}` + ); + } + if (!isIso8601(result.lastUpdatedAt)) { + errs.push( + `lastUpdatedAt MUST be an ISO-8601 string; got ${JSON.stringify(result.lastUpdatedAt)}` + ); + } + if (result.taskId) workingTaskId = result.taskId; + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF], + details: { + resultType: result.resultType, + taskId: result.taskId, + status: result.status + } + }); + } catch (error) { + checks.push(failureCheck(id, name, description, error, [SEP_2663_REF])); + } + } + + // Check 3: tasks/get during working state returns status + metadata. + { + const id = 'tasks-get-during-working'; + const name = 'TasksGetDuringWorking'; + const description = + 'tasks/get returns status + metadata for an active task'; + if (!workingTaskId) { + checks.push(skipCheck(id, name, description, 'no task created')); + } else { + try { + const task = await rawRequest( + serverUrl, + 'tasks/get', + { taskId: workingTaskId }, + { sessionId } + ); + const errs: string[] = []; + if (task.taskId !== workingTaskId) { + errs.push( + `taskId mismatch: expected ${workingTaskId}, got ${task.taskId}` + ); + } + if (!task.status) errs.push('tasks/get response MUST carry status'); + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF], + details: { status: task.status } + }); + } catch (error) { + checks.push( + failureCheck(id, name, description, error, [SEP_2663_REF]) + ); + } + } + } + + // Check 4: terminal tasks/get inlines result with content[]. + { + const id = 'tasks-get-terminal-inlined-result'; + const name = 'TasksGetTerminalInlinedResult'; + const description = + 'Completed task tasks/get inlines result with content[] (no separate tasks/result method)'; + if (!workingTaskId) { + checks.push(skipCheck(id, name, description, 'no task created')); + } else { + try { + const terminal = await waitForTerminal( + serverUrl, + sessionId, + workingTaskId + ); + const errs: string[] = []; + if (terminal.status !== 'completed') { + errs.push( + `expected status:"completed"; got ${JSON.stringify(terminal.status)}` + ); + } + if (!terminal.result) { + errs.push('completed task MUST inline `result`'); + } else if ( + !Array.isArray(terminal.result.content) || + terminal.result.content.length === 0 + ) { + errs.push( + 'completed task `result.content[]` MUST be a non-empty array' + ); + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF], + details: { + status: terminal.status, + hasResult: Boolean(terminal.result), + contentLength: terminal.result?.content?.length + } + }); + } catch (error) { + checks.push( + failureCheck(id, name, description, error, [SEP_2663_REF]) + ); + } + } + } + + // Check 5: tool execution error → completed with isError:true. + { + const id = 'tasks-tool-error-completed-iserror'; + const name = 'TasksToolErrorCompletedIsError'; + const description = + 'Tool execution error reports as completed + result.isError (NOT failed)'; + try { + const created = await rawRequest( + serverUrl, + 'tools/call', + { name: 'failing_job', arguments: {} }, + { sessionId } + ); + const errs: string[] = []; + if (!created.taskId) { + errs.push('failing_job MUST create a task'); + } else { + const terminal = await waitForTerminal( + serverUrl, + sessionId, + created.taskId + ); + if (terminal.status !== 'completed') { + errs.push( + `tool error MUST surface as completed (not "${terminal.status}")` + ); + } + if (!terminal.result) { + errs.push('completed task with tool error MUST carry `result`'); + } else if (terminal.result.isError !== true) { + errs.push('result.isError MUST be true for tool execution errors'); + } + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF] + }); + } catch (error) { + checks.push(failureCheck(id, name, description, error, [SEP_2663_REF])); + } + } + + // Check 6: protocol-level error → failed with inlined error, no result. + { + const id = 'tasks-protocol-error-failed-shape'; + const name = 'TasksProtocolErrorFailedShape'; + const description = + 'Protocol-level error reports as failed + inlined error{code,message}, no result'; + try { + const created = await rawRequest( + serverUrl, + 'tools/call', + { name: 'protocol_error_job', arguments: {} }, + { sessionId } + ); + const errs: string[] = []; + if (!created.taskId) { + errs.push('protocol_error_job MUST create a task'); + } else { + const terminal = await waitForTerminal( + serverUrl, + sessionId, + created.taskId + ); + if (terminal.status !== 'failed') { + errs.push( + `protocol error MUST surface as failed (not "${terminal.status}")` + ); + } + if (!terminal.error) { + errs.push('failed task MUST carry inlined `error`'); + } else { + if (typeof terminal.error.code !== 'number') { + errs.push('failed task error MUST carry numeric `code`'); + } + if (typeof terminal.error.message !== 'string') { + errs.push('failed task error MUST carry string `message`'); + } + } + if (terminal.result) { + errs.push('failed task MUST NOT carry `result`'); + } + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF] + }); + } catch (error) { + checks.push(failureCheck(id, name, description, error, [SEP_2663_REF])); + } + } + + // Check 7: tasks/cancel returns empty {resultType:"complete"} ack; + // status settles to cancelled. + { + const id = 'tasks-cancel-empty-ack'; + const name = 'TasksCancelEmptyAck'; + const description = + 'tasks/cancel returns {resultType:"complete"} ack; status settles to cancelled'; + let cancelTaskId: string | undefined; + try { + const created = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'slow_compute', + arguments: { seconds: 60, label: 'lifecycle-cancel' } + }, + { sessionId } + ); + cancelTaskId = created.taskId; + if (!cancelTaskId) { + checks.push({ + id, + name, + description, + status: 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: 'slow_compute did not create a task', + specReferences: [SEP_2663_REF, SEP_2322_REF] + }); + } else { + const ack = await rawRequest( + serverUrl, + 'tasks/cancel', + { taskId: cancelTaskId }, + { sessionId } + ); + const errs: string[] = []; + // Ack carries only the SEP-2322 discriminator — no task envelope. + if ( + JSON.stringify(ack) !== JSON.stringify({ resultType: 'complete' }) + ) { + errs.push( + `cancel ack MUST be {resultType:"complete"}; got ${JSON.stringify(ack)}` + ); + } + // Status settles to cancelled — observe via tasks/get. + const after = await rawRequest( + serverUrl, + 'tasks/get', + { taskId: cancelTaskId }, + { sessionId } + ); + if (after.status !== 'cancelled') { + errs.push( + `tasks/get after cancel MUST report cancelled; got ${after.status}` + ); + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF, SEP_2322_REF], + details: { cancelAck: ack, statusAfterCancel: after.status } + }); + } + } catch (error) { + checks.push(failureCheck(id, name, description, error, [SEP_2663_REF])); + } + } + + // Check 8: tasks/cancel on a terminal task MUST return -32602. + { + const id = 'tasks-cancel-terminal-rejected'; + const name = 'TasksCancelTerminalRejected'; + const description = + 'tasks/cancel on a terminal task returns -32602 (per spec commit d963ad0)'; + try { + const created = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'slow_compute', + arguments: { seconds: 1, label: 'lifecycle-cancel-terminal' } + }, + { sessionId } + ); + const completedTaskId = created.taskId; + if (!completedTaskId) { + checks.push({ + id, + name, + description, + status: 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: 'slow_compute did not create a task', + specReferences: [SEP_2663_REF] + }); + } else { + await waitForTerminal(serverUrl, sessionId, completedTaskId); + // Now cancel — must throw -32602. + let thrown: any; + try { + await rawRequest( + serverUrl, + 'tasks/cancel', + { taskId: completedTaskId }, + { sessionId } + ); + } catch (e) { + thrown = e; + } + const errs: string[] = []; + if (!thrown) { + errs.push( + 'tasks/cancel on terminal task MUST return JSON-RPC error' + ); + } else if (thrown.code !== -32602) { + errs.push( + `expected error code -32602; got ${thrown.code ?? ''}` + ); + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF], + details: { observedCode: thrown?.code } + }); + } + } catch (error) { + checks.push(failureCheck(id, name, description, error, [SEP_2663_REF])); + } + } + + return checks; + } +} diff --git a/src/scenarios/server/tasks/mrtr-input.ts b/src/scenarios/server/tasks/mrtr-input.ts new file mode 100644 index 0000000..49cfacc --- /dev/null +++ b/src/scenarios/server/tasks/mrtr-input.ts @@ -0,0 +1,416 @@ +/** + * SEP-2322 / SEP-2663 — MRTR input flow on the tasks surface. + * + * Tests the input_required → tasks/update → resume loop, including + * partial inputResponses fulfillment when a tool fans out multiple + * simultaneous input requests. + * + * Required server fixtures: + * - confirm_delete — task-supporting, calls TaskElicit once + * - multi_input — task-supporting, fans out two TaskElicits in + * parallel so two keys are pending at once + */ + +import { + ClientScenario, + ConformanceCheck, + ScenarioSpecTag, + DRAFT_PROTOCOL_VERSION +} from '../../../types'; +import { + TASKS_EXTENSION_ID, + SEP_2322_REF, + SEP_2663_REF, + errMsg, + failureCheck, + initRawSession, + rawRequest, + waitForStatus, + waitForTerminal +} from './helpers'; + +export class TasksMRTRInputScenario implements ClientScenario { + name = 'tasks-mrtr-input'; + specVersions: ScenarioSpecTag[] = ['extension', DRAFT_PROTOCOL_VERSION]; + description = `Test SEP-2322 MRTR input flow on the tasks surface. + +**Server Implementation Requirements:** + +**Surfacing inputRequests (SEP-2322):** +- A task waiting on client input MUST report \`status:"input_required"\` + on tasks/get and surface a non-empty \`inputRequests\` map keyed by + server-minted opaque ids. Each entry carries the underlying request + (\`elicitation/create\`, \`sampling/createMessage\`, etc.). + +**Resuming via tasks/update (SEP-2663):** +- The client delivers responses through \`tasks/update\` with + \`inputResponses\` keyed to match the server-emitted ids. The server + MUST return an empty \`{resultType:"complete"}\` ack on the + tasks/update response — the resulting task state is observed via the + next tasks/get. +- After the response is delivered, the task MUST resume execution and + proceed to a terminal state (or back to input_required for another + round). + +**Partial fulfillment (SEP-2663):** +- A tool that emits multiple simultaneous input requests parks the task + with multiple keys in \`inputRequests\`. A client MAY answer them one + at a time: + - tasks/update with a subset of keys MUST be acked. + - The task MUST stay in \`input_required\` until every pending request + has been answered. + - tasks/get after a partial update MUST surface only the still-pending + keys; the answered key MUST be removed.`; + + async run(serverUrl: string): Promise { + const checks: ConformanceCheck[] = []; + + let sessionId: string; + try { + ({ sessionId } = await initRawSession(serverUrl, { + capabilities: { + elicitation: {}, + sampling: {}, + extensions: { [TASKS_EXTENSION_ID]: {} } + } + })); + } catch (error) { + checks.push({ + id: 'tasks-session-bootstrap', + name: 'TasksSessionBootstrap', + description: + 'Initialize handshake declaring io.modelcontextprotocol/tasks extension succeeds', + status: 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: `Failed to initialize: ${errMsg(error)}`, + specReferences: [SEP_2322_REF] + }); + return checks; + } + + // Check 1: tasks/get surfaces inputRequests when status=input_required. + { + const id = 'tasks-mrtr-input-requests-on-tasks-get'; + const name = 'TasksMRTRInputRequestsOnTasksGet'; + const description = + 'tasks/get on an input_required task MUST surface a non-empty inputRequests map'; + try { + const created = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'confirm_delete', + arguments: { filename: 'mrtr-input.txt' } + }, + { sessionId } + ); + const taskId = created.taskId; + if (!taskId) { + checks.push({ + id, + name, + description, + status: 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: 'confirm_delete did not create a task', + specReferences: [SEP_2322_REF] + }); + } else { + const task = await waitForStatus( + serverUrl, + sessionId, + taskId, + 'input_required', + 5_000 + ); + const errs: string[] = []; + if (task.status !== 'input_required') { + errs.push( + `expected status:"input_required"; got ${JSON.stringify(task.status)}` + ); + } + if ( + !task.inputRequests || + typeof task.inputRequests !== 'object' || + Array.isArray(task.inputRequests) + ) { + errs.push('inputRequests MUST be a non-null object (map)'); + } else { + const keys = Object.keys(task.inputRequests); + if (keys.length === 0) { + errs.push('inputRequests MUST have at least one entry'); + } else { + const firstReq = task.inputRequests[keys[0]]; + if (!firstReq?.method) { + errs.push( + 'each inputRequest MUST carry a `method` (e.g., elicitation/create)' + ); + } + } + } + // Cancel so we don't leave the task parked. + try { + await rawRequest( + serverUrl, + 'tasks/cancel', + { taskId }, + { sessionId } + ); + } catch { + /* swallow */ + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2322_REF, SEP_2663_REF] + }); + } + } catch (error) { + checks.push(failureCheck(id, name, description, error, [SEP_2322_REF])); + } + } + + // Check 2: tasks/update delivers inputResponses + resumes the task. + { + const id = 'tasks-mrtr-tasks-update-resumes'; + const name = 'TasksMRTRTasksUpdateResumes'; + const description = + 'tasks/update with matching inputResponses MUST be acked with {resultType:"complete"} and resume the task to a terminal state'; + try { + const created = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'confirm_delete', + arguments: { filename: 'mrtr-resume.txt' } + }, + { sessionId } + ); + const taskId = created.taskId; + if (!taskId) { + checks.push({ + id, + name, + description, + status: 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: 'confirm_delete did not create a task', + specReferences: [SEP_2322_REF, SEP_2663_REF] + }); + } else { + const inputTask = await waitForStatus( + serverUrl, + sessionId, + taskId, + 'input_required', + 5_000 + ); + const errs: string[] = []; + const responses: Record = {}; + for (const key of Object.keys(inputTask.inputRequests ?? {})) { + responses[key] = { + action: 'accept', + content: { confirm: true } + }; + } + const ack = await rawRequest( + serverUrl, + 'tasks/update', + { + taskId, + inputResponses: responses, + requestState: inputTask.requestState + }, + { sessionId } + ); + if ( + JSON.stringify(ack) !== JSON.stringify({ resultType: 'complete' }) + ) { + errs.push( + `tasks/update ack MUST be {resultType:"complete"}; got ${JSON.stringify(ack)}` + ); + } + const terminal = await waitForTerminal(serverUrl, sessionId, taskId); + if (terminal.status !== 'completed') { + errs.push( + `task MUST resume to completed after tasks/update; got status ${JSON.stringify(terminal.status)}` + ); + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2322_REF, SEP_2663_REF] + }); + } + } catch (error) { + checks.push( + failureCheck(id, name, description, error, [ + SEP_2322_REF, + SEP_2663_REF + ]) + ); + } + } + + // Check 3: partial inputResponses fulfillment leaves the rest pending. + { + const id = 'tasks-mrtr-partial-fulfillment'; + const name = 'TasksMRTRPartialFulfillment'; + const description = + 'tasks/update with a subset of keys MUST keep the task in input_required with only the unanswered key remaining'; + try { + const created = await rawRequest( + serverUrl, + 'tools/call', + { name: 'multi_input', arguments: {} }, + { sessionId } + ); + const taskId = created.taskId; + if (!taskId) { + checks.push({ + id, + name, + description, + status: 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: 'multi_input did not create a task', + specReferences: [SEP_2663_REF] + }); + } else { + // Wait until two keys are pending (the fan-out tool races two + // TaskElicits, so we may briefly see one before the second). + let inputTask: any; + const start = Date.now(); + while (Date.now() - start < 5_000) { + inputTask = await rawRequest( + serverUrl, + 'tasks/get', + { taskId }, + { sessionId } + ); + if ( + inputTask.status === 'input_required' && + inputTask.inputRequests && + Object.keys(inputTask.inputRequests).length >= 2 + ) { + break; + } + await new Promise((r) => setTimeout(r, 100)); + } + const errs: string[] = []; + if (inputTask.status !== 'input_required') { + errs.push( + `task with two parallel elicits MUST be input_required; got ${JSON.stringify(inputTask.status)}` + ); + } + const keys = Object.keys(inputTask.inputRequests ?? {}); + if (keys.length < 2) { + errs.push( + `multi_input MUST surface 2 inputRequests; got ${keys.length}` + ); + } else { + const [firstKey, secondKey] = keys; + + // Answer first key only. + const firstAck = await rawRequest( + serverUrl, + 'tasks/update', + { + taskId, + inputResponses: { + [firstKey]: { + action: 'accept', + content: { name: 'partial-1', confirm: true } + } + } + }, + { sessionId } + ); + if (firstAck.resultType !== 'complete') { + errs.push( + `partial tasks/update ack MUST carry resultType:"complete"; got ${JSON.stringify(firstAck)}` + ); + } + + // Status MUST still be input_required with only the second + // key remaining. + const afterFirst = await rawRequest( + serverUrl, + 'tasks/get', + { taskId }, + { sessionId } + ); + if (afterFirst.status !== 'input_required') { + errs.push( + `task MUST stay input_required while another input is still pending; got ${JSON.stringify(afterFirst.status)}` + ); + } + const remaining = Object.keys(afterFirst.inputRequests ?? {}); + if (!remaining.includes(secondKey)) { + errs.push( + `unanswered key MUST remain in inputRequests; got ${JSON.stringify(remaining)}` + ); + } + if (remaining.includes(firstKey)) { + errs.push( + `answered key MUST be removed from inputRequests; still saw ${firstKey}` + ); + } + + // Answer second key — task resumes and finishes. + await rawRequest( + serverUrl, + 'tasks/update', + { + taskId, + inputResponses: { + [secondKey]: { + action: 'accept', + content: { name: 'partial-2', confirm: true } + } + } + }, + { sessionId } + ); + const terminal = await waitForTerminal( + serverUrl, + sessionId, + taskId + ); + if (terminal.status !== 'completed') { + errs.push( + `task MUST complete after both inputs are satisfied; got ${JSON.stringify(terminal.status)}` + ); + } + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2322_REF, SEP_2663_REF] + }); + } + } catch (error) { + checks.push( + failureCheck(id, name, description, error, [ + SEP_2322_REF, + SEP_2663_REF + ]) + ); + } + } + + return checks; + } +} diff --git a/src/scenarios/server/tasks/notifications.ts b/src/scenarios/server/tasks/notifications.ts new file mode 100644 index 0000000..a3881a2 --- /dev/null +++ b/src/scenarios/server/tasks/notifications.ts @@ -0,0 +1,188 @@ +/** + * SEP-2663 Tasks Extension — status notifications conformance. + * + * Status notifications are OPTIONAL. The check pattern is: + * - INFO when no notifications are received (well-formed silence). + * - SUCCESS when notifications arrive and carry the SEP-2663 shape + * (DetailedTask: taskId + status, with inlined result on terminal). + * - FAILURE only if a notification was emitted but is malformed. + * + * The raw HTTP harness can't open a long-lived GET SSE stream from the + * scenario layer easily, so this check observes notifications via the + * POST tools/call SSE response stream. That captures the status + * transitions emitted while the task is running. This is a best-effort + * smoke test — passing servers may still emit additional notifications + * on the persistent GET stream that this harness doesn't see. + * + * Required server fixtures: + * - slow_compute — task-supporting, sleeps N seconds + */ + +import { + ClientScenario, + ConformanceCheck, + ScenarioSpecTag, + DRAFT_PROTOCOL_VERSION +} from '../../../types'; +import { + TASKS_EXTENSION_ID, + SEP_2663_REF, + errMsg, + failureCheck, + initRawSession, + waitForTerminal +} from './helpers'; + +export class TasksStatusNotificationsScenario implements ClientScenario { + name = 'tasks-status-notifications'; + specVersions: ScenarioSpecTag[] = ['extension', DRAFT_PROTOCOL_VERSION]; + description = `Test SEP-2663 status notifications (optional). + +**Server Implementation Requirements:** + +Servers MAY emit \`notifications/tasks/status\` to inform clients of +task state changes without polling. Notifications are optional — a +server is conformant whether it sends them or not. When sent, the +notification params MUST carry: + +- \`taskId\`: the task the notification refers to. +- \`status\`: the new task status. +- For terminal statuses (\`completed\`/\`failed\`/\`cancelled\`), + notifications MAY inline the corresponding \`result\` or \`error\` + per the SEP-2663 DetailedTask shape.`; + + async run(serverUrl: string): Promise { + const checks: ConformanceCheck[] = []; + + let sessionId: string; + try { + ({ sessionId } = await initRawSession(serverUrl, { + capabilities: { extensions: { [TASKS_EXTENSION_ID]: {} } } + })); + } catch (error) { + checks.push({ + id: 'tasks-session-bootstrap', + name: 'TasksSessionBootstrap', + description: + 'Initialize handshake declaring io.modelcontextprotocol/tasks extension succeeds', + status: 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: `Failed to initialize: ${errMsg(error)}`, + specReferences: [SEP_2663_REF] + }); + return checks; + } + + const id = 'tasks-status-notifications-shape'; + const name = 'TasksStatusNotificationsShape'; + const description = + 'When status notifications are emitted, each MUST carry taskId + status (SEP-2663 DetailedTask)'; + + // Issue tools/call with SSE-accepting headers and capture every + // `data:` payload. Some are JSON-RPC responses (with id), some are + // notifications (no id). We ingest all and classify by the body. + let taskId: string | undefined; + const notifications: any[] = []; + try { + const resp = await fetch(serverUrl, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Accept: 'text/event-stream, application/json', + 'Mcp-Session-Id': sessionId + }, + body: JSON.stringify({ + jsonrpc: '2.0', + id: 'notif-test', + method: 'tools/call', + params: { + name: 'slow_compute', + arguments: { seconds: 1, label: 'notif' } + } + }) + }); + const ct = resp.headers.get('content-type') || ''; + if (ct.includes('text/event-stream')) { + const text = await resp.text(); + for (const line of text.split('\n')) { + const trimmed = line.trim(); + if (trimmed.startsWith('data:')) { + const payload = trimmed.slice(5).trimStart(); + if (payload.startsWith('{')) { + const parsed = JSON.parse(payload); + if (parsed.id === 'notif-test' && parsed.result) { + taskId = parsed.result.taskId; + } else if (parsed.method === 'notifications/tasks/status') { + notifications.push(parsed.params); + } + } + } + } + } else { + const body = await resp.json(); + taskId = body.result?.taskId; + } + } catch (error) { + checks.push(failureCheck(id, name, description, error, [SEP_2663_REF])); + return checks; + } + + // Drain to a terminal so the server has emitted everything it's + // going to (best-effort — the persistent GET stream might be + // collecting more, but we're done with this scenario regardless). + if (taskId) { + try { + await waitForTerminal(serverUrl, sessionId, taskId); + } catch { + /* swallow */ + } + } + + if (notifications.length === 0) { + checks.push({ + id, + name, + description, + status: 'INFO', + timestamp: new Date().toISOString(), + errorMessage: + 'No status notifications received on the tools/call POST SSE stream (notifications are optional)', + specReferences: [SEP_2663_REF] + }); + return checks; + } + + const errs: string[] = []; + for (const evt of notifications) { + if (!evt.taskId) { + errs.push('status notification MUST carry taskId'); + } + if (!evt.status) { + errs.push('status notification MUST carry status'); + } + } + // Optional terminal-with-inlined-result check: if the suite saw a + // completed notification for our taskId, it SHOULD include result. + const terminalForOurs = notifications.find( + (n: any) => n.taskId === taskId && n.status === 'completed' + ); + if (terminalForOurs && !terminalForOurs.result) { + errs.push( + 'completed status notification SHOULD inline result (DetailedTask shape)' + ); + } + + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF], + details: { notificationCount: notifications.length } + }); + + return checks; + } +} diff --git a/src/scenarios/server/tasks/request-state.ts b/src/scenarios/server/tasks/request-state.ts new file mode 100644 index 0000000..8c2b165 --- /dev/null +++ b/src/scenarios/server/tasks/request-state.ts @@ -0,0 +1,290 @@ +/** + * SEP-2322 / SEP-2663 — requestState conformance. + * + * Tests the optional opaque session-continuation token: + * - Server MAY include requestState on tasks/get responses. + * - Clients MUST echo it back on subsequent tasks/get / tasks/update / + * tasks/cancel for the same task — server MUST accept the echo. + * - Servers MUST tolerate a stale but still-valid token (one minted + * before a newer one but still within its TTL window). + * + * If the server does not issue requestState at all (it's optional per + * SEP-2322), the dependent checks emit INFO rather than failing — the + * spec allows omission. + * + * Required server fixtures: + * - slow_compute — task-supporting, sleeps N seconds + */ + +import { + ClientScenario, + ConformanceCheck, + ScenarioSpecTag, + DRAFT_PROTOCOL_VERSION +} from '../../../types'; +import { + TASKS_EXTENSION_ID, + SEP_2322_REF, + SEP_2663_REF, + errMsg, + failureCheck, + initRawSession, + rawRequest +} from './helpers'; + +export class TasksRequestStateScenario implements ClientScenario { + name = 'tasks-request-state'; + specVersions: ScenarioSpecTag[] = ['extension', DRAFT_PROTOCOL_VERSION]; + description = `Test SEP-2322 requestState semantics on the tasks surface. + +**Server Implementation Requirements:** + +**Optional emission (SEP-2322):** +- A server MAY include a non-empty string \`requestState\` on tasks/get + responses to allow stateless deployments to resume the conversation. + When present, it MUST be a non-empty string. + +**Echo acceptance:** +- A client that receives a \`requestState\` from tasks/get MUST be able + to echo it back on a subsequent \`tasks/get\`/\`tasks/update\`/ + \`tasks/cancel\` for the same task. The server MUST accept the echo. + +**Stale-but-valid tolerance (SEP-2663):** +- Each tasks/get may mint a new requestState (e.g., for a refreshed + TTL). After a fresh tasks/get returns a newer token, echoing the + earlier one MUST still succeed as long as the earlier token has not + itself expired. (Servers MUST tolerate stale-but-valid tokens + gracefully.)`; + + async run(serverUrl: string): Promise { + const checks: ConformanceCheck[] = []; + + let sessionId: string; + try { + ({ sessionId } = await initRawSession(serverUrl, { + capabilities: { extensions: { [TASKS_EXTENSION_ID]: {} } } + })); + } catch (error) { + checks.push({ + id: 'tasks-session-bootstrap', + name: 'TasksSessionBootstrap', + description: + 'Initialize handshake declaring io.modelcontextprotocol/tasks extension succeeds', + status: 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: `Failed to initialize: ${errMsg(error)}`, + specReferences: [SEP_2322_REF] + }); + return checks; + } + + // Drive a long-running task once and reuse it for every check. + let taskId: string | undefined; + try { + const created = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'slow_compute', + arguments: { seconds: 60, label: 'request-state' } + }, + { sessionId } + ); + taskId = created.taskId; + } catch (error) { + checks.push( + failureCheck( + 'tasks-request-state-setup', + 'TasksRequestStateSetup', + 'Failed to create a long-running task to exercise requestState', + error, + [SEP_2322_REF] + ) + ); + return checks; + } + if (!taskId) { + checks.push({ + id: 'tasks-request-state-setup', + name: 'TasksRequestStateSetup', + description: + 'slow_compute did not produce a task; cannot exercise requestState', + status: 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: 'no taskId in CreateTaskResult', + specReferences: [SEP_2322_REF] + }); + return checks; + } + + let firstToken: string | undefined; + + // Check 1: tasks/get response shape — requestState (optional) must + // be a non-empty string when present. + { + const id = 'tasks-request-state-shape'; + const name = 'TasksRequestStateShape'; + const description = + 'tasks/get may include requestState; when present it MUST be a non-empty string'; + try { + const task = await rawRequest( + serverUrl, + 'tasks/get', + { taskId }, + { sessionId } + ); + const errs: string[] = []; + if (task.requestState !== undefined) { + if (typeof task.requestState !== 'string') { + errs.push( + `requestState MUST be a string when present; got ${typeof task.requestState}` + ); + } else if (task.requestState.length === 0) { + errs.push( + 'requestState MUST be non-empty when present (omit the field instead of emitting "")' + ); + } else { + firstToken = task.requestState; + } + } + // Optional emission: SUCCESS regardless of presence; INFO when + // server omits it so the result advertises the chosen path. + const status: 'SUCCESS' | 'INFO' | 'FAILURE' = + errs.length === 0 ? (firstToken ? 'SUCCESS' : 'INFO') : 'FAILURE'; + checks.push({ + id, + name, + description, + status, + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2322_REF], + details: { + emitted: Boolean(firstToken), + tokenLength: firstToken?.length + } + }); + } catch (error) { + checks.push(failureCheck(id, name, description, error, [SEP_2322_REF])); + } + } + + // Check 2: client echoes requestState; server accepts the echo. + { + const id = 'tasks-request-state-echo'; + const name = 'TasksRequestStateEcho'; + const description = + 'Server accepts a tasks/get with the previously-emitted requestState echoed back'; + if (!firstToken) { + checks.push({ + id, + name, + description, + status: 'INFO', + timestamp: new Date().toISOString(), + errorMessage: 'Server did not emit requestState; nothing to echo', + specReferences: [SEP_2322_REF] + }); + } else { + try { + const echoed = await rawRequest( + serverUrl, + 'tasks/get', + { taskId, requestState: firstToken }, + { sessionId } + ); + const errs: string[] = []; + if (echoed.taskId !== taskId) { + errs.push( + `tasks/get with echoed requestState MUST resolve the same taskId; got ${echoed.taskId}` + ); + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2322_REF] + }); + } catch (error) { + checks.push( + failureCheck(id, name, description, error, [SEP_2322_REF]) + ); + } + } + } + + // Check 3: stale-but-valid tolerance. + { + const id = 'tasks-request-state-stale-tolerance'; + const name = 'TasksRequestStateStaleTolerance'; + const description = + 'After a newer requestState is minted, the earlier (stale-but-still-valid) token MUST still be accepted'; + if (!firstToken) { + checks.push({ + id, + name, + description, + status: 'INFO', + timestamp: new Date().toISOString(), + errorMessage: + 'Server did not emit requestState; stale tolerance is moot', + specReferences: [SEP_2663_REF, SEP_2322_REF] + }); + } else { + try { + // Force a fresh mint by issuing another tasks/get. On servers + // that sign tokens with embedded expiry, this likely yields a + // newer token; on plaintext-token servers it round-trips the + // same value (still valid). + await rawRequest( + serverUrl, + 'tasks/get', + { taskId, requestState: firstToken }, + { sessionId } + ); + // Now re-echo the OLDER token; server MUST accept. + const stale = await rawRequest( + serverUrl, + 'tasks/get', + { taskId, requestState: firstToken }, + { sessionId } + ); + const errs: string[] = []; + if (stale.taskId !== taskId) { + errs.push( + `stale-but-valid requestState MUST resolve the same taskId; got ${stale.taskId}` + ); + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF, SEP_2322_REF] + }); + } catch (error) { + checks.push( + failureCheck(id, name, description, error, [ + SEP_2663_REF, + SEP_2322_REF + ]) + ); + } + } + } + + // Cleanup the long-lived task so we don't leak goroutines. + try { + await rawRequest(serverUrl, 'tasks/cancel', { taskId }, { sessionId }); + } catch { + /* swallow */ + } + + return checks; + } +} diff --git a/src/scenarios/server/tasks/wire-fields.ts b/src/scenarios/server/tasks/wire-fields.ts new file mode 100644 index 0000000..3fb377d --- /dev/null +++ b/src/scenarios/server/tasks/wire-fields.ts @@ -0,0 +1,250 @@ +/** + * SEP-2663 Tasks Extension — wire-format / TTL conformance. + * + * Tests the renamed wire fields (ttlSeconds, pollIntervalMilliseconds), + * the no-early-TTL-expiry rule, and confirms the v1 `related-task` _meta + * key is absent on tasks/get's inlined result (taskId is at root level + * already, so the metadata is redundant). + * + * Required server fixtures: + * - slow_compute — task-supporting, sleeps N seconds + */ + +import { + ClientScenario, + ConformanceCheck, + ScenarioSpecTag, + DRAFT_PROTOCOL_VERSION +} from '../../../types'; +import { + TASKS_EXTENSION_ID, + SEP_2663_REF, + errMsg, + failureCheck, + skipCheck, + initRawSession, + rawRequest, + waitForTerminal +} from './helpers'; + +export class TasksWireFieldsScenario implements ClientScenario { + name = 'tasks-wire-fields'; + specVersions: ScenarioSpecTag[] = ['extension', DRAFT_PROTOCOL_VERSION]; + description = `Test SEP-2663 wire-field renames + TTL semantics. + +**Server Implementation Requirements:** + +**Wire-field renames (SEP-2663):** +- The TTL field is named \`ttlSeconds\` on the wire (the v1 \`ttl\` + key is in milliseconds-by-convention; SEP-2663 puts the unit in the + field name). +- The poll-interval field is named \`pollIntervalMilliseconds\` (v1 + used \`pollInterval\`). +- A \`CreateTaskResult\` MUST NOT carry the legacy \`ttl\` or + \`pollInterval\` keys — clients keying off v1 names on a v2 server + would silently miss the TTL guidance. + +**TTL non-expiry (SEP-2663):** +- A task MUST remain accessible via \`tasks/get\` for the duration of + its \`ttlSeconds\`; a server MUST NOT expire it earlier. + +**Inlined-result \`_meta\` (SEP-2663):** +- The v1 \`io.modelcontextprotocol/related-task\` \`_meta\` key MUST NOT + appear on tasks/get's inlined \`result\` — the \`taskId\` is already + at the root level of the \`tasks/get\` response, so the metadata is + redundant.`; + + async run(serverUrl: string): Promise { + const checks: ConformanceCheck[] = []; + + let sessionId: string; + try { + ({ sessionId } = await initRawSession(serverUrl, { + capabilities: { + extensions: { [TASKS_EXTENSION_ID]: {} } + } + })); + } catch (error) { + checks.push({ + id: 'tasks-session-bootstrap', + name: 'TasksSessionBootstrap', + description: + 'Initialize handshake declaring io.modelcontextprotocol/tasks extension succeeds', + status: 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: `Failed to initialize: ${errMsg(error)}`, + specReferences: [SEP_2663_REF] + }); + return checks; + } + + // Check 1: ttlSeconds + pollIntervalMilliseconds wire shape. + let createdTaskId: string | undefined; + { + const id = 'tasks-wire-field-renames'; + const name = 'TasksWireFieldRenames'; + const description = + 'CreateTaskResult uses ttlSeconds + pollIntervalMilliseconds; legacy ttl / pollInterval keys absent'; + try { + const result = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'slow_compute', + arguments: { seconds: 1, label: 'wire-fields' } + }, + { sessionId } + ); + createdTaskId = result.taskId; + const errs: string[] = []; + // ttlSeconds — required, positive (or null = unlimited; treat + // either as well-formed). Legacy `ttl` MUST be absent. + if (!('ttlSeconds' in result)) { + errs.push( + 'CreateTaskResult MUST carry ttlSeconds (renamed from v1 `ttl`)' + ); + } else if ( + result.ttlSeconds !== null && + (typeof result.ttlSeconds !== 'number' || result.ttlSeconds <= 0) + ) { + errs.push( + `ttlSeconds MUST be null or a positive number; got ${JSON.stringify(result.ttlSeconds)}` + ); + } + if ('ttl' in result) { + errs.push( + 'CreateTaskResult MUST NOT carry the v1 `ttl` key (use ttlSeconds)' + ); + } + // pollIntervalMilliseconds — optional. When present it MUST be + // a positive number and the legacy `pollInterval` key MUST NOT + // appear. + if ( + result.pollIntervalMilliseconds !== undefined && + (typeof result.pollIntervalMilliseconds !== 'number' || + result.pollIntervalMilliseconds <= 0) + ) { + errs.push( + `pollIntervalMilliseconds MUST be a positive number when present; got ${JSON.stringify(result.pollIntervalMilliseconds)}` + ); + } + if ('pollInterval' in result) { + errs.push( + 'CreateTaskResult MUST NOT carry the v1 `pollInterval` key (use pollIntervalMilliseconds)' + ); + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF], + details: { + ttlSeconds: result.ttlSeconds, + pollIntervalMilliseconds: result.pollIntervalMilliseconds, + hasLegacyTtl: 'ttl' in result, + hasLegacyPollInterval: 'pollInterval' in result + } + }); + } catch (error) { + checks.push(failureCheck(id, name, description, error, [SEP_2663_REF])); + } + } + + // Check 2: task accessible before TTL elapses. + { + const id = 'tasks-no-early-ttl-expiry'; + const name = 'TasksNoEarlyTtlExpiry'; + const description = + 'Task remains accessible via tasks/get for the duration of its ttlSeconds'; + if (!createdTaskId) { + checks.push(skipCheck(id, name, description, 'no task created')); + } else { + try { + await waitForTerminal(serverUrl, sessionId, createdTaskId); + // Sanity probe well before TTL (the unit is seconds; servers + // typically pick order-of-minutes defaults). + await new Promise((r) => setTimeout(r, 500)); + const after = await rawRequest( + serverUrl, + 'tasks/get', + { taskId: createdTaskId }, + { sessionId } + ); + const errs: string[] = []; + if (after.taskId !== createdTaskId) { + errs.push( + `task MUST still be accessible before TTL; got taskId=${after.taskId}` + ); + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF] + }); + } catch (error) { + checks.push( + failureCheck(id, name, description, error, [SEP_2663_REF]) + ); + } + } + } + + // Check 3: no related-task _meta on inlined result. + { + const id = 'tasks-no-related-task-meta-on-inlined-result'; + const name = 'TasksNoRelatedTaskMetaOnInlinedResult'; + const description = + 'tasks/get inlined result MUST NOT include the v1 io.modelcontextprotocol/related-task _meta key (taskId is at the root)'; + try { + const created = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'slow_compute', + arguments: { seconds: 1, label: 'wire-fields-meta' } + }, + { sessionId } + ); + const taskId = created.taskId; + if (!taskId) { + checks.push(skipCheck(id, name, description, 'no task created')); + } else { + const terminal = await waitForTerminal(serverUrl, sessionId, taskId); + const errs: string[] = []; + const meta = terminal.result?._meta; + if (meta && meta['io.modelcontextprotocol/related-task']) { + errs.push( + 'related-task _meta MUST NOT appear on tasks/get inlined result' + ); + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF], + details: { + hasMeta: Boolean(meta), + hasRelatedTask: Boolean( + meta?.['io.modelcontextprotocol/related-task'] + ) + } + }); + } + } catch (error) { + checks.push(failureCheck(id, name, description, error, [SEP_2663_REF])); + } + } + + return checks; + } +}