From da57c23183f23373d15e3e2a8ef68c269fd8707b Mon Sep 17 00:00:00 2001 From: Sri Panyam Date: Tue, 5 May 2026 14:14:19 -0700 Subject: [PATCH 1/7] feat(tasks): SEP-2663 lifecycle scenario (8 checks) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the first scenario for the SEP-2663 io.modelcontextprotocol/tasks extension — a single TasksLifecycleScenario covering sync vs async dispatch, DetailedTask shape on tasks/get, tool errors vs protocol errors, and cancellation semantics. 8 ConformanceCheck records, all passing against a SEP-2663-conformant Go fixture. Why "tasks" (not "tasks-v2"): SEP-2663 IS the tasks surface once it lands; the v2 suffix is only meaningful in implementations that maintain a v1 surface alongside, which the conformance suite does not. Layout: - src/scenarios/server/tasks/lifecycle.ts — scenario class - src/scenarios/server/tasks/helpers.ts — raw-fetch escape hatch (the SDK's typed schemas strip resultType/inputRequests/...) - src/scenarios/server/tasks/lifecycle.test.ts — fork-local vitest runner. Two modes: spawn a fixture binary via MCPKIT_TASKS_BINARY, or point at an already-running server via MCPKIT_TASKS_SERVER_URL. Skips when neither is set so it doesn't break upstream CI runs that go through everything-server (which doesn't yet implement io.modelcontextprotocol/tasks). Scenario is registered in pendingClientScenariosList so all-scenarios.test.ts skips it; promote to active once the upstream fixture grows extension support. Tagged ['extension', DRAFT_PROTOCOL_VERSION] — selectable via --suite extensions and --spec-version draft. --- src/scenarios/index.ts | 21 +- src/scenarios/server/tasks/helpers.ts | 214 +++++++ src/scenarios/server/tasks/lifecycle.test.ts | 145 +++++ src/scenarios/server/tasks/lifecycle.ts | 600 +++++++++++++++++++ 4 files changed, 978 insertions(+), 2 deletions(-) create mode 100644 src/scenarios/server/tasks/helpers.ts create mode 100644 src/scenarios/server/tasks/lifecycle.test.ts create mode 100644 src/scenarios/server/tasks/lifecycle.ts diff --git a/src/scenarios/index.ts b/src/scenarios/index.ts index 0e2191a..096f4f4 100644 --- a/src/scenarios/index.ts +++ b/src/scenarios/index.ts @@ -63,6 +63,8 @@ import { import { DNSRebindingProtectionScenario } from './server/dns-rebinding'; +import { TasksLifecycleScenario } from './server/tasks/lifecycle'; + import { authScenariosList, backcompatScenariosList, @@ -81,7 +83,15 @@ const pendingClientScenariosList: ClientScenario[] = [ // On hold until server-side SSE improvements are made // https://github.com/modelcontextprotocol/typescript-sdk/pull/1129 - new ServerSSEPollingScenario() + new ServerSSEPollingScenario(), + + // SEP-2663 Tasks extension lifecycle. + // The SEP is still in draft (see PR 2663) and the everything-server + // does not yet implement the io.modelcontextprotocol/tasks extension, + // so all-scenarios.test.ts cannot exercise this against the default + // fixture. Active runs target a SEP-2663-conformant server via the + // dedicated tasks/lifecycle.test.ts harness. + new TasksLifecycleScenario() ]; // All client scenarios @@ -139,7 +149,14 @@ const allClientScenariosList: ClientScenario[] = [ new PromptsGetWithImageScenario(), // Security scenarios - new DNSRebindingProtectionScenario() + new DNSRebindingProtectionScenario(), + + // SEP-2663 Tasks extension (draft). + // Listed here so the CLI can find it by name and so the active/pending + // filter sees it; pendingClientScenariosList below excludes it from + // automatic runs against the everything-server (which doesn't implement + // io.modelcontextprotocol/tasks yet). + new TasksLifecycleScenario() ]; // Active client scenarios (excludes pending) diff --git a/src/scenarios/server/tasks/helpers.ts b/src/scenarios/server/tasks/helpers.ts new file mode 100644 index 0000000..32ebec4 --- /dev/null +++ b/src/scenarios/server/tasks/helpers.ts @@ -0,0 +1,214 @@ +/** + * Shared helpers for SEP-2663 Tasks server-conformance scenarios. + * + * The MCP TS SDK's typed schemas (CallToolResultSchema, etc.) strip the + * SEP-2663 / SEP-2322 wire fields — `resultType`, `taskId`, `inputRequests`, + * `requestState`, inlined `result`/`error` on tasks/get's DetailedTask. So + * scenarios that exercise those fields use raw fetch instead. This file + * centralizes the bootstrap + RPC + polling primitives. + * + * If/when the SDK gains schemas for the SEP-2663 wire shapes, the call + * sites in scenarios switch back to `client.request(..., AnyResult)` + * and this file shrinks (or disappears). + */ + +export const TASKS_EXTENSION_ID = 'io.modelcontextprotocol/tasks'; + +export interface InitOpts { + /** Negotiated wire protocolVersion. Defaults to LATEST_SPEC_VERSION. */ + protocolVersion?: string; + /** Client capabilities (extensions, elicitation, sampling, …). */ + capabilities?: Record; + /** Optional clientInfo override. */ + clientInfo?: { name: string; version: string }; +} + +/** + * Run a fresh initialize handshake and return the resulting session id. + * Bypasses the SDK so callers can declare extension capabilities the + * SDK's typed wrappers don't yet know about. + */ +export async function initRawSession( + serverUrl: string, + opts: InitOpts = {} +): Promise { + const protocolVersion = opts.protocolVersion ?? '2025-11-25'; + const capabilities = opts.capabilities ?? {}; + const clientInfo = opts.clientInfo ?? { + name: 'mcp-conformance', + version: '1.0' + }; + + const initResp = await fetch(serverUrl, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Accept: 'application/json' + }, + body: JSON.stringify({ + jsonrpc: '2.0', + id: 'init-raw', + method: 'initialize', + params: { protocolVersion, clientInfo, capabilities } + }) + }); + const sid = initResp.headers.get('mcp-session-id') || ''; + if (!sid) throw new Error('initialize response missing Mcp-Session-Id'); + + await fetch(serverUrl, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Accept: 'application/json', + 'Mcp-Session-Id': sid + }, + body: JSON.stringify({ + jsonrpc: '2.0', + method: 'notifications/initialized' + }) + }); + return sid; +} + +export interface RawRequestOpts { + sessionId: string; + /** Optional _meta object merged into the JSON-RPC params. */ + meta?: Record; + /** Optional HTTP request headers merged after the harness defaults. */ + headers?: Record; +} + +export interface RawRequestResult { + /** The JSON-RPC `result` body, when the response carried one. */ + result: any; + /** The raw fetch Response so callers can inspect transport-level headers. */ + response: Response; +} + +let nextId = 1; + +/** + * Send a raw JSON-RPC request via fetch, parsing SSE `data:` lines or + * plain JSON depending on Content-Type. Throws an Error decorated with + * `code` / `data` when the response carries a JSON-RPC error. + */ +export async function rawRequest( + serverUrl: string, + method: string, + params: any, + opts: RawRequestOpts +): Promise { + const { result } = await rawRequestFull(serverUrl, method, params, opts); + return result; +} + +/** + * Like rawRequest, but also returns the raw fetch Response so callers + * can inspect transport-level headers (e.g., SEP-2243 routing headers). + */ +export async function rawRequestFull( + serverUrl: string, + method: string, + params: any, + opts: RawRequestOpts +): Promise { + const id = nextId++; + const requestParams = opts.meta ? { ...params, _meta: opts.meta } : params; + const resp = await fetch(serverUrl, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Accept: 'text/event-stream, application/json', + 'Mcp-Session-Id': opts.sessionId, + ...(opts.headers ?? {}) + }, + body: JSON.stringify({ + jsonrpc: '2.0', + id, + method, + params: requestParams + }) + }); + const ct = resp.headers.get('content-type') || ''; + let body: any; + if (ct.includes('text/event-stream')) { + const text = await resp.text(); + for (const line of text.split('\n')) { + const trimmed = line.trim(); + if (trimmed.startsWith('data:')) { + const payload = trimmed.slice(5).trimStart(); + if (payload.startsWith('{')) { + const parsed = JSON.parse(payload); + if (parsed.id === id) { + body = parsed; + break; + } + } + } + } + } else { + body = await resp.json(); + } + if (!body) throw new Error(`No JSON-RPC response for ${method}`); + if (body.error) { + const err: any = new Error(body.error.message); + err.code = body.error.code; + err.data = body.error.data; + throw err; + } + return { result: body.result, response: resp }; +} + +/** Poll tasks/get until the task reaches a terminal state. */ +export async function waitForTerminal( + serverUrl: string, + sessionId: string, + taskId: string, + timeoutMs = 10_000 +): Promise { + const start = Date.now(); + while (Date.now() - start < timeoutMs) { + const task = await rawRequest( + serverUrl, + 'tasks/get', + { taskId }, + { sessionId } + ); + if (['completed', 'failed', 'cancelled'].includes(task.status)) { + return task; + } + await new Promise((r) => setTimeout(r, 200)); + } + throw new Error( + `Task ${taskId} did not reach terminal state within ${timeoutMs}ms` + ); +} + +/** Poll tasks/get until a specific status (or any terminal state). */ +export async function waitForStatus( + serverUrl: string, + sessionId: string, + taskId: string, + status: string, + timeoutMs = 10_000 +): Promise { + const start = Date.now(); + while (Date.now() - start < timeoutMs) { + const task = await rawRequest( + serverUrl, + 'tasks/get', + { taskId }, + { sessionId } + ); + if ( + task.status === status || + ['completed', 'failed', 'cancelled'].includes(task.status) + ) { + return task; + } + await new Promise((r) => setTimeout(r, 200)); + } + throw new Error( + `Task ${taskId} did not reach status ${status} within ${timeoutMs}ms` + ); +} diff --git a/src/scenarios/server/tasks/lifecycle.test.ts b/src/scenarios/server/tasks/lifecycle.test.ts new file mode 100644 index 0000000..0977139 --- /dev/null +++ b/src/scenarios/server/tasks/lifecycle.test.ts @@ -0,0 +1,145 @@ +/** + * SEP-2663 Tasks extension test runner. + * + * Iterates the tasks server scenarios against a SEP-2663-conformant + * server. Two ways to point at one — pick whichever fits: + * + * 1. Existing server already running: + * MCPKIT_TASKS_SERVER_URL=http://localhost:8080/mcp npm test -- lifecycle.test.ts + * + * 2. Auto-spawn a fixture binary in beforeAll (the binary must accept + * `--serve --addr :PORT` and bind Streamable HTTP at /mcp): + * MCPKIT_TASKS_BINARY=/path/to/tasks-server npm test -- lifecycle.test.ts + * + * Optional: MCPKIT_TASKS_PORT overrides the auto-spawn port (default 18092). + * + * If neither is set, the suite is skipped — letting CI runs against the + * everything-server stay green until the upstream fixture grows SEP-2663 + * support. + * + * The mcpkit reference fixture lives at + * https://github.com/panyam/mcpkit/tree/main/examples/tasks-v2 (mcpkit + * keeps its v1 surface alongside v2 internally; the fork only cares + * about the SEP-2663 surface, hence the unsuffixed naming here). + */ + +import { spawn, ChildProcess } from 'child_process'; +import { describe, it, expect, beforeAll, afterAll } from 'vitest'; +import { TasksLifecycleScenario } from './lifecycle'; + +const FIXTURE_BINARY = process.env.MCPKIT_TASKS_BINARY; +const EXTERNAL_URL = process.env.MCPKIT_TASKS_SERVER_URL; +const TEST_PORT = parseInt(process.env.MCPKIT_TASKS_PORT ?? '18092', 10); +const SERVER_URL = EXTERNAL_URL ?? `http://localhost:${TEST_PORT}/mcp`; +const SERVER_STARTUP_TIMEOUT_MS = 10_000; +// Spawn only when no external URL is provided AND a fixture binary is. +const SHOULD_SPAWN = !EXTERNAL_URL && Boolean(FIXTURE_BINARY); +const HAVE_TARGET = Boolean(EXTERNAL_URL) || SHOULD_SPAWN; + +const TASKS_SCENARIOS = [new TasksLifecycleScenario()]; + +const describeIfTarget = HAVE_TARGET ? describe : describe.skip; + +describeIfTarget('SEP-2663 Tasks — server conformance', () => { + let serverProcess: ChildProcess | null = null; + + beforeAll(async () => { + if (!SHOULD_SPAWN) return; + + serverProcess = spawn(FIXTURE_BINARY!, ['--serve', '--addr', `:${TEST_PORT}`], { + stdio: ['ignore', 'pipe', 'pipe'], + detached: false + }); + + let stdoutBuf = ''; + let stderrBuf = ''; + serverProcess.stdout?.on('data', (b) => { + stdoutBuf += b.toString(); + }); + serverProcess.stderr?.on('data', (b) => { + stderrBuf += b.toString(); + }); + + await new Promise((resolve, reject) => { + const timer = setTimeout(() => { + if (serverProcess && !serverProcess.killed) { + serverProcess.kill('SIGKILL'); + } + reject( + new Error( + `tasks fixture failed to start within ${SERVER_STARTUP_TIMEOUT_MS}ms.\nSTDOUT: ${stdoutBuf}\nSTDERR: ${stderrBuf}` + ) + ); + }, SERVER_STARTUP_TIMEOUT_MS); + + // mcpkit's tasks demo logs the listen address to stderr via the + // log package; treat any "Connect:" or "listening" line as ready. + const checkReady = (chunk: string) => { + if ( + chunk.includes('Connect:') || + chunk.includes('listening') || + chunk.includes('Listening on') + ) { + clearTimeout(timer); + resolve(); + } + }; + serverProcess!.stdout?.on('data', (b) => checkReady(b.toString())); + serverProcess!.stderr?.on('data', (b) => checkReady(b.toString())); + + serverProcess!.on('error', (err) => { + clearTimeout(timer); + reject(new Error(`Failed to spawn tasks fixture: ${err.message}`)); + }); + serverProcess!.on('exit', (code) => { + if (code !== null && code !== 0) { + clearTimeout(timer); + reject( + new Error( + `tasks fixture exited prematurely with code ${code}.\nSTDOUT: ${stdoutBuf}\nSTDERR: ${stderrBuf}` + ) + ); + } + }); + }); + }, SERVER_STARTUP_TIMEOUT_MS + 5_000); + + afterAll(async () => { + if (!SHOULD_SPAWN) return; + if (!serverProcess || serverProcess.killed) return; + serverProcess.kill('SIGTERM'); + await new Promise((resolve) => { + const timer = setTimeout(() => { + if (serverProcess && !serverProcess.killed) { + serverProcess.kill('SIGKILL'); + } + resolve(); + }, 3_000); + serverProcess!.once('exit', () => { + clearTimeout(timer); + resolve(); + }); + }); + serverProcess = null; + }); + + for (const scenario of TASKS_SCENARIOS) { + it(`${scenario.name} — all checks succeed against fixture`, async () => { + const checks = await scenario.run(SERVER_URL); + expect(checks.length).toBeGreaterThan(0); + const failures = checks.filter( + (c) => c.status === 'FAILURE' || c.status === 'WARNING' + ); + if (failures.length > 0) { + // Surface the failing slugs and messages so vitest output points + // at the exact spec-coverage gaps. + const detail = failures + .map((c) => ` - ${c.id}: ${c.errorMessage ?? '(no message)'}`) + .join('\n'); + throw new Error( + `${failures.length}/${checks.length} checks failed:\n${detail}` + ); + } + }); + } +}); diff --git a/src/scenarios/server/tasks/lifecycle.ts b/src/scenarios/server/tasks/lifecycle.ts new file mode 100644 index 0000000..f36fe02 --- /dev/null +++ b/src/scenarios/server/tasks/lifecycle.ts @@ -0,0 +1,600 @@ +/** + * SEP-2663 Tasks Extension — server lifecycle conformance. + * + * Tests a server that implements the io.modelcontextprotocol/tasks + * extension end-to-end: sync vs async dispatch, DetailedTask shape on + * tasks/get, tool errors vs protocol errors, and cancellation + * semantics. + * + * Required server fixtures (tools/list output must include all): + * - greet — sync-only, returns "Hello, {name}!" + * - slow_compute — task-supporting, sleeps N seconds + * - failing_job — task-supporting, returns a tool error + * - protocol_error_job — task-supporting, panics into a protocol error + */ + +import { + ClientScenario, + ConformanceCheck, + ScenarioSpecTag, + SpecReference, + DRAFT_PROTOCOL_VERSION +} from '../../../types'; +import { + TASKS_EXTENSION_ID, + initRawSession, + rawRequest, + waitForTerminal +} from './helpers'; + +const SEP_2663_REF: SpecReference = { + id: 'SEP-2663', + url: 'https://github.com/modelcontextprotocol/specification/pull/2663' +}; +const SEP_2322_REF: SpecReference = { + id: 'SEP-2322', + url: 'https://github.com/modelcontextprotocol/specification/pull/2322' +}; + +export class TasksLifecycleScenario implements ClientScenario { + name = 'tasks-lifecycle'; + // 'extension' tags this as off the dated-version timeline (selectable + // via `--suite extensions`); DRAFT_PROTOCOL_VERSION lets `--spec-version + // draft` runs include it before SEP-2663 lands in a dated release. + specVersions: ScenarioSpecTag[] = ['extension', DRAFT_PROTOCOL_VERSION]; + description = `Test SEP-2663 Tasks extension lifecycle on the server. + +**Server Implementation Requirements (SEP-2663):** + +The server MUST advertise \`io.modelcontextprotocol/tasks\` under +\`capabilities.extensions\` and gate the task surface on negotiation. + +**Sync dispatch (no task created):** +- A \`tools/call\` against a sync-only tool MUST return a flat + \`ToolResult\` with \`resultType:"complete"\` and a \`content[]\` array. +- It MUST NOT carry \`taskId\` at the top level (that would imply a + CreateTaskResult). + +**Server-directed task creation:** +- For task-supporting tools, the server decides whether to create a task — + the client MUST NOT need to opt in via a request param. +- The response MUST be a \`CreateTaskResult\` — a flat \`Result & Task\` + intersection: \`resultType:"task"\`, plus \`taskId\` / \`status\` / + \`createdAt\` / \`lastUpdatedAt\` / \`ttlSeconds\` at the top level. + There MUST NOT be a nested \`task\` wrapper key. + +**tasks/get DetailedTask:** +- Working tasks return \`status\` and basic metadata; result/error are + absent. +- Completed tasks MUST inline the original tool result under \`result\` + with \`content[]\`. There is no separate \`tasks/result\` method. + +**Tool errors vs protocol errors (SEP-2663 §error-semantics):** +- A tool that ran but reported an error MUST surface as + \`status:"completed"\` with \`result.isError:true\`. The status + \`"failed"\` is reserved for protocol-level errors. +- A protocol-level error (server crash, internal failure) MUST surface + as \`status:"failed"\` with an inlined \`error\` object (JSON-RPC + error shape: code/message/data) and MUST NOT carry \`result\`. + +**Cancellation:** +- \`tasks/cancel\` MUST return an empty + \`{resultType:"complete"}\` ack — no task envelope (SEP-2322 + discriminator). The cancelled status is observed via the next + \`tasks/get\`. +- \`tasks/cancel\` against a terminal task MUST return JSON-RPC + \`-32602\` (InvalidParams). Clarified upstream in spec commit d963ad0.`; + + async run(serverUrl: string): Promise { + const checks: ConformanceCheck[] = []; + + let sessionId: string; + try { + sessionId = await initRawSession(serverUrl, { + capabilities: { + elicitation: {}, + sampling: {}, + extensions: { [TASKS_EXTENSION_ID]: {} } + } + }); + } catch (error) { + checks.push({ + id: 'tasks-session-bootstrap', + name: 'TasksSessionBootstrap', + description: + 'Initialize handshake declaring io.modelcontextprotocol/tasks extension succeeds', + status: 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: `Failed to initialize: ${errMsg(error)}`, + specReferences: [SEP_2663_REF] + }); + return checks; + } + + // Check 1: sync tool call returns ToolResult, no task creation. + { + const id = 'tasks-sync-tool-call'; + const name = 'TasksSyncToolCall'; + const description = + 'Sync tool returns ToolResult (resultType:"complete"), no taskId at top level'; + try { + const result = await rawRequest( + serverUrl, + 'tools/call', + { name: 'greet', arguments: { name: 'World' } }, + { sessionId } + ); + const errs: string[] = []; + if (result.resultType === 'task') { + errs.push('sync tool result MUST NOT carry resultType:"task"'); + } + if (result.taskId) { + errs.push( + `sync tool result MUST NOT carry top-level taskId; got ${result.taskId}` + ); + } + if (!Array.isArray(result.content) || result.content.length === 0) { + errs.push('sync tool result MUST carry a non-empty content[] array'); + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF, SEP_2322_REF], + details: { + resultType: result.resultType, + hasTaskId: Boolean(result.taskId), + contentLength: result.content?.length + } + }); + } catch (error) { + checks.push(failureCheck(id, name, description, error, [SEP_2663_REF])); + } + } + + // Check 2: server-directed task creation produces flat CreateTaskResult. + let workingTaskId: string | undefined; + { + const id = 'tasks-server-task-creation'; + const name = 'TasksServerTaskCreation'; + const description = + 'Task-supporting tool returns flat CreateTaskResult (no nested `task` wrapper)'; + try { + const result = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'slow_compute', + arguments: { seconds: 2, label: 'lifecycle-create' } + }, + { sessionId } + ); + const errs: string[] = []; + if (result.resultType !== 'task') { + errs.push( + `expected resultType:"task"; got ${JSON.stringify(result.resultType)}` + ); + } + if (result.task) { + errs.push( + 'CreateTaskResult MUST be flat (Result & Task); there must be no nested `task` wrapper key' + ); + } + if (!result.taskId) { + errs.push('CreateTaskResult MUST carry top-level taskId'); + } + if (!result.status) { + errs.push('CreateTaskResult MUST carry top-level status'); + } + if ('result' in result) { + errs.push( + 'CreateTaskResult MUST NOT carry `result` (lives on tasks/get DetailedTask)' + ); + } + if ('error' in result) { + errs.push( + 'CreateTaskResult MUST NOT carry `error` (lives on tasks/get DetailedTask)' + ); + } + if ('inputRequests' in result) { + errs.push( + 'CreateTaskResult MUST NOT carry `inputRequests` (lives on tasks/get DetailedTask)' + ); + } + if (result.taskId) workingTaskId = result.taskId; + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF], + details: { + resultType: result.resultType, + taskId: result.taskId, + status: result.status + } + }); + } catch (error) { + checks.push(failureCheck(id, name, description, error, [SEP_2663_REF])); + } + } + + // Check 3: tasks/get during working state returns status + metadata. + { + const id = 'tasks-get-during-working'; + const name = 'TasksGetDuringWorking'; + const description = + 'tasks/get returns status + metadata for an active task'; + if (!workingTaskId) { + checks.push(skipCheck(id, name, description, 'no task created')); + } else { + try { + const task = await rawRequest( + serverUrl, + 'tasks/get', + { taskId: workingTaskId }, + { sessionId } + ); + const errs: string[] = []; + if (task.taskId !== workingTaskId) { + errs.push( + `taskId mismatch: expected ${workingTaskId}, got ${task.taskId}` + ); + } + if (!task.status) errs.push('tasks/get response MUST carry status'); + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF], + details: { status: task.status } + }); + } catch (error) { + checks.push(failureCheck(id, name, description, error, [SEP_2663_REF])); + } + } + } + + // Check 4: terminal tasks/get inlines result with content[]. + { + const id = 'tasks-get-terminal-inlined-result'; + const name = 'TasksGetTerminalInlinedResult'; + const description = + 'Completed task tasks/get inlines result with content[] (no separate tasks/result method)'; + if (!workingTaskId) { + checks.push(skipCheck(id, name, description, 'no task created')); + } else { + try { + const terminal = await waitForTerminal( + serverUrl, + sessionId, + workingTaskId + ); + const errs: string[] = []; + if (terminal.status !== 'completed') { + errs.push( + `expected status:"completed"; got ${JSON.stringify(terminal.status)}` + ); + } + if (!terminal.result) { + errs.push('completed task MUST inline `result`'); + } else if ( + !Array.isArray(terminal.result.content) || + terminal.result.content.length === 0 + ) { + errs.push( + 'completed task `result.content[]` MUST be a non-empty array' + ); + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF], + details: { + status: terminal.status, + hasResult: Boolean(terminal.result), + contentLength: terminal.result?.content?.length + } + }); + } catch (error) { + checks.push(failureCheck(id, name, description, error, [SEP_2663_REF])); + } + } + } + + // Check 5: tool execution error → completed with isError:true. + { + const id = 'tasks-tool-error-completed-iserror'; + const name = 'TasksToolErrorCompletedIsError'; + const description = + 'Tool execution error reports as completed + result.isError (NOT failed)'; + try { + const created = await rawRequest( + serverUrl, + 'tools/call', + { name: 'failing_job', arguments: {} }, + { sessionId } + ); + const errs: string[] = []; + if (!created.taskId) { + errs.push('failing_job MUST create a task'); + } else { + const terminal = await waitForTerminal( + serverUrl, + sessionId, + created.taskId + ); + if (terminal.status !== 'completed') { + errs.push( + `tool error MUST surface as completed (not "${terminal.status}")` + ); + } + if (!terminal.result) { + errs.push('completed task with tool error MUST carry `result`'); + } else if (terminal.result.isError !== true) { + errs.push('result.isError MUST be true for tool execution errors'); + } + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF] + }); + } catch (error) { + checks.push(failureCheck(id, name, description, error, [SEP_2663_REF])); + } + } + + // Check 6: protocol-level error → failed with inlined error, no result. + { + const id = 'tasks-protocol-error-failed-shape'; + const name = 'TasksProtocolErrorFailedShape'; + const description = + 'Protocol-level error reports as failed + inlined error{code,message}, no result'; + try { + const created = await rawRequest( + serverUrl, + 'tools/call', + { name: 'protocol_error_job', arguments: {} }, + { sessionId } + ); + const errs: string[] = []; + if (!created.taskId) { + errs.push('protocol_error_job MUST create a task'); + } else { + const terminal = await waitForTerminal( + serverUrl, + sessionId, + created.taskId + ); + if (terminal.status !== 'failed') { + errs.push( + `protocol error MUST surface as failed (not "${terminal.status}")` + ); + } + if (!terminal.error) { + errs.push('failed task MUST carry inlined `error`'); + } else { + if (typeof terminal.error.code !== 'number') { + errs.push('failed task error MUST carry numeric `code`'); + } + if (typeof terminal.error.message !== 'string') { + errs.push('failed task error MUST carry string `message`'); + } + } + if (terminal.result) { + errs.push('failed task MUST NOT carry `result`'); + } + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF] + }); + } catch (error) { + checks.push(failureCheck(id, name, description, error, [SEP_2663_REF])); + } + } + + // Check 7: tasks/cancel returns empty {resultType:"complete"} ack; + // status settles to cancelled. + { + const id = 'tasks-cancel-empty-ack'; + const name = 'TasksCancelEmptyAck'; + const description = + 'tasks/cancel returns {resultType:"complete"} ack; status settles to cancelled'; + let cancelTaskId: string | undefined; + try { + const created = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'slow_compute', + arguments: { seconds: 60, label: 'lifecycle-cancel' } + }, + { sessionId } + ); + cancelTaskId = created.taskId; + if (!cancelTaskId) { + checks.push({ + id, + name, + description, + status: 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: 'slow_compute did not create a task', + specReferences: [SEP_2663_REF, SEP_2322_REF] + }); + } else { + const ack = await rawRequest( + serverUrl, + 'tasks/cancel', + { taskId: cancelTaskId }, + { sessionId } + ); + const errs: string[] = []; + // Ack carries only the SEP-2322 discriminator — no task envelope. + if ( + JSON.stringify(ack) !== JSON.stringify({ resultType: 'complete' }) + ) { + errs.push( + `cancel ack MUST be {resultType:"complete"}; got ${JSON.stringify(ack)}` + ); + } + // Status settles to cancelled — observe via tasks/get. + const after = await rawRequest( + serverUrl, + 'tasks/get', + { taskId: cancelTaskId }, + { sessionId } + ); + if (after.status !== 'cancelled') { + errs.push( + `tasks/get after cancel MUST report cancelled; got ${after.status}` + ); + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF, SEP_2322_REF], + details: { cancelAck: ack, statusAfterCancel: after.status } + }); + } + } catch (error) { + checks.push(failureCheck(id, name, description, error, [SEP_2663_REF])); + } + } + + // Check 8: tasks/cancel on a terminal task MUST return -32602. + { + const id = 'tasks-cancel-terminal-rejected'; + const name = 'TasksCancelTerminalRejected'; + const description = + 'tasks/cancel on a terminal task returns -32602 (per spec commit d963ad0)'; + try { + const created = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'slow_compute', + arguments: { seconds: 1, label: 'lifecycle-cancel-terminal' } + }, + { sessionId } + ); + const completedTaskId = created.taskId; + if (!completedTaskId) { + checks.push({ + id, + name, + description, + status: 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: 'slow_compute did not create a task', + specReferences: [SEP_2663_REF] + }); + } else { + await waitForTerminal(serverUrl, sessionId, completedTaskId); + // Now cancel — must throw -32602. + let thrown: any; + try { + await rawRequest( + serverUrl, + 'tasks/cancel', + { taskId: completedTaskId }, + { sessionId } + ); + } catch (e) { + thrown = e; + } + const errs: string[] = []; + if (!thrown) { + errs.push( + 'tasks/cancel on terminal task MUST return JSON-RPC error' + ); + } else if (thrown.code !== -32602) { + errs.push( + `expected error code -32602; got ${thrown.code ?? ''}` + ); + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF], + details: { observedCode: thrown?.code } + }); + } + } catch (error) { + checks.push(failureCheck(id, name, description, error, [SEP_2663_REF])); + } + } + + return checks; + } +} + +function errMsg(error: unknown): string { + return error instanceof Error ? error.message : String(error); +} + +function failureCheck( + id: string, + name: string, + description: string, + error: unknown, + specReferences: SpecReference[] +): ConformanceCheck { + return { + id, + name, + description, + status: 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errMsg(error), + specReferences + }; +} + +function skipCheck( + id: string, + name: string, + description: string, + reason: string +): ConformanceCheck { + return { + id, + name, + description, + status: 'SKIPPED', + timestamp: new Date().toISOString(), + errorMessage: `Skipped: ${reason}`, + specReferences: [SEP_2663_REF] + }; +} From 4f2eac0ba4a2fbb2b8b85a2652e2344aea8f6f98 Mon Sep 17 00:00:00 2001 From: Sri Panyam Date: Tue, 5 May 2026 14:17:00 -0700 Subject: [PATCH 2/7] style(tasks): apply prettier formatting --- src/scenarios/server/tasks/lifecycle.test.ts | 12 ++++++++---- src/scenarios/server/tasks/lifecycle.ts | 8 ++++++-- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/scenarios/server/tasks/lifecycle.test.ts b/src/scenarios/server/tasks/lifecycle.test.ts index 0977139..d2ea918 100644 --- a/src/scenarios/server/tasks/lifecycle.test.ts +++ b/src/scenarios/server/tasks/lifecycle.test.ts @@ -46,10 +46,14 @@ describeIfTarget('SEP-2663 Tasks — server conformance', () => { beforeAll(async () => { if (!SHOULD_SPAWN) return; - serverProcess = spawn(FIXTURE_BINARY!, ['--serve', '--addr', `:${TEST_PORT}`], { - stdio: ['ignore', 'pipe', 'pipe'], - detached: false - }); + serverProcess = spawn( + FIXTURE_BINARY!, + ['--serve', '--addr', `:${TEST_PORT}`], + { + stdio: ['ignore', 'pipe', 'pipe'], + detached: false + } + ); let stdoutBuf = ''; let stderrBuf = ''; diff --git a/src/scenarios/server/tasks/lifecycle.ts b/src/scenarios/server/tasks/lifecycle.ts index f36fe02..e337c83 100644 --- a/src/scenarios/server/tasks/lifecycle.ts +++ b/src/scenarios/server/tasks/lifecycle.ts @@ -258,7 +258,9 @@ The server MUST advertise \`io.modelcontextprotocol/tasks\` under details: { status: task.status } }); } catch (error) { - checks.push(failureCheck(id, name, description, error, [SEP_2663_REF])); + checks.push( + failureCheck(id, name, description, error, [SEP_2663_REF]) + ); } } } @@ -309,7 +311,9 @@ The server MUST advertise \`io.modelcontextprotocol/tasks\` under } }); } catch (error) { - checks.push(failureCheck(id, name, description, error, [SEP_2663_REF])); + checks.push( + failureCheck(id, name, description, error, [SEP_2663_REF]) + ); } } } From 95da20da525d0b2d726dd599dae11ea48ee6c799 Mon Sep 17 00:00:00 2001 From: Sri Panyam Date: Tue, 5 May 2026 15:02:43 -0700 Subject: [PATCH 3/7] feat(tasks,mrtr): port full SEP-2663 + SEP-2322 scenario suite MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Builds out the rest of the tasks scenarios (atop the lifecycle canary) and adds the SEP-2322 ephemeral MRTR scenario in a sibling folder. Both target their own fixtures; both runners are brand-neutral and language-agnostic (TASKS_SERVER_URL / TASKS_SERVER_CMD, MRTR_SERVER_URL / MRTR_SERVER_CMD; readiness via TCP polling). Tasks ClientScenario classes: - TasksLifecycleScenario (8 checks; v2-01..v2-08) - TasksCapabilityNegotiationScenario (4 checks; v2-11/22/23/25, SEP-2575) - TasksWireFieldsScenario (3 checks; v2-12/13/21) - TasksRequestStateScenario (3 checks; v2-14/15/28) - TasksMRTRInputScenario (3 checks; v2-16/17/29 partial fulfillment) - TasksRequestHeadersScenario (3 checks; SEP-2243 request-header tolerance) - TasksDispatchScenario (8 checks; v2-09/10/19/20/26/27/30/31) - TasksStatusNotificationsScenario (1 check; SEP-2663 §notifications, optional) MRTR ClientScenario class: - MrtrEphemeralFlowScenario (7 checks + 1 SKIPPED; mrtr-01..07, mrtr-08 deferred for spec terminology + reference-impl reasons) Both runners spawn the fixture via a shell command and detect readiness by TCP-polling the URL's host/port — no log-line scanning, no language-specific assumptions. The same env vars work for any server implementation. Scenarios are tagged ['extension', DRAFT_PROTOCOL_VERSION] and registered in pendingClientScenariosList so all-scenarios.test.ts (which targets the upstream everything-server) skips them until the fixture grows SEP-2322 / SEP-2663 support. --- src/scenarios/index.ts | 37 +- .../server/mrtr/all-scenarios.test.ts | 149 +++++ src/scenarios/server/mrtr/ephemeral-flow.ts | 585 ++++++++++++++++++ src/scenarios/server/mrtr/helpers.ts | 82 +++ .../server/tasks/all-scenarios.test.ts | 176 ++++++ src/scenarios/server/tasks/capability.ts | 291 +++++++++ src/scenarios/server/tasks/dispatch.ts | 560 +++++++++++++++++ src/scenarios/server/tasks/headers.ts | 243 ++++++++ src/scenarios/server/tasks/helpers.ts | 97 ++- src/scenarios/server/tasks/lifecycle.test.ts | 149 ----- src/scenarios/server/tasks/lifecycle.ts | 58 +- src/scenarios/server/tasks/mrtr-input.ts | 416 +++++++++++++ src/scenarios/server/tasks/notifications.ts | 188 ++++++ src/scenarios/server/tasks/request-state.ts | 290 +++++++++ src/scenarios/server/tasks/wire-fields.ts | 250 ++++++++ 15 files changed, 3364 insertions(+), 207 deletions(-) create mode 100644 src/scenarios/server/mrtr/all-scenarios.test.ts create mode 100644 src/scenarios/server/mrtr/ephemeral-flow.ts create mode 100644 src/scenarios/server/mrtr/helpers.ts create mode 100644 src/scenarios/server/tasks/all-scenarios.test.ts create mode 100644 src/scenarios/server/tasks/capability.ts create mode 100644 src/scenarios/server/tasks/dispatch.ts create mode 100644 src/scenarios/server/tasks/headers.ts delete mode 100644 src/scenarios/server/tasks/lifecycle.test.ts create mode 100644 src/scenarios/server/tasks/mrtr-input.ts create mode 100644 src/scenarios/server/tasks/notifications.ts create mode 100644 src/scenarios/server/tasks/request-state.ts create mode 100644 src/scenarios/server/tasks/wire-fields.ts diff --git a/src/scenarios/index.ts b/src/scenarios/index.ts index 096f4f4..e82a16c 100644 --- a/src/scenarios/index.ts +++ b/src/scenarios/index.ts @@ -64,6 +64,14 @@ import { import { DNSRebindingProtectionScenario } from './server/dns-rebinding'; import { TasksLifecycleScenario } from './server/tasks/lifecycle'; +import { TasksCapabilityNegotiationScenario } from './server/tasks/capability'; +import { TasksWireFieldsScenario } from './server/tasks/wire-fields'; +import { TasksRequestStateScenario } from './server/tasks/request-state'; +import { TasksMRTRInputScenario } from './server/tasks/mrtr-input'; +import { TasksRequestHeadersScenario } from './server/tasks/headers'; +import { TasksDispatchScenario } from './server/tasks/dispatch'; +import { TasksStatusNotificationsScenario } from './server/tasks/notifications'; +import { MrtrEphemeralFlowScenario } from './server/mrtr/ephemeral-flow'; import { authScenariosList, @@ -91,7 +99,20 @@ const pendingClientScenariosList: ClientScenario[] = [ // so all-scenarios.test.ts cannot exercise this against the default // fixture. Active runs target a SEP-2663-conformant server via the // dedicated tasks/lifecycle.test.ts harness. - new TasksLifecycleScenario() + new TasksLifecycleScenario(), + new TasksCapabilityNegotiationScenario(), + new TasksWireFieldsScenario(), + new TasksRequestStateScenario(), + new TasksMRTRInputScenario(), + new TasksRequestHeadersScenario(), + new TasksDispatchScenario(), + new TasksStatusNotificationsScenario(), + + // SEP-2322 MRTR (ephemeral IncompleteResult flow). + // Targets a different fixture than tasks scenarios; the dedicated + // mrtr/all-scenarios.test.ts runner points at an MRTR-conformant + // server via MRTR_SERVER_URL / MRTR_SERVER_CMD. + new MrtrEphemeralFlowScenario() ]; // All client scenarios @@ -156,7 +177,19 @@ const allClientScenariosList: ClientScenario[] = [ // filter sees it; pendingClientScenariosList below excludes it from // automatic runs against the everything-server (which doesn't implement // io.modelcontextprotocol/tasks yet). - new TasksLifecycleScenario() + new TasksLifecycleScenario(), + new TasksCapabilityNegotiationScenario(), + new TasksWireFieldsScenario(), + new TasksRequestStateScenario(), + new TasksMRTRInputScenario(), + new TasksRequestHeadersScenario(), + new TasksDispatchScenario(), + new TasksStatusNotificationsScenario(), + + // SEP-2322 MRTR (ephemeral IncompleteResult flow). Targets a + // dedicated MRTR fixture — out of scope for the default + // everything-server until SEP-2322 lands there. + new MrtrEphemeralFlowScenario() ]; // Active client scenarios (excludes pending) diff --git a/src/scenarios/server/mrtr/all-scenarios.test.ts b/src/scenarios/server/mrtr/all-scenarios.test.ts new file mode 100644 index 0000000..dd1e6bb --- /dev/null +++ b/src/scenarios/server/mrtr/all-scenarios.test.ts @@ -0,0 +1,149 @@ +/** + * SEP-2322 MRTR test runner. + * + * Iterates the MRTR scenario classes against a SEP-2322-conformant + * server. Configuration is brand-neutral and language-agnostic: + * + * 1. Point at an already-running server: + * MRTR_SERVER_URL=http://localhost:8080/mcp npm test -- mrtr/all-scenarios.test.ts + * + * 2. Auto-spawn a fixture before tests (any language): + * MRTR_SERVER_URL=http://localhost:18093/mcp \ + * MRTR_SERVER_CMD="/path/to/server --port 18093" \ + * npm test -- mrtr/all-scenarios.test.ts + * + * If MRTR_SERVER_URL is unset the suite is skipped — keeping CI runs + * against the everything-server green. + * + * The fixture server can be implemented in any language as long as it + * exposes a SEP-2322 conformant Streamable HTTP MCP endpoint. Anyone is + * free to bring their own; one example reference implementation lives + * at https://github.com/panyam/mcpkit/tree/main/examples/mrtr. + */ + +import { spawn, ChildProcess } from 'child_process'; +import { connect } from 'net'; +import { describe, it, expect, beforeAll, afterAll } from 'vitest'; +import { MrtrEphemeralFlowScenario } from './ephemeral-flow'; + +const SERVER_URL = process.env.MRTR_SERVER_URL; +const SERVER_CMD = process.env.MRTR_SERVER_CMD; +const SERVER_STARTUP_TIMEOUT_MS = 15_000; +const SHOULD_SPAWN = Boolean(SERVER_URL && SERVER_CMD); +const HAVE_TARGET = Boolean(SERVER_URL); + +const MRTR_SCENARIOS = [new MrtrEphemeralFlowScenario()]; + +const describeIfTarget = HAVE_TARGET ? describe : describe.skip; + +describeIfTarget('SEP-2322 MRTR — server conformance', () => { + let serverProcess: ChildProcess | null = null; + + beforeAll(async () => { + if (!SHOULD_SPAWN) return; + + serverProcess = spawn('sh', ['-c', SERVER_CMD!], { + stdio: ['ignore', 'pipe', 'pipe'], + detached: false + }); + + let stdoutBuf = ''; + let stderrBuf = ''; + serverProcess.stdout?.on('data', (b) => { + stdoutBuf += b.toString(); + }); + serverProcess.stderr?.on('data', (b) => { + stderrBuf += b.toString(); + }); + + serverProcess.on('exit', (code) => { + if (code !== null && code !== 0) { + console.error( + `mrtr fixture exited unexpectedly with code ${code}.\nSTDOUT: ${stdoutBuf}\nSTDERR: ${stderrBuf}` + ); + } + }); + + await waitForTcpReady(SERVER_URL!, SERVER_STARTUP_TIMEOUT_MS).catch( + (err) => { + if (serverProcess && !serverProcess.killed) { + serverProcess.kill('SIGKILL'); + } + throw new Error( + `mrtr fixture did not become reachable within ${SERVER_STARTUP_TIMEOUT_MS}ms: ${err.message}\nSTDOUT: ${stdoutBuf}\nSTDERR: ${stderrBuf}` + ); + } + ); + }, SERVER_STARTUP_TIMEOUT_MS + 5_000); + + afterAll(async () => { + if (!SHOULD_SPAWN) return; + if (!serverProcess || serverProcess.killed) return; + serverProcess.kill('SIGTERM'); + await new Promise((resolve) => { + const timer = setTimeout(() => { + if (serverProcess && !serverProcess.killed) { + serverProcess.kill('SIGKILL'); + } + resolve(); + }, 3_000); + serverProcess!.once('exit', () => { + clearTimeout(timer); + resolve(); + }); + }); + serverProcess = null; + }); + + for (const scenario of MRTR_SCENARIOS) { + it(`${scenario.name} — all checks succeed against fixture`, async () => { + const checks = await scenario.run(SERVER_URL!); + expect(checks.length).toBeGreaterThan(0); + const failures = checks.filter( + (c) => c.status === 'FAILURE' || c.status === 'WARNING' + ); + if (failures.length > 0) { + const detail = failures + .map((c) => ` - ${c.id}: ${c.errorMessage ?? '(no message)'}`) + .join('\n'); + throw new Error( + `${failures.length}/${checks.length} checks failed:\n${detail}` + ); + } + }); + } +}); + +async function waitForTcpReady(url: string, timeoutMs: number): Promise { + const u = new URL(url); + const port = parseInt(u.port || (u.protocol === 'https:' ? '443' : '80'), 10); + const host = u.hostname; + const deadline = Date.now() + timeoutMs; + let lastErr: Error | null = null; + + while (Date.now() < deadline) { + try { + await new Promise((resolve, reject) => { + const socket = connect({ host, port }, () => { + socket.end(); + resolve(); + }); + socket.once('error', (err) => { + socket.destroy(); + reject(err); + }); + socket.setTimeout(1_000, () => { + socket.destroy(); + reject(new Error('connect timeout')); + }); + }); + return; + } catch (err) { + lastErr = err as Error; + await new Promise((r) => setTimeout(r, 200)); + } + } + throw new Error( + `${host}:${port} did not accept TCP connections (last: ${lastErr?.message ?? 'unknown'})` + ); +} diff --git a/src/scenarios/server/mrtr/ephemeral-flow.ts b/src/scenarios/server/mrtr/ephemeral-flow.ts new file mode 100644 index 0000000..8deaf68 --- /dev/null +++ b/src/scenarios/server/mrtr/ephemeral-flow.ts @@ -0,0 +1,585 @@ +/** + * SEP-2322 MRTR ephemeral IncompleteResult flow. + * + * Tests the multi-round-trip-request contract end-to-end against any + * server that implements SEP-2322's ephemeral path: tools/call returns + * `IncompleteResult` to gather input, the client retries the SAME + * tools/call with `inputResponses` (and echoed `requestState`), and + * the server eventually returns a normal `ToolResult`. No task + * envelope, no separate methods. + * + * Required server fixtures (tools/list output must include all): + * - test_tool_with_elicitation — single elicitation/create round + * - test_incomplete_result_sampling — single sampling/createMessage round + * - test_incomplete_result_list_roots — single roots/list round + * - test_incomplete_result_request_state — exercises requestState validation + * - test_incomplete_result_multiple_inputs — emits 3+ inputRequests in one round + * - test_incomplete_result_multi_round — drives 2+ MRTR rounds + * - test_incomplete_result_elicitation — emits inputRequest for "user_name"; + * server re-requests on wrong key + */ + +import { + ClientScenario, + ConformanceCheck, + ScenarioSpecTag, + DRAFT_PROTOCOL_VERSION +} from '../../../types'; +import { initRawSession, rawRequest } from '../tasks/helpers'; +import { + MRTR_INCOMPLETE_RESULT_TYPE, + SEP_2322_REF, + errMsg, + failureCheck, + isCompleteResult, + isIncompleteResult, + mockElicitResponse, + mockListRootsResponse, + mockSamplingResponse +} from './helpers'; + +export class MrtrEphemeralFlowScenario implements ClientScenario { + name = 'mrtr-ephemeral-flow'; + // MRTR is in draft alongside SEP-2322; tagged 'extension' because it + // introduces an ephemeral resultType discriminator that's not on the + // dated-spec timeline yet. + specVersions: ScenarioSpecTag[] = ['extension', DRAFT_PROTOCOL_VERSION]; + description = `Test SEP-2322 ephemeral MRTR (Multi Round-Trip Request) flow. + +**Server Implementation Requirements:** + +Every \`tools/call\` response in the MRTR contract is one of: +- \`resultType:"${MRTR_INCOMPLETE_RESULT_TYPE}"\` — server is asking for + more input; carries an \`inputRequests\` map keyed by server-minted + opaque ids and (optionally) a \`requestState\` token to echo on the + next round. +- \`resultType:"complete"\` (or absent — current SDKs may strip the + discriminator on responses without one) — the tools/call has finished; + the body is a normal \`ToolResult\` with \`content[]\`. + +**Round-trip rules (SEP-2322):** +- Round 1 with no \`inputResponses\` MUST return \`IncompleteResult\` + with \`inputRequests\`. +- The client retries the SAME tools/call (same name + arguments) with + \`inputResponses\` keyed against the previously-emitted ids, plus the + echoed \`requestState\` if one was provided. +- The server MUST validate the echoed \`requestState\` and complete on + the next round. + +**Multi-method support:** +- A single \`IncompleteResult\` can carry \`inputRequests\` for + \`elicitation/create\`, \`sampling/createMessage\`, and \`roots/list\` + in any combination. + +**Multi-round + state accumulation:** +- A handler MAY take more than two rounds. Each MRTR round mints a + fresh \`requestState\`; the prior token MUST NOT be reused. Answers + from prior rounds MUST be available to the handler on the final + round (server forwards them via \`requestState\`). + +**Wrong-key tolerance:** +- When a client retries with an \`inputResponses\` key the server did + not emit, the server SHOULD re-request via \`IncompleteResult\` + rather than erroring. (The spec is soft here; this scenario asserts + the re-request path.)`; + + async run(serverUrl: string): Promise { + const checks: ConformanceCheck[] = []; + + let sessionId: string; + try { + ({ sessionId } = await initRawSession(serverUrl, { + capabilities: { + elicitation: {}, + sampling: {}, + roots: {} + } + })); + } catch (error) { + checks.push({ + id: 'mrtr-session-bootstrap', + name: 'MrtrSessionBootstrap', + description: + 'Initialize handshake declaring elicitation/sampling/roots capabilities succeeds', + status: 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: `Failed to initialize: ${errMsg(error)}`, + specReferences: [SEP_2322_REF] + }); + return checks; + } + + // Check 1: basic elicitation round-trip. + { + const id = 'mrtr-basic-elicitation-round-trip'; + const name = 'MrtrBasicElicitationRoundTrip'; + const description = + 'tools/call returns IncompleteResult on round 1 (elicitation/create); completes on round 2 with the answer reflected in the result'; + try { + const r1 = await rawRequest( + serverUrl, + 'tools/call', + { name: 'test_tool_with_elicitation', arguments: {} }, + { sessionId } + ); + const errs: string[] = []; + if (!isIncompleteResult(r1)) { + errs.push( + `round 1 MUST be IncompleteResult; got ${JSON.stringify(r1)}` + ); + } + if (r1.resultType !== MRTR_INCOMPLETE_RESULT_TYPE) { + errs.push( + `resultType MUST be "${MRTR_INCOMPLETE_RESULT_TYPE}"; got ${JSON.stringify(r1.resultType)}` + ); + } + if (!r1.inputRequests || !r1.inputRequests.user_name) { + errs.push( + 'IncompleteResult MUST carry inputRequests with the "user_name" key' + ); + } else if (r1.inputRequests.user_name.method !== 'elicitation/create') { + errs.push( + `inputRequest method MUST be "elicitation/create"; got ${JSON.stringify(r1.inputRequests.user_name.method)}` + ); + } + + const r2 = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'test_tool_with_elicitation', + arguments: {}, + inputResponses: { + user_name: mockElicitResponse({ name: 'Alice' }) + }, + ...(r1.requestState !== undefined + ? { requestState: r1.requestState } + : {}) + }, + { sessionId } + ); + if (!isCompleteResult(r2)) { + errs.push(`round 2 MUST be complete; got ${JSON.stringify(r2)}`); + } + const text = r2.content?.[0]?.text ?? ''; + if (!/Alice/.test(text)) { + errs.push( + 'response text SHOULD reference the answered name ("Alice")' + ); + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2322_REF] + }); + } catch (error) { + checks.push(failureCheck(id, name, description, error)); + } + } + + // Check 2: sampling round-trip. + { + const id = 'mrtr-sampling-round-trip'; + const name = 'MrtrSamplingRoundTrip'; + const description = + 'IncompleteResult with sampling/createMessage round-trips through the inputResponses retry'; + try { + const r1 = await rawRequest( + serverUrl, + 'tools/call', + { name: 'test_incomplete_result_sampling', arguments: {} }, + { sessionId } + ); + const errs: string[] = []; + if (!isIncompleteResult(r1)) { + errs.push('round 1 MUST be IncompleteResult'); + } else { + const key = Object.keys(r1.inputRequests)[0]; + if (r1.inputRequests[key].method !== 'sampling/createMessage') { + errs.push( + `inputRequest method MUST be "sampling/createMessage"; got ${JSON.stringify(r1.inputRequests[key].method)}` + ); + } + const r2 = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'test_incomplete_result_sampling', + arguments: {}, + inputResponses: { [key]: mockSamplingResponse('Paris') }, + ...(r1.requestState !== undefined + ? { requestState: r1.requestState } + : {}) + }, + { sessionId } + ); + if (!isCompleteResult(r2)) { + errs.push('round 2 MUST be complete'); + } + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2322_REF] + }); + } catch (error) { + checks.push(failureCheck(id, name, description, error)); + } + } + + // Check 3: roots/list round-trip. + { + const id = 'mrtr-roots-list-round-trip'; + const name = 'MrtrRootsListRoundTrip'; + const description = + 'IncompleteResult with roots/list round-trips through the inputResponses retry'; + try { + const r1 = await rawRequest( + serverUrl, + 'tools/call', + { name: 'test_incomplete_result_list_roots', arguments: {} }, + { sessionId } + ); + const errs: string[] = []; + if (!isIncompleteResult(r1)) { + errs.push('round 1 MUST be IncompleteResult'); + } else { + const key = Object.keys(r1.inputRequests)[0]; + if (r1.inputRequests[key].method !== 'roots/list') { + errs.push( + `inputRequest method MUST be "roots/list"; got ${JSON.stringify(r1.inputRequests[key].method)}` + ); + } + const r2 = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'test_incomplete_result_list_roots', + arguments: {}, + inputResponses: { [key]: mockListRootsResponse() }, + ...(r1.requestState !== undefined + ? { requestState: r1.requestState } + : {}) + }, + { sessionId } + ); + if (!isCompleteResult(r2)) { + errs.push('round 2 MUST be complete'); + } + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2322_REF] + }); + } catch (error) { + checks.push(failureCheck(id, name, description, error)); + } + } + + // Check 4: requestState round-trip validation. + { + const id = 'mrtr-request-state-round-trip'; + const name = 'MrtrRequestStateRoundTrip'; + const description = + 'When server emits requestState on round 1, it MUST be a non-empty string and the server MUST validate the echo on round 2'; + try { + const r1 = await rawRequest( + serverUrl, + 'tools/call', + { name: 'test_incomplete_result_request_state', arguments: {} }, + { sessionId } + ); + const errs: string[] = []; + if (!isIncompleteResult(r1)) { + errs.push('round 1 MUST be IncompleteResult'); + } + if (typeof r1.requestState !== 'string') { + errs.push( + `requestState MUST be a string when emitted; got ${typeof r1.requestState}` + ); + } else if (r1.requestState.length === 0) { + errs.push( + 'requestState MUST be non-empty when emitted (omit instead of "")' + ); + } + const key = Object.keys(r1.inputRequests ?? {})[0]; + if (key) { + const r2 = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'test_incomplete_result_request_state', + arguments: {}, + inputResponses: { [key]: mockElicitResponse({ ok: true }) }, + requestState: r1.requestState + }, + { sessionId } + ); + if (!isCompleteResult(r2)) { + errs.push('round 2 MUST be complete after valid requestState echo'); + } + const text = + r2.content?.find((c: any) => c.type === 'text')?.text ?? ''; + if (!/state-ok/.test(text)) { + errs.push( + 'final response SHOULD include "state-ok" to confirm the server validated requestState' + ); + } + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2322_REF] + }); + } catch (error) { + checks.push(failureCheck(id, name, description, error)); + } + } + + // Check 5: multiple inputRequests of different methods in one round. + { + const id = 'mrtr-multiple-input-requests-one-round'; + const name = 'MrtrMultipleInputRequestsOneRound'; + const description = + 'A single IncompleteResult MAY carry inputRequests for elicitation/create + sampling/createMessage + roots/list together'; + try { + const r1 = await rawRequest( + serverUrl, + 'tools/call', + { name: 'test_incomplete_result_multiple_inputs', arguments: {} }, + { sessionId } + ); + const errs: string[] = []; + if (!isIncompleteResult(r1)) { + errs.push('round 1 MUST be IncompleteResult'); + } else { + const keys = Object.keys(r1.inputRequests); + if (keys.length < 3) { + errs.push( + `expected at least 3 inputRequests in one round; got ${keys.length}` + ); + } + const methods = new Set(keys.map((k) => r1.inputRequests[k].method)); + for (const expected of [ + 'elicitation/create', + 'sampling/createMessage', + 'roots/list' + ]) { + if (!methods.has(expected)) { + errs.push(`inputRequests MUST include method "${expected}"`); + } + } + const inputResponses: Record = {}; + for (const [key, req] of Object.entries(r1.inputRequests) as Array< + [string, any] + >) { + if (req.method === 'elicitation/create') + inputResponses[key] = mockElicitResponse({ name: 'Alice' }); + else if (req.method === 'sampling/createMessage') + inputResponses[key] = mockSamplingResponse('hi'); + else if (req.method === 'roots/list') + inputResponses[key] = mockListRootsResponse(); + } + const r2 = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'test_incomplete_result_multiple_inputs', + arguments: {}, + inputResponses, + ...(r1.requestState !== undefined + ? { requestState: r1.requestState } + : {}) + }, + { sessionId } + ); + if (!isCompleteResult(r2)) { + errs.push('round 2 MUST be complete with all three answers'); + } + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2322_REF] + }); + } catch (error) { + checks.push(failureCheck(id, name, description, error)); + } + } + + // Check 6: multi-round flow accumulates answers via requestState. + { + const id = 'mrtr-multi-round-flow'; + const name = 'MrtrMultiRoundFlow'; + const description = + 'A handler may take 2+ MRTR rounds; each round mints a fresh requestState; final result MUST reflect answers from every round'; + try { + const r1 = await rawRequest( + serverUrl, + 'tools/call', + { name: 'test_incomplete_result_multi_round', arguments: {} }, + { sessionId } + ); + const errs: string[] = []; + if (!isIncompleteResult(r1)) { + errs.push('round 1 MUST be IncompleteResult'); + } + if (!r1.requestState) { + errs.push('round 1 MUST mint requestState for multi-round flow'); + } + const k1 = Object.keys(r1.inputRequests ?? {})[0]; + + const r2 = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'test_incomplete_result_multi_round', + arguments: {}, + inputResponses: { [k1]: mockElicitResponse({ name: 'Alice' }) }, + requestState: r1.requestState + }, + { sessionId } + ); + if (!isIncompleteResult(r2)) { + errs.push('round 2 MUST still be IncompleteResult (asks for step2)'); + } + if (!r2.requestState) { + errs.push('round 2 MUST mint a fresh requestState'); + } + if (r2.requestState === r1.requestState) { + errs.push( + 'round 2 requestState MUST differ from round 1 (each round mints a fresh token)' + ); + } + const k2 = Object.keys(r2.inputRequests ?? {})[0]; + + const r3 = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'test_incomplete_result_multi_round', + arguments: {}, + inputResponses: { [k2]: mockElicitResponse({ color: 'blue' }) }, + requestState: r2.requestState + }, + { sessionId } + ); + if (!isCompleteResult(r3)) { + errs.push('round 3 MUST be complete'); + } + const text = r3.content?.[0]?.text ?? ''; + if (!/Alice/.test(text)) { + errs.push( + 'final result MUST reflect round 1 answer (server forwards via requestState)' + ); + } + if (!/blue/.test(text)) { + errs.push('final result MUST reflect round 2 answer'); + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2322_REF] + }); + } catch (error) { + checks.push(failureCheck(id, name, description, error)); + } + } + + // Check 7: wrong-key inputResponses → server re-requests. + { + const id = 'mrtr-wrong-input-key-rerequests'; + const name = 'MrtrWrongInputKeyRerequests'; + const description = + 'When the client sends inputResponses with a key the server did not emit, the server SHOULD re-request via IncompleteResult'; + try { + const r1 = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'test_incomplete_result_elicitation', + arguments: {}, + inputResponses: { wrong_key: mockElicitResponse({ data: 'wrong' }) } + }, + { sessionId } + ); + const errs: string[] = []; + if (!isIncompleteResult(r1)) { + errs.push( + `expected IncompleteResult re-request when inputResponses key is wrong; got ${JSON.stringify(r1)}` + ); + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2322_REF] + }); + } catch (error) { + checks.push(failureCheck(id, name, description, error)); + } + } + + // Check 8: SKIPPED — MRTR → Tasks composition. + // Tracking placeholder; spec made this normative in commit 451f5e1 + // (Apr 30) but two blockers remain before this can be enabled: + // (a) Spec watch on the MRTR resultType discriminator value + // (input_required vs incomplete; see helpers.ts SPEC WATCH). + // (b) Reference servers need middleware that observes the + // handler's IncompleteResult signal BEFORE creating a task — + // the natural implementation pattern (create task up-front, + // run handler in goroutine) doesn't expose the signal in time. + // Tracked in https://github.com/panyam/mcpkit/issues/347 as + // one example impl that hits this; SDKs in any language will + // need an equivalent fix. + { + checks.push({ + id: 'mrtr-tasks-composition', + name: 'MrtrTasksComposition', + description: + 'MRTR loop gathers input then final round returns CreateTaskResult (SEP-2663 451f5e1; deferred — spec authors disagree on the resultType discriminator value, and reference implementations still in flight)', + status: 'SKIPPED', + timestamp: new Date().toISOString(), + errorMessage: + "Skipped: deferred until (a) spec authors converge on the MRTR resultType value (input_required vs incomplete) and (b) reference servers can observe the handler's IsIncomplete signal before creating a task.", + specReferences: [ + SEP_2322_REF, + { + id: 'SEP-2663', + url: 'https://github.com/modelcontextprotocol/specification/pull/2663' + } + ] + }); + } + + return checks; + } +} diff --git a/src/scenarios/server/mrtr/helpers.ts b/src/scenarios/server/mrtr/helpers.ts new file mode 100644 index 0000000..7e0ee19 --- /dev/null +++ b/src/scenarios/server/mrtr/helpers.ts @@ -0,0 +1,82 @@ +/** + * MRTR (SEP-2322 ephemeral) scenario helpers. + * + * Reuses the raw-rpc + session bootstrap from the tasks scenarios since + * MRTR's wire shape (resultType discriminator, requestState, inputRequests) + * is the SEP-2322 base that SEP-2663 builds on. The MRTR resultType value + * is centralized here so it's a one-liner to flip when the spec converges + * (SEP-2322 draft uses "input_required", SEP-2663 draft uses "incomplete"; + * see prezaei comment on PR 2663 for the open question). + */ + +import type { ConformanceCheck, SpecReference } from '../../../types'; + +export const SEP_2322_REF: SpecReference = { + id: 'SEP-2322', + url: 'https://github.com/modelcontextprotocol/specification/pull/2322' +}; + +// SPEC WATCH — MRTR resultType discriminator value +// SEP-2322 (MRTR) and SEP-2663 (Tasks Extension) currently disagree on +// the wire value: SEP-2322's draft uses "input_required", SEP-2663's +// draft uses "incomplete". Awaiting alignment between SEP authors +// (PR 2663 comment 4381885336 + PR 2322 comment 4381884825). When the +// spec converges, this single constant flips. +export const MRTR_INCOMPLETE_RESULT_TYPE = 'incomplete'; + +export function isIncompleteResult(result: any): boolean { + if (!result) return false; + if (result.resultType === MRTR_INCOMPLETE_RESULT_TYPE) return true; + return 'inputRequests' in result || 'requestState' in result; +} + +export function isCompleteResult(result: any): boolean { + if (!result) return false; + if (result.resultType === 'complete') return true; + if (!('resultType' in result)) return true; + return !isIncompleteResult(result); +} + +/** Build an ElicitResult-shaped mock response payload. */ +export function mockElicitResponse( + content: Record +): Record { + return { action: 'accept', content }; +} + +/** Build a CreateMessageResult-shaped mock response payload. */ +export function mockSamplingResponse(text: string): Record { + return { + role: 'assistant', + content: { type: 'text', text }, + model: 'test-model', + stopReason: 'endTurn' + }; +} + +/** Build a ListRootsResult-shaped mock response payload. */ +export function mockListRootsResponse(): Record { + return { roots: [{ uri: 'file:///test/root', name: 'Test Root' }] }; +} + +export function errMsg(error: unknown): string { + return error instanceof Error ? error.message : String(error); +} + +export function failureCheck( + id: string, + name: string, + description: string, + error: unknown, + specReferences: SpecReference[] = [SEP_2322_REF] +): ConformanceCheck { + return { + id, + name, + description, + status: 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errMsg(error), + specReferences + }; +} diff --git a/src/scenarios/server/tasks/all-scenarios.test.ts b/src/scenarios/server/tasks/all-scenarios.test.ts new file mode 100644 index 0000000..76136f8 --- /dev/null +++ b/src/scenarios/server/tasks/all-scenarios.test.ts @@ -0,0 +1,176 @@ +/** + * SEP-2663 Tasks extension test runner. + * + * Iterates the tasks server scenarios against a SEP-2663-conformant + * server. Configuration is brand-neutral and language-agnostic: + * + * 1. Point at an already-running server: + * TASKS_SERVER_URL=http://localhost:8080/mcp npm test -- tasks/all-scenarios.test.ts + * + * 2. Auto-spawn a fixture before tests (any language; the runner just + * shells out to TASKS_SERVER_CMD and waits until TASKS_SERVER_URL + * becomes reachable): + * TASKS_SERVER_URL=http://localhost:18092/mcp \ + * TASKS_SERVER_CMD="/path/to/server --port 18092" \ + * npm test -- tasks/all-scenarios.test.ts + * + * If TASKS_SERVER_URL is unset, the suite is skipped — letting CI runs + * against the everything-server stay green until the upstream fixture + * grows SEP-2663 support. + * + * Readiness is detected by polling the URL's host/port for a TCP + * connection (deliberately language-agnostic — no log-line scanning). + * + * The fixture server can be implemented in any language as long as it + * exposes a SEP-2663 conformant Streamable HTTP MCP endpoint. Anyone is + * free to bring their own; one example reference implementation lives + * at https://github.com/panyam/mcpkit/tree/main/examples/tasks-v2. + */ + +import { spawn, ChildProcess } from 'child_process'; +import { connect } from 'net'; +import { describe, it, expect, beforeAll, afterAll } from 'vitest'; +import { TasksLifecycleScenario } from './lifecycle'; +import { TasksCapabilityNegotiationScenario } from './capability'; +import { TasksWireFieldsScenario } from './wire-fields'; +import { TasksRequestStateScenario } from './request-state'; +import { TasksMRTRInputScenario } from './mrtr-input'; +import { TasksRequestHeadersScenario } from './headers'; +import { TasksDispatchScenario } from './dispatch'; +import { TasksStatusNotificationsScenario } from './notifications'; + +const SERVER_URL = process.env.TASKS_SERVER_URL; +const SERVER_CMD = process.env.TASKS_SERVER_CMD; +const SERVER_STARTUP_TIMEOUT_MS = 15_000; +const SHOULD_SPAWN = Boolean(SERVER_URL && SERVER_CMD); +const HAVE_TARGET = Boolean(SERVER_URL); + +const TASKS_SCENARIOS = [ + new TasksLifecycleScenario(), + new TasksCapabilityNegotiationScenario(), + new TasksWireFieldsScenario(), + new TasksRequestStateScenario(), + new TasksMRTRInputScenario(), + new TasksRequestHeadersScenario(), + new TasksDispatchScenario(), + new TasksStatusNotificationsScenario() +]; + +const describeIfTarget = HAVE_TARGET ? describe : describe.skip; + +describeIfTarget('SEP-2663 Tasks — server conformance', () => { + let serverProcess: ChildProcess | null = null; + + beforeAll(async () => { + if (!SHOULD_SPAWN) return; + + serverProcess = spawn('sh', ['-c', SERVER_CMD!], { + stdio: ['ignore', 'pipe', 'pipe'], + detached: false + }); + + let stdoutBuf = ''; + let stderrBuf = ''; + serverProcess.stdout?.on('data', (b) => { + stdoutBuf += b.toString(); + }); + serverProcess.stderr?.on('data', (b) => { + stderrBuf += b.toString(); + }); + + serverProcess.on('exit', (code) => { + if (code !== null && code !== 0) { + console.error( + `tasks fixture exited unexpectedly with code ${code}.\nSTDOUT: ${stdoutBuf}\nSTDERR: ${stderrBuf}` + ); + } + }); + + await waitForTcpReady(SERVER_URL!, SERVER_STARTUP_TIMEOUT_MS).catch( + (err) => { + if (serverProcess && !serverProcess.killed) { + serverProcess.kill('SIGKILL'); + } + throw new Error( + `tasks fixture did not become reachable within ${SERVER_STARTUP_TIMEOUT_MS}ms: ${err.message}\nSTDOUT: ${stdoutBuf}\nSTDERR: ${stderrBuf}` + ); + } + ); + }, SERVER_STARTUP_TIMEOUT_MS + 5_000); + + afterAll(async () => { + if (!SHOULD_SPAWN) return; + if (!serverProcess || serverProcess.killed) return; + serverProcess.kill('SIGTERM'); + await new Promise((resolve) => { + const timer = setTimeout(() => { + if (serverProcess && !serverProcess.killed) { + serverProcess.kill('SIGKILL'); + } + resolve(); + }, 3_000); + serverProcess!.once('exit', () => { + clearTimeout(timer); + resolve(); + }); + }); + serverProcess = null; + }); + + for (const scenario of TASKS_SCENARIOS) { + it(`${scenario.name} — all checks succeed against fixture`, async () => { + const checks = await scenario.run(SERVER_URL!); + expect(checks.length).toBeGreaterThan(0); + const failures = checks.filter( + (c) => c.status === 'FAILURE' || c.status === 'WARNING' + ); + if (failures.length > 0) { + const detail = failures + .map((c) => ` - ${c.id}: ${c.errorMessage ?? '(no message)'}`) + .join('\n'); + throw new Error( + `${failures.length}/${checks.length} checks failed:\n${detail}` + ); + } + }); + } +}); + +/** + * Poll the host/port of the given URL until a TCP connection succeeds + * or the timeout elapses. Language-agnostic readiness check — works + * for any server that binds before serving requests. + */ +async function waitForTcpReady(url: string, timeoutMs: number): Promise { + const u = new URL(url); + const port = parseInt(u.port || (u.protocol === 'https:' ? '443' : '80'), 10); + const host = u.hostname; + const deadline = Date.now() + timeoutMs; + let lastErr: Error | null = null; + + while (Date.now() < deadline) { + try { + await new Promise((resolve, reject) => { + const socket = connect({ host, port }, () => { + socket.end(); + resolve(); + }); + socket.once('error', (err) => { + socket.destroy(); + reject(err); + }); + socket.setTimeout(1_000, () => { + socket.destroy(); + reject(new Error('connect timeout')); + }); + }); + return; + } catch (err) { + lastErr = err as Error; + await new Promise((r) => setTimeout(r, 200)); + } + } + throw new Error( + `${host}:${port} did not accept TCP connections (last: ${lastErr?.message ?? 'unknown'})` + ); +} diff --git a/src/scenarios/server/tasks/capability.ts b/src/scenarios/server/tasks/capability.ts new file mode 100644 index 0000000..91615d7 --- /dev/null +++ b/src/scenarios/server/tasks/capability.ts @@ -0,0 +1,291 @@ +/** + * SEP-2663 Tasks Extension — capability negotiation conformance. + * + * Tests that the server advertises the io.modelcontextprotocol/tasks + * extension correctly, gates the v2 task surface on negotiation, and + * supports SEP-2575 per-request capability overrides. + * + * Required server fixtures: + * - greet — sync-only, returns "Hello, {name}!" + * - slow_compute — task-supporting, sleeps N seconds + */ + +import { + ClientScenario, + ConformanceCheck, + ScenarioSpecTag, + DRAFT_PROTOCOL_VERSION +} from '../../../types'; +import { + TASKS_EXTENSION_ID, + SEP_2663_REF, + SEP_2575_REF, + errMsg, + failureCheck, + initRawSession, + rawRequest +} from './helpers'; + +export class TasksCapabilityNegotiationScenario implements ClientScenario { + name = 'tasks-capability-negotiation'; + specVersions: ScenarioSpecTag[] = ['extension', DRAFT_PROTOCOL_VERSION]; + description = `Test SEP-2663 capability negotiation for the tasks extension. + +**Server Implementation Requirements:** + +**Capability advertisement (SEP-2663):** +- The server MUST advertise \`io.modelcontextprotocol/tasks\` under + \`capabilities.extensions\` in its \`initialize\` response. +- It MUST NOT use a v1-style \`capabilities.tasks\` slot (the v1 surface + is replaced by the extension). + +**Gating without negotiation (SEP-2663):** +- For sessions that did NOT declare the \`io.modelcontextprotocol/tasks\` + extension during \`initialize\`, the server MUST reject \`tasks/get\`, + \`tasks/update\`, and \`tasks/cancel\` with JSON-RPC \`-32601\` + (MethodNotFound) — clients that didn't negotiate the surface should + not see it. +- A \`tools/call\` from such a session MUST NOT return + \`CreateTaskResult\`. Task-supporting tools fall through to synchronous + execution and return a plain \`ToolResult\` with + \`resultType:"complete"\`. + +**Per-request opt-in (SEP-2575):** +- A session that did not declare the extension at session level can + opt into task creation for a single \`tools/call\` by including the + extension under \`_meta.io.modelcontextprotocol/clientCapabilities.extensions\`. + The server MUST honor the per-request opt-in and produce a + \`CreateTaskResult\` for that call.`; + + async run(serverUrl: string): Promise { + const checks: ConformanceCheck[] = []; + + // Two sessions: one declares the extension, one does NOT. + let withExt: { sessionId: string; serverCapabilities: any }; + let withoutExt: { sessionId: string }; + try { + withExt = await initRawSession(serverUrl, { + capabilities: { + elicitation: {}, + sampling: {}, + extensions: { [TASKS_EXTENSION_ID]: {} } + } + }); + withoutExt = await initRawSession(serverUrl, { capabilities: {} }); + } catch (error) { + checks.push({ + id: 'tasks-session-bootstrap', + name: 'TasksSessionBootstrap', + description: 'Initialize handshakes (with + without extension) succeed', + status: 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: `Failed to initialize: ${errMsg(error)}`, + specReferences: [SEP_2663_REF] + }); + return checks; + } + + // Check 1: server advertises extension under capabilities.extensions. + { + const id = 'tasks-extension-advertised'; + const name = 'TasksExtensionAdvertised'; + const description = `Server advertises ${TASKS_EXTENSION_ID} under capabilities.extensions (and not capabilities.tasks)`; + const caps = withExt.serverCapabilities ?? {}; + const errs: string[] = []; + if (caps.tasks) { + errs.push( + 'v1-style capabilities.tasks slot MUST NOT be used; tasks lives under capabilities.extensions' + ); + } + if (!caps.extensions) { + errs.push('capabilities.extensions MUST be advertised'); + } else if (!caps.extensions[TASKS_EXTENSION_ID]) { + errs.push( + `capabilities.extensions["${TASKS_EXTENSION_ID}"] MUST be present` + ); + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF], + details: { + hasExtensions: Boolean(caps.extensions), + hasTasksExtension: Boolean(caps.extensions?.[TASKS_EXTENSION_ID]), + hasV1TasksSlot: Boolean(caps.tasks) + } + }); + } + + // Check 2: tasks/* methods rejected without extension negotiation. + { + const id = 'tasks-methods-gated-without-extension'; + const name = 'TasksMethodsGatedWithoutExtension'; + const description = + 'tasks/get, tasks/update, tasks/cancel return -32601 when extension was not negotiated'; + const cases: Array<{ method: string; params: any }> = [ + { method: 'tasks/get', params: { taskId: 'gate-test' } }, + { + method: 'tasks/update', + params: { taskId: 'gate-test', inputResponses: {} } + }, + { method: 'tasks/cancel', params: { taskId: 'gate-test' } } + ]; + const errs: string[] = []; + for (const tc of cases) { + try { + await rawRequest(serverUrl, tc.method, tc.params, { + sessionId: withoutExt.sessionId + }); + errs.push(`${tc.method} MUST reject (it returned a result)`); + } catch (e: any) { + if (e.code !== -32601) { + errs.push( + `${tc.method} MUST return -32601; got ${e.code ?? ''}` + ); + } + } + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF] + }); + } + + // Check 3: tools/call without extension returns sync ToolResult, not task. + { + const id = 'tasks-tools-call-without-extension-sync'; + const name = 'TasksToolsCallWithoutExtensionSync'; + const description = + 'tools/call from a session without the extension MUST fall through to sync (no CreateTaskResult, even for task-supporting tools)'; + try { + const result = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'slow_compute', + arguments: { seconds: 0, label: 'capability-no-ext' } + }, + { sessionId: withoutExt.sessionId } + ); + const errs: string[] = []; + if (result.resultType === 'task') { + errs.push( + 'tools/call without extension MUST NOT return resultType:"task"' + ); + } + if (result.taskId) { + errs.push( + `tools/call without extension MUST NOT carry top-level taskId; got ${result.taskId}` + ); + } + if (result.task) { + errs.push( + 'tools/call without extension MUST NOT carry the v1-style nested `task` envelope' + ); + } + if (!result.content) { + errs.push( + 'tools/call without extension MUST return sync ToolResult with content[]' + ); + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF], + details: { + resultType: result.resultType, + hasTaskId: Boolean(result.taskId) + } + }); + } catch (error) { + checks.push(failureCheck(id, name, description, error, [SEP_2663_REF])); + } + } + + // Check 4: SEP-2575 per-request _meta opt-in produces CreateTaskResult. + { + const id = 'tasks-per-request-meta-opt-in'; + const name = 'TasksPerRequestMetaOptIn'; + const description = + 'tools/call with extension declared in _meta.io.modelcontextprotocol/clientCapabilities produces a CreateTaskResult even when the session did not negotiate the extension'; + try { + const result = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'slow_compute', + arguments: { seconds: 1, label: 'capability-meta-opt' } + }, + { + sessionId: withoutExt.sessionId, + meta: { + 'io.modelcontextprotocol/clientCapabilities': { + extensions: { [TASKS_EXTENSION_ID]: {} } + } + } + } + ); + const errs: string[] = []; + if (result.resultType !== 'task') { + errs.push( + `expected resultType:"task" via per-request opt-in; got ${JSON.stringify(result.resultType)}` + ); + } + if (!result.taskId) { + errs.push( + 'per-request opt-in MUST produce a CreateTaskResult with top-level taskId' + ); + } + if (result.task) { + errs.push( + 'CreateTaskResult MUST be flat (no nested `task` wrapper) even on per-request opt-in path' + ); + } + // Best-effort cleanup: cancel the task so we don't leak a 1s + // background goroutine on the server. + if (result.taskId) { + try { + await rawRequest( + serverUrl, + 'tasks/cancel', + { taskId: result.taskId }, + { sessionId: withExt.sessionId } + ); + } catch { + /* swallow — cleanup best-effort */ + } + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2575_REF, SEP_2663_REF], + details: { + resultType: result.resultType, + taskId: result.taskId + } + }); + } catch (error) { + checks.push(failureCheck(id, name, description, error, [SEP_2575_REF])); + } + } + + return checks; + } +} diff --git a/src/scenarios/server/tasks/dispatch.ts b/src/scenarios/server/tasks/dispatch.ts new file mode 100644 index 0000000..3f35e43 --- /dev/null +++ b/src/scenarios/server/tasks/dispatch.ts @@ -0,0 +1,560 @@ +/** + * SEP-2663 Tasks Extension — dispatch + envelope conformance. + * + * Bundles a number of small, related checks under one scenario: + * - Removed v1 methods (tasks/result, tasks/list) reject as -32601. + * - Server-directed task creation works without a client `task` hint + * param; legacy v1 `task` param on tools/call is tolerated and + * ignored on sync tools. + * - Immediate-result shortcut: a fast operation MAY skip task creation + * and return a sync ToolResult. + * - SEP-2322 resultType:"complete" discriminator on every non-task + * response (sync tools/call, tasks/get, tasks/update, tasks/cancel). + * - Strong consistency: tasks/get immediately after CreateTaskResult + * MUST resolve. + * - tasks/get with an unknown taskId MUST return -32602. + * + * Required server fixtures: + * - greet — sync-only + * - slow_compute — task-supporting (seconds:0 = instant) + * - confirm_delete — task-supporting, parks for elicitation + * - failing_job — task-supporting, returns tool error + */ + +import { + ClientScenario, + ConformanceCheck, + ScenarioSpecTag, + DRAFT_PROTOCOL_VERSION +} from '../../../types'; +import { + TASKS_EXTENSION_ID, + SEP_2322_REF, + SEP_2663_REF, + errMsg, + failureCheck, + initRawSession, + rawRequest, + waitForStatus, + waitForTerminal +} from './helpers'; + +export class TasksDispatchScenario implements ClientScenario { + name = 'tasks-dispatch-and-envelope'; + specVersions: ScenarioSpecTag[] = ['extension', DRAFT_PROTOCOL_VERSION]; + description = `Test SEP-2663 dispatch / envelope rules across the tasks surface. + +**Server Implementation Requirements:** + +**Removed v1 methods (SEP-2663):** +- \`tasks/result\` is removed in v2 — the result is inlined on + \`tasks/get\`. Servers MUST reject the method with JSON-RPC \`-32601\`. +- \`tasks/list\` is removed in v2. Servers MUST reject it with + \`-32601\`. + +**Server-directed task creation (SEP-2663):** +- The client does NOT send a \`task\` hint param. The server alone + decides whether to create a task. A \`tools/call\` against a + task-supporting tool MUST produce \`CreateTaskResult\` even with no + client hint. + +**Legacy \`task\` param tolerated (SEP-2663):** +- A v1 client may still send \`task: { ttl, pollInterval }\` on + \`tools/call\`. The server MUST tolerate it (no error) AND MUST NOT + promote a sync-only tool to a task on its presence. The body + arguments + tool registration are authoritative. + +**Immediate-result shortcut (SEP-2663):** +- A server MAY return a sync \`ToolResult\` for task-supporting tools + when the operation completes fast enough. Either return a + \`CreateTaskResult\` (with \`resultType:"task"\`) or a sync + \`ToolResult\` (with \`resultType:"complete"\`); both are valid. + +**resultType:"complete" on non-task responses (SEP-2322):** +- Every JSON-RPC response on the tools+tasks surface other than a + CreateTaskResult MUST carry \`resultType:"complete"\`. This applies + to: sync \`tools/call\`, \`tasks/get\`, \`tasks/update\` ack, + \`tasks/cancel\` ack. + +**Strong consistency / durable create (SEP-2663):** +- A server MUST NOT return \`CreateTaskResult\` until the task is + durably created — that is, until a \`tasks/get\` for the returned + \`taskId\` would resolve. Issuing \`tasks/get\` immediately after the + CreateTaskResult arrives MUST succeed, not -32602. + +**Unknown taskId on tasks/get (SEP-2663):** +- \`tasks/get\` for a taskId the server doesn't recognize MUST return + JSON-RPC \`-32602\` (InvalidParams). Mirrors the same rule for + \`tasks/cancel\` (clarified upstream in spec commit d963ad0).`; + + async run(serverUrl: string): Promise { + const checks: ConformanceCheck[] = []; + + let sessionId: string; + try { + ({ sessionId } = await initRawSession(serverUrl, { + capabilities: { + elicitation: {}, + sampling: {}, + extensions: { [TASKS_EXTENSION_ID]: {} } + } + })); + } catch (error) { + checks.push({ + id: 'tasks-session-bootstrap', + name: 'TasksSessionBootstrap', + description: + 'Initialize handshake declaring io.modelcontextprotocol/tasks extension succeeds', + status: 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: `Failed to initialize: ${errMsg(error)}`, + specReferences: [SEP_2663_REF] + }); + return checks; + } + + // Check 1: tasks/result removed. + { + const id = 'tasks-removed-tasks-result'; + const name = 'TasksRemovedTasksResult'; + const description = + 'tasks/result is removed in v2 and MUST reject with -32601'; + try { + await rawRequest( + serverUrl, + 'tasks/result', + { taskId: 'any' }, + { + sessionId + } + ); + checks.push({ + id, + name, + description, + status: 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: 'tasks/result returned a result instead of -32601', + specReferences: [SEP_2663_REF] + }); + } catch (e: any) { + const errs: string[] = []; + if (e.code !== -32601) { + errs.push(`expected -32601; got ${e.code ?? ''}`); + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF] + }); + } + } + + // Check 2: tasks/list removed. + { + const id = 'tasks-removed-tasks-list'; + const name = 'TasksRemovedTasksList'; + const description = + 'tasks/list is removed in v2 and MUST reject with -32601'; + try { + await rawRequest(serverUrl, 'tasks/list', {}, { sessionId }); + checks.push({ + id, + name, + description, + status: 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: 'tasks/list returned a result instead of -32601', + specReferences: [SEP_2663_REF] + }); + } catch (e: any) { + const errs: string[] = []; + if (e.code !== -32601) { + errs.push(`expected -32601; got ${e.code ?? ''}`); + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF] + }); + } + } + + // Check 3: server-directed task creation without client hint. + { + const id = 'tasks-server-directed-creation-no-hint'; + const name = 'TasksServerDirectedCreationNoHint'; + const description = + 'tools/call with no client `task` hint param MUST still produce CreateTaskResult for task-supporting tools'; + try { + const result = await rawRequest( + serverUrl, + 'tools/call', + { name: 'failing_job', arguments: {} }, + { sessionId } + ); + const errs: string[] = []; + if (result.resultType !== 'task' || !result.taskId) { + errs.push( + `expected CreateTaskResult; got resultType=${JSON.stringify(result.resultType)}, taskId=${JSON.stringify(result.taskId)}` + ); + } + // Best-effort wait so we don't leak. + if (result.taskId) { + try { + await waitForTerminal(serverUrl, sessionId, result.taskId); + } catch { + /* swallow */ + } + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF] + }); + } catch (error) { + checks.push(failureCheck(id, name, description, error, [SEP_2663_REF])); + } + } + + // Check 4: legacy `task` param tolerated + ignored on sync tool. + { + const id = 'tasks-legacy-task-param-ignored'; + const name = 'TasksLegacyTaskParamIgnored'; + const description = + 'tools/call with legacy `task` param against a sync tool MUST NOT error and MUST NOT be promoted to a task'; + try { + const result = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'greet', + arguments: { name: 'legacy-hint' }, + // Legacy v1 hint that the server MUST ignore. + task: { ttl: 60_000, pollInterval: 100 } + }, + { sessionId } + ); + const errs: string[] = []; + if (result.resultType === 'task') { + errs.push( + 'legacy `task` param MUST NOT promote a sync tool to a task' + ); + } + if (result.taskId) { + errs.push( + `sync tool with legacy hint MUST NOT carry top-level taskId; got ${result.taskId}` + ); + } + if (!Array.isArray(result.content) || result.content.length === 0) { + errs.push('sync tool MUST still return content[]'); + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF] + }); + } catch (error) { + checks.push(failureCheck(id, name, description, error, [SEP_2663_REF])); + } + } + + // Check 5: immediate-result shortcut. Either CreateTaskResult OR + // sync ToolResult is acceptable for an instant operation. + { + const id = 'tasks-immediate-result-shortcut'; + const name = 'TasksImmediateResultShortcut'; + const description = + 'For a fast operation, a task-supporting tool MAY skip task creation and return a sync ToolResult; either path is valid'; + try { + const result = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'slow_compute', + arguments: { seconds: 0, label: 'instant' } + }, + { sessionId } + ); + const errs: string[] = []; + if (result.resultType === 'task') { + if (!result.taskId) { + errs.push( + 'task-path response MUST carry top-level taskId on CreateTaskResult' + ); + } + } else { + // Sync path + if (!Array.isArray(result.content)) { + errs.push( + 'sync-path response MUST carry content[] for the immediate ToolResult' + ); + } + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF], + details: { resultType: result.resultType } + }); + } catch (error) { + checks.push(failureCheck(id, name, description, error, [SEP_2663_REF])); + } + } + + // Check 6: resultType:"complete" on every non-task response. + { + const id = 'tasks-result-type-complete-on-non-task-responses'; + const name = 'TasksResultTypeCompleteOnNonTaskResponses'; + const description = + 'Sync tools/call, tasks/get, tasks/update ack, and tasks/cancel ack MUST all carry resultType:"complete"'; + const errs: string[] = []; + try { + // Sync tools/call. + const sync = await rawRequest( + serverUrl, + 'tools/call', + { name: 'greet', arguments: { name: 'rt' } }, + { sessionId } + ); + if (sync.resultType !== 'complete') { + errs.push( + `sync tools/call resultType = ${JSON.stringify(sync.resultType)}, want "complete"` + ); + } + + // tasks/get against a fresh task. + const created = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'slow_compute', + arguments: { seconds: 0, label: 'rt-get' } + }, + { sessionId } + ); + const taskIdForGet = created.taskId; + if (taskIdForGet) { + await waitForTerminal(serverUrl, sessionId, taskIdForGet); + const got = await rawRequest( + serverUrl, + 'tasks/get', + { taskId: taskIdForGet }, + { sessionId } + ); + if (got.resultType !== 'complete') { + errs.push( + `tasks/get resultType = ${JSON.stringify(got.resultType)}, want "complete"` + ); + } + } + + // tasks/cancel ack on a fresh long-running task. + const longLived = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'slow_compute', + arguments: { seconds: 60, label: 'rt-cancel' } + }, + { sessionId } + ); + if (longLived.taskId) { + const cancelAck = await rawRequest( + serverUrl, + 'tasks/cancel', + { taskId: longLived.taskId }, + { sessionId } + ); + if (cancelAck.resultType !== 'complete') { + errs.push( + `tasks/cancel ack resultType = ${JSON.stringify(cancelAck.resultType)}, want "complete"` + ); + } + } + + // tasks/update ack on a parked elicitation task. + const elicit = await rawRequest( + serverUrl, + 'tools/call', + { name: 'confirm_delete', arguments: { filename: 'rt.txt' } }, + { sessionId } + ); + const elicitTaskId = elicit.taskId; + if (elicitTaskId) { + await waitForStatus( + serverUrl, + sessionId, + elicitTaskId, + 'input_required', + 5_000 + ); + const updateAck = await rawRequest( + serverUrl, + 'tasks/update', + { + taskId: elicitTaskId, + inputResponses: { 'unknown-key': { ignored: true } } + }, + { sessionId } + ); + if (updateAck.resultType !== 'complete') { + errs.push( + `tasks/update ack resultType = ${JSON.stringify(updateAck.resultType)}, want "complete"` + ); + } + try { + await rawRequest( + serverUrl, + 'tasks/cancel', + { taskId: elicitTaskId }, + { sessionId } + ); + } catch { + /* swallow */ + } + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2322_REF, SEP_2663_REF] + }); + } catch (error) { + checks.push(failureCheck(id, name, description, error, [SEP_2322_REF])); + } + } + + // Check 7: strong consistency — immediate tasks/get after CreateTaskResult. + { + const id = 'tasks-strong-consistency-immediate-get'; + const name = 'TasksStrongConsistencyImmediateGet'; + const description = + 'tasks/get issued immediately after CreateTaskResult arrives MUST resolve (server MUST NOT return CreateTaskResult before the task is durably created)'; + try { + const created = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'slow_compute', + arguments: { seconds: 60, label: 'consistency' } + }, + { sessionId } + ); + const taskId = created.taskId; + if (!taskId) { + checks.push({ + id, + name, + description, + status: 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: 'slow_compute did not create a task', + specReferences: [SEP_2663_REF] + }); + } else { + // No await/sleep between create and get — codifies the + // strong-consistency ordering. + const got = await rawRequest( + serverUrl, + 'tasks/get', + { taskId }, + { sessionId } + ); + const errs: string[] = []; + if (got.taskId !== taskId) { + errs.push( + `immediate tasks/get MUST resolve the same taskId; got ${got.taskId}` + ); + } + // Cleanup. + try { + await rawRequest( + serverUrl, + 'tasks/cancel', + { taskId }, + { sessionId } + ); + } catch { + /* swallow */ + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF] + }); + } + } catch (error) { + checks.push(failureCheck(id, name, description, error, [SEP_2663_REF])); + } + } + + // Check 8: tasks/get with unknown taskId returns -32602. + { + const id = 'tasks-get-unknown-task-id-rejected'; + const name = 'TasksGetUnknownTaskIdRejected'; + const description = + 'tasks/get for a taskId the server does not recognize MUST return -32602'; + try { + await rawRequest( + serverUrl, + 'tasks/get', + { taskId: 'tasks-conformance-nonexistent-12345' }, + { sessionId } + ); + checks.push({ + id, + name, + description, + status: 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: 'tasks/get with unknown taskId returned a result', + specReferences: [SEP_2663_REF] + }); + } catch (e: any) { + const errs: string[] = []; + if (e.code !== -32602) { + errs.push(`expected -32602; got ${e.code ?? ''}`); + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF] + }); + } + } + + return checks; + } +} diff --git a/src/scenarios/server/tasks/headers.ts b/src/scenarios/server/tasks/headers.ts new file mode 100644 index 0000000..0d5ebdd --- /dev/null +++ b/src/scenarios/server/tasks/headers.ts @@ -0,0 +1,243 @@ +/** + * SEP-2243 Mcp-Method / Mcp-Name request-header tolerance. + * + * SEP-2243 defines Mcp-Method and Mcp-Name as REQUEST headers (client → + * server) used by HTTP infrastructure (proxies, gateways, observability) + * to route or shape JSON-RPC traffic without parsing the body. They are + * informational; the JSON-RPC body is authoritative. A conformant + * server MUST tolerate the headers without changing dispatch. + * + * Whether the server *also* echoes these headers on responses for + * downstream observability is implementation-defined and out of scope + * for SEP-2243 conformance. + * + * Required server fixtures: + * - greet — sync-only, returns "Hello, {name}!" + * - slow_compute — task-supporting, sleeps N seconds + */ + +import { + ClientScenario, + ConformanceCheck, + ScenarioSpecTag, + DRAFT_PROTOCOL_VERSION +} from '../../../types'; +import { + TASKS_EXTENSION_ID, + SEP_2243_REF, + errMsg, + failureCheck, + initRawSession, + rawRequest +} from './helpers'; + +export class TasksRequestHeadersScenario implements ClientScenario { + name = 'tasks-request-headers'; + specVersions: ScenarioSpecTag[] = ['extension', DRAFT_PROTOCOL_VERSION]; + description = `Test SEP-2243 Mcp-Method / Mcp-Name request-header tolerance. + +**Server Implementation Requirements:** + +SEP-2243 defines two informational request headers used by HTTP +infrastructure (proxies, gateways, observability) to route or shape +JSON-RPC traffic without parsing the body: + +- \`Mcp-Method: \` — set on every JSON-RPC request. +- \`Mcp-Name: \` — set on resume operations (\`tasks/get\`, + \`tasks/update\`, \`tasks/cancel\`). + +The JSON-RPC body is authoritative. The server MUST tolerate the +headers, MUST NOT require them, and MUST NOT change dispatch behavior +based on them — including when the headers disagree with the body.`; + + async run(serverUrl: string): Promise { + const checks: ConformanceCheck[] = []; + + let sessionId: string; + try { + ({ sessionId } = await initRawSession(serverUrl, { + capabilities: { extensions: { [TASKS_EXTENSION_ID]: {} } } + })); + } catch (error) { + checks.push({ + id: 'tasks-session-bootstrap', + name: 'TasksSessionBootstrap', + description: + 'Initialize handshake declaring io.modelcontextprotocol/tasks extension succeeds', + status: 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: `Failed to initialize: ${errMsg(error)}`, + specReferences: [SEP_2243_REF] + }); + return checks; + } + + // Check 1: Mcp-Method on tools/call against a sync tool. + { + const id = 'tasks-headers-tolerate-mcp-method-on-tools-call'; + const name = 'TasksHeadersTolerateMcpMethodOnToolsCall'; + const description = + 'Server tolerates Mcp-Method request header on tools/call (sync tool dispatch unaffected)'; + try { + const result = await rawRequest( + serverUrl, + 'tools/call', + { name: 'greet', arguments: { name: 'sep-2243' } }, + { sessionId, headers: { 'Mcp-Method': 'tools/call' } } + ); + const errs: string[] = []; + if (result.resultType !== 'complete') { + errs.push( + `sync ToolResult.resultType MUST be "complete" regardless of routing header; got ${JSON.stringify(result.resultType)}` + ); + } + if ( + !Array.isArray(result.content) || + result.content.length === 0 || + result.content[0]?.text !== 'Hello, sep-2243!' + ) { + errs.push( + 'tool result content MUST be unaffected by the Mcp-Method header' + ); + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2243_REF] + }); + } catch (error) { + checks.push(failureCheck(id, name, description, error, [SEP_2243_REF])); + } + } + + // Check 2: Mcp-Method + Mcp-Name on tasks/get (drive a task first + // so we have a real taskId to route on). + let routingTaskId: string | undefined; + { + const id = 'tasks-headers-tolerate-routing-headers-on-tasks-get'; + const name = 'TasksHeadersTolerateRoutingHeadersOnTasksGet'; + const description = + 'Server tolerates Mcp-Method + Mcp-Name request headers on tasks/get (body taskId resolves regardless of routing headers)'; + try { + const created = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'slow_compute', + arguments: { seconds: 60, label: 'headers-tasks-get' } + }, + { sessionId } + ); + routingTaskId = created.taskId; + if (!routingTaskId) { + checks.push({ + id, + name, + description, + status: 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: 'slow_compute did not create a task', + specReferences: [SEP_2243_REF] + }); + } else { + const got = await rawRequest( + serverUrl, + 'tasks/get', + { taskId: routingTaskId }, + { + sessionId, + headers: { + 'Mcp-Method': 'tasks/get', + 'Mcp-Name': routingTaskId + } + } + ); + const errs: string[] = []; + if (got.taskId !== routingTaskId) { + errs.push( + `tasks/get MUST resolve body taskId regardless of routing headers; got ${got.taskId}` + ); + } + if (!got.status) { + errs.push( + 'tasks/get MUST still return status when routing headers are set' + ); + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2243_REF] + }); + } + } catch (error) { + checks.push(failureCheck(id, name, description, error, [SEP_2243_REF])); + } + } + + // Check 3: Body method is authoritative when Mcp-Method header + // disagrees with body. + { + const id = 'tasks-headers-body-method-authoritative'; + const name = 'TasksHeadersBodyMethodAuthoritative'; + const description = + 'When Mcp-Method header disagrees with body, server MUST dispatch on body method (header is informational)'; + try { + const result = await rawRequest( + serverUrl, + 'tools/call', + { name: 'greet', arguments: { name: 'header-mismatch' } }, + { sessionId, headers: { 'Mcp-Method': 'tasks/get' } } + ); + const errs: string[] = []; + if (result.resultType !== 'complete') { + errs.push( + `server MUST dispatch on body method (tools/call → resultType:"complete"); got ${JSON.stringify(result.resultType)}` + ); + } + if ( + !Array.isArray(result.content) || + result.content[0]?.text !== 'Hello, header-mismatch!' + ) { + errs.push( + 'tool result MUST reflect the body method, not the header claim' + ); + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2243_REF] + }); + } catch (error) { + checks.push(failureCheck(id, name, description, error, [SEP_2243_REF])); + } + } + + // Cleanup the long-lived task. + if (routingTaskId) { + try { + await rawRequest( + serverUrl, + 'tasks/cancel', + { taskId: routingTaskId }, + { sessionId } + ); + } catch { + /* swallow */ + } + } + + return checks; + } +} diff --git a/src/scenarios/server/tasks/helpers.ts b/src/scenarios/server/tasks/helpers.ts index 32ebec4..2eea4e7 100644 --- a/src/scenarios/server/tasks/helpers.ts +++ b/src/scenarios/server/tasks/helpers.ts @@ -12,8 +12,69 @@ * and this file shrinks (or disappears). */ +import type { ConformanceCheck, SpecReference } from '../../../types'; + export const TASKS_EXTENSION_ID = 'io.modelcontextprotocol/tasks'; +export const SEP_2663_REF: SpecReference = { + id: 'SEP-2663', + url: 'https://github.com/modelcontextprotocol/specification/pull/2663' +}; +export const SEP_2322_REF: SpecReference = { + id: 'SEP-2322', + url: 'https://github.com/modelcontextprotocol/specification/pull/2322' +}; +export const SEP_2243_REF: SpecReference = { + id: 'SEP-2243', + url: 'https://github.com/modelcontextprotocol/specification/pull/2243' +}; +export const SEP_2575_REF: SpecReference = { + id: 'SEP-2575', + url: 'https://github.com/modelcontextprotocol/specification/pull/2575' +}; + +export function errMsg(error: unknown): string { + return error instanceof Error ? error.message : String(error); +} + +/** Build a FAILURE check from a thrown error, preserving id/name/description. */ +export function failureCheck( + id: string, + name: string, + description: string, + error: unknown, + specReferences: SpecReference[] +): ConformanceCheck { + return { + id, + name, + description, + status: 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errMsg(error), + specReferences + }; +} + +/** Build a SKIPPED check (preserves id stability so Ctrl+F still finds it). */ +export function skipCheck( + id: string, + name: string, + description: string, + reason: string, + specReferences: SpecReference[] = [SEP_2663_REF] +): ConformanceCheck { + return { + id, + name, + description, + status: 'SKIPPED', + timestamp: new Date().toISOString(), + errorMessage: `Skipped: ${reason}`, + specReferences + }; +} + export interface InitOpts { /** Negotiated wire protocolVersion. Defaults to LATEST_SPEC_VERSION. */ protocolVersion?: string; @@ -23,15 +84,28 @@ export interface InitOpts { clientInfo?: { name: string; version: string }; } +export interface InitResult { + /** Mcp-Session-Id minted by the server during initialize. */ + sessionId: string; + /** capabilities object the server advertised in its initialize response. */ + serverCapabilities: Record; + /** Negotiated protocolVersion echoed back by the server. */ + serverProtocolVersion?: string; + /** Server info (name, version, …). */ + serverInfo?: Record; +} + /** - * Run a fresh initialize handshake and return the resulting session id. - * Bypasses the SDK so callers can declare extension capabilities the - * SDK's typed wrappers don't yet know about. + * Run a fresh initialize handshake and return session id + the server's + * advertised capabilities. Bypasses the SDK so callers can declare + * extension capabilities the SDK's typed wrappers don't yet know about, + * and so the SDK's Zod schemas don't strip extension fields off the + * server response. */ export async function initRawSession( serverUrl: string, opts: InitOpts = {} -): Promise { +): Promise { const protocolVersion = opts.protocolVersion ?? '2025-11-25'; const capabilities = opts.capabilities ?? {}; const clientInfo = opts.clientInfo ?? { @@ -55,6 +129,14 @@ export async function initRawSession( const sid = initResp.headers.get('mcp-session-id') || ''; if (!sid) throw new Error('initialize response missing Mcp-Session-Id'); + const initBody = await initResp.json(); + if (initBody.error) { + throw new Error( + `initialize returned JSON-RPC error: ${JSON.stringify(initBody.error)}` + ); + } + const result = initBody.result ?? {}; + await fetch(serverUrl, { method: 'POST', headers: { @@ -67,7 +149,12 @@ export async function initRawSession( method: 'notifications/initialized' }) }); - return sid; + return { + sessionId: sid, + serverCapabilities: result.capabilities ?? {}, + serverProtocolVersion: result.protocolVersion, + serverInfo: result.serverInfo + }; } export interface RawRequestOpts { diff --git a/src/scenarios/server/tasks/lifecycle.test.ts b/src/scenarios/server/tasks/lifecycle.test.ts deleted file mode 100644 index d2ea918..0000000 --- a/src/scenarios/server/tasks/lifecycle.test.ts +++ /dev/null @@ -1,149 +0,0 @@ -/** - * SEP-2663 Tasks extension test runner. - * - * Iterates the tasks server scenarios against a SEP-2663-conformant - * server. Two ways to point at one — pick whichever fits: - * - * 1. Existing server already running: - * MCPKIT_TASKS_SERVER_URL=http://localhost:8080/mcp npm test -- lifecycle.test.ts - * - * 2. Auto-spawn a fixture binary in beforeAll (the binary must accept - * `--serve --addr :PORT` and bind Streamable HTTP at /mcp): - * MCPKIT_TASKS_BINARY=/path/to/tasks-server npm test -- lifecycle.test.ts - * - * Optional: MCPKIT_TASKS_PORT overrides the auto-spawn port (default 18092). - * - * If neither is set, the suite is skipped — letting CI runs against the - * everything-server stay green until the upstream fixture grows SEP-2663 - * support. - * - * The mcpkit reference fixture lives at - * https://github.com/panyam/mcpkit/tree/main/examples/tasks-v2 (mcpkit - * keeps its v1 surface alongside v2 internally; the fork only cares - * about the SEP-2663 surface, hence the unsuffixed naming here). - */ - -import { spawn, ChildProcess } from 'child_process'; -import { describe, it, expect, beforeAll, afterAll } from 'vitest'; -import { TasksLifecycleScenario } from './lifecycle'; - -const FIXTURE_BINARY = process.env.MCPKIT_TASKS_BINARY; -const EXTERNAL_URL = process.env.MCPKIT_TASKS_SERVER_URL; -const TEST_PORT = parseInt(process.env.MCPKIT_TASKS_PORT ?? '18092', 10); -const SERVER_URL = EXTERNAL_URL ?? `http://localhost:${TEST_PORT}/mcp`; -const SERVER_STARTUP_TIMEOUT_MS = 10_000; -// Spawn only when no external URL is provided AND a fixture binary is. -const SHOULD_SPAWN = !EXTERNAL_URL && Boolean(FIXTURE_BINARY); -const HAVE_TARGET = Boolean(EXTERNAL_URL) || SHOULD_SPAWN; - -const TASKS_SCENARIOS = [new TasksLifecycleScenario()]; - -const describeIfTarget = HAVE_TARGET ? describe : describe.skip; - -describeIfTarget('SEP-2663 Tasks — server conformance', () => { - let serverProcess: ChildProcess | null = null; - - beforeAll(async () => { - if (!SHOULD_SPAWN) return; - - serverProcess = spawn( - FIXTURE_BINARY!, - ['--serve', '--addr', `:${TEST_PORT}`], - { - stdio: ['ignore', 'pipe', 'pipe'], - detached: false - } - ); - - let stdoutBuf = ''; - let stderrBuf = ''; - serverProcess.stdout?.on('data', (b) => { - stdoutBuf += b.toString(); - }); - serverProcess.stderr?.on('data', (b) => { - stderrBuf += b.toString(); - }); - - await new Promise((resolve, reject) => { - const timer = setTimeout(() => { - if (serverProcess && !serverProcess.killed) { - serverProcess.kill('SIGKILL'); - } - reject( - new Error( - `tasks fixture failed to start within ${SERVER_STARTUP_TIMEOUT_MS}ms.\nSTDOUT: ${stdoutBuf}\nSTDERR: ${stderrBuf}` - ) - ); - }, SERVER_STARTUP_TIMEOUT_MS); - - // mcpkit's tasks demo logs the listen address to stderr via the - // log package; treat any "Connect:" or "listening" line as ready. - const checkReady = (chunk: string) => { - if ( - chunk.includes('Connect:') || - chunk.includes('listening') || - chunk.includes('Listening on') - ) { - clearTimeout(timer); - resolve(); - } - }; - serverProcess!.stdout?.on('data', (b) => checkReady(b.toString())); - serverProcess!.stderr?.on('data', (b) => checkReady(b.toString())); - - serverProcess!.on('error', (err) => { - clearTimeout(timer); - reject(new Error(`Failed to spawn tasks fixture: ${err.message}`)); - }); - serverProcess!.on('exit', (code) => { - if (code !== null && code !== 0) { - clearTimeout(timer); - reject( - new Error( - `tasks fixture exited prematurely with code ${code}.\nSTDOUT: ${stdoutBuf}\nSTDERR: ${stderrBuf}` - ) - ); - } - }); - }); - }, SERVER_STARTUP_TIMEOUT_MS + 5_000); - - afterAll(async () => { - if (!SHOULD_SPAWN) return; - if (!serverProcess || serverProcess.killed) return; - serverProcess.kill('SIGTERM'); - await new Promise((resolve) => { - const timer = setTimeout(() => { - if (serverProcess && !serverProcess.killed) { - serverProcess.kill('SIGKILL'); - } - resolve(); - }, 3_000); - serverProcess!.once('exit', () => { - clearTimeout(timer); - resolve(); - }); - }); - serverProcess = null; - }); - - for (const scenario of TASKS_SCENARIOS) { - it(`${scenario.name} — all checks succeed against fixture`, async () => { - const checks = await scenario.run(SERVER_URL); - expect(checks.length).toBeGreaterThan(0); - const failures = checks.filter( - (c) => c.status === 'FAILURE' || c.status === 'WARNING' - ); - if (failures.length > 0) { - // Surface the failing slugs and messages so vitest output points - // at the exact spec-coverage gaps. - const detail = failures - .map((c) => ` - ${c.id}: ${c.errorMessage ?? '(no message)'}`) - .join('\n'); - throw new Error( - `${failures.length}/${checks.length} checks failed:\n${detail}` - ); - } - }); - } -}); diff --git a/src/scenarios/server/tasks/lifecycle.ts b/src/scenarios/server/tasks/lifecycle.ts index e337c83..a59adce 100644 --- a/src/scenarios/server/tasks/lifecycle.ts +++ b/src/scenarios/server/tasks/lifecycle.ts @@ -17,25 +17,20 @@ import { ClientScenario, ConformanceCheck, ScenarioSpecTag, - SpecReference, DRAFT_PROTOCOL_VERSION } from '../../../types'; import { TASKS_EXTENSION_ID, + SEP_2663_REF, + SEP_2322_REF, + errMsg, + failureCheck, + skipCheck, initRawSession, rawRequest, waitForTerminal } from './helpers'; -const SEP_2663_REF: SpecReference = { - id: 'SEP-2663', - url: 'https://github.com/modelcontextprotocol/specification/pull/2663' -}; -const SEP_2322_REF: SpecReference = { - id: 'SEP-2322', - url: 'https://github.com/modelcontextprotocol/specification/pull/2322' -}; - export class TasksLifecycleScenario implements ClientScenario { name = 'tasks-lifecycle'; // 'extension' tags this as off the dated-version timeline (selectable @@ -90,13 +85,13 @@ The server MUST advertise \`io.modelcontextprotocol/tasks\` under let sessionId: string; try { - sessionId = await initRawSession(serverUrl, { + ({ sessionId } = await initRawSession(serverUrl, { capabilities: { elicitation: {}, sampling: {}, extensions: { [TASKS_EXTENSION_ID]: {} } } - }); + })); } catch (error) { checks.push({ id: 'tasks-session-bootstrap', @@ -563,42 +558,3 @@ The server MUST advertise \`io.modelcontextprotocol/tasks\` under return checks; } } - -function errMsg(error: unknown): string { - return error instanceof Error ? error.message : String(error); -} - -function failureCheck( - id: string, - name: string, - description: string, - error: unknown, - specReferences: SpecReference[] -): ConformanceCheck { - return { - id, - name, - description, - status: 'FAILURE', - timestamp: new Date().toISOString(), - errorMessage: errMsg(error), - specReferences - }; -} - -function skipCheck( - id: string, - name: string, - description: string, - reason: string -): ConformanceCheck { - return { - id, - name, - description, - status: 'SKIPPED', - timestamp: new Date().toISOString(), - errorMessage: `Skipped: ${reason}`, - specReferences: [SEP_2663_REF] - }; -} diff --git a/src/scenarios/server/tasks/mrtr-input.ts b/src/scenarios/server/tasks/mrtr-input.ts new file mode 100644 index 0000000..49cfacc --- /dev/null +++ b/src/scenarios/server/tasks/mrtr-input.ts @@ -0,0 +1,416 @@ +/** + * SEP-2322 / SEP-2663 — MRTR input flow on the tasks surface. + * + * Tests the input_required → tasks/update → resume loop, including + * partial inputResponses fulfillment when a tool fans out multiple + * simultaneous input requests. + * + * Required server fixtures: + * - confirm_delete — task-supporting, calls TaskElicit once + * - multi_input — task-supporting, fans out two TaskElicits in + * parallel so two keys are pending at once + */ + +import { + ClientScenario, + ConformanceCheck, + ScenarioSpecTag, + DRAFT_PROTOCOL_VERSION +} from '../../../types'; +import { + TASKS_EXTENSION_ID, + SEP_2322_REF, + SEP_2663_REF, + errMsg, + failureCheck, + initRawSession, + rawRequest, + waitForStatus, + waitForTerminal +} from './helpers'; + +export class TasksMRTRInputScenario implements ClientScenario { + name = 'tasks-mrtr-input'; + specVersions: ScenarioSpecTag[] = ['extension', DRAFT_PROTOCOL_VERSION]; + description = `Test SEP-2322 MRTR input flow on the tasks surface. + +**Server Implementation Requirements:** + +**Surfacing inputRequests (SEP-2322):** +- A task waiting on client input MUST report \`status:"input_required"\` + on tasks/get and surface a non-empty \`inputRequests\` map keyed by + server-minted opaque ids. Each entry carries the underlying request + (\`elicitation/create\`, \`sampling/createMessage\`, etc.). + +**Resuming via tasks/update (SEP-2663):** +- The client delivers responses through \`tasks/update\` with + \`inputResponses\` keyed to match the server-emitted ids. The server + MUST return an empty \`{resultType:"complete"}\` ack on the + tasks/update response — the resulting task state is observed via the + next tasks/get. +- After the response is delivered, the task MUST resume execution and + proceed to a terminal state (or back to input_required for another + round). + +**Partial fulfillment (SEP-2663):** +- A tool that emits multiple simultaneous input requests parks the task + with multiple keys in \`inputRequests\`. A client MAY answer them one + at a time: + - tasks/update with a subset of keys MUST be acked. + - The task MUST stay in \`input_required\` until every pending request + has been answered. + - tasks/get after a partial update MUST surface only the still-pending + keys; the answered key MUST be removed.`; + + async run(serverUrl: string): Promise { + const checks: ConformanceCheck[] = []; + + let sessionId: string; + try { + ({ sessionId } = await initRawSession(serverUrl, { + capabilities: { + elicitation: {}, + sampling: {}, + extensions: { [TASKS_EXTENSION_ID]: {} } + } + })); + } catch (error) { + checks.push({ + id: 'tasks-session-bootstrap', + name: 'TasksSessionBootstrap', + description: + 'Initialize handshake declaring io.modelcontextprotocol/tasks extension succeeds', + status: 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: `Failed to initialize: ${errMsg(error)}`, + specReferences: [SEP_2322_REF] + }); + return checks; + } + + // Check 1: tasks/get surfaces inputRequests when status=input_required. + { + const id = 'tasks-mrtr-input-requests-on-tasks-get'; + const name = 'TasksMRTRInputRequestsOnTasksGet'; + const description = + 'tasks/get on an input_required task MUST surface a non-empty inputRequests map'; + try { + const created = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'confirm_delete', + arguments: { filename: 'mrtr-input.txt' } + }, + { sessionId } + ); + const taskId = created.taskId; + if (!taskId) { + checks.push({ + id, + name, + description, + status: 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: 'confirm_delete did not create a task', + specReferences: [SEP_2322_REF] + }); + } else { + const task = await waitForStatus( + serverUrl, + sessionId, + taskId, + 'input_required', + 5_000 + ); + const errs: string[] = []; + if (task.status !== 'input_required') { + errs.push( + `expected status:"input_required"; got ${JSON.stringify(task.status)}` + ); + } + if ( + !task.inputRequests || + typeof task.inputRequests !== 'object' || + Array.isArray(task.inputRequests) + ) { + errs.push('inputRequests MUST be a non-null object (map)'); + } else { + const keys = Object.keys(task.inputRequests); + if (keys.length === 0) { + errs.push('inputRequests MUST have at least one entry'); + } else { + const firstReq = task.inputRequests[keys[0]]; + if (!firstReq?.method) { + errs.push( + 'each inputRequest MUST carry a `method` (e.g., elicitation/create)' + ); + } + } + } + // Cancel so we don't leave the task parked. + try { + await rawRequest( + serverUrl, + 'tasks/cancel', + { taskId }, + { sessionId } + ); + } catch { + /* swallow */ + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2322_REF, SEP_2663_REF] + }); + } + } catch (error) { + checks.push(failureCheck(id, name, description, error, [SEP_2322_REF])); + } + } + + // Check 2: tasks/update delivers inputResponses + resumes the task. + { + const id = 'tasks-mrtr-tasks-update-resumes'; + const name = 'TasksMRTRTasksUpdateResumes'; + const description = + 'tasks/update with matching inputResponses MUST be acked with {resultType:"complete"} and resume the task to a terminal state'; + try { + const created = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'confirm_delete', + arguments: { filename: 'mrtr-resume.txt' } + }, + { sessionId } + ); + const taskId = created.taskId; + if (!taskId) { + checks.push({ + id, + name, + description, + status: 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: 'confirm_delete did not create a task', + specReferences: [SEP_2322_REF, SEP_2663_REF] + }); + } else { + const inputTask = await waitForStatus( + serverUrl, + sessionId, + taskId, + 'input_required', + 5_000 + ); + const errs: string[] = []; + const responses: Record = {}; + for (const key of Object.keys(inputTask.inputRequests ?? {})) { + responses[key] = { + action: 'accept', + content: { confirm: true } + }; + } + const ack = await rawRequest( + serverUrl, + 'tasks/update', + { + taskId, + inputResponses: responses, + requestState: inputTask.requestState + }, + { sessionId } + ); + if ( + JSON.stringify(ack) !== JSON.stringify({ resultType: 'complete' }) + ) { + errs.push( + `tasks/update ack MUST be {resultType:"complete"}; got ${JSON.stringify(ack)}` + ); + } + const terminal = await waitForTerminal(serverUrl, sessionId, taskId); + if (terminal.status !== 'completed') { + errs.push( + `task MUST resume to completed after tasks/update; got status ${JSON.stringify(terminal.status)}` + ); + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2322_REF, SEP_2663_REF] + }); + } + } catch (error) { + checks.push( + failureCheck(id, name, description, error, [ + SEP_2322_REF, + SEP_2663_REF + ]) + ); + } + } + + // Check 3: partial inputResponses fulfillment leaves the rest pending. + { + const id = 'tasks-mrtr-partial-fulfillment'; + const name = 'TasksMRTRPartialFulfillment'; + const description = + 'tasks/update with a subset of keys MUST keep the task in input_required with only the unanswered key remaining'; + try { + const created = await rawRequest( + serverUrl, + 'tools/call', + { name: 'multi_input', arguments: {} }, + { sessionId } + ); + const taskId = created.taskId; + if (!taskId) { + checks.push({ + id, + name, + description, + status: 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: 'multi_input did not create a task', + specReferences: [SEP_2663_REF] + }); + } else { + // Wait until two keys are pending (the fan-out tool races two + // TaskElicits, so we may briefly see one before the second). + let inputTask: any; + const start = Date.now(); + while (Date.now() - start < 5_000) { + inputTask = await rawRequest( + serverUrl, + 'tasks/get', + { taskId }, + { sessionId } + ); + if ( + inputTask.status === 'input_required' && + inputTask.inputRequests && + Object.keys(inputTask.inputRequests).length >= 2 + ) { + break; + } + await new Promise((r) => setTimeout(r, 100)); + } + const errs: string[] = []; + if (inputTask.status !== 'input_required') { + errs.push( + `task with two parallel elicits MUST be input_required; got ${JSON.stringify(inputTask.status)}` + ); + } + const keys = Object.keys(inputTask.inputRequests ?? {}); + if (keys.length < 2) { + errs.push( + `multi_input MUST surface 2 inputRequests; got ${keys.length}` + ); + } else { + const [firstKey, secondKey] = keys; + + // Answer first key only. + const firstAck = await rawRequest( + serverUrl, + 'tasks/update', + { + taskId, + inputResponses: { + [firstKey]: { + action: 'accept', + content: { name: 'partial-1', confirm: true } + } + } + }, + { sessionId } + ); + if (firstAck.resultType !== 'complete') { + errs.push( + `partial tasks/update ack MUST carry resultType:"complete"; got ${JSON.stringify(firstAck)}` + ); + } + + // Status MUST still be input_required with only the second + // key remaining. + const afterFirst = await rawRequest( + serverUrl, + 'tasks/get', + { taskId }, + { sessionId } + ); + if (afterFirst.status !== 'input_required') { + errs.push( + `task MUST stay input_required while another input is still pending; got ${JSON.stringify(afterFirst.status)}` + ); + } + const remaining = Object.keys(afterFirst.inputRequests ?? {}); + if (!remaining.includes(secondKey)) { + errs.push( + `unanswered key MUST remain in inputRequests; got ${JSON.stringify(remaining)}` + ); + } + if (remaining.includes(firstKey)) { + errs.push( + `answered key MUST be removed from inputRequests; still saw ${firstKey}` + ); + } + + // Answer second key — task resumes and finishes. + await rawRequest( + serverUrl, + 'tasks/update', + { + taskId, + inputResponses: { + [secondKey]: { + action: 'accept', + content: { name: 'partial-2', confirm: true } + } + } + }, + { sessionId } + ); + const terminal = await waitForTerminal( + serverUrl, + sessionId, + taskId + ); + if (terminal.status !== 'completed') { + errs.push( + `task MUST complete after both inputs are satisfied; got ${JSON.stringify(terminal.status)}` + ); + } + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2322_REF, SEP_2663_REF] + }); + } + } catch (error) { + checks.push( + failureCheck(id, name, description, error, [ + SEP_2322_REF, + SEP_2663_REF + ]) + ); + } + } + + return checks; + } +} diff --git a/src/scenarios/server/tasks/notifications.ts b/src/scenarios/server/tasks/notifications.ts new file mode 100644 index 0000000..a3881a2 --- /dev/null +++ b/src/scenarios/server/tasks/notifications.ts @@ -0,0 +1,188 @@ +/** + * SEP-2663 Tasks Extension — status notifications conformance. + * + * Status notifications are OPTIONAL. The check pattern is: + * - INFO when no notifications are received (well-formed silence). + * - SUCCESS when notifications arrive and carry the SEP-2663 shape + * (DetailedTask: taskId + status, with inlined result on terminal). + * - FAILURE only if a notification was emitted but is malformed. + * + * The raw HTTP harness can't open a long-lived GET SSE stream from the + * scenario layer easily, so this check observes notifications via the + * POST tools/call SSE response stream. That captures the status + * transitions emitted while the task is running. This is a best-effort + * smoke test — passing servers may still emit additional notifications + * on the persistent GET stream that this harness doesn't see. + * + * Required server fixtures: + * - slow_compute — task-supporting, sleeps N seconds + */ + +import { + ClientScenario, + ConformanceCheck, + ScenarioSpecTag, + DRAFT_PROTOCOL_VERSION +} from '../../../types'; +import { + TASKS_EXTENSION_ID, + SEP_2663_REF, + errMsg, + failureCheck, + initRawSession, + waitForTerminal +} from './helpers'; + +export class TasksStatusNotificationsScenario implements ClientScenario { + name = 'tasks-status-notifications'; + specVersions: ScenarioSpecTag[] = ['extension', DRAFT_PROTOCOL_VERSION]; + description = `Test SEP-2663 status notifications (optional). + +**Server Implementation Requirements:** + +Servers MAY emit \`notifications/tasks/status\` to inform clients of +task state changes without polling. Notifications are optional — a +server is conformant whether it sends them or not. When sent, the +notification params MUST carry: + +- \`taskId\`: the task the notification refers to. +- \`status\`: the new task status. +- For terminal statuses (\`completed\`/\`failed\`/\`cancelled\`), + notifications MAY inline the corresponding \`result\` or \`error\` + per the SEP-2663 DetailedTask shape.`; + + async run(serverUrl: string): Promise { + const checks: ConformanceCheck[] = []; + + let sessionId: string; + try { + ({ sessionId } = await initRawSession(serverUrl, { + capabilities: { extensions: { [TASKS_EXTENSION_ID]: {} } } + })); + } catch (error) { + checks.push({ + id: 'tasks-session-bootstrap', + name: 'TasksSessionBootstrap', + description: + 'Initialize handshake declaring io.modelcontextprotocol/tasks extension succeeds', + status: 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: `Failed to initialize: ${errMsg(error)}`, + specReferences: [SEP_2663_REF] + }); + return checks; + } + + const id = 'tasks-status-notifications-shape'; + const name = 'TasksStatusNotificationsShape'; + const description = + 'When status notifications are emitted, each MUST carry taskId + status (SEP-2663 DetailedTask)'; + + // Issue tools/call with SSE-accepting headers and capture every + // `data:` payload. Some are JSON-RPC responses (with id), some are + // notifications (no id). We ingest all and classify by the body. + let taskId: string | undefined; + const notifications: any[] = []; + try { + const resp = await fetch(serverUrl, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Accept: 'text/event-stream, application/json', + 'Mcp-Session-Id': sessionId + }, + body: JSON.stringify({ + jsonrpc: '2.0', + id: 'notif-test', + method: 'tools/call', + params: { + name: 'slow_compute', + arguments: { seconds: 1, label: 'notif' } + } + }) + }); + const ct = resp.headers.get('content-type') || ''; + if (ct.includes('text/event-stream')) { + const text = await resp.text(); + for (const line of text.split('\n')) { + const trimmed = line.trim(); + if (trimmed.startsWith('data:')) { + const payload = trimmed.slice(5).trimStart(); + if (payload.startsWith('{')) { + const parsed = JSON.parse(payload); + if (parsed.id === 'notif-test' && parsed.result) { + taskId = parsed.result.taskId; + } else if (parsed.method === 'notifications/tasks/status') { + notifications.push(parsed.params); + } + } + } + } + } else { + const body = await resp.json(); + taskId = body.result?.taskId; + } + } catch (error) { + checks.push(failureCheck(id, name, description, error, [SEP_2663_REF])); + return checks; + } + + // Drain to a terminal so the server has emitted everything it's + // going to (best-effort — the persistent GET stream might be + // collecting more, but we're done with this scenario regardless). + if (taskId) { + try { + await waitForTerminal(serverUrl, sessionId, taskId); + } catch { + /* swallow */ + } + } + + if (notifications.length === 0) { + checks.push({ + id, + name, + description, + status: 'INFO', + timestamp: new Date().toISOString(), + errorMessage: + 'No status notifications received on the tools/call POST SSE stream (notifications are optional)', + specReferences: [SEP_2663_REF] + }); + return checks; + } + + const errs: string[] = []; + for (const evt of notifications) { + if (!evt.taskId) { + errs.push('status notification MUST carry taskId'); + } + if (!evt.status) { + errs.push('status notification MUST carry status'); + } + } + // Optional terminal-with-inlined-result check: if the suite saw a + // completed notification for our taskId, it SHOULD include result. + const terminalForOurs = notifications.find( + (n: any) => n.taskId === taskId && n.status === 'completed' + ); + if (terminalForOurs && !terminalForOurs.result) { + errs.push( + 'completed status notification SHOULD inline result (DetailedTask shape)' + ); + } + + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF], + details: { notificationCount: notifications.length } + }); + + return checks; + } +} diff --git a/src/scenarios/server/tasks/request-state.ts b/src/scenarios/server/tasks/request-state.ts new file mode 100644 index 0000000..8c2b165 --- /dev/null +++ b/src/scenarios/server/tasks/request-state.ts @@ -0,0 +1,290 @@ +/** + * SEP-2322 / SEP-2663 — requestState conformance. + * + * Tests the optional opaque session-continuation token: + * - Server MAY include requestState on tasks/get responses. + * - Clients MUST echo it back on subsequent tasks/get / tasks/update / + * tasks/cancel for the same task — server MUST accept the echo. + * - Servers MUST tolerate a stale but still-valid token (one minted + * before a newer one but still within its TTL window). + * + * If the server does not issue requestState at all (it's optional per + * SEP-2322), the dependent checks emit INFO rather than failing — the + * spec allows omission. + * + * Required server fixtures: + * - slow_compute — task-supporting, sleeps N seconds + */ + +import { + ClientScenario, + ConformanceCheck, + ScenarioSpecTag, + DRAFT_PROTOCOL_VERSION +} from '../../../types'; +import { + TASKS_EXTENSION_ID, + SEP_2322_REF, + SEP_2663_REF, + errMsg, + failureCheck, + initRawSession, + rawRequest +} from './helpers'; + +export class TasksRequestStateScenario implements ClientScenario { + name = 'tasks-request-state'; + specVersions: ScenarioSpecTag[] = ['extension', DRAFT_PROTOCOL_VERSION]; + description = `Test SEP-2322 requestState semantics on the tasks surface. + +**Server Implementation Requirements:** + +**Optional emission (SEP-2322):** +- A server MAY include a non-empty string \`requestState\` on tasks/get + responses to allow stateless deployments to resume the conversation. + When present, it MUST be a non-empty string. + +**Echo acceptance:** +- A client that receives a \`requestState\` from tasks/get MUST be able + to echo it back on a subsequent \`tasks/get\`/\`tasks/update\`/ + \`tasks/cancel\` for the same task. The server MUST accept the echo. + +**Stale-but-valid tolerance (SEP-2663):** +- Each tasks/get may mint a new requestState (e.g., for a refreshed + TTL). After a fresh tasks/get returns a newer token, echoing the + earlier one MUST still succeed as long as the earlier token has not + itself expired. (Servers MUST tolerate stale-but-valid tokens + gracefully.)`; + + async run(serverUrl: string): Promise { + const checks: ConformanceCheck[] = []; + + let sessionId: string; + try { + ({ sessionId } = await initRawSession(serverUrl, { + capabilities: { extensions: { [TASKS_EXTENSION_ID]: {} } } + })); + } catch (error) { + checks.push({ + id: 'tasks-session-bootstrap', + name: 'TasksSessionBootstrap', + description: + 'Initialize handshake declaring io.modelcontextprotocol/tasks extension succeeds', + status: 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: `Failed to initialize: ${errMsg(error)}`, + specReferences: [SEP_2322_REF] + }); + return checks; + } + + // Drive a long-running task once and reuse it for every check. + let taskId: string | undefined; + try { + const created = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'slow_compute', + arguments: { seconds: 60, label: 'request-state' } + }, + { sessionId } + ); + taskId = created.taskId; + } catch (error) { + checks.push( + failureCheck( + 'tasks-request-state-setup', + 'TasksRequestStateSetup', + 'Failed to create a long-running task to exercise requestState', + error, + [SEP_2322_REF] + ) + ); + return checks; + } + if (!taskId) { + checks.push({ + id: 'tasks-request-state-setup', + name: 'TasksRequestStateSetup', + description: + 'slow_compute did not produce a task; cannot exercise requestState', + status: 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: 'no taskId in CreateTaskResult', + specReferences: [SEP_2322_REF] + }); + return checks; + } + + let firstToken: string | undefined; + + // Check 1: tasks/get response shape — requestState (optional) must + // be a non-empty string when present. + { + const id = 'tasks-request-state-shape'; + const name = 'TasksRequestStateShape'; + const description = + 'tasks/get may include requestState; when present it MUST be a non-empty string'; + try { + const task = await rawRequest( + serverUrl, + 'tasks/get', + { taskId }, + { sessionId } + ); + const errs: string[] = []; + if (task.requestState !== undefined) { + if (typeof task.requestState !== 'string') { + errs.push( + `requestState MUST be a string when present; got ${typeof task.requestState}` + ); + } else if (task.requestState.length === 0) { + errs.push( + 'requestState MUST be non-empty when present (omit the field instead of emitting "")' + ); + } else { + firstToken = task.requestState; + } + } + // Optional emission: SUCCESS regardless of presence; INFO when + // server omits it so the result advertises the chosen path. + const status: 'SUCCESS' | 'INFO' | 'FAILURE' = + errs.length === 0 ? (firstToken ? 'SUCCESS' : 'INFO') : 'FAILURE'; + checks.push({ + id, + name, + description, + status, + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2322_REF], + details: { + emitted: Boolean(firstToken), + tokenLength: firstToken?.length + } + }); + } catch (error) { + checks.push(failureCheck(id, name, description, error, [SEP_2322_REF])); + } + } + + // Check 2: client echoes requestState; server accepts the echo. + { + const id = 'tasks-request-state-echo'; + const name = 'TasksRequestStateEcho'; + const description = + 'Server accepts a tasks/get with the previously-emitted requestState echoed back'; + if (!firstToken) { + checks.push({ + id, + name, + description, + status: 'INFO', + timestamp: new Date().toISOString(), + errorMessage: 'Server did not emit requestState; nothing to echo', + specReferences: [SEP_2322_REF] + }); + } else { + try { + const echoed = await rawRequest( + serverUrl, + 'tasks/get', + { taskId, requestState: firstToken }, + { sessionId } + ); + const errs: string[] = []; + if (echoed.taskId !== taskId) { + errs.push( + `tasks/get with echoed requestState MUST resolve the same taskId; got ${echoed.taskId}` + ); + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2322_REF] + }); + } catch (error) { + checks.push( + failureCheck(id, name, description, error, [SEP_2322_REF]) + ); + } + } + } + + // Check 3: stale-but-valid tolerance. + { + const id = 'tasks-request-state-stale-tolerance'; + const name = 'TasksRequestStateStaleTolerance'; + const description = + 'After a newer requestState is minted, the earlier (stale-but-still-valid) token MUST still be accepted'; + if (!firstToken) { + checks.push({ + id, + name, + description, + status: 'INFO', + timestamp: new Date().toISOString(), + errorMessage: + 'Server did not emit requestState; stale tolerance is moot', + specReferences: [SEP_2663_REF, SEP_2322_REF] + }); + } else { + try { + // Force a fresh mint by issuing another tasks/get. On servers + // that sign tokens with embedded expiry, this likely yields a + // newer token; on plaintext-token servers it round-trips the + // same value (still valid). + await rawRequest( + serverUrl, + 'tasks/get', + { taskId, requestState: firstToken }, + { sessionId } + ); + // Now re-echo the OLDER token; server MUST accept. + const stale = await rawRequest( + serverUrl, + 'tasks/get', + { taskId, requestState: firstToken }, + { sessionId } + ); + const errs: string[] = []; + if (stale.taskId !== taskId) { + errs.push( + `stale-but-valid requestState MUST resolve the same taskId; got ${stale.taskId}` + ); + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF, SEP_2322_REF] + }); + } catch (error) { + checks.push( + failureCheck(id, name, description, error, [ + SEP_2663_REF, + SEP_2322_REF + ]) + ); + } + } + } + + // Cleanup the long-lived task so we don't leak goroutines. + try { + await rawRequest(serverUrl, 'tasks/cancel', { taskId }, { sessionId }); + } catch { + /* swallow */ + } + + return checks; + } +} diff --git a/src/scenarios/server/tasks/wire-fields.ts b/src/scenarios/server/tasks/wire-fields.ts new file mode 100644 index 0000000..3fb377d --- /dev/null +++ b/src/scenarios/server/tasks/wire-fields.ts @@ -0,0 +1,250 @@ +/** + * SEP-2663 Tasks Extension — wire-format / TTL conformance. + * + * Tests the renamed wire fields (ttlSeconds, pollIntervalMilliseconds), + * the no-early-TTL-expiry rule, and confirms the v1 `related-task` _meta + * key is absent on tasks/get's inlined result (taskId is at root level + * already, so the metadata is redundant). + * + * Required server fixtures: + * - slow_compute — task-supporting, sleeps N seconds + */ + +import { + ClientScenario, + ConformanceCheck, + ScenarioSpecTag, + DRAFT_PROTOCOL_VERSION +} from '../../../types'; +import { + TASKS_EXTENSION_ID, + SEP_2663_REF, + errMsg, + failureCheck, + skipCheck, + initRawSession, + rawRequest, + waitForTerminal +} from './helpers'; + +export class TasksWireFieldsScenario implements ClientScenario { + name = 'tasks-wire-fields'; + specVersions: ScenarioSpecTag[] = ['extension', DRAFT_PROTOCOL_VERSION]; + description = `Test SEP-2663 wire-field renames + TTL semantics. + +**Server Implementation Requirements:** + +**Wire-field renames (SEP-2663):** +- The TTL field is named \`ttlSeconds\` on the wire (the v1 \`ttl\` + key is in milliseconds-by-convention; SEP-2663 puts the unit in the + field name). +- The poll-interval field is named \`pollIntervalMilliseconds\` (v1 + used \`pollInterval\`). +- A \`CreateTaskResult\` MUST NOT carry the legacy \`ttl\` or + \`pollInterval\` keys — clients keying off v1 names on a v2 server + would silently miss the TTL guidance. + +**TTL non-expiry (SEP-2663):** +- A task MUST remain accessible via \`tasks/get\` for the duration of + its \`ttlSeconds\`; a server MUST NOT expire it earlier. + +**Inlined-result \`_meta\` (SEP-2663):** +- The v1 \`io.modelcontextprotocol/related-task\` \`_meta\` key MUST NOT + appear on tasks/get's inlined \`result\` — the \`taskId\` is already + at the root level of the \`tasks/get\` response, so the metadata is + redundant.`; + + async run(serverUrl: string): Promise { + const checks: ConformanceCheck[] = []; + + let sessionId: string; + try { + ({ sessionId } = await initRawSession(serverUrl, { + capabilities: { + extensions: { [TASKS_EXTENSION_ID]: {} } + } + })); + } catch (error) { + checks.push({ + id: 'tasks-session-bootstrap', + name: 'TasksSessionBootstrap', + description: + 'Initialize handshake declaring io.modelcontextprotocol/tasks extension succeeds', + status: 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: `Failed to initialize: ${errMsg(error)}`, + specReferences: [SEP_2663_REF] + }); + return checks; + } + + // Check 1: ttlSeconds + pollIntervalMilliseconds wire shape. + let createdTaskId: string | undefined; + { + const id = 'tasks-wire-field-renames'; + const name = 'TasksWireFieldRenames'; + const description = + 'CreateTaskResult uses ttlSeconds + pollIntervalMilliseconds; legacy ttl / pollInterval keys absent'; + try { + const result = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'slow_compute', + arguments: { seconds: 1, label: 'wire-fields' } + }, + { sessionId } + ); + createdTaskId = result.taskId; + const errs: string[] = []; + // ttlSeconds — required, positive (or null = unlimited; treat + // either as well-formed). Legacy `ttl` MUST be absent. + if (!('ttlSeconds' in result)) { + errs.push( + 'CreateTaskResult MUST carry ttlSeconds (renamed from v1 `ttl`)' + ); + } else if ( + result.ttlSeconds !== null && + (typeof result.ttlSeconds !== 'number' || result.ttlSeconds <= 0) + ) { + errs.push( + `ttlSeconds MUST be null or a positive number; got ${JSON.stringify(result.ttlSeconds)}` + ); + } + if ('ttl' in result) { + errs.push( + 'CreateTaskResult MUST NOT carry the v1 `ttl` key (use ttlSeconds)' + ); + } + // pollIntervalMilliseconds — optional. When present it MUST be + // a positive number and the legacy `pollInterval` key MUST NOT + // appear. + if ( + result.pollIntervalMilliseconds !== undefined && + (typeof result.pollIntervalMilliseconds !== 'number' || + result.pollIntervalMilliseconds <= 0) + ) { + errs.push( + `pollIntervalMilliseconds MUST be a positive number when present; got ${JSON.stringify(result.pollIntervalMilliseconds)}` + ); + } + if ('pollInterval' in result) { + errs.push( + 'CreateTaskResult MUST NOT carry the v1 `pollInterval` key (use pollIntervalMilliseconds)' + ); + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF], + details: { + ttlSeconds: result.ttlSeconds, + pollIntervalMilliseconds: result.pollIntervalMilliseconds, + hasLegacyTtl: 'ttl' in result, + hasLegacyPollInterval: 'pollInterval' in result + } + }); + } catch (error) { + checks.push(failureCheck(id, name, description, error, [SEP_2663_REF])); + } + } + + // Check 2: task accessible before TTL elapses. + { + const id = 'tasks-no-early-ttl-expiry'; + const name = 'TasksNoEarlyTtlExpiry'; + const description = + 'Task remains accessible via tasks/get for the duration of its ttlSeconds'; + if (!createdTaskId) { + checks.push(skipCheck(id, name, description, 'no task created')); + } else { + try { + await waitForTerminal(serverUrl, sessionId, createdTaskId); + // Sanity probe well before TTL (the unit is seconds; servers + // typically pick order-of-minutes defaults). + await new Promise((r) => setTimeout(r, 500)); + const after = await rawRequest( + serverUrl, + 'tasks/get', + { taskId: createdTaskId }, + { sessionId } + ); + const errs: string[] = []; + if (after.taskId !== createdTaskId) { + errs.push( + `task MUST still be accessible before TTL; got taskId=${after.taskId}` + ); + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF] + }); + } catch (error) { + checks.push( + failureCheck(id, name, description, error, [SEP_2663_REF]) + ); + } + } + } + + // Check 3: no related-task _meta on inlined result. + { + const id = 'tasks-no-related-task-meta-on-inlined-result'; + const name = 'TasksNoRelatedTaskMetaOnInlinedResult'; + const description = + 'tasks/get inlined result MUST NOT include the v1 io.modelcontextprotocol/related-task _meta key (taskId is at the root)'; + try { + const created = await rawRequest( + serverUrl, + 'tools/call', + { + name: 'slow_compute', + arguments: { seconds: 1, label: 'wire-fields-meta' } + }, + { sessionId } + ); + const taskId = created.taskId; + if (!taskId) { + checks.push(skipCheck(id, name, description, 'no task created')); + } else { + const terminal = await waitForTerminal(serverUrl, sessionId, taskId); + const errs: string[] = []; + const meta = terminal.result?._meta; + if (meta && meta['io.modelcontextprotocol/related-task']) { + errs.push( + 'related-task _meta MUST NOT appear on tasks/get inlined result' + ); + } + checks.push({ + id, + name, + description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: [SEP_2663_REF], + details: { + hasMeta: Boolean(meta), + hasRelatedTask: Boolean( + meta?.['io.modelcontextprotocol/related-task'] + ) + } + }); + } + } catch (error) { + checks.push(failureCheck(id, name, description, error, [SEP_2663_REF])); + } + } + + return checks; + } +} From 10bf8370247e373004510aec1a0a7f278be7ee3c Mon Sep 17 00:00:00 2001 From: Sri Panyam Date: Tue, 5 May 2026 15:09:20 -0700 Subject: [PATCH 4/7] docs(tasks,mrtr): scenario READMEs for upstream porting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Restructured around ClientScenario classes (one row per class with check-list under it) rather than per-numbered-test slugs. Documents fixture requirements, env vars, open spec questions, and the wire-format diff for each suite. Per AGENTS.md, severity follows spec keyword (MUST/MUST NOT → FAILURE, SHOULD/SHOULD NOT → WARNING). The READMEs explain why some checks emit INFO rather than FAILURE (optional emission paths per SEP-2322). --- src/scenarios/server/mrtr/README.md | 111 +++++++++++++++ src/scenarios/server/tasks/README.md | 197 +++++++++++++++++++++++++++ 2 files changed, 308 insertions(+) create mode 100644 src/scenarios/server/mrtr/README.md create mode 100644 src/scenarios/server/tasks/README.md diff --git a/src/scenarios/server/mrtr/README.md b/src/scenarios/server/mrtr/README.md new file mode 100644 index 0000000..8e1bf53 --- /dev/null +++ b/src/scenarios/server/mrtr/README.md @@ -0,0 +1,111 @@ +# SEP-2322 MRTR — Server Conformance + +Tests any MCP server that implements the SEP-2322 ephemeral +Multi Round-Trip Request flow on `tools/call` — the +`IncompleteResult` → retry-with-`inputResponses` → `ToolResult` +contract that lets a tool gather elicitation / sampling / roots input +without creating a task envelope. + +## Specs covered + +| SEP | What it adds | Where it shows up | +| -------- | ---------------------------------------------------------------------------------------------------------------- | ----------------------------- | +| SEP-2322 | Ephemeral MRTR — `resultType` discriminator, `inputRequests` / `inputResponses` keyed maps, `requestState` token | every check | +| SEP-2663 | MRTR → Tasks composition (final round returns `CreateTaskResult`) | mrtr-08 (SKIPPED — see below) | + +## ClientScenario classes + +### `mrtr-ephemeral-flow` (`ephemeral-flow.ts`) + +A single scenario covering the full ephemeral MRTR contract — per the +AGENTS.md "fewer scenarios, more checks" rule. A server that +implemented elicitation round-trips but not sampling round-trips would +be incoherent, so they bundle. + +| Check | What it tests | +| ---------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------- | +| `mrtr-basic-elicitation-round-trip` | Round 1 returns `IncompleteResult` with `elicitation/create`; round 2 completes with the answer reflected | +| `mrtr-sampling-round-trip` | Same flow with `sampling/createMessage` | +| `mrtr-roots-list-round-trip` | Same flow with `roots/list` | +| `mrtr-request-state-round-trip` | When server emits `requestState`, it's a non-empty string and the server validates the echo | +| `mrtr-multiple-input-requests-one-round` | A single `IncompleteResult` MAY carry inputRequests for `elicitation/create` + `sampling/createMessage` + `roots/list` together | +| `mrtr-multi-round-flow` | A handler MAY take 2+ rounds; each round mints a fresh `requestState`; final result reflects answers from every round | +| `mrtr-wrong-input-key-rerequests` | When client sends a wrong `inputResponses` key, server SHOULD re-request via `IncompleteResult` rather than erroring | +| `mrtr-tasks-composition` | **SKIPPED** — see "Open issues" below | + +## Required server fixtures + +The fixture server MUST register these tools: + +| Tool | Behavior | +| ---------------------------------------- | ------------------------------------------------------------------------------------------- | +| `test_tool_with_elicitation` | One `elicitation/create` round, completes with answer reflected | +| `test_incomplete_result_sampling` | One `sampling/createMessage` round | +| `test_incomplete_result_list_roots` | One `roots/list` round | +| `test_incomplete_result_request_state` | Exercises `requestState` validation; final result includes `state-ok` to confirm validation | +| `test_incomplete_result_multiple_inputs` | Emits 3+ inputRequests of different methods in one round | +| `test_incomplete_result_multi_round` | Drives 2+ MRTR rounds, final result references every answer | +| `test_incomplete_result_elicitation` | Emits inputRequest for `user_name`; server re-requests on wrong-key responses | + +The fixture can be implemented in any language; one example reference +implementation lives at +[`panyam/mcpkit/examples/mrtr`](https://github.com/panyam/mcpkit/tree/main/examples/mrtr). + +## Running + +```bash +# Against an already-running server +MRTR_SERVER_URL=http://localhost:8080/mcp \ + npx vitest run src/scenarios/server/mrtr/all-scenarios.test.ts + +# Auto-spawn a fixture in beforeAll +MRTR_SERVER_URL=http://localhost:18093/mcp \ +MRTR_SERVER_CMD="/path/to/mrtr-server --port 18093" \ + npx vitest run src/scenarios/server/mrtr/all-scenarios.test.ts +``` + +## Open issues + +### `mrtr-tasks-composition` deferred + +SEP-2663 commit `451f5e1` (Apr 30) made the MRTR → Tasks composition +flow normative: a `tools/call` MAY exchange `IncompleteResult` rounds +to gather input, then return `CreateTaskResult` to go async on a +subsequent round. Two blockers prevent enabling the check today: + +1. **Spec watch — discriminator value.** SEP-2322 (MRTR base) and + SEP-2663 (Tasks Extension) currently disagree on the wire value for + the "needs more input" discriminator: SEP-2322's draft uses + `"input_required"`, SEP-2663's draft uses `"incomplete"`. Awaiting + alignment between the SEP authors. The current literal lives in + `MRTR_INCOMPLETE_RESULT_TYPE` (helpers.ts) so it's a one-line flip + when the spec converges. + +2. **Reference-impl gap.** The natural server-side implementation + pattern for tasks (mint task up-front, run handler in a goroutine / + async task) means the handler's `IncompleteResult` signal isn't + visible to the middleware in time — by the time the handler returns + `IsIncomplete`, the `CreateTaskResult` is already on the wire. SDKs + in any language need an inverted middleware pattern that runs the + first round synchronously and only spins up the task once the + handler signals async-promotion. + ([panyam/mcpkit issue 347](https://github.com/panyam/mcpkit/issues/347) + tracks this for one example impl; SDKs in any language hit the + same architectural choice.) + +The check is registered with `status: 'SKIPPED'` so it's discoverable +but doesn't fail conformance runs. When both blockers resolve, remove +the SKIPPED short-circuit in `ephemeral-flow.ts` Check 8. + +## Design notes + +### Why the MRTR scenarios share helpers with `tasks/` + +`MRTR_INCOMPLETE_RESULT_TYPE`, the result-type predicates +(`isIncompleteResult`, `isCompleteResult`), and the elicitation/sampling/ +roots mocks live in `mrtr/helpers.ts`. The raw-fetch primitives +(`initRawSession`, `rawRequest`) are imported from the sibling +`../tasks/helpers` because both scenario sets share the same wire-shape +problem (SDK Zod schemas strip extension fields). When the upstream +SDK gains schemas for SEP-2322 / SEP-2663 shapes, those import paths +collapse back into the SDK. diff --git a/src/scenarios/server/tasks/README.md b/src/scenarios/server/tasks/README.md new file mode 100644 index 0000000..f145279 --- /dev/null +++ b/src/scenarios/server/tasks/README.md @@ -0,0 +1,197 @@ +# SEP-2663 Tasks Extension — Server Conformance + +Tests any MCP server that implements the `io.modelcontextprotocol/tasks` +extension (SEP-2663) plus the SEP-2322 base types it builds on, the +SEP-2575 per-request capability override, and the SEP-2243 routing +headers. + +The scenarios assert what the spec text says — not what any particular +implementation does. When the SDK schemas in +`@modelcontextprotocol/sdk/types.js` lag the spec, scenarios bypass +the SDK and use raw `fetch` so the SEP-2663 wire fields (`resultType`, +`taskId`, `inputRequests`, `requestState`, inlined `result`/`error`) +aren't stripped. + +## Specs covered + +| SEP | What it adds | Where it shows up | +| -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------- | +| SEP-2663 | Tasks Extension — `io.modelcontextprotocol/tasks` capability, flat `CreateTaskResult` (`Result & Task`), `DetailedTask` on `tasks/get` (with inlined result/error/inputRequests/requestState), `tasks/update` for MRTR resume, ack-only `tasks/cancel`, wire-field renames (`ttlSeconds`, `pollIntervalMilliseconds`) | every scenario | +| SEP-2322 | MRTR base types — `inputRequests`/`inputResponses` keyed maps, `requestState`, `resultType` discriminator (`"task"`/`"complete"`/`"incomplete"`) | request-state, mrtr-input, dispatch | +| SEP-2575 | Per-request capability override via `_meta.io.modelcontextprotocol/clientCapabilities` | capability | +| SEP-2243 | Server tolerates `Mcp-Method` / `Mcp-Name` request headers as informational routing metadata; body is authoritative | headers | + +## ClientScenario classes + +Per the AGENTS.md "fewer scenarios, more checks" rule, related checks +are bundled into one scenario class with multiple `ConformanceCheck` +records. Each row below is one class. + +### `tasks-lifecycle` (`lifecycle.ts`) + +Sync vs async dispatch, DetailedTask shape on tasks/get, tool errors +vs protocol errors, cancellation semantics. + +| Check | What it tests | +| ------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------ | +| `tasks-sync-tool-call` | Sync tool returns `resultType:"complete"`; no top-level `taskId` | +| `tasks-server-task-creation` | Task-supporting tool returns flat `CreateTaskResult` (no nested `task` wrapper); MUST NOT carry `result`/`error`/`inputRequests` on the envelope | +| `tasks-get-during-working` | `tasks/get` on an active task returns status + metadata | +| `tasks-get-terminal-inlined-result` | Completed task `tasks/get` inlines `result.content[]` (no separate `tasks/result`) | +| `tasks-tool-error-completed-iserror` | Tool execution errors → `status:"completed"` + `result.isError:true` (NOT `failed`) | +| `tasks-protocol-error-failed-shape` | Protocol errors → `status:"failed"` with inlined `error{code,message}`; no `result` | +| `tasks-cancel-empty-ack` | `tasks/cancel` returns `{resultType:"complete"}`; status settles to cancelled | +| `tasks-cancel-terminal-rejected` | `tasks/cancel` on a terminal task returns `-32602` (clarified in spec commit `d963ad0`) | + +### `tasks-capability-negotiation` (`capability.ts`) + +| Check | What it tests | +| ----------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------ | +| `tasks-extension-advertised` | Server advertises `io.modelcontextprotocol/tasks` under `capabilities.extensions`; v1 `capabilities.tasks` slot stays absent | +| `tasks-methods-gated-without-extension` | `tasks/get`, `tasks/update`, `tasks/cancel` return `-32601` for sessions that didn't negotiate the extension | +| `tasks-tools-call-without-extension-sync` | `tools/call` from a non-negotiated session falls through to sync (no `CreateTaskResult`) | +| `tasks-per-request-meta-opt-in` | SEP-2575 — per-request `_meta.io.modelcontextprotocol/clientCapabilities` produces `CreateTaskResult` even without session-level extension | + +### `tasks-wire-fields` (`wire-fields.ts`) + +| Check | What it tests | +| ---------------------------------------------- | -------------------------------------------------------------------------------------------- | +| `tasks-wire-field-renames` | `ttlSeconds` + `pollIntervalMilliseconds` present; legacy `ttl` / `pollInterval` keys absent | +| `tasks-no-early-ttl-expiry` | Task remains accessible via `tasks/get` for the duration of its `ttlSeconds` | +| `tasks-no-related-task-meta-on-inlined-result` | v1 `io.modelcontextprotocol/related-task` `_meta` key absent on tasks/get's inlined `result` | + +### `tasks-request-state` (`request-state.ts`) + +| Check | What it tests | +| ------------------------------------- | ----------------------------------------------------------------------------------------------------------------- | +| `tasks-request-state-shape` | When emitted, `requestState` is a non-empty string (`INFO` if server omits it; emission is optional per SEP-2322) | +| `tasks-request-state-echo` | Server accepts `tasks/get` with the previously-emitted `requestState` echoed back | +| `tasks-request-state-stale-tolerance` | Earlier (stale-but-still-valid) `requestState` MUST still be accepted after a newer one is minted | + +### `tasks-mrtr-input` (`mrtr-input.ts`) + +| Check | What it tests | +| ---------------------------------------- | --------------------------------------------------------------------------------------------------------------- | +| `tasks-mrtr-input-requests-on-tasks-get` | `tasks/get` on `input_required` task surfaces non-empty `inputRequests` map | +| `tasks-mrtr-tasks-update-resumes` | `tasks/update` with matching `inputResponses` is acked with `{resultType:"complete"}`; task resumes to terminal | +| `tasks-mrtr-partial-fulfillment` | A subset-of-keys `tasks/update` keeps the task in `input_required` with only the unanswered key remaining | + +### `tasks-request-headers` (`headers.ts`) + +| Check | What it tests | +| ----------------------------------------------------- | ------------------------------------------------------------------------------------------------ | +| `tasks-headers-tolerate-mcp-method-on-tools-call` | Server tolerates `Mcp-Method` request header on `tools/call` (sync dispatch unaffected) | +| `tasks-headers-tolerate-routing-headers-on-tasks-get` | Server tolerates `Mcp-Method` + `Mcp-Name` request headers on `tasks/get` (body taskId resolves) | +| `tasks-headers-body-method-authoritative` | When `Mcp-Method` header disagrees with body, server MUST dispatch on body method | + +> SEP-2243 defines these as **request** headers (client → server) used by HTTP infrastructure for routing. Whether the server _also_ echoes them on responses for downstream observability is implementation-defined and out of scope here. + +### `tasks-dispatch-and-envelope` (`dispatch.ts`) + +| Check | What it tests | +| -------------------------------------------------- | -------------------------------------------------------------------------------------------------------- | +| `tasks-removed-tasks-result` | `tasks/result` removed in v2 → `-32601` | +| `tasks-removed-tasks-list` | `tasks/list` removed in v2 → `-32601` | +| `tasks-server-directed-creation-no-hint` | `tools/call` without client `task` hint still produces `CreateTaskResult` | +| `tasks-legacy-task-param-ignored` | Legacy v1 `task` param tolerated AND ignored on a sync tool (no error, no promotion) | +| `tasks-immediate-result-shortcut` | Fast operation MAY skip task creation and return a sync `ToolResult` | +| `tasks-result-type-complete-on-non-task-responses` | Sync `tools/call`, `tasks/get`, `tasks/update` ack, `tasks/cancel` ack all carry `resultType:"complete"` | +| `tasks-strong-consistency-immediate-get` | `tasks/get` immediately after `CreateTaskResult` MUST resolve (no -32602) | +| `tasks-get-unknown-task-id-rejected` | `tasks/get` with unknown taskId returns `-32602` | + +### `tasks-status-notifications` (`notifications.ts`) + +| Check | What it tests | +| ---------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `tasks-status-notifications-shape` | Optional check — when sent, each `notifications/tasks/status` carries `taskId` + `status`; terminal notifications SHOULD inline `result` (DetailedTask) | + +> Notifications are optional per SEP-2663. The check emits `INFO` (not `FAILURE`) when no notifications are received, so a server that doesn't implement the optional path stays conformant. + +## Required server fixtures + +The fixture server MUST register these tools: + +| Tool | Behavior | +| -------------------- | --------------------------------------------------------------------------------------- | +| `greet` | Sync — returns `Hello, {name}!` | +| `slow_compute` | Async — `seconds`-second sleep, returns result; `seconds:0` for immediate path | +| `failing_job` | Async — always returns tool error after ~1s | +| `protocol_error_job` | Async — panics, surfaces as protocol error | +| `confirm_delete` | Async — calls `TaskElicit` (single inputRequest) | +| `multi_input` | Async — fans out two `TaskElicit` calls in parallel (used by partial-fulfillment check) | + +The fixture can be implemented in any language; one example reference +implementation lives at +[`panyam/mcpkit/examples/tasks-v2`](https://github.com/panyam/mcpkit/tree/main/examples/tasks-v2). + +## Running + +The runner is brand-neutral and language-agnostic — it just shells out +to a command line and waits for the URL to become reachable. + +### Against an already-running server + +```bash +TASKS_SERVER_URL=http://localhost:8080/mcp \ + npx vitest run src/scenarios/server/tasks/all-scenarios.test.ts +``` + +### Auto-spawn a fixture in `beforeAll` + +```bash +TASKS_SERVER_URL=http://localhost:18092/mcp \ +TASKS_SERVER_CMD="/path/to/tasks-server --port 18092" \ + npx vitest run src/scenarios/server/tasks/all-scenarios.test.ts +``` + +If `TASKS_SERVER_URL` is unset, the suite is `describe.skip`'d so CI +runs against the upstream `everything-server` stay green until that +fixture grows SEP-2663 support. + +## Open spec questions + +Where the spec is silent or ambiguous, this suite picks the louder / +safer option (typically `-32602` over silent ack) so a misbehaving +server fails loudly rather than appearing well-formed. Today: + +1. **Invalid `requestState`** — silent ack vs `-32602`. Suite asserts `-32602` (a server that silently accepts a forged token is a security hazard). +2. **SEP-2575 per-request capabilities envelope shape** — covered by `tasks-per-request-meta-opt-in`; the suite asserts only the observable behavior (`CreateTaskResult` produced) so the inner shape can evolve without churn. +3. **`tasks/update` / `tasks/cancel` for unknown taskId** — silent ack vs `-32602`. The read paths (`tasks/get` and `tasks/cancel` on terminal task) assert `-32602`; the write paths' upstream wording is too soft to assert against here. + +## Wire-format diff vs MCP Tasks v1 (spec 2025-11-25) + +| Aspect | v1 | SEP-2663 | +| -------------------------- | ------------------------------ | ---------------------------------------------------------------------------------------------- | +| Capability slot | `capabilities.tasks` | `capabilities.extensions["io.modelcontextprotocol/tasks"]` | +| Client opt-in | (none) | MUST declare extension at session OR per-request (SEP-2575) | +| Task creation | Client sends `task` hint param | Server decides unilaterally | +| `resultType` discriminator | absent | `"task"` (CreateTaskResult) / `"complete"` (everything else) / `"incomplete"` (MRTR ephemeral) | +| `CreateTaskResult` shape | `{task: {...}}` (nested) | flat: `{resultType, taskId, status, ttlSeconds, ...}` (no nested wrapper) | +| `tasks/get` response | flat `TaskInfo` only | `DetailedTask` with inlined `result`/`error`/`inputRequests`/`requestState` | +| `tasks/update` | n/a | new — MRTR resume path, returns `{resultType:"complete"}` ack | +| `tasks/cancel` response | rich task envelope | `{resultType:"complete"}` ack (no task state) | +| `tasks/result` | separate blocking method | **removed** (result inlined on `tasks/get`) | +| `tasks/list` | session-scoped list | **removed** | +| TTL field | `ttl` (ms by convention) | `ttlSeconds` (units in name) | +| Poll-interval field | `pollInterval` | `pollIntervalMilliseconds` | +| `parentTaskId` | present | removed | +| Tool errors | `status:failed` | `status:completed, result.isError:true` | +| Mcp-Name HTTP header | not set | request-side routing header (SEP-2243) | + +## Design notes + +### Raw fetch escape hatch + +The MCP TS SDK ships with strict Zod schemas that strip SEP-2663 / +SEP-2322 wire fields from responses (`resultType`, `taskId`, +`inputRequests`, `requestState`, inlined result/error). Scenarios that +exercise those fields use the raw-fetch helpers in `helpers.ts` rather +than the SDK client. When the SDK gains schemas for the SEP-2663 +shapes, those call sites switch back to +`client.request(..., AnyResult)` and the helpers shrink (or disappear). + +### Severity follows the spec keyword + +Per AGENTS.md: MUST / MUST NOT → `FAILURE`; SHOULD / SHOULD NOT → +`WARNING`; optional emission with no presence → `INFO`. CI treats +`WARNING` as a failure, so SHOULD-level requirements still gate. From b99b877242b9716a950a31b5a8e737e69bcc4dfc Mon Sep 17 00:00:00 2001 From: Sri Panyam Date: Wed, 6 May 2026 12:50:10 -0700 Subject: [PATCH 5/7] tasks: assert createdAt + lastUpdatedAt; factor _shared/ helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two reviewer-driven additions: 1. SEP-2663 createdAt / lastUpdatedAt ISO-8601 assertion in `tasks-server-task-creation` (per Luca's PR #262 review feedback). The check now flags servers that emit non-ISO timestamps (epoch seconds, RFC-2822, etc.) on TaskInfoV2 envelopes. 2. Factor cross-cutting test-harness helpers into _shared/: - `_shared/test-runner.ts` — `waitForServerReady` (renamed from `waitForTcpReady`; the call site cares about server readiness, not the TCP-poll mechanism). Imported by tasks/ and mrtr/ all-scenarios.test.ts; replaces ~30 LOC of inline duplication in each. - `_shared/wire-format.ts` — `ISO_8601_PATTERN` constant + `isIso8601(s)` predicate. Documented rationale for choosing a regex over `Date.parse` (too permissive), `new Date(s).toISOString()` (too strict), or `Temporal.Instant.from` (Node 24+ experimental). Future wire-shape predicates (data URI, percent-encoded filename, etc.) can land here. Cherry-pick footprint when graduating to upstream PR is the SEP folder + the imported `_shared/` files. First PR through carries them upstream; subsequent feat branches inherit via standard upstream-sync flow. All 9 scenario tests still pass against the Go reference fixtures. --- src/scenarios/server/_shared/test-runner.ts | 56 +++++++++++++++++++ src/scenarios/server/_shared/wire-format.ts | 33 +++++++++++ .../server/mrtr/all-scenarios.test.ts | 38 +------------ .../server/tasks/all-scenarios.test.ts | 43 +------------- src/scenarios/server/tasks/lifecycle.ts | 14 +++++ 5 files changed, 107 insertions(+), 77 deletions(-) create mode 100644 src/scenarios/server/_shared/test-runner.ts create mode 100644 src/scenarios/server/_shared/wire-format.ts diff --git a/src/scenarios/server/_shared/test-runner.ts b/src/scenarios/server/_shared/test-runner.ts new file mode 100644 index 0000000..5eb044b --- /dev/null +++ b/src/scenarios/server/_shared/test-runner.ts @@ -0,0 +1,56 @@ +/** + * Test-runner utilities for server-conformance scenarios. + * + * Used by `*.test.ts` runner files that auto-spawn a fixture binary + * before running scenarios. These helpers are language-agnostic and + * harness-only — they don't touch MCP protocol, so they don't belong + * in the SDK. + * + * Single responsibility today: TCP readiness polling. Spawn / cleanup + * scaffolding stays inline in each runner so the file reads top-to-bottom + * without indirection (per AGENTS.md "repetitive check blocks are fine"). + */ + +import { connect } from 'net'; + +/** + * Poll the host/port of the given URL until a TCP connection succeeds + * or the timeout elapses. Language-agnostic readiness check — works + * for any server that binds before serving requests. + */ +export async function waitForServerReady( + url: string, + timeoutMs: number +): Promise { + const u = new URL(url); + const port = parseInt(u.port || (u.protocol === 'https:' ? '443' : '80'), 10); + const host = u.hostname; + const deadline = Date.now() + timeoutMs; + let lastErr: Error | null = null; + + while (Date.now() < deadline) { + try { + await new Promise((resolve, reject) => { + const socket = connect({ host, port }, () => { + socket.end(); + resolve(); + }); + socket.once('error', (err) => { + socket.destroy(); + reject(err); + }); + socket.setTimeout(1_000, () => { + socket.destroy(); + reject(new Error('connect timeout')); + }); + }); + return; + } catch (err) { + lastErr = err as Error; + await new Promise((r) => setTimeout(r, 200)); + } + } + throw new Error( + `${host}:${port} did not accept TCP connections (last: ${lastErr?.message ?? 'unknown'})` + ); +} diff --git a/src/scenarios/server/_shared/wire-format.ts b/src/scenarios/server/_shared/wire-format.ts new file mode 100644 index 0000000..ea37bc7 --- /dev/null +++ b/src/scenarios/server/_shared/wire-format.ts @@ -0,0 +1,33 @@ +/** + * Wire-format validation helpers shared across server-conformance + * scenarios. Pure predicates / regex — no I/O, no async. + * + * Pragmatic choices documented per helper. When validation needs + * tighten (e.g., the spec mandates a stricter timestamp format), edit + * here once and every scenario picks it up. + */ + +/** + * ISO-8601 timestamp prefix (YYYY-MM-DDThh:mm:ss). Tolerant about + * the timezone tail (`Z`, `+00:00`, `+0000`) and sub-second precision — + * matches what real servers emit (Go `time.RFC3339Nano`, + * Python `datetime.isoformat()`, JavaScript `toISOString()`). + * + * Why a regex over `Date.parse` / `new Date(s).toISOString() === s` / + * `Temporal.Instant.from`: + * - `Date.parse` accepts RFC-2822, "May 4 2026", and other + * non-ISO strings — too permissive. + * - `new Date(s).toISOString() === s` is too strict — rejects + * valid `+00:00`-style offsets that don't survive the canonical + * `Z` round-trip. + * - `Temporal.Instant.from` is Node 24+ experimental. + * + * Swap this constant for a stdlib validator if/when one becomes + * broadly available. + */ +export const ISO_8601_PATTERN = /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}/; + +/** Returns true when the input is a string matching ISO-8601 prefix. */ +export function isIso8601(s: unknown): boolean { + return typeof s === 'string' && ISO_8601_PATTERN.test(s); +} diff --git a/src/scenarios/server/mrtr/all-scenarios.test.ts b/src/scenarios/server/mrtr/all-scenarios.test.ts index dd1e6bb..1e8154a 100644 --- a/src/scenarios/server/mrtr/all-scenarios.test.ts +++ b/src/scenarios/server/mrtr/all-scenarios.test.ts @@ -22,9 +22,9 @@ */ import { spawn, ChildProcess } from 'child_process'; -import { connect } from 'net'; import { describe, it, expect, beforeAll, afterAll } from 'vitest'; import { MrtrEphemeralFlowScenario } from './ephemeral-flow'; +import { waitForServerReady } from '../_shared/test-runner'; const SERVER_URL = process.env.MRTR_SERVER_URL; const SERVER_CMD = process.env.MRTR_SERVER_CMD; @@ -64,7 +64,7 @@ describeIfTarget('SEP-2322 MRTR — server conformance', () => { } }); - await waitForTcpReady(SERVER_URL!, SERVER_STARTUP_TIMEOUT_MS).catch( + await waitForServerReady(SERVER_URL!, SERVER_STARTUP_TIMEOUT_MS).catch( (err) => { if (serverProcess && !serverProcess.killed) { serverProcess.kill('SIGKILL'); @@ -113,37 +113,3 @@ describeIfTarget('SEP-2322 MRTR — server conformance', () => { }); } }); - -async function waitForTcpReady(url: string, timeoutMs: number): Promise { - const u = new URL(url); - const port = parseInt(u.port || (u.protocol === 'https:' ? '443' : '80'), 10); - const host = u.hostname; - const deadline = Date.now() + timeoutMs; - let lastErr: Error | null = null; - - while (Date.now() < deadline) { - try { - await new Promise((resolve, reject) => { - const socket = connect({ host, port }, () => { - socket.end(); - resolve(); - }); - socket.once('error', (err) => { - socket.destroy(); - reject(err); - }); - socket.setTimeout(1_000, () => { - socket.destroy(); - reject(new Error('connect timeout')); - }); - }); - return; - } catch (err) { - lastErr = err as Error; - await new Promise((r) => setTimeout(r, 200)); - } - } - throw new Error( - `${host}:${port} did not accept TCP connections (last: ${lastErr?.message ?? 'unknown'})` - ); -} diff --git a/src/scenarios/server/tasks/all-scenarios.test.ts b/src/scenarios/server/tasks/all-scenarios.test.ts index 76136f8..d6ad16d 100644 --- a/src/scenarios/server/tasks/all-scenarios.test.ts +++ b/src/scenarios/server/tasks/all-scenarios.test.ts @@ -28,7 +28,6 @@ */ import { spawn, ChildProcess } from 'child_process'; -import { connect } from 'net'; import { describe, it, expect, beforeAll, afterAll } from 'vitest'; import { TasksLifecycleScenario } from './lifecycle'; import { TasksCapabilityNegotiationScenario } from './capability'; @@ -38,6 +37,7 @@ import { TasksMRTRInputScenario } from './mrtr-input'; import { TasksRequestHeadersScenario } from './headers'; import { TasksDispatchScenario } from './dispatch'; import { TasksStatusNotificationsScenario } from './notifications'; +import { waitForServerReady } from '../_shared/test-runner'; const SERVER_URL = process.env.TASKS_SERVER_URL; const SERVER_CMD = process.env.TASKS_SERVER_CMD; @@ -86,7 +86,7 @@ describeIfTarget('SEP-2663 Tasks — server conformance', () => { } }); - await waitForTcpReady(SERVER_URL!, SERVER_STARTUP_TIMEOUT_MS).catch( + await waitForServerReady(SERVER_URL!, SERVER_STARTUP_TIMEOUT_MS).catch( (err) => { if (serverProcess && !serverProcess.killed) { serverProcess.kill('SIGKILL'); @@ -135,42 +135,3 @@ describeIfTarget('SEP-2663 Tasks — server conformance', () => { }); } }); - -/** - * Poll the host/port of the given URL until a TCP connection succeeds - * or the timeout elapses. Language-agnostic readiness check — works - * for any server that binds before serving requests. - */ -async function waitForTcpReady(url: string, timeoutMs: number): Promise { - const u = new URL(url); - const port = parseInt(u.port || (u.protocol === 'https:' ? '443' : '80'), 10); - const host = u.hostname; - const deadline = Date.now() + timeoutMs; - let lastErr: Error | null = null; - - while (Date.now() < deadline) { - try { - await new Promise((resolve, reject) => { - const socket = connect({ host, port }, () => { - socket.end(); - resolve(); - }); - socket.once('error', (err) => { - socket.destroy(); - reject(err); - }); - socket.setTimeout(1_000, () => { - socket.destroy(); - reject(new Error('connect timeout')); - }); - }); - return; - } catch (err) { - lastErr = err as Error; - await new Promise((r) => setTimeout(r, 200)); - } - } - throw new Error( - `${host}:${port} did not accept TCP connections (last: ${lastErr?.message ?? 'unknown'})` - ); -} diff --git a/src/scenarios/server/tasks/lifecycle.ts b/src/scenarios/server/tasks/lifecycle.ts index a59adce..21c5a13 100644 --- a/src/scenarios/server/tasks/lifecycle.ts +++ b/src/scenarios/server/tasks/lifecycle.ts @@ -30,6 +30,7 @@ import { rawRequest, waitForTerminal } from './helpers'; +import { isIso8601 } from '../_shared/wire-format'; export class TasksLifecycleScenario implements ClientScenario { name = 'tasks-lifecycle'; @@ -199,6 +200,19 @@ The server MUST advertise \`io.modelcontextprotocol/tasks\` under 'CreateTaskResult MUST NOT carry `inputRequests` (lives on tasks/get DetailedTask)' ); } + // Timestamps — both keys present, both ISO-8601 formatted. Per + // SEP-2663 these are required on every TaskInfoV2. See + // `_shared/wire-format.ts` for the regex rationale. + if (!isIso8601(result.createdAt)) { + errs.push( + `createdAt MUST be an ISO-8601 string; got ${JSON.stringify(result.createdAt)}` + ); + } + if (!isIso8601(result.lastUpdatedAt)) { + errs.push( + `lastUpdatedAt MUST be an ISO-8601 string; got ${JSON.stringify(result.lastUpdatedAt)}` + ); + } if (result.taskId) workingTaskId = result.taskId; checks.push({ id, From ac31214e49cf26d473500d23c0bd77913838df96 Mon Sep 17 00:00:00 2001 From: Sri Panyam Date: Wed, 6 May 2026 14:22:42 -0700 Subject: [PATCH 6/7] refactor(tasks,mrtr): use SDK Client + AnyResult instead of raw-fetch helpers Drops initRawSession/rawRequest/rawRequestFull from tasks/helpers.ts in favor of the SDK's Client + StreamableHTTPClientTransport, paired with a Zod passthrough schema (AnyResult) that preserves SEP-2663 / SEP-2322 draft fields the SDK's typed schemas would strip. headers.ts and notifications.ts keep a small inline fetch where the SDK can't reach: per-request HTTP headers (SEP-2243) and SSE notification observation. Both reuse the SDK session via transport.sessionId. All SEP-2663 + MRTR ephemeral-flow scenarios pass against the Go fixture. --- src/scenarios/server/mrtr/README.md | 14 +- src/scenarios/server/mrtr/ephemeral-flow.ts | 272 +++++++++++--------- src/scenarios/server/tasks/capability.ts | 101 +++++--- src/scenarios/server/tasks/dispatch.ts | 237 ++++++++--------- src/scenarios/server/tasks/headers.ts | 131 +++++++--- src/scenarios/server/tasks/helpers.ts | 234 +++-------------- src/scenarios/server/tasks/lifecycle.ts | 167 ++++++------ src/scenarios/server/tasks/mrtr-input.ts | 167 ++++++------ src/scenarios/server/tasks/notifications.ts | 27 +- src/scenarios/server/tasks/request-state.ts | 79 +++--- src/scenarios/server/tasks/wire-fields.ts | 68 ++--- 11 files changed, 740 insertions(+), 757 deletions(-) diff --git a/src/scenarios/server/mrtr/README.md b/src/scenarios/server/mrtr/README.md index 8e1bf53..ad3df99 100644 --- a/src/scenarios/server/mrtr/README.md +++ b/src/scenarios/server/mrtr/README.md @@ -103,9 +103,11 @@ the SKIPPED short-circuit in `ephemeral-flow.ts` Check 8. `MRTR_INCOMPLETE_RESULT_TYPE`, the result-type predicates (`isIncompleteResult`, `isCompleteResult`), and the elicitation/sampling/ -roots mocks live in `mrtr/helpers.ts`. The raw-fetch primitives -(`initRawSession`, `rawRequest`) are imported from the sibling -`../tasks/helpers` because both scenario sets share the same wire-shape -problem (SDK Zod schemas strip extension fields). When the upstream -SDK gains schemas for SEP-2322 / SEP-2663 shapes, those import paths -collapse back into the SDK. +roots mocks live in `mrtr/helpers.ts`. The shared `AnyResult` Zod +passthrough schema and `waitForTerminal`/`waitForStatus` polling helpers +are imported from the sibling `../tasks/helpers` because both scenario +sets share the same wire-shape problem (SDK Zod schemas strip extension +fields). Pair `client.request(req, AnyResult)` with the SDK's +`StreamableHTTPClientTransport` and you preserve every SEP-2322 / SEP-2663 +field. When the upstream SDK gains schemas for those shapes, the +passthrough disappears in favor of the typed schemas directly. diff --git a/src/scenarios/server/mrtr/ephemeral-flow.ts b/src/scenarios/server/mrtr/ephemeral-flow.ts index 8deaf68..51025a7 100644 --- a/src/scenarios/server/mrtr/ephemeral-flow.ts +++ b/src/scenarios/server/mrtr/ephemeral-flow.ts @@ -19,13 +19,16 @@ * server re-requests on wrong key */ +import { Client } from '@modelcontextprotocol/sdk/client/index.js'; +import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js'; + import { ClientScenario, ConformanceCheck, ScenarioSpecTag, DRAFT_PROTOCOL_VERSION } from '../../../types'; -import { initRawSession, rawRequest } from '../tasks/helpers'; +import { AnyResult } from '../tasks/helpers'; import { MRTR_INCOMPLETE_RESULT_TYPE, SEP_2322_REF, @@ -86,15 +89,19 @@ Every \`tools/call\` response in the MRTR contract is one of: async run(serverUrl: string): Promise { const checks: ConformanceCheck[] = []; - let sessionId: string; + let client: Client; try { - ({ sessionId } = await initRawSession(serverUrl, { - capabilities: { - elicitation: {}, - sampling: {}, - roots: {} + client = new Client( + { name: 'mcp-conformance', version: '1.0' }, + { + capabilities: { + elicitation: {}, + sampling: {}, + roots: {} + } } - })); + ); + await client.connect(new StreamableHTTPClientTransport(new URL(serverUrl))); } catch (error) { checks.push({ id: 'mrtr-session-bootstrap', @@ -116,12 +123,13 @@ Every \`tools/call\` response in the MRTR contract is one of: const description = 'tools/call returns IncompleteResult on round 1 (elicitation/create); completes on round 2 with the answer reflected in the result'; try { - const r1 = await rawRequest( - serverUrl, - 'tools/call', - { name: 'test_tool_with_elicitation', arguments: {} }, - { sessionId } - ); + const r1 = (await client.request( + { + method: 'tools/call', + params: { name: 'test_tool_with_elicitation', arguments: {} } + }, + AnyResult + )) as any; const errs: string[] = []; if (!isIncompleteResult(r1)) { errs.push( @@ -143,21 +151,22 @@ Every \`tools/call\` response in the MRTR contract is one of: ); } - const r2 = await rawRequest( - serverUrl, - 'tools/call', + const r2 = (await client.request( { - name: 'test_tool_with_elicitation', - arguments: {}, - inputResponses: { - user_name: mockElicitResponse({ name: 'Alice' }) - }, - ...(r1.requestState !== undefined - ? { requestState: r1.requestState } - : {}) + method: 'tools/call', + params: { + name: 'test_tool_with_elicitation', + arguments: {}, + inputResponses: { + user_name: mockElicitResponse({ name: 'Alice' }) + }, + ...(r1.requestState !== undefined + ? { requestState: r1.requestState } + : {}) + } }, - { sessionId } - ); + AnyResult + )) as any; if (!isCompleteResult(r2)) { errs.push(`round 2 MUST be complete; got ${JSON.stringify(r2)}`); } @@ -188,12 +197,13 @@ Every \`tools/call\` response in the MRTR contract is one of: const description = 'IncompleteResult with sampling/createMessage round-trips through the inputResponses retry'; try { - const r1 = await rawRequest( - serverUrl, - 'tools/call', - { name: 'test_incomplete_result_sampling', arguments: {} }, - { sessionId } - ); + const r1 = (await client.request( + { + method: 'tools/call', + params: { name: 'test_incomplete_result_sampling', arguments: {} } + }, + AnyResult + )) as any; const errs: string[] = []; if (!isIncompleteResult(r1)) { errs.push('round 1 MUST be IncompleteResult'); @@ -204,19 +214,20 @@ Every \`tools/call\` response in the MRTR contract is one of: `inputRequest method MUST be "sampling/createMessage"; got ${JSON.stringify(r1.inputRequests[key].method)}` ); } - const r2 = await rawRequest( - serverUrl, - 'tools/call', + const r2 = (await client.request( { - name: 'test_incomplete_result_sampling', - arguments: {}, - inputResponses: { [key]: mockSamplingResponse('Paris') }, - ...(r1.requestState !== undefined - ? { requestState: r1.requestState } - : {}) + method: 'tools/call', + params: { + name: 'test_incomplete_result_sampling', + arguments: {}, + inputResponses: { [key]: mockSamplingResponse('Paris') }, + ...(r1.requestState !== undefined + ? { requestState: r1.requestState } + : {}) + } }, - { sessionId } - ); + AnyResult + )) as any; if (!isCompleteResult(r2)) { errs.push('round 2 MUST be complete'); } @@ -242,12 +253,13 @@ Every \`tools/call\` response in the MRTR contract is one of: const description = 'IncompleteResult with roots/list round-trips through the inputResponses retry'; try { - const r1 = await rawRequest( - serverUrl, - 'tools/call', - { name: 'test_incomplete_result_list_roots', arguments: {} }, - { sessionId } - ); + const r1 = (await client.request( + { + method: 'tools/call', + params: { name: 'test_incomplete_result_list_roots', arguments: {} } + }, + AnyResult + )) as any; const errs: string[] = []; if (!isIncompleteResult(r1)) { errs.push('round 1 MUST be IncompleteResult'); @@ -258,19 +270,20 @@ Every \`tools/call\` response in the MRTR contract is one of: `inputRequest method MUST be "roots/list"; got ${JSON.stringify(r1.inputRequests[key].method)}` ); } - const r2 = await rawRequest( - serverUrl, - 'tools/call', + const r2 = (await client.request( { - name: 'test_incomplete_result_list_roots', - arguments: {}, - inputResponses: { [key]: mockListRootsResponse() }, - ...(r1.requestState !== undefined - ? { requestState: r1.requestState } - : {}) + method: 'tools/call', + params: { + name: 'test_incomplete_result_list_roots', + arguments: {}, + inputResponses: { [key]: mockListRootsResponse() }, + ...(r1.requestState !== undefined + ? { requestState: r1.requestState } + : {}) + } }, - { sessionId } - ); + AnyResult + )) as any; if (!isCompleteResult(r2)) { errs.push('round 2 MUST be complete'); } @@ -296,12 +309,13 @@ Every \`tools/call\` response in the MRTR contract is one of: const description = 'When server emits requestState on round 1, it MUST be a non-empty string and the server MUST validate the echo on round 2'; try { - const r1 = await rawRequest( - serverUrl, - 'tools/call', - { name: 'test_incomplete_result_request_state', arguments: {} }, - { sessionId } - ); + const r1 = (await client.request( + { + method: 'tools/call', + params: { name: 'test_incomplete_result_request_state', arguments: {} } + }, + AnyResult + )) as any; const errs: string[] = []; if (!isIncompleteResult(r1)) { errs.push('round 1 MUST be IncompleteResult'); @@ -317,17 +331,18 @@ Every \`tools/call\` response in the MRTR contract is one of: } const key = Object.keys(r1.inputRequests ?? {})[0]; if (key) { - const r2 = await rawRequest( - serverUrl, - 'tools/call', + const r2 = (await client.request( { - name: 'test_incomplete_result_request_state', - arguments: {}, - inputResponses: { [key]: mockElicitResponse({ ok: true }) }, - requestState: r1.requestState + method: 'tools/call', + params: { + name: 'test_incomplete_result_request_state', + arguments: {}, + inputResponses: { [key]: mockElicitResponse({ ok: true }) }, + requestState: r1.requestState + } }, - { sessionId } - ); + AnyResult + )) as any; if (!isCompleteResult(r2)) { errs.push('round 2 MUST be complete after valid requestState echo'); } @@ -360,12 +375,13 @@ Every \`tools/call\` response in the MRTR contract is one of: const description = 'A single IncompleteResult MAY carry inputRequests for elicitation/create + sampling/createMessage + roots/list together'; try { - const r1 = await rawRequest( - serverUrl, - 'tools/call', - { name: 'test_incomplete_result_multiple_inputs', arguments: {} }, - { sessionId } - ); + const r1 = (await client.request( + { + method: 'tools/call', + params: { name: 'test_incomplete_result_multiple_inputs', arguments: {} } + }, + AnyResult + )) as any; const errs: string[] = []; if (!isIncompleteResult(r1)) { errs.push('round 1 MUST be IncompleteResult'); @@ -397,19 +413,20 @@ Every \`tools/call\` response in the MRTR contract is one of: else if (req.method === 'roots/list') inputResponses[key] = mockListRootsResponse(); } - const r2 = await rawRequest( - serverUrl, - 'tools/call', + const r2 = (await client.request( { - name: 'test_incomplete_result_multiple_inputs', - arguments: {}, - inputResponses, - ...(r1.requestState !== undefined - ? { requestState: r1.requestState } - : {}) + method: 'tools/call', + params: { + name: 'test_incomplete_result_multiple_inputs', + arguments: {}, + inputResponses, + ...(r1.requestState !== undefined + ? { requestState: r1.requestState } + : {}) + } }, - { sessionId } - ); + AnyResult + )) as any; if (!isCompleteResult(r2)) { errs.push('round 2 MUST be complete with all three answers'); } @@ -435,12 +452,13 @@ Every \`tools/call\` response in the MRTR contract is one of: const description = 'A handler may take 2+ MRTR rounds; each round mints a fresh requestState; final result MUST reflect answers from every round'; try { - const r1 = await rawRequest( - serverUrl, - 'tools/call', - { name: 'test_incomplete_result_multi_round', arguments: {} }, - { sessionId } - ); + const r1 = (await client.request( + { + method: 'tools/call', + params: { name: 'test_incomplete_result_multi_round', arguments: {} } + }, + AnyResult + )) as any; const errs: string[] = []; if (!isIncompleteResult(r1)) { errs.push('round 1 MUST be IncompleteResult'); @@ -450,17 +468,18 @@ Every \`tools/call\` response in the MRTR contract is one of: } const k1 = Object.keys(r1.inputRequests ?? {})[0]; - const r2 = await rawRequest( - serverUrl, - 'tools/call', + const r2 = (await client.request( { - name: 'test_incomplete_result_multi_round', - arguments: {}, - inputResponses: { [k1]: mockElicitResponse({ name: 'Alice' }) }, - requestState: r1.requestState + method: 'tools/call', + params: { + name: 'test_incomplete_result_multi_round', + arguments: {}, + inputResponses: { [k1]: mockElicitResponse({ name: 'Alice' }) }, + requestState: r1.requestState + } }, - { sessionId } - ); + AnyResult + )) as any; if (!isIncompleteResult(r2)) { errs.push('round 2 MUST still be IncompleteResult (asks for step2)'); } @@ -474,17 +493,18 @@ Every \`tools/call\` response in the MRTR contract is one of: } const k2 = Object.keys(r2.inputRequests ?? {})[0]; - const r3 = await rawRequest( - serverUrl, - 'tools/call', + const r3 = (await client.request( { - name: 'test_incomplete_result_multi_round', - arguments: {}, - inputResponses: { [k2]: mockElicitResponse({ color: 'blue' }) }, - requestState: r2.requestState + method: 'tools/call', + params: { + name: 'test_incomplete_result_multi_round', + arguments: {}, + inputResponses: { [k2]: mockElicitResponse({ color: 'blue' }) }, + requestState: r2.requestState + } }, - { sessionId } - ); + AnyResult + )) as any; if (!isCompleteResult(r3)) { errs.push('round 3 MUST be complete'); } @@ -518,16 +538,17 @@ Every \`tools/call\` response in the MRTR contract is one of: const description = 'When the client sends inputResponses with a key the server did not emit, the server SHOULD re-request via IncompleteResult'; try { - const r1 = await rawRequest( - serverUrl, - 'tools/call', + const r1 = (await client.request( { - name: 'test_incomplete_result_elicitation', - arguments: {}, - inputResponses: { wrong_key: mockElicitResponse({ data: 'wrong' }) } + method: 'tools/call', + params: { + name: 'test_incomplete_result_elicitation', + arguments: {}, + inputResponses: { wrong_key: mockElicitResponse({ data: 'wrong' }) } + } }, - { sessionId } - ); + AnyResult + )) as any; const errs: string[] = []; if (!isIncompleteResult(r1)) { errs.push( @@ -580,6 +601,7 @@ Every \`tools/call\` response in the MRTR contract is one of: }); } + await client.close().catch(() => {}); return checks; } } diff --git a/src/scenarios/server/tasks/capability.ts b/src/scenarios/server/tasks/capability.ts index 91615d7..c92c287 100644 --- a/src/scenarios/server/tasks/capability.ts +++ b/src/scenarios/server/tasks/capability.ts @@ -10,6 +10,9 @@ * - slow_compute — task-supporting, sleeps N seconds */ +import { Client } from '@modelcontextprotocol/sdk/client/index.js'; +import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js'; + import { ClientScenario, ConformanceCheck, @@ -20,10 +23,9 @@ import { TASKS_EXTENSION_ID, SEP_2663_REF, SEP_2575_REF, + AnyResult, errMsg, - failureCheck, - initRawSession, - rawRequest + failureCheck } from './helpers'; export class TasksCapabilityNegotiationScenario implements ClientScenario { @@ -60,18 +62,29 @@ export class TasksCapabilityNegotiationScenario implements ClientScenario { async run(serverUrl: string): Promise { const checks: ConformanceCheck[] = []; - // Two sessions: one declares the extension, one does NOT. - let withExt: { sessionId: string; serverCapabilities: any }; - let withoutExt: { sessionId: string }; + // Two parallel clients: one declares the extension, one does NOT. + let withExt: Client; + let withoutExt: Client; try { - withExt = await initRawSession(serverUrl, { - capabilities: { - elicitation: {}, - sampling: {}, - extensions: { [TASKS_EXTENSION_ID]: {} } + withExt = new Client( + { name: 'mcp-conformance', version: '1.0' }, + { + capabilities: { + elicitation: {}, + sampling: {}, + extensions: { [TASKS_EXTENSION_ID]: {} } + } } - }); - withoutExt = await initRawSession(serverUrl, { capabilities: {} }); + ); + await withExt.connect(new StreamableHTTPClientTransport(new URL(serverUrl))); + + withoutExt = new Client( + { name: 'mcp-conformance', version: '1.0' }, + { capabilities: {} } + ); + await withoutExt.connect( + new StreamableHTTPClientTransport(new URL(serverUrl)) + ); } catch (error) { checks.push({ id: 'tasks-session-bootstrap', @@ -90,7 +103,7 @@ export class TasksCapabilityNegotiationScenario implements ClientScenario { const id = 'tasks-extension-advertised'; const name = 'TasksExtensionAdvertised'; const description = `Server advertises ${TASKS_EXTENSION_ID} under capabilities.extensions (and not capabilities.tasks)`; - const caps = withExt.serverCapabilities ?? {}; + const caps: any = withExt.getServerCapabilities() ?? {}; const errs: string[] = []; if (caps.tasks) { errs.push( @@ -137,9 +150,10 @@ export class TasksCapabilityNegotiationScenario implements ClientScenario { const errs: string[] = []; for (const tc of cases) { try { - await rawRequest(serverUrl, tc.method, tc.params, { - sessionId: withoutExt.sessionId - }); + await withoutExt.request( + { method: tc.method, params: tc.params }, + AnyResult + ); errs.push(`${tc.method} MUST reject (it returned a result)`); } catch (e: any) { if (e.code !== -32601) { @@ -167,15 +181,16 @@ export class TasksCapabilityNegotiationScenario implements ClientScenario { const description = 'tools/call from a session without the extension MUST fall through to sync (no CreateTaskResult, even for task-supporting tools)'; try { - const result = await rawRequest( - serverUrl, - 'tools/call', + const result = (await withoutExt.request( { - name: 'slow_compute', - arguments: { seconds: 0, label: 'capability-no-ext' } + method: 'tools/call', + params: { + name: 'slow_compute', + arguments: { seconds: 0, label: 'capability-no-ext' } + } }, - { sessionId: withoutExt.sessionId } - ); + AnyResult + )) as any; const errs: string[] = []; if (result.resultType === 'task') { errs.push( @@ -222,22 +237,21 @@ export class TasksCapabilityNegotiationScenario implements ClientScenario { const description = 'tools/call with extension declared in _meta.io.modelcontextprotocol/clientCapabilities produces a CreateTaskResult even when the session did not negotiate the extension'; try { - const result = await rawRequest( - serverUrl, - 'tools/call', - { - name: 'slow_compute', - arguments: { seconds: 1, label: 'capability-meta-opt' } - }, + const result = (await withoutExt.request( { - sessionId: withoutExt.sessionId, - meta: { - 'io.modelcontextprotocol/clientCapabilities': { - extensions: { [TASKS_EXTENSION_ID]: {} } + method: 'tools/call', + params: { + name: 'slow_compute', + arguments: { seconds: 1, label: 'capability-meta-opt' }, + _meta: { + 'io.modelcontextprotocol/clientCapabilities': { + extensions: { [TASKS_EXTENSION_ID]: {} } + } } } - } - ); + }, + AnyResult + )) as any; const errs: string[] = []; if (result.resultType !== 'task') { errs.push( @@ -258,11 +272,12 @@ export class TasksCapabilityNegotiationScenario implements ClientScenario { // background goroutine on the server. if (result.taskId) { try { - await rawRequest( - serverUrl, - 'tasks/cancel', - { taskId: result.taskId }, - { sessionId: withExt.sessionId } + await withExt.request( + { + method: 'tasks/cancel', + params: { taskId: result.taskId } + }, + AnyResult ); } catch { /* swallow — cleanup best-effort */ @@ -286,6 +301,8 @@ export class TasksCapabilityNegotiationScenario implements ClientScenario { } } + await withExt.close().catch(() => {}); + await withoutExt.close().catch(() => {}); return checks; } } diff --git a/src/scenarios/server/tasks/dispatch.ts b/src/scenarios/server/tasks/dispatch.ts index 3f35e43..272ea57 100644 --- a/src/scenarios/server/tasks/dispatch.ts +++ b/src/scenarios/server/tasks/dispatch.ts @@ -21,6 +21,9 @@ * - failing_job — task-supporting, returns tool error */ +import { Client } from '@modelcontextprotocol/sdk/client/index.js'; +import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js'; + import { ClientScenario, ConformanceCheck, @@ -31,10 +34,9 @@ import { TASKS_EXTENSION_ID, SEP_2322_REF, SEP_2663_REF, + AnyResult, errMsg, failureCheck, - initRawSession, - rawRequest, waitForStatus, waitForTerminal } from './helpers'; @@ -90,15 +92,19 @@ export class TasksDispatchScenario implements ClientScenario { async run(serverUrl: string): Promise { const checks: ConformanceCheck[] = []; - let sessionId: string; + let client: Client; try { - ({ sessionId } = await initRawSession(serverUrl, { - capabilities: { - elicitation: {}, - sampling: {}, - extensions: { [TASKS_EXTENSION_ID]: {} } + client = new Client( + { name: 'mcp-conformance', version: '1.0' }, + { + capabilities: { + elicitation: {}, + sampling: {}, + extensions: { [TASKS_EXTENSION_ID]: {} } + } } - })); + ); + await client.connect(new StreamableHTTPClientTransport(new URL(serverUrl))); } catch (error) { checks.push({ id: 'tasks-session-bootstrap', @@ -120,13 +126,9 @@ export class TasksDispatchScenario implements ClientScenario { const description = 'tasks/result is removed in v2 and MUST reject with -32601'; try { - await rawRequest( - serverUrl, - 'tasks/result', - { taskId: 'any' }, - { - sessionId - } + await client.request( + { method: 'tasks/result', params: { taskId: 'any' } }, + AnyResult ); checks.push({ id, @@ -161,7 +163,10 @@ export class TasksDispatchScenario implements ClientScenario { const description = 'tasks/list is removed in v2 and MUST reject with -32601'; try { - await rawRequest(serverUrl, 'tasks/list', {}, { sessionId }); + await client.request( + { method: 'tasks/list', params: {} }, + AnyResult + ); checks.push({ id, name, @@ -195,12 +200,13 @@ export class TasksDispatchScenario implements ClientScenario { const description = 'tools/call with no client `task` hint param MUST still produce CreateTaskResult for task-supporting tools'; try { - const result = await rawRequest( - serverUrl, - 'tools/call', - { name: 'failing_job', arguments: {} }, - { sessionId } - ); + const result = (await client.request( + { + method: 'tools/call', + params: { name: 'failing_job', arguments: {} } + }, + AnyResult + )) as any; const errs: string[] = []; if (result.resultType !== 'task' || !result.taskId) { errs.push( @@ -210,7 +216,7 @@ export class TasksDispatchScenario implements ClientScenario { // Best-effort wait so we don't leak. if (result.taskId) { try { - await waitForTerminal(serverUrl, sessionId, result.taskId); + await waitForTerminal(client, result.taskId); } catch { /* swallow */ } @@ -236,17 +242,18 @@ export class TasksDispatchScenario implements ClientScenario { const description = 'tools/call with legacy `task` param against a sync tool MUST NOT error and MUST NOT be promoted to a task'; try { - const result = await rawRequest( - serverUrl, - 'tools/call', + const result = (await client.request( { - name: 'greet', - arguments: { name: 'legacy-hint' }, - // Legacy v1 hint that the server MUST ignore. - task: { ttl: 60_000, pollInterval: 100 } + method: 'tools/call', + params: { + name: 'greet', + arguments: { name: 'legacy-hint' }, + // Legacy v1 hint that the server MUST ignore. + task: { ttl: 60_000, pollInterval: 100 } + } }, - { sessionId } - ); + AnyResult + )) as any; const errs: string[] = []; if (result.resultType === 'task') { errs.push( @@ -283,15 +290,16 @@ export class TasksDispatchScenario implements ClientScenario { const description = 'For a fast operation, a task-supporting tool MAY skip task creation and return a sync ToolResult; either path is valid'; try { - const result = await rawRequest( - serverUrl, - 'tools/call', + const result = (await client.request( { - name: 'slow_compute', - arguments: { seconds: 0, label: 'instant' } + method: 'tools/call', + params: { + name: 'slow_compute', + arguments: { seconds: 0, label: 'instant' } + } }, - { sessionId } - ); + AnyResult + )) as any; const errs: string[] = []; if (result.resultType === 'task') { if (!result.taskId) { @@ -331,12 +339,13 @@ export class TasksDispatchScenario implements ClientScenario { const errs: string[] = []; try { // Sync tools/call. - const sync = await rawRequest( - serverUrl, - 'tools/call', - { name: 'greet', arguments: { name: 'rt' } }, - { sessionId } - ); + const sync = (await client.request( + { + method: 'tools/call', + params: { name: 'greet', arguments: { name: 'rt' } } + }, + AnyResult + )) as any; if (sync.resultType !== 'complete') { errs.push( `sync tools/call resultType = ${JSON.stringify(sync.resultType)}, want "complete"` @@ -344,24 +353,23 @@ export class TasksDispatchScenario implements ClientScenario { } // tasks/get against a fresh task. - const created = await rawRequest( - serverUrl, - 'tools/call', + const created = (await client.request( { - name: 'slow_compute', - arguments: { seconds: 0, label: 'rt-get' } + method: 'tools/call', + params: { + name: 'slow_compute', + arguments: { seconds: 0, label: 'rt-get' } + } }, - { sessionId } - ); + AnyResult + )) as any; const taskIdForGet = created.taskId; if (taskIdForGet) { - await waitForTerminal(serverUrl, sessionId, taskIdForGet); - const got = await rawRequest( - serverUrl, - 'tasks/get', - { taskId: taskIdForGet }, - { sessionId } - ); + await waitForTerminal(client, taskIdForGet); + const got = (await client.request( + { method: 'tasks/get', params: { taskId: taskIdForGet } }, + AnyResult + )) as any; if (got.resultType !== 'complete') { errs.push( `tasks/get resultType = ${JSON.stringify(got.resultType)}, want "complete"` @@ -370,22 +378,21 @@ export class TasksDispatchScenario implements ClientScenario { } // tasks/cancel ack on a fresh long-running task. - const longLived = await rawRequest( - serverUrl, - 'tools/call', + const longLived = (await client.request( { - name: 'slow_compute', - arguments: { seconds: 60, label: 'rt-cancel' } + method: 'tools/call', + params: { + name: 'slow_compute', + arguments: { seconds: 60, label: 'rt-cancel' } + } }, - { sessionId } - ); + AnyResult + )) as any; if (longLived.taskId) { - const cancelAck = await rawRequest( - serverUrl, - 'tasks/cancel', - { taskId: longLived.taskId }, - { sessionId } - ); + const cancelAck = (await client.request( + { method: 'tasks/cancel', params: { taskId: longLived.taskId } }, + AnyResult + )) as any; if (cancelAck.resultType !== 'complete') { errs.push( `tasks/cancel ack resultType = ${JSON.stringify(cancelAck.resultType)}, want "complete"` @@ -394,41 +401,40 @@ export class TasksDispatchScenario implements ClientScenario { } // tasks/update ack on a parked elicitation task. - const elicit = await rawRequest( - serverUrl, - 'tools/call', - { name: 'confirm_delete', arguments: { filename: 'rt.txt' } }, - { sessionId } - ); + const elicit = (await client.request( + { + method: 'tools/call', + params: { name: 'confirm_delete', arguments: { filename: 'rt.txt' } } + }, + AnyResult + )) as any; const elicitTaskId = elicit.taskId; if (elicitTaskId) { await waitForStatus( - serverUrl, - sessionId, + client, elicitTaskId, 'input_required', 5_000 ); - const updateAck = await rawRequest( - serverUrl, - 'tasks/update', + const updateAck = (await client.request( { - taskId: elicitTaskId, - inputResponses: { 'unknown-key': { ignored: true } } + method: 'tasks/update', + params: { + taskId: elicitTaskId, + inputResponses: { 'unknown-key': { ignored: true } } + } }, - { sessionId } - ); + AnyResult + )) as any; if (updateAck.resultType !== 'complete') { errs.push( `tasks/update ack resultType = ${JSON.stringify(updateAck.resultType)}, want "complete"` ); } try { - await rawRequest( - serverUrl, - 'tasks/cancel', - { taskId: elicitTaskId }, - { sessionId } + await client.request( + { method: 'tasks/cancel', params: { taskId: elicitTaskId } }, + AnyResult ); } catch { /* swallow */ @@ -455,15 +461,16 @@ export class TasksDispatchScenario implements ClientScenario { const description = 'tasks/get issued immediately after CreateTaskResult arrives MUST resolve (server MUST NOT return CreateTaskResult before the task is durably created)'; try { - const created = await rawRequest( - serverUrl, - 'tools/call', + const created = (await client.request( { - name: 'slow_compute', - arguments: { seconds: 60, label: 'consistency' } + method: 'tools/call', + params: { + name: 'slow_compute', + arguments: { seconds: 60, label: 'consistency' } + } }, - { sessionId } - ); + AnyResult + )) as any; const taskId = created.taskId; if (!taskId) { checks.push({ @@ -478,12 +485,10 @@ export class TasksDispatchScenario implements ClientScenario { } else { // No await/sleep between create and get — codifies the // strong-consistency ordering. - const got = await rawRequest( - serverUrl, - 'tasks/get', - { taskId }, - { sessionId } - ); + const got = (await client.request( + { method: 'tasks/get', params: { taskId } }, + AnyResult + )) as any; const errs: string[] = []; if (got.taskId !== taskId) { errs.push( @@ -492,11 +497,9 @@ export class TasksDispatchScenario implements ClientScenario { } // Cleanup. try { - await rawRequest( - serverUrl, - 'tasks/cancel', - { taskId }, - { sessionId } + await client.request( + { method: 'tasks/cancel', params: { taskId } }, + AnyResult ); } catch { /* swallow */ @@ -523,11 +526,12 @@ export class TasksDispatchScenario implements ClientScenario { const description = 'tasks/get for a taskId the server does not recognize MUST return -32602'; try { - await rawRequest( - serverUrl, - 'tasks/get', - { taskId: 'tasks-conformance-nonexistent-12345' }, - { sessionId } + await client.request( + { + method: 'tasks/get', + params: { taskId: 'tasks-conformance-nonexistent-12345' } + }, + AnyResult ); checks.push({ id, @@ -555,6 +559,7 @@ export class TasksDispatchScenario implements ClientScenario { } } + await client.close().catch(() => {}); return checks; } } diff --git a/src/scenarios/server/tasks/headers.ts b/src/scenarios/server/tasks/headers.ts index 0d5ebdd..9936779 100644 --- a/src/scenarios/server/tasks/headers.ts +++ b/src/scenarios/server/tasks/headers.ts @@ -16,6 +16,9 @@ * - slow_compute — task-supporting, sleeps N seconds */ +import { Client } from '@modelcontextprotocol/sdk/client/index.js'; +import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js'; + import { ClientScenario, ConformanceCheck, @@ -25,12 +28,75 @@ import { import { TASKS_EXTENSION_ID, SEP_2243_REF, + AnyResult, errMsg, - failureCheck, - initRawSession, - rawRequest + failureCheck } from './helpers'; +/** + * Minimal raw POST that lets us inject SEP-2243 routing headers + * (Mcp-Method, Mcp-Name) on a JSON-RPC call. The SDK's + * StreamableHTTPClientTransport doesn't expose per-request HTTP + * headers, and this whole scenario exists to verify the server tolerates + * those headers — so we pin a single raw fetch helper to this file. + * + * Reuses the SDK transport's session via `transport.sessionId` so the + * request lands on the same already-initialized session. + */ +async function rawJsonRpcWithHeaders( + serverUrl: string, + sessionId: string, + method: string, + params: any, + extraHeaders: Record +): Promise { + const resp = await fetch(serverUrl, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Accept: 'application/json, text/event-stream', + 'Mcp-Session-Id': sessionId, + ...extraHeaders + }, + body: JSON.stringify({ + jsonrpc: '2.0', + id: `hdr-${Math.random().toString(36).slice(2, 10)}`, + method, + params + }) + }); + const ct = resp.headers.get('content-type') || ''; + let body: any; + if (ct.includes('text/event-stream')) { + const text = await resp.text(); + for (const line of text.split('\n')) { + const trimmed = line.trim(); + if (trimmed.startsWith('data:')) { + const payload = trimmed.slice(5).trimStart(); + if (payload.startsWith('{')) { + const parsed = JSON.parse(payload); + if (parsed.id !== undefined && (parsed.result || parsed.error)) { + body = parsed; + break; + } + } + } + } + } else { + body = await resp.json(); + } + if (!body) { + throw new Error(`No JSON-RPC frame in response (status ${resp.status})`); + } + if (body.error) { + const err: any = new Error(body.error.message || 'JSON-RPC error'); + err.code = body.error.code; + err.data = body.error.data; + throw err; + } + return body.result; +} + export class TasksRequestHeadersScenario implements ClientScenario { name = 'tasks-request-headers'; specVersions: ScenarioSpecTag[] = ['extension', DRAFT_PROTOCOL_VERSION]; @@ -53,11 +119,15 @@ based on them — including when the headers disagree with the body.`; async run(serverUrl: string): Promise { const checks: ConformanceCheck[] = []; - let sessionId: string; + let client: Client; + let transport: StreamableHTTPClientTransport; try { - ({ sessionId } = await initRawSession(serverUrl, { - capabilities: { extensions: { [TASKS_EXTENSION_ID]: {} } } - })); + client = new Client( + { name: 'mcp-conformance', version: '1.0' }, + { capabilities: { extensions: { [TASKS_EXTENSION_ID]: {} } } } + ); + transport = new StreamableHTTPClientTransport(new URL(serverUrl)); + await client.connect(transport); } catch (error) { checks.push({ id: 'tasks-session-bootstrap', @@ -79,11 +149,12 @@ based on them — including when the headers disagree with the body.`; const description = 'Server tolerates Mcp-Method request header on tools/call (sync tool dispatch unaffected)'; try { - const result = await rawRequest( + const result = await rawJsonRpcWithHeaders( serverUrl, + transport.sessionId!, 'tools/call', { name: 'greet', arguments: { name: 'sep-2243' } }, - { sessionId, headers: { 'Mcp-Method': 'tools/call' } } + { 'Mcp-Method': 'tools/call' } ); const errs: string[] = []; if (result.resultType !== 'complete') { @@ -123,15 +194,16 @@ based on them — including when the headers disagree with the body.`; const description = 'Server tolerates Mcp-Method + Mcp-Name request headers on tasks/get (body taskId resolves regardless of routing headers)'; try { - const created = await rawRequest( - serverUrl, - 'tools/call', + const created = (await client.request( { - name: 'slow_compute', - arguments: { seconds: 60, label: 'headers-tasks-get' } + method: 'tools/call', + params: { + name: 'slow_compute', + arguments: { seconds: 60, label: 'headers-tasks-get' } + } }, - { sessionId } - ); + AnyResult + )) as any; routingTaskId = created.taskId; if (!routingTaskId) { checks.push({ @@ -144,16 +216,14 @@ based on them — including when the headers disagree with the body.`; specReferences: [SEP_2243_REF] }); } else { - const got = await rawRequest( + const got = await rawJsonRpcWithHeaders( serverUrl, + transport.sessionId!, 'tasks/get', { taskId: routingTaskId }, { - sessionId, - headers: { - 'Mcp-Method': 'tasks/get', - 'Mcp-Name': routingTaskId - } + 'Mcp-Method': 'tasks/get', + 'Mcp-Name': routingTaskId } ); const errs: string[] = []; @@ -190,11 +260,12 @@ based on them — including when the headers disagree with the body.`; const description = 'When Mcp-Method header disagrees with body, server MUST dispatch on body method (header is informational)'; try { - const result = await rawRequest( + const result = await rawJsonRpcWithHeaders( serverUrl, + transport.sessionId!, 'tools/call', { name: 'greet', arguments: { name: 'header-mismatch' } }, - { sessionId, headers: { 'Mcp-Method': 'tasks/get' } } + { 'Mcp-Method': 'tasks/get' } ); const errs: string[] = []; if (result.resultType !== 'complete') { @@ -227,17 +298,19 @@ based on them — including when the headers disagree with the body.`; // Cleanup the long-lived task. if (routingTaskId) { try { - await rawRequest( - serverUrl, - 'tasks/cancel', - { taskId: routingTaskId }, - { sessionId } + await client.request( + { + method: 'tasks/cancel', + params: { taskId: routingTaskId } + }, + AnyResult ); } catch { /* swallow */ } } + await client.close().catch(() => {}); return checks; } } diff --git a/src/scenarios/server/tasks/helpers.ts b/src/scenarios/server/tasks/helpers.ts index 2eea4e7..388a985 100644 --- a/src/scenarios/server/tasks/helpers.ts +++ b/src/scenarios/server/tasks/helpers.ts @@ -1,17 +1,31 @@ /** * Shared helpers for SEP-2663 Tasks server-conformance scenarios. * - * The MCP TS SDK's typed schemas (CallToolResultSchema, etc.) strip the - * SEP-2663 / SEP-2322 wire fields — `resultType`, `taskId`, `inputRequests`, - * `requestState`, inlined `result`/`error` on tasks/get's DetailedTask. So - * scenarios that exercise those fields use raw fetch instead. This file - * centralizes the bootstrap + RPC + polling primitives. + * Most of what scenarios need is already in the official MCP TS SDK: + * - new Client(...) + StreamableHTTPClientTransport for connection + * - client.request(req, schema) for typed JSON-RPC calls + * - McpError with .code / .data for JSON-RPC errors * - * If/when the SDK gains schemas for the SEP-2663 wire shapes, the call - * sites in scenarios switch back to `client.request(..., AnyResult)` - * and this file shrinks (or disappears). + * This file holds: + * - SEP reference constants used by every scenario's specReferences + * - Tiny check builders (errMsg / failureCheck / skipCheck) used by + * all scenarios for consistent FAILURE / SKIPPED reporting + * - Polling helpers (waitForTerminal / waitForStatus) wrapping + * `client.request('tasks/get', AnyResult)` + * - The `AnyResult` Zod passthrough schema — pair with + * `client.request(req, AnyResult)` to preserve fields the SDK's + * typed result schemas would strip (`resultType`, `taskId`, + * `requestState`, inlined `result`/`error`, etc.) + * + * Scenarios that need transport-level access (HTTP request-header + * injection for SEP-2243; raw SSE event reading for status + * notifications) keep their own inline raw fetch — SDK doesn't expose + * those layers. See headers.ts / notifications.ts. */ +import type { Client } from '@modelcontextprotocol/sdk/client/index.js'; +import { z } from 'zod'; + import type { ConformanceCheck, SpecReference } from '../../../types'; export const TASKS_EXTENSION_ID = 'io.modelcontextprotocol/tasks'; @@ -33,6 +47,13 @@ export const SEP_2575_REF: SpecReference = { url: 'https://github.com/modelcontextprotocol/specification/pull/2575' }; +/** + * Zod passthrough schema. Pair with `client.request(req, AnyResult)` to + * preserve fields the SDK's typed result schemas would strip — every + * SEP-2663 / SEP-2322 wire field falls into this bucket today. + */ +export const AnyResult = z.object({}).passthrough(); + export function errMsg(error: unknown): string { return error instanceof Error ? error.message : String(error); } @@ -75,192 +96,18 @@ export function skipCheck( }; } -export interface InitOpts { - /** Negotiated wire protocolVersion. Defaults to LATEST_SPEC_VERSION. */ - protocolVersion?: string; - /** Client capabilities (extensions, elicitation, sampling, …). */ - capabilities?: Record; - /** Optional clientInfo override. */ - clientInfo?: { name: string; version: string }; -} - -export interface InitResult { - /** Mcp-Session-Id minted by the server during initialize. */ - sessionId: string; - /** capabilities object the server advertised in its initialize response. */ - serverCapabilities: Record; - /** Negotiated protocolVersion echoed back by the server. */ - serverProtocolVersion?: string; - /** Server info (name, version, …). */ - serverInfo?: Record; -} - -/** - * Run a fresh initialize handshake and return session id + the server's - * advertised capabilities. Bypasses the SDK so callers can declare - * extension capabilities the SDK's typed wrappers don't yet know about, - * and so the SDK's Zod schemas don't strip extension fields off the - * server response. - */ -export async function initRawSession( - serverUrl: string, - opts: InitOpts = {} -): Promise { - const protocolVersion = opts.protocolVersion ?? '2025-11-25'; - const capabilities = opts.capabilities ?? {}; - const clientInfo = opts.clientInfo ?? { - name: 'mcp-conformance', - version: '1.0' - }; - - const initResp = await fetch(serverUrl, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - Accept: 'application/json' - }, - body: JSON.stringify({ - jsonrpc: '2.0', - id: 'init-raw', - method: 'initialize', - params: { protocolVersion, clientInfo, capabilities } - }) - }); - const sid = initResp.headers.get('mcp-session-id') || ''; - if (!sid) throw new Error('initialize response missing Mcp-Session-Id'); - - const initBody = await initResp.json(); - if (initBody.error) { - throw new Error( - `initialize returned JSON-RPC error: ${JSON.stringify(initBody.error)}` - ); - } - const result = initBody.result ?? {}; - - await fetch(serverUrl, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - Accept: 'application/json', - 'Mcp-Session-Id': sid - }, - body: JSON.stringify({ - jsonrpc: '2.0', - method: 'notifications/initialized' - }) - }); - return { - sessionId: sid, - serverCapabilities: result.capabilities ?? {}, - serverProtocolVersion: result.protocolVersion, - serverInfo: result.serverInfo - }; -} - -export interface RawRequestOpts { - sessionId: string; - /** Optional _meta object merged into the JSON-RPC params. */ - meta?: Record; - /** Optional HTTP request headers merged after the harness defaults. */ - headers?: Record; -} - -export interface RawRequestResult { - /** The JSON-RPC `result` body, when the response carried one. */ - result: any; - /** The raw fetch Response so callers can inspect transport-level headers. */ - response: Response; -} - -let nextId = 1; - -/** - * Send a raw JSON-RPC request via fetch, parsing SSE `data:` lines or - * plain JSON depending on Content-Type. Throws an Error decorated with - * `code` / `data` when the response carries a JSON-RPC error. - */ -export async function rawRequest( - serverUrl: string, - method: string, - params: any, - opts: RawRequestOpts -): Promise { - const { result } = await rawRequestFull(serverUrl, method, params, opts); - return result; -} - -/** - * Like rawRequest, but also returns the raw fetch Response so callers - * can inspect transport-level headers (e.g., SEP-2243 routing headers). - */ -export async function rawRequestFull( - serverUrl: string, - method: string, - params: any, - opts: RawRequestOpts -): Promise { - const id = nextId++; - const requestParams = opts.meta ? { ...params, _meta: opts.meta } : params; - const resp = await fetch(serverUrl, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - Accept: 'text/event-stream, application/json', - 'Mcp-Session-Id': opts.sessionId, - ...(opts.headers ?? {}) - }, - body: JSON.stringify({ - jsonrpc: '2.0', - id, - method, - params: requestParams - }) - }); - const ct = resp.headers.get('content-type') || ''; - let body: any; - if (ct.includes('text/event-stream')) { - const text = await resp.text(); - for (const line of text.split('\n')) { - const trimmed = line.trim(); - if (trimmed.startsWith('data:')) { - const payload = trimmed.slice(5).trimStart(); - if (payload.startsWith('{')) { - const parsed = JSON.parse(payload); - if (parsed.id === id) { - body = parsed; - break; - } - } - } - } - } else { - body = await resp.json(); - } - if (!body) throw new Error(`No JSON-RPC response for ${method}`); - if (body.error) { - const err: any = new Error(body.error.message); - err.code = body.error.code; - err.data = body.error.data; - throw err; - } - return { result: body.result, response: resp }; -} - /** Poll tasks/get until the task reaches a terminal state. */ export async function waitForTerminal( - serverUrl: string, - sessionId: string, + client: Client, taskId: string, timeoutMs = 10_000 ): Promise { const start = Date.now(); while (Date.now() - start < timeoutMs) { - const task = await rawRequest( - serverUrl, - 'tasks/get', - { taskId }, - { sessionId } - ); + const task = (await client.request( + { method: 'tasks/get', params: { taskId } }, + AnyResult + )) as any; if (['completed', 'failed', 'cancelled'].includes(task.status)) { return task; } @@ -273,20 +120,17 @@ export async function waitForTerminal( /** Poll tasks/get until a specific status (or any terminal state). */ export async function waitForStatus( - serverUrl: string, - sessionId: string, + client: Client, taskId: string, status: string, timeoutMs = 10_000 ): Promise { const start = Date.now(); while (Date.now() - start < timeoutMs) { - const task = await rawRequest( - serverUrl, - 'tasks/get', - { taskId }, - { sessionId } - ); + const task = (await client.request( + { method: 'tasks/get', params: { taskId } }, + AnyResult + )) as any; if ( task.status === status || ['completed', 'failed', 'cancelled'].includes(task.status) diff --git a/src/scenarios/server/tasks/lifecycle.ts b/src/scenarios/server/tasks/lifecycle.ts index 21c5a13..53381fa 100644 --- a/src/scenarios/server/tasks/lifecycle.ts +++ b/src/scenarios/server/tasks/lifecycle.ts @@ -13,6 +13,9 @@ * - protocol_error_job — task-supporting, panics into a protocol error */ +import { Client } from '@modelcontextprotocol/sdk/client/index.js'; +import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js'; + import { ClientScenario, ConformanceCheck, @@ -23,11 +26,10 @@ import { TASKS_EXTENSION_ID, SEP_2663_REF, SEP_2322_REF, + AnyResult, errMsg, failureCheck, skipCheck, - initRawSession, - rawRequest, waitForTerminal } from './helpers'; import { isIso8601 } from '../_shared/wire-format'; @@ -84,15 +86,19 @@ The server MUST advertise \`io.modelcontextprotocol/tasks\` under async run(serverUrl: string): Promise { const checks: ConformanceCheck[] = []; - let sessionId: string; + let client: Client; try { - ({ sessionId } = await initRawSession(serverUrl, { - capabilities: { - elicitation: {}, - sampling: {}, - extensions: { [TASKS_EXTENSION_ID]: {} } + client = new Client( + { name: 'mcp-conformance', version: '1.0' }, + { + capabilities: { + elicitation: {}, + sampling: {}, + extensions: { [TASKS_EXTENSION_ID]: {} } + } } - })); + ); + await client.connect(new StreamableHTTPClientTransport(new URL(serverUrl))); } catch (error) { checks.push({ id: 'tasks-session-bootstrap', @@ -114,12 +120,13 @@ The server MUST advertise \`io.modelcontextprotocol/tasks\` under const description = 'Sync tool returns ToolResult (resultType:"complete"), no taskId at top level'; try { - const result = await rawRequest( - serverUrl, - 'tools/call', - { name: 'greet', arguments: { name: 'World' } }, - { sessionId } - ); + const result = (await client.request( + { + method: 'tools/call', + params: { name: 'greet', arguments: { name: 'World' } } + }, + AnyResult + )) as any; const errs: string[] = []; if (result.resultType === 'task') { errs.push('sync tool result MUST NOT carry resultType:"task"'); @@ -159,15 +166,16 @@ The server MUST advertise \`io.modelcontextprotocol/tasks\` under const description = 'Task-supporting tool returns flat CreateTaskResult (no nested `task` wrapper)'; try { - const result = await rawRequest( - serverUrl, - 'tools/call', + const result = (await client.request( { - name: 'slow_compute', - arguments: { seconds: 2, label: 'lifecycle-create' } + method: 'tools/call', + params: { + name: 'slow_compute', + arguments: { seconds: 2, label: 'lifecycle-create' } + } }, - { sessionId } - ); + AnyResult + )) as any; const errs: string[] = []; if (result.resultType !== 'task') { errs.push( @@ -243,12 +251,10 @@ The server MUST advertise \`io.modelcontextprotocol/tasks\` under checks.push(skipCheck(id, name, description, 'no task created')); } else { try { - const task = await rawRequest( - serverUrl, - 'tasks/get', - { taskId: workingTaskId }, - { sessionId } - ); + const task = (await client.request( + { method: 'tasks/get', params: { taskId: workingTaskId } }, + AnyResult + )) as any; const errs: string[] = []; if (task.taskId !== workingTaskId) { errs.push( @@ -284,11 +290,7 @@ The server MUST advertise \`io.modelcontextprotocol/tasks\` under checks.push(skipCheck(id, name, description, 'no task created')); } else { try { - const terminal = await waitForTerminal( - serverUrl, - sessionId, - workingTaskId - ); + const terminal = await waitForTerminal(client, workingTaskId); const errs: string[] = []; if (terminal.status !== 'completed') { errs.push( @@ -334,21 +336,18 @@ The server MUST advertise \`io.modelcontextprotocol/tasks\` under const description = 'Tool execution error reports as completed + result.isError (NOT failed)'; try { - const created = await rawRequest( - serverUrl, - 'tools/call', - { name: 'failing_job', arguments: {} }, - { sessionId } - ); + const created = (await client.request( + { + method: 'tools/call', + params: { name: 'failing_job', arguments: {} } + }, + AnyResult + )) as any; const errs: string[] = []; if (!created.taskId) { errs.push('failing_job MUST create a task'); } else { - const terminal = await waitForTerminal( - serverUrl, - sessionId, - created.taskId - ); + const terminal = await waitForTerminal(client, created.taskId); if (terminal.status !== 'completed') { errs.push( `tool error MUST surface as completed (not "${terminal.status}")` @@ -381,21 +380,18 @@ The server MUST advertise \`io.modelcontextprotocol/tasks\` under const description = 'Protocol-level error reports as failed + inlined error{code,message}, no result'; try { - const created = await rawRequest( - serverUrl, - 'tools/call', - { name: 'protocol_error_job', arguments: {} }, - { sessionId } - ); + const created = (await client.request( + { + method: 'tools/call', + params: { name: 'protocol_error_job', arguments: {} } + }, + AnyResult + )) as any; const errs: string[] = []; if (!created.taskId) { errs.push('protocol_error_job MUST create a task'); } else { - const terminal = await waitForTerminal( - serverUrl, - sessionId, - created.taskId - ); + const terminal = await waitForTerminal(client, created.taskId); if (terminal.status !== 'failed') { errs.push( `protocol error MUST surface as failed (not "${terminal.status}")` @@ -438,15 +434,16 @@ The server MUST advertise \`io.modelcontextprotocol/tasks\` under 'tasks/cancel returns {resultType:"complete"} ack; status settles to cancelled'; let cancelTaskId: string | undefined; try { - const created = await rawRequest( - serverUrl, - 'tools/call', + const created = (await client.request( { - name: 'slow_compute', - arguments: { seconds: 60, label: 'lifecycle-cancel' } + method: 'tools/call', + params: { + name: 'slow_compute', + arguments: { seconds: 60, label: 'lifecycle-cancel' } + } }, - { sessionId } - ); + AnyResult + )) as any; cancelTaskId = created.taskId; if (!cancelTaskId) { checks.push({ @@ -459,12 +456,10 @@ The server MUST advertise \`io.modelcontextprotocol/tasks\` under specReferences: [SEP_2663_REF, SEP_2322_REF] }); } else { - const ack = await rawRequest( - serverUrl, - 'tasks/cancel', - { taskId: cancelTaskId }, - { sessionId } - ); + const ack = (await client.request( + { method: 'tasks/cancel', params: { taskId: cancelTaskId } }, + AnyResult + )) as any; const errs: string[] = []; // Ack carries only the SEP-2322 discriminator — no task envelope. if ( @@ -475,12 +470,10 @@ The server MUST advertise \`io.modelcontextprotocol/tasks\` under ); } // Status settles to cancelled — observe via tasks/get. - const after = await rawRequest( - serverUrl, - 'tasks/get', - { taskId: cancelTaskId }, - { sessionId } - ); + const after = (await client.request( + { method: 'tasks/get', params: { taskId: cancelTaskId } }, + AnyResult + )) as any; if (after.status !== 'cancelled') { errs.push( `tasks/get after cancel MUST report cancelled; got ${after.status}` @@ -509,15 +502,16 @@ The server MUST advertise \`io.modelcontextprotocol/tasks\` under const description = 'tasks/cancel on a terminal task returns -32602 (per spec commit d963ad0)'; try { - const created = await rawRequest( - serverUrl, - 'tools/call', + const created = (await client.request( { - name: 'slow_compute', - arguments: { seconds: 1, label: 'lifecycle-cancel-terminal' } + method: 'tools/call', + params: { + name: 'slow_compute', + arguments: { seconds: 1, label: 'lifecycle-cancel-terminal' } + } }, - { sessionId } - ); + AnyResult + )) as any; const completedTaskId = created.taskId; if (!completedTaskId) { checks.push({ @@ -530,15 +524,13 @@ The server MUST advertise \`io.modelcontextprotocol/tasks\` under specReferences: [SEP_2663_REF] }); } else { - await waitForTerminal(serverUrl, sessionId, completedTaskId); + await waitForTerminal(client, completedTaskId); // Now cancel — must throw -32602. let thrown: any; try { - await rawRequest( - serverUrl, - 'tasks/cancel', - { taskId: completedTaskId }, - { sessionId } + await client.request( + { method: 'tasks/cancel', params: { taskId: completedTaskId } }, + AnyResult ); } catch (e) { thrown = e; @@ -569,6 +561,7 @@ The server MUST advertise \`io.modelcontextprotocol/tasks\` under } } + await client.close(); return checks; } } diff --git a/src/scenarios/server/tasks/mrtr-input.ts b/src/scenarios/server/tasks/mrtr-input.ts index 49cfacc..416aa09 100644 --- a/src/scenarios/server/tasks/mrtr-input.ts +++ b/src/scenarios/server/tasks/mrtr-input.ts @@ -11,6 +11,9 @@ * parallel so two keys are pending at once */ +import { Client } from '@modelcontextprotocol/sdk/client/index.js'; +import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js'; + import { ClientScenario, ConformanceCheck, @@ -21,10 +24,9 @@ import { TASKS_EXTENSION_ID, SEP_2322_REF, SEP_2663_REF, + AnyResult, errMsg, failureCheck, - initRawSession, - rawRequest, waitForStatus, waitForTerminal } from './helpers'; @@ -65,15 +67,19 @@ export class TasksMRTRInputScenario implements ClientScenario { async run(serverUrl: string): Promise { const checks: ConformanceCheck[] = []; - let sessionId: string; + let client: Client; try { - ({ sessionId } = await initRawSession(serverUrl, { - capabilities: { - elicitation: {}, - sampling: {}, - extensions: { [TASKS_EXTENSION_ID]: {} } + client = new Client( + { name: 'mcp-conformance', version: '1.0' }, + { + capabilities: { + elicitation: {}, + sampling: {}, + extensions: { [TASKS_EXTENSION_ID]: {} } + } } - })); + ); + await client.connect(new StreamableHTTPClientTransport(new URL(serverUrl))); } catch (error) { checks.push({ id: 'tasks-session-bootstrap', @@ -95,15 +101,16 @@ export class TasksMRTRInputScenario implements ClientScenario { const description = 'tasks/get on an input_required task MUST surface a non-empty inputRequests map'; try { - const created = await rawRequest( - serverUrl, - 'tools/call', + const created = (await client.request( { - name: 'confirm_delete', - arguments: { filename: 'mrtr-input.txt' } + method: 'tools/call', + params: { + name: 'confirm_delete', + arguments: { filename: 'mrtr-input.txt' } + } }, - { sessionId } - ); + AnyResult + )) as any; const taskId = created.taskId; if (!taskId) { checks.push({ @@ -117,8 +124,7 @@ export class TasksMRTRInputScenario implements ClientScenario { }); } else { const task = await waitForStatus( - serverUrl, - sessionId, + client, taskId, 'input_required', 5_000 @@ -150,11 +156,9 @@ export class TasksMRTRInputScenario implements ClientScenario { } // Cancel so we don't leave the task parked. try { - await rawRequest( - serverUrl, - 'tasks/cancel', - { taskId }, - { sessionId } + await client.request( + { method: 'tasks/cancel', params: { taskId } }, + AnyResult ); } catch { /* swallow */ @@ -181,15 +185,16 @@ export class TasksMRTRInputScenario implements ClientScenario { const description = 'tasks/update with matching inputResponses MUST be acked with {resultType:"complete"} and resume the task to a terminal state'; try { - const created = await rawRequest( - serverUrl, - 'tools/call', + const created = (await client.request( { - name: 'confirm_delete', - arguments: { filename: 'mrtr-resume.txt' } + method: 'tools/call', + params: { + name: 'confirm_delete', + arguments: { filename: 'mrtr-resume.txt' } + } }, - { sessionId } - ); + AnyResult + )) as any; const taskId = created.taskId; if (!taskId) { checks.push({ @@ -203,8 +208,7 @@ export class TasksMRTRInputScenario implements ClientScenario { }); } else { const inputTask = await waitForStatus( - serverUrl, - sessionId, + client, taskId, 'input_required', 5_000 @@ -217,16 +221,17 @@ export class TasksMRTRInputScenario implements ClientScenario { content: { confirm: true } }; } - const ack = await rawRequest( - serverUrl, - 'tasks/update', + const ack = (await client.request( { - taskId, - inputResponses: responses, - requestState: inputTask.requestState + method: 'tasks/update', + params: { + taskId, + inputResponses: responses, + requestState: inputTask.requestState + } }, - { sessionId } - ); + AnyResult + )) as any; if ( JSON.stringify(ack) !== JSON.stringify({ resultType: 'complete' }) ) { @@ -234,7 +239,7 @@ export class TasksMRTRInputScenario implements ClientScenario { `tasks/update ack MUST be {resultType:"complete"}; got ${JSON.stringify(ack)}` ); } - const terminal = await waitForTerminal(serverUrl, sessionId, taskId); + const terminal = await waitForTerminal(client, taskId); if (terminal.status !== 'completed') { errs.push( `task MUST resume to completed after tasks/update; got status ${JSON.stringify(terminal.status)}` @@ -267,12 +272,13 @@ export class TasksMRTRInputScenario implements ClientScenario { const description = 'tasks/update with a subset of keys MUST keep the task in input_required with only the unanswered key remaining'; try { - const created = await rawRequest( - serverUrl, - 'tools/call', - { name: 'multi_input', arguments: {} }, - { sessionId } - ); + const created = (await client.request( + { + method: 'tools/call', + params: { name: 'multi_input', arguments: {} } + }, + AnyResult + )) as any; const taskId = created.taskId; if (!taskId) { checks.push({ @@ -290,12 +296,10 @@ export class TasksMRTRInputScenario implements ClientScenario { let inputTask: any; const start = Date.now(); while (Date.now() - start < 5_000) { - inputTask = await rawRequest( - serverUrl, - 'tasks/get', - { taskId }, - { sessionId } - ); + inputTask = (await client.request( + { method: 'tasks/get', params: { taskId } }, + AnyResult + )) as any; if ( inputTask.status === 'input_required' && inputTask.inputRequests && @@ -320,20 +324,21 @@ export class TasksMRTRInputScenario implements ClientScenario { const [firstKey, secondKey] = keys; // Answer first key only. - const firstAck = await rawRequest( - serverUrl, - 'tasks/update', + const firstAck = (await client.request( { - taskId, - inputResponses: { - [firstKey]: { - action: 'accept', - content: { name: 'partial-1', confirm: true } + method: 'tasks/update', + params: { + taskId, + inputResponses: { + [firstKey]: { + action: 'accept', + content: { name: 'partial-1', confirm: true } + } } } }, - { sessionId } - ); + AnyResult + )) as any; if (firstAck.resultType !== 'complete') { errs.push( `partial tasks/update ack MUST carry resultType:"complete"; got ${JSON.stringify(firstAck)}` @@ -342,12 +347,10 @@ export class TasksMRTRInputScenario implements ClientScenario { // Status MUST still be input_required with only the second // key remaining. - const afterFirst = await rawRequest( - serverUrl, - 'tasks/get', - { taskId }, - { sessionId } - ); + const afterFirst = (await client.request( + { method: 'tasks/get', params: { taskId } }, + AnyResult + )) as any; if (afterFirst.status !== 'input_required') { errs.push( `task MUST stay input_required while another input is still pending; got ${JSON.stringify(afterFirst.status)}` @@ -366,25 +369,22 @@ export class TasksMRTRInputScenario implements ClientScenario { } // Answer second key — task resumes and finishes. - await rawRequest( - serverUrl, - 'tasks/update', + await client.request( { - taskId, - inputResponses: { - [secondKey]: { - action: 'accept', - content: { name: 'partial-2', confirm: true } + method: 'tasks/update', + params: { + taskId, + inputResponses: { + [secondKey]: { + action: 'accept', + content: { name: 'partial-2', confirm: true } + } } } }, - { sessionId } - ); - const terminal = await waitForTerminal( - serverUrl, - sessionId, - taskId + AnyResult ); + const terminal = await waitForTerminal(client, taskId); if (terminal.status !== 'completed') { errs.push( `task MUST complete after both inputs are satisfied; got ${JSON.stringify(terminal.status)}` @@ -411,6 +411,7 @@ export class TasksMRTRInputScenario implements ClientScenario { } } + await client.close().catch(() => {}); return checks; } } diff --git a/src/scenarios/server/tasks/notifications.ts b/src/scenarios/server/tasks/notifications.ts index a3881a2..3a4f3c5 100644 --- a/src/scenarios/server/tasks/notifications.ts +++ b/src/scenarios/server/tasks/notifications.ts @@ -18,6 +18,9 @@ * - slow_compute — task-supporting, sleeps N seconds */ +import { Client } from '@modelcontextprotocol/sdk/client/index.js'; +import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js'; + import { ClientScenario, ConformanceCheck, @@ -29,7 +32,6 @@ import { SEP_2663_REF, errMsg, failureCheck, - initRawSession, waitForTerminal } from './helpers'; @@ -54,11 +56,15 @@ notification params MUST carry: async run(serverUrl: string): Promise { const checks: ConformanceCheck[] = []; - let sessionId: string; + let client: Client; + let transport: StreamableHTTPClientTransport; try { - ({ sessionId } = await initRawSession(serverUrl, { - capabilities: { extensions: { [TASKS_EXTENSION_ID]: {} } } - })); + client = new Client( + { name: 'mcp-conformance', version: '1.0' }, + { capabilities: { extensions: { [TASKS_EXTENSION_ID]: {} } } } + ); + transport = new StreamableHTTPClientTransport(new URL(serverUrl)); + await client.connect(transport); } catch (error) { checks.push({ id: 'tasks-session-bootstrap', @@ -81,6 +87,11 @@ notification params MUST carry: // Issue tools/call with SSE-accepting headers and capture every // `data:` payload. Some are JSON-RPC responses (with id), some are // notifications (no id). We ingest all and classify by the body. + // + // The SDK's Client.request() consumes the response stream internally, + // so to *observe* notification frames on the POST SSE we drop to raw + // fetch here while reusing the SDK-initialized session via + // `transport.sessionId`. let taskId: string | undefined; const notifications: any[] = []; try { @@ -89,7 +100,7 @@ notification params MUST carry: headers: { 'Content-Type': 'application/json', Accept: 'text/event-stream, application/json', - 'Mcp-Session-Id': sessionId + 'Mcp-Session-Id': transport.sessionId! }, body: JSON.stringify({ jsonrpc: '2.0', @@ -132,7 +143,7 @@ notification params MUST carry: // collecting more, but we're done with this scenario regardless). if (taskId) { try { - await waitForTerminal(serverUrl, sessionId, taskId); + await waitForTerminal(client, taskId); } catch { /* swallow */ } @@ -149,6 +160,7 @@ notification params MUST carry: 'No status notifications received on the tools/call POST SSE stream (notifications are optional)', specReferences: [SEP_2663_REF] }); + await client.close().catch(() => {}); return checks; } @@ -183,6 +195,7 @@ notification params MUST carry: details: { notificationCount: notifications.length } }); + await client.close().catch(() => {}); return checks; } } diff --git a/src/scenarios/server/tasks/request-state.ts b/src/scenarios/server/tasks/request-state.ts index 8c2b165..30e9cc5 100644 --- a/src/scenarios/server/tasks/request-state.ts +++ b/src/scenarios/server/tasks/request-state.ts @@ -16,6 +16,9 @@ * - slow_compute — task-supporting, sleeps N seconds */ +import { Client } from '@modelcontextprotocol/sdk/client/index.js'; +import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js'; + import { ClientScenario, ConformanceCheck, @@ -26,10 +29,9 @@ import { TASKS_EXTENSION_ID, SEP_2322_REF, SEP_2663_REF, + AnyResult, errMsg, - failureCheck, - initRawSession, - rawRequest + failureCheck } from './helpers'; export class TasksRequestStateScenario implements ClientScenario { @@ -59,11 +61,15 @@ export class TasksRequestStateScenario implements ClientScenario { async run(serverUrl: string): Promise { const checks: ConformanceCheck[] = []; - let sessionId: string; + let client: Client; try { - ({ sessionId } = await initRawSession(serverUrl, { - capabilities: { extensions: { [TASKS_EXTENSION_ID]: {} } } - })); + client = new Client( + { name: 'mcp-conformance', version: '1.0' }, + { + capabilities: { extensions: { [TASKS_EXTENSION_ID]: {} } } + } + ); + await client.connect(new StreamableHTTPClientTransport(new URL(serverUrl))); } catch (error) { checks.push({ id: 'tasks-session-bootstrap', @@ -81,15 +87,16 @@ export class TasksRequestStateScenario implements ClientScenario { // Drive a long-running task once and reuse it for every check. let taskId: string | undefined; try { - const created = await rawRequest( - serverUrl, - 'tools/call', + const created = (await client.request( { - name: 'slow_compute', - arguments: { seconds: 60, label: 'request-state' } + method: 'tools/call', + params: { + name: 'slow_compute', + arguments: { seconds: 60, label: 'request-state' } + } }, - { sessionId } - ); + AnyResult + )) as any; taskId = created.taskId; } catch (error) { checks.push( @@ -127,12 +134,10 @@ export class TasksRequestStateScenario implements ClientScenario { const description = 'tasks/get may include requestState; when present it MUST be a non-empty string'; try { - const task = await rawRequest( - serverUrl, - 'tasks/get', - { taskId }, - { sessionId } - ); + const task = (await client.request( + { method: 'tasks/get', params: { taskId } }, + AnyResult + )) as any; const errs: string[] = []; if (task.requestState !== undefined) { if (typeof task.requestState !== 'string') { @@ -187,12 +192,10 @@ export class TasksRequestStateScenario implements ClientScenario { }); } else { try { - const echoed = await rawRequest( - serverUrl, - 'tasks/get', - { taskId, requestState: firstToken }, - { sessionId } - ); + const echoed = (await client.request( + { method: 'tasks/get', params: { taskId, requestState: firstToken } }, + AnyResult + )) as any; const errs: string[] = []; if (echoed.taskId !== taskId) { errs.push( @@ -239,19 +242,15 @@ export class TasksRequestStateScenario implements ClientScenario { // that sign tokens with embedded expiry, this likely yields a // newer token; on plaintext-token servers it round-trips the // same value (still valid). - await rawRequest( - serverUrl, - 'tasks/get', - { taskId, requestState: firstToken }, - { sessionId } + await client.request( + { method: 'tasks/get', params: { taskId, requestState: firstToken } }, + AnyResult ); // Now re-echo the OLDER token; server MUST accept. - const stale = await rawRequest( - serverUrl, - 'tasks/get', - { taskId, requestState: firstToken }, - { sessionId } - ); + const stale = (await client.request( + { method: 'tasks/get', params: { taskId, requestState: firstToken } }, + AnyResult + )) as any; const errs: string[] = []; if (stale.taskId !== taskId) { errs.push( @@ -280,11 +279,15 @@ export class TasksRequestStateScenario implements ClientScenario { // Cleanup the long-lived task so we don't leak goroutines. try { - await rawRequest(serverUrl, 'tasks/cancel', { taskId }, { sessionId }); + await client.request( + { method: 'tasks/cancel', params: { taskId } }, + AnyResult + ); } catch { /* swallow */ } + await client.close().catch(() => {}); return checks; } } diff --git a/src/scenarios/server/tasks/wire-fields.ts b/src/scenarios/server/tasks/wire-fields.ts index 3fb377d..3dc69fd 100644 --- a/src/scenarios/server/tasks/wire-fields.ts +++ b/src/scenarios/server/tasks/wire-fields.ts @@ -10,6 +10,9 @@ * - slow_compute — task-supporting, sleeps N seconds */ +import { Client } from '@modelcontextprotocol/sdk/client/index.js'; +import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js'; + import { ClientScenario, ConformanceCheck, @@ -19,11 +22,10 @@ import { import { TASKS_EXTENSION_ID, SEP_2663_REF, + AnyResult, errMsg, failureCheck, skipCheck, - initRawSession, - rawRequest, waitForTerminal } from './helpers'; @@ -57,13 +59,17 @@ export class TasksWireFieldsScenario implements ClientScenario { async run(serverUrl: string): Promise { const checks: ConformanceCheck[] = []; - let sessionId: string; + let client: Client; try { - ({ sessionId } = await initRawSession(serverUrl, { - capabilities: { - extensions: { [TASKS_EXTENSION_ID]: {} } + client = new Client( + { name: 'mcp-conformance', version: '1.0' }, + { + capabilities: { + extensions: { [TASKS_EXTENSION_ID]: {} } + } } - })); + ); + await client.connect(new StreamableHTTPClientTransport(new URL(serverUrl))); } catch (error) { checks.push({ id: 'tasks-session-bootstrap', @@ -86,15 +92,16 @@ export class TasksWireFieldsScenario implements ClientScenario { const description = 'CreateTaskResult uses ttlSeconds + pollIntervalMilliseconds; legacy ttl / pollInterval keys absent'; try { - const result = await rawRequest( - serverUrl, - 'tools/call', + const result = (await client.request( { - name: 'slow_compute', - arguments: { seconds: 1, label: 'wire-fields' } + method: 'tools/call', + params: { + name: 'slow_compute', + arguments: { seconds: 1, label: 'wire-fields' } + } }, - { sessionId } - ); + AnyResult + )) as any; createdTaskId = result.taskId; const errs: string[] = []; // ttlSeconds — required, positive (or null = unlimited; treat @@ -163,16 +170,17 @@ export class TasksWireFieldsScenario implements ClientScenario { checks.push(skipCheck(id, name, description, 'no task created')); } else { try { - await waitForTerminal(serverUrl, sessionId, createdTaskId); + await waitForTerminal(client, createdTaskId); // Sanity probe well before TTL (the unit is seconds; servers // typically pick order-of-minutes defaults). await new Promise((r) => setTimeout(r, 500)); - const after = await rawRequest( - serverUrl, - 'tasks/get', - { taskId: createdTaskId }, - { sessionId } - ); + const after = (await client.request( + { + method: 'tasks/get', + params: { taskId: createdTaskId } + }, + AnyResult + )) as any; const errs: string[] = []; if (after.taskId !== createdTaskId) { errs.push( @@ -203,20 +211,21 @@ export class TasksWireFieldsScenario implements ClientScenario { const description = 'tasks/get inlined result MUST NOT include the v1 io.modelcontextprotocol/related-task _meta key (taskId is at the root)'; try { - const created = await rawRequest( - serverUrl, - 'tools/call', + const created = (await client.request( { - name: 'slow_compute', - arguments: { seconds: 1, label: 'wire-fields-meta' } + method: 'tools/call', + params: { + name: 'slow_compute', + arguments: { seconds: 1, label: 'wire-fields-meta' } + } }, - { sessionId } - ); + AnyResult + )) as any; const taskId = created.taskId; if (!taskId) { checks.push(skipCheck(id, name, description, 'no task created')); } else { - const terminal = await waitForTerminal(serverUrl, sessionId, taskId); + const terminal = await waitForTerminal(client, taskId); const errs: string[] = []; const meta = terminal.result?._meta; if (meta && meta['io.modelcontextprotocol/related-task']) { @@ -245,6 +254,7 @@ export class TasksWireFieldsScenario implements ClientScenario { } } + await client.close().catch(() => {}); return checks; } } From 683c633514105c28e9fa56343e1b5763533de7d8 Mon Sep 17 00:00:00 2001 From: Sri Panyam Date: Wed, 6 May 2026 14:24:23 -0700 Subject: [PATCH 7/7] style: prettier formatting on tasks/mrtr scenarios --- src/scenarios/server/mrtr/ephemeral-flow.ts | 23 ++++++++++++++++----- src/scenarios/server/tasks/capability.ts | 4 +++- src/scenarios/server/tasks/dispatch.ts | 21 ++++++++----------- src/scenarios/server/tasks/lifecycle.ts | 4 +++- src/scenarios/server/tasks/mrtr-input.ts | 4 +++- src/scenarios/server/tasks/request-state.ts | 19 +++++++++++++---- src/scenarios/server/tasks/wire-fields.ts | 4 +++- 7 files changed, 54 insertions(+), 25 deletions(-) diff --git a/src/scenarios/server/mrtr/ephemeral-flow.ts b/src/scenarios/server/mrtr/ephemeral-flow.ts index 51025a7..543c431 100644 --- a/src/scenarios/server/mrtr/ephemeral-flow.ts +++ b/src/scenarios/server/mrtr/ephemeral-flow.ts @@ -101,7 +101,9 @@ Every \`tools/call\` response in the MRTR contract is one of: } } ); - await client.connect(new StreamableHTTPClientTransport(new URL(serverUrl))); + await client.connect( + new StreamableHTTPClientTransport(new URL(serverUrl)) + ); } catch (error) { checks.push({ id: 'mrtr-session-bootstrap', @@ -312,7 +314,10 @@ Every \`tools/call\` response in the MRTR contract is one of: const r1 = (await client.request( { method: 'tools/call', - params: { name: 'test_incomplete_result_request_state', arguments: {} } + params: { + name: 'test_incomplete_result_request_state', + arguments: {} + } }, AnyResult )) as any; @@ -378,7 +383,10 @@ Every \`tools/call\` response in the MRTR contract is one of: const r1 = (await client.request( { method: 'tools/call', - params: { name: 'test_incomplete_result_multiple_inputs', arguments: {} } + params: { + name: 'test_incomplete_result_multiple_inputs', + arguments: {} + } }, AnyResult )) as any; @@ -455,7 +463,10 @@ Every \`tools/call\` response in the MRTR contract is one of: const r1 = (await client.request( { method: 'tools/call', - params: { name: 'test_incomplete_result_multi_round', arguments: {} } + params: { + name: 'test_incomplete_result_multi_round', + arguments: {} + } }, AnyResult )) as any; @@ -544,7 +555,9 @@ Every \`tools/call\` response in the MRTR contract is one of: params: { name: 'test_incomplete_result_elicitation', arguments: {}, - inputResponses: { wrong_key: mockElicitResponse({ data: 'wrong' }) } + inputResponses: { + wrong_key: mockElicitResponse({ data: 'wrong' }) + } } }, AnyResult diff --git a/src/scenarios/server/tasks/capability.ts b/src/scenarios/server/tasks/capability.ts index c92c287..5211145 100644 --- a/src/scenarios/server/tasks/capability.ts +++ b/src/scenarios/server/tasks/capability.ts @@ -76,7 +76,9 @@ export class TasksCapabilityNegotiationScenario implements ClientScenario { } } ); - await withExt.connect(new StreamableHTTPClientTransport(new URL(serverUrl))); + await withExt.connect( + new StreamableHTTPClientTransport(new URL(serverUrl)) + ); withoutExt = new Client( { name: 'mcp-conformance', version: '1.0' }, diff --git a/src/scenarios/server/tasks/dispatch.ts b/src/scenarios/server/tasks/dispatch.ts index 272ea57..ec78df4 100644 --- a/src/scenarios/server/tasks/dispatch.ts +++ b/src/scenarios/server/tasks/dispatch.ts @@ -104,7 +104,9 @@ export class TasksDispatchScenario implements ClientScenario { } } ); - await client.connect(new StreamableHTTPClientTransport(new URL(serverUrl))); + await client.connect( + new StreamableHTTPClientTransport(new URL(serverUrl)) + ); } catch (error) { checks.push({ id: 'tasks-session-bootstrap', @@ -163,10 +165,7 @@ export class TasksDispatchScenario implements ClientScenario { const description = 'tasks/list is removed in v2 and MUST reject with -32601'; try { - await client.request( - { method: 'tasks/list', params: {} }, - AnyResult - ); + await client.request({ method: 'tasks/list', params: {} }, AnyResult); checks.push({ id, name, @@ -404,18 +403,16 @@ export class TasksDispatchScenario implements ClientScenario { const elicit = (await client.request( { method: 'tools/call', - params: { name: 'confirm_delete', arguments: { filename: 'rt.txt' } } + params: { + name: 'confirm_delete', + arguments: { filename: 'rt.txt' } + } }, AnyResult )) as any; const elicitTaskId = elicit.taskId; if (elicitTaskId) { - await waitForStatus( - client, - elicitTaskId, - 'input_required', - 5_000 - ); + await waitForStatus(client, elicitTaskId, 'input_required', 5_000); const updateAck = (await client.request( { method: 'tasks/update', diff --git a/src/scenarios/server/tasks/lifecycle.ts b/src/scenarios/server/tasks/lifecycle.ts index 53381fa..cdca072 100644 --- a/src/scenarios/server/tasks/lifecycle.ts +++ b/src/scenarios/server/tasks/lifecycle.ts @@ -98,7 +98,9 @@ The server MUST advertise \`io.modelcontextprotocol/tasks\` under } } ); - await client.connect(new StreamableHTTPClientTransport(new URL(serverUrl))); + await client.connect( + new StreamableHTTPClientTransport(new URL(serverUrl)) + ); } catch (error) { checks.push({ id: 'tasks-session-bootstrap', diff --git a/src/scenarios/server/tasks/mrtr-input.ts b/src/scenarios/server/tasks/mrtr-input.ts index 416aa09..84f0cbb 100644 --- a/src/scenarios/server/tasks/mrtr-input.ts +++ b/src/scenarios/server/tasks/mrtr-input.ts @@ -79,7 +79,9 @@ export class TasksMRTRInputScenario implements ClientScenario { } } ); - await client.connect(new StreamableHTTPClientTransport(new URL(serverUrl))); + await client.connect( + new StreamableHTTPClientTransport(new URL(serverUrl)) + ); } catch (error) { checks.push({ id: 'tasks-session-bootstrap', diff --git a/src/scenarios/server/tasks/request-state.ts b/src/scenarios/server/tasks/request-state.ts index 30e9cc5..c6f9de8 100644 --- a/src/scenarios/server/tasks/request-state.ts +++ b/src/scenarios/server/tasks/request-state.ts @@ -69,7 +69,9 @@ export class TasksRequestStateScenario implements ClientScenario { capabilities: { extensions: { [TASKS_EXTENSION_ID]: {} } } } ); - await client.connect(new StreamableHTTPClientTransport(new URL(serverUrl))); + await client.connect( + new StreamableHTTPClientTransport(new URL(serverUrl)) + ); } catch (error) { checks.push({ id: 'tasks-session-bootstrap', @@ -193,7 +195,10 @@ export class TasksRequestStateScenario implements ClientScenario { } else { try { const echoed = (await client.request( - { method: 'tasks/get', params: { taskId, requestState: firstToken } }, + { + method: 'tasks/get', + params: { taskId, requestState: firstToken } + }, AnyResult )) as any; const errs: string[] = []; @@ -243,12 +248,18 @@ export class TasksRequestStateScenario implements ClientScenario { // newer token; on plaintext-token servers it round-trips the // same value (still valid). await client.request( - { method: 'tasks/get', params: { taskId, requestState: firstToken } }, + { + method: 'tasks/get', + params: { taskId, requestState: firstToken } + }, AnyResult ); // Now re-echo the OLDER token; server MUST accept. const stale = (await client.request( - { method: 'tasks/get', params: { taskId, requestState: firstToken } }, + { + method: 'tasks/get', + params: { taskId, requestState: firstToken } + }, AnyResult )) as any; const errs: string[] = []; diff --git a/src/scenarios/server/tasks/wire-fields.ts b/src/scenarios/server/tasks/wire-fields.ts index 3dc69fd..1c98d16 100644 --- a/src/scenarios/server/tasks/wire-fields.ts +++ b/src/scenarios/server/tasks/wire-fields.ts @@ -69,7 +69,9 @@ export class TasksWireFieldsScenario implements ClientScenario { } } ); - await client.connect(new StreamableHTTPClientTransport(new URL(serverUrl))); + await client.connect( + new StreamableHTTPClientTransport(new URL(serverUrl)) + ); } catch (error) { checks.push({ id: 'tasks-session-bootstrap',