modelcontextprotocol · panyam · May 5, 2026 · May 5, 2026 · May 5, 2026 · May 5, 2026
diff --git a/src/scenarios/index.ts b/src/scenarios/index.ts
@@ -63,6 +63,16 @@ import {
 
 import { DNSRebindingProtectionScenario } from './server/dns-rebinding';
 
+import { TasksLifecycleScenario } from './server/tasks/lifecycle';
+import { TasksCapabilityNegotiationScenario } from './server/tasks/capability';
+import { TasksWireFieldsScenario } from './server/tasks/wire-fields';
+import { TasksRequestStateScenario } from './server/tasks/request-state';
+import { TasksMRTRInputScenario } from './server/tasks/mrtr-input';
+import { TasksRequestHeadersScenario } from './server/tasks/headers';
+import { TasksDispatchScenario } from './server/tasks/dispatch';
+import { TasksStatusNotificationsScenario } from './server/tasks/notifications';
+import { MrtrEphemeralFlowScenario } from './server/mrtr/ephemeral-flow';
+
 import {
   authScenariosList,
   backcompatScenariosList,
@@ -81,7 +91,28 @@ const pendingClientScenariosList: ClientScenario[] = [
 
   // On hold until server-side SSE improvements are made
   // https://github.com/modelcontextprotocol/typescript-sdk/pull/1129
-  new ServerSSEPollingScenario()
+  new ServerSSEPollingScenario(),
+
+  // SEP-2663 Tasks extension lifecycle.
+  // The SEP is still in draft (see PR 2663) and the everything-server
+  // does not yet implement the io.modelcontextprotocol/tasks extension,
+  // so all-scenarios.test.ts cannot exercise this against the default
+  // fixture. Active runs target a SEP-2663-conformant server via the
+  // dedicated tasks/lifecycle.test.ts harness.
+  new TasksLifecycleScenario(),
+  new TasksCapabilityNegotiationScenario(),
+  new TasksWireFieldsScenario(),
+  new TasksRequestStateScenario(),
+  new TasksMRTRInputScenario(),
+  new TasksRequestHeadersScenario(),
+  new TasksDispatchScenario(),
+  new TasksStatusNotificationsScenario(),
+
+  // SEP-2322 MRTR (ephemeral IncompleteResult flow).
+  // Targets a different fixture than tasks scenarios; the dedicated
+  // mrtr/all-scenarios.test.ts runner points at an MRTR-conformant
+  // server via MRTR_SERVER_URL / MRTR_SERVER_CMD.
+  new MrtrEphemeralFlowScenario()
 ];
 
 // All client scenarios
@@ -139,7 +170,26 @@ const allClientScenariosList: ClientScenario[] = [
   new PromptsGetWithImageScenario(),
 
   // Security scenarios
-  new DNSRebindingProtectionScenario()
+  new DNSRebindingProtectionScenario(),
+
+  // SEP-2663 Tasks extension (draft).
+  // Listed here so the CLI can find it by name and so the active/pending
+  // filter sees it; pendingClientScenariosList below excludes it from
+  // automatic runs against the everything-server (which doesn't implement
+  // io.modelcontextprotocol/tasks yet).
+  new TasksLifecycleScenario(),
+  new TasksCapabilityNegotiationScenario(),
+  new TasksWireFieldsScenario(),
+  new TasksRequestStateScenario(),
+  new TasksMRTRInputScenario(),
+  new TasksRequestHeadersScenario(),
+  new TasksDispatchScenario(),
+  new TasksStatusNotificationsScenario(),
+
+  // SEP-2322 MRTR (ephemeral IncompleteResult flow). Targets a
+  // dedicated MRTR fixture — out of scope for the default
+  // everything-server until SEP-2322 lands there.
+  new MrtrEphemeralFlowScenario()
 ];
 
 // Active client scenarios (excludes pending)

diff --git a/src/scenarios/server/_shared/test-runner.ts b/src/scenarios/server/_shared/test-runner.ts
@@ -0,0 +1,56 @@
+/**
+ * Test-runner utilities for server-conformance scenarios.
+ *
+ * Used by `*.test.ts` runner files that auto-spawn a fixture binary
+ * before running scenarios. These helpers are language-agnostic and
+ * harness-only — they don't touch MCP protocol, so they don't belong
+ * in the SDK.
+ *
+ * Single responsibility today: TCP readiness polling. Spawn / cleanup
+ * scaffolding stays inline in each runner so the file reads top-to-bottom
+ * without indirection (per AGENTS.md "repetitive check blocks are fine").
+ */
+
+import { connect } from 'net';
+
+/**
+ * Poll the host/port of the given URL until a TCP connection succeeds
+ * or the timeout elapses. Language-agnostic readiness check — works
+ * for any server that binds before serving requests.
+ */
+export async function waitForServerReady(
+  url: string,
+  timeoutMs: number
+): Promise<void> {
+  const u = new URL(url);
+  const port = parseInt(u.port || (u.protocol === 'https:' ? '443' : '80'), 10);
+  const host = u.hostname;
+  const deadline = Date.now() + timeoutMs;
+  let lastErr: Error | null = null;
+
+  while (Date.now() < deadline) {
+    try {
+      await new Promise<void>((resolve, reject) => {
+        const socket = connect({ host, port }, () => {
+          socket.end();
+          resolve();
+        });
+        socket.once('error', (err) => {
+          socket.destroy();
+          reject(err);
+        });
+        socket.setTimeout(1_000, () => {
+          socket.destroy();
+          reject(new Error('connect timeout'));
+        });
+      });
+      return;
+    } catch (err) {
+      lastErr = err as Error;
+      await new Promise((r) => setTimeout(r, 200));
+    }
+  }
+  throw new Error(
+    `${host}:${port} did not accept TCP connections (last: ${lastErr?.message ?? 'unknown'})`
+  );
+}
diff --git a/src/scenarios/server/_shared/wire-format.ts b/src/scenarios/server/_shared/wire-format.ts
@@ -0,0 +1,33 @@
+/**
+ * Wire-format validation helpers shared across server-conformance
+ * scenarios. Pure predicates / regex — no I/O, no async.
+ *
+ * Pragmatic choices documented per helper. When validation needs
+ * tighten (e.g., the spec mandates a stricter timestamp format), edit
+ * here once and every scenario picks it up.
+ */
+
+/**
+ * ISO-8601 timestamp prefix (YYYY-MM-DDThh:mm:ss). Tolerant about
+ * the timezone tail (`Z`, `+00:00`, `+0000`) and sub-second precision —
+ * matches what real servers emit (Go `time.RFC3339Nano`,
+ * Python `datetime.isoformat()`, JavaScript `toISOString()`).
+ *
+ * Why a regex over `Date.parse` / `new Date(s).toISOString() === s` /
+ * `Temporal.Instant.from`:
+ *   - `Date.parse` accepts RFC-2822, "May 4 2026", and other
+ *     non-ISO strings — too permissive.
+ *   - `new Date(s).toISOString() === s` is too strict — rejects
+ *     valid `+00:00`-style offsets that don't survive the canonical
+ *     `Z` round-trip.
+ *   - `Temporal.Instant.from` is Node 24+ experimental.
+ *
+ * Swap this constant for a stdlib validator if/when one becomes
+ * broadly available.
+ */
+export const ISO_8601_PATTERN = /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}/;
+
+/** Returns true when the input is a string matching ISO-8601 prefix. */
+export function isIso8601(s: unknown): boolean {
+  return typeof s === 'string' && ISO_8601_PATTERN.test(s);
+}
diff --git a/src/scenarios/server/mrtr/README.md b/src/scenarios/server/mrtr/README.md
@@ -0,0 +1,113 @@
+# SEP-2322 MRTR — Server Conformance
+
+Tests any MCP server that implements the SEP-2322 ephemeral
+Multi Round-Trip Request flow on `tools/call` — the
+`IncompleteResult` → retry-with-`inputResponses` → `ToolResult`
+contract that lets a tool gather elicitation / sampling / roots input
+without creating a task envelope.
+
+## Specs covered
+
+| SEP      | What it adds                                                                                                     | Where it shows up             |
+| -------- | ---------------------------------------------------------------------------------------------------------------- | ----------------------------- |
+| SEP-2322 | Ephemeral MRTR — `resultType` discriminator, `inputRequests` / `inputResponses` keyed maps, `requestState` token | every check                   |
+| SEP-2663 | MRTR → Tasks composition (final round returns `CreateTaskResult`)                                                | mrtr-08 (SKIPPED — see below) |
+
+## ClientScenario classes
+
+### `mrtr-ephemeral-flow` (`ephemeral-flow.ts`)
+
+A single scenario covering the full ephemeral MRTR contract — per the
+AGENTS.md "fewer scenarios, more checks" rule. A server that
+implemented elicitation round-trips but not sampling round-trips would
+be incoherent, so they bundle.
+
+| Check                                    | What it tests                                                                                                                   |
+| ---------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------- |
+| `mrtr-basic-elicitation-round-trip`      | Round 1 returns `IncompleteResult` with `elicitation/create`; round 2 completes with the answer reflected                       |
+| `mrtr-sampling-round-trip`               | Same flow with `sampling/createMessage`                                                                                         |
+| `mrtr-roots-list-round-trip`             | Same flow with `roots/list`                                                                                                     |
+| `mrtr-request-state-round-trip`          | When server emits `requestState`, it's a non-empty string and the server validates the echo                                     |
+| `mrtr-multiple-input-requests-one-round` | A single `IncompleteResult` MAY carry inputRequests for `elicitation/create` + `sampling/createMessage` + `roots/list` together |
+| `mrtr-multi-round-flow`                  | A handler MAY take 2+ rounds; each round mints a fresh `requestState`; final result reflects answers from every round           |
+| `mrtr-wrong-input-key-rerequests`        | When client sends a wrong `inputResponses` key, server SHOULD re-request via `IncompleteResult` rather than erroring            |
+| `mrtr-tasks-composition`                 | **SKIPPED** — see "Open issues" below                                                                                           |
+
+## Required server fixtures
+
+The fixture server MUST register these tools:
+
+| Tool                                     | Behavior                                                                                    |
+| ---------------------------------------- | ------------------------------------------------------------------------------------------- |
+| `test_tool_with_elicitation`             | One `elicitation/create` round, completes with answer reflected                             |
+| `test_incomplete_result_sampling`        | One `sampling/createMessage` round                                                          |
+| `test_incomplete_result_list_roots`      | One `roots/list` round                                                                      |
+| `test_incomplete_result_request_state`   | Exercises `requestState` validation; final result includes `state-ok` to confirm validation |
+| `test_incomplete_result_multiple_inputs` | Emits 3+ inputRequests of different methods in one round                                    |
+| `test_incomplete_result_multi_round`     | Drives 2+ MRTR rounds, final result references every answer                                 |
+| `test_incomplete_result_elicitation`     | Emits inputRequest for `user_name`; server re-requests on wrong-key responses               |
+
+The fixture can be implemented in any language; one example reference
+implementation lives at
+[`panyam/mcpkit/examples/mrtr`](https://github.com/panyam/mcpkit/tree/main/examples/mrtr).
+
+## Running
+
+```bash
+# Against an already-running server
+MRTR_SERVER_URL=http://localhost:8080/mcp \
+  npx vitest run src/scenarios/server/mrtr/all-scenarios.test.ts
+
+# Auto-spawn a fixture in beforeAll
+MRTR_SERVER_URL=http://localhost:18093/mcp \
+MRTR_SERVER_CMD="/path/to/mrtr-server --port 18093" \
+  npx vitest run src/scenarios/server/mrtr/all-scenarios.test.ts
+```
+
+## Open issues
+
+### `mrtr-tasks-composition` deferred
+
+SEP-2663 commit `451f5e1` (Apr 30) made the MRTR → Tasks composition
+flow normative: a `tools/call` MAY exchange `IncompleteResult` rounds
+to gather input, then return `CreateTaskResult` to go async on a
+subsequent round. Two blockers prevent enabling the check today:
+
+1. **Spec watch — discriminator value.** SEP-2322 (MRTR base) and
+   SEP-2663 (Tasks Extension) currently disagree on the wire value for
+   the "needs more input" discriminator: SEP-2322's draft uses
+   `"input_required"`, SEP-2663's draft uses `"incomplete"`. Awaiting
+   alignment between the SEP authors. The current literal lives in
+   `MRTR_INCOMPLETE_RESULT_TYPE` (helpers.ts) so it's a one-line flip
+   when the spec converges.
+
+2. **Reference-impl gap.** The natural server-side implementation
+   pattern for tasks (mint task up-front, run handler in a goroutine /
+   async task) means the handler's `IncompleteResult` signal isn't
+   visible to the middleware in time — by the time the handler returns
+   `IsIncomplete`, the `CreateTaskResult` is already on the wire. SDKs
+   in any language need an inverted middleware pattern that runs the
+   first round synchronously and only spins up the task once the
+   handler signals async-promotion.
+   ([panyam/mcpkit issue 347](https://github.com/panyam/mcpkit/issues/347)
+   tracks this for one example impl; SDKs in any language hit the
+   same architectural choice.)
+
+The check is registered with `status: 'SKIPPED'` so it's discoverable
+but doesn't fail conformance runs. When both blockers resolve, remove
+the SKIPPED short-circuit in `ephemeral-flow.ts` Check 8.
+
+## Design notes
+
+### Why the MRTR scenarios share helpers with `tasks/`
+
+`MRTR_INCOMPLETE_RESULT_TYPE`, the result-type predicates
+(`isIncompleteResult`, `isCompleteResult`), and the elicitation/sampling/
+roots mocks live in `mrtr/helpers.ts`. The shared `AnyResult` Zod
+passthrough schema and `waitForTerminal`/`waitForStatus` polling helpers
+are imported from the sibling `../tasks/helpers` because both scenario
+sets share the same wire-shape problem (SDK Zod schemas strip extension
+fields). Pair `client.request(req, AnyResult)` with the SDK's
+`StreamableHTTPClientTransport` and you preserve every SEP-2322 / SEP-2663
+field. When the upstream SDK gains schemas for those shapes, the
+passthrough disappears in favor of the typed schemas directly.
diff --git a/src/scenarios/server/mrtr/all-scenarios.test.ts b/src/scenarios/server/mrtr/all-scenarios.test.ts
@@ -0,0 +1,115 @@
+/**
+ * SEP-2322 MRTR test runner.
+ *
+ * Iterates the MRTR scenario classes against a SEP-2322-conformant
+ * server. Configuration is brand-neutral and language-agnostic:
+ *
+ *   1. Point at an already-running server:
+ *        MRTR_SERVER_URL=http://localhost:8080/mcp npm test -- mrtr/all-scenarios.test.ts
+ *
+ *   2. Auto-spawn a fixture before tests (any language):
+ *        MRTR_SERVER_URL=http://localhost:18093/mcp \
+ *        MRTR_SERVER_CMD="/path/to/server --port 18093" \
+ *          npm test -- mrtr/all-scenarios.test.ts
+ *
+ * If MRTR_SERVER_URL is unset the suite is skipped — keeping CI runs
+ * against the everything-server green.
+ *
+ * The fixture server can be implemented in any language as long as it
+ * exposes a SEP-2322 conformant Streamable HTTP MCP endpoint. Anyone is
+ * free to bring their own; one example reference implementation lives
+ * at https://github.com/panyam/mcpkit/tree/main/examples/mrtr.
+ */
+
+import { spawn, ChildProcess } from 'child_process';
+import { describe, it, expect, beforeAll, afterAll } from 'vitest';
+import { MrtrEphemeralFlowScenario } from './ephemeral-flow';
+import { waitForServerReady } from '../_shared/test-runner';
+
+const SERVER_URL = process.env.MRTR_SERVER_URL;
+const SERVER_CMD = process.env.MRTR_SERVER_CMD;
+const SERVER_STARTUP_TIMEOUT_MS = 15_000;
+const SHOULD_SPAWN = Boolean(SERVER_URL && SERVER_CMD);
+const HAVE_TARGET = Boolean(SERVER_URL);
+
+const MRTR_SCENARIOS = [new MrtrEphemeralFlowScenario()];
+
+const describeIfTarget = HAVE_TARGET ? describe : describe.skip;
+
+describeIfTarget('SEP-2322 MRTR — server conformance', () => {
+  let serverProcess: ChildProcess | null = null;
+
+  beforeAll(async () => {
+    if (!SHOULD_SPAWN) return;
+
+    serverProcess = spawn('sh', ['-c', SERVER_CMD!], {
+      stdio: ['ignore', 'pipe', 'pipe'],
+      detached: false
+    });
+
+    let stdoutBuf = '';
+    let stderrBuf = '';
+    serverProcess.stdout?.on('data', (b) => {
+      stdoutBuf += b.toString();
+    });
+    serverProcess.stderr?.on('data', (b) => {
+      stderrBuf += b.toString();
+    });
+
+    serverProcess.on('exit', (code) => {
+      if (code !== null && code !== 0) {
+        console.error(
+          `mrtr fixture exited unexpectedly with code ${code}.\nSTDOUT: ${stdoutBuf}\nSTDERR: ${stderrBuf}`
+        );
+      }
+    });
+
+    await waitForServerReady(SERVER_URL!, SERVER_STARTUP_TIMEOUT_MS).catch(
+      (err) => {
+        if (serverProcess && !serverProcess.killed) {
+          serverProcess.kill('SIGKILL');
+        }
+        throw new Error(
+          `mrtr fixture did not become reachable within ${SERVER_STARTUP_TIMEOUT_MS}ms: ${err.message}\nSTDOUT: ${stdoutBuf}\nSTDERR: ${stderrBuf}`
+        );
+      }
+    );
+  }, SERVER_STARTUP_TIMEOUT_MS + 5_000);
+
+  afterAll(async () => {
+    if (!SHOULD_SPAWN) return;
+    if (!serverProcess || serverProcess.killed) return;
+    serverProcess.kill('SIGTERM');
+    await new Promise<void>((resolve) => {
+      const timer = setTimeout(() => {
+        if (serverProcess && !serverProcess.killed) {
+          serverProcess.kill('SIGKILL');
+        }
+        resolve();
+      }, 3_000);
+      serverProcess!.once('exit', () => {
+        clearTimeout(timer);
+        resolve();
+      });
+    });
+    serverProcess = null;
+  });
+
+  for (const scenario of MRTR_SCENARIOS) {
+    it(`${scenario.name} — all checks succeed against fixture`, async () => {
+      const checks = await scenario.run(SERVER_URL!);
+      expect(checks.length).toBeGreaterThan(0);
+      const failures = checks.filter(
+        (c) => c.status === 'FAILURE' || c.status === 'WARNING'
+      );
+      if (failures.length > 0) {
+        const detail = failures
+          .map((c) => `  - ${c.id}: ${c.errorMessage ?? '(no message)'}`)
+          .join('\n');
+        throw new Error(
+          `${failures.length}/${checks.length} checks failed:\n${detail}`
+        );
+      }
+    });
+  }
+});