From da57c23183f23373d15e3e2a8ef68c269fd8707b Mon Sep 17 00:00:00 2001
From: Sri Panyam <sri.panyam@gmail.com>
Date: Tue, 5 May 2026 14:14:19 -0700
Subject: [PATCH 1/7] feat(tasks): SEP-2663 lifecycle scenario (8 checks)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds the first scenario for the SEP-2663 io.modelcontextprotocol/tasks
extension — a single TasksLifecycleScenario covering sync vs async
dispatch, DetailedTask shape on tasks/get, tool errors vs protocol
errors, and cancellation semantics. 8 ConformanceCheck records, all
passing against a SEP-2663-conformant Go fixture.

Why "tasks" (not "tasks-v2"): SEP-2663 IS the tasks surface once it
lands; the v2 suffix is only meaningful in implementations that
maintain a v1 surface alongside, which the conformance suite does not.

Layout:
- src/scenarios/server/tasks/lifecycle.ts — scenario class
- src/scenarios/server/tasks/helpers.ts — raw-fetch escape hatch
  (the SDK's typed schemas strip resultType/inputRequests/...)
- src/scenarios/server/tasks/lifecycle.test.ts — fork-local vitest
  runner. Two modes: spawn a fixture binary via MCPKIT_TASKS_BINARY,
  or point at an already-running server via MCPKIT_TASKS_SERVER_URL.
  Skips when neither is set so it doesn't break upstream CI runs that
  go through everything-server (which doesn't yet implement
  io.modelcontextprotocol/tasks).

Scenario is registered in pendingClientScenariosList so
all-scenarios.test.ts skips it; promote to active once the upstream
fixture grows extension support.

Tagged ['extension', DRAFT_PROTOCOL_VERSION] — selectable via
--suite extensions and --spec-version draft.
---
 src/scenarios/index.ts                       |  21 +-
 src/scenarios/server/tasks/helpers.ts        | 214 +++++++
 src/scenarios/server/tasks/lifecycle.test.ts | 145 +++++
 src/scenarios/server/tasks/lifecycle.ts      | 600 +++++++++++++++++++
 4 files changed, 978 insertions(+), 2 deletions(-)
 create mode 100644 src/scenarios/server/tasks/helpers.ts
 create mode 100644 src/scenarios/server/tasks/lifecycle.test.ts
 create mode 100644 src/scenarios/server/tasks/lifecycle.ts

diff --git a/src/scenarios/index.ts b/src/scenarios/index.ts
index 0e2191a..096f4f4 100644
--- a/src/scenarios/index.ts
+++ b/src/scenarios/index.ts
@@ -63,6 +63,8 @@ import {
 
 import { DNSRebindingProtectionScenario } from './server/dns-rebinding';
 
+import { TasksLifecycleScenario } from './server/tasks/lifecycle';
+
 import {
   authScenariosList,
   backcompatScenariosList,
@@ -81,7 +83,15 @@ const pendingClientScenariosList: ClientScenario[] = [
 
   // On hold until server-side SSE improvements are made
   // https://github.com/modelcontextprotocol/typescript-sdk/pull/1129
-  new ServerSSEPollingScenario()
+  new ServerSSEPollingScenario(),
+
+  // SEP-2663 Tasks extension lifecycle.
+  // The SEP is still in draft (see PR 2663) and the everything-server
+  // does not yet implement the io.modelcontextprotocol/tasks extension,
+  // so all-scenarios.test.ts cannot exercise this against the default
+  // fixture. Active runs target a SEP-2663-conformant server via the
+  // dedicated tasks/lifecycle.test.ts harness.
+  new TasksLifecycleScenario()
 ];
 
 // All client scenarios
@@ -139,7 +149,14 @@ const allClientScenariosList: ClientScenario[] = [
   new PromptsGetWithImageScenario(),
 
   // Security scenarios
-  new DNSRebindingProtectionScenario()
+  new DNSRebindingProtectionScenario(),
+
+  // SEP-2663 Tasks extension (draft).
+  // Listed here so the CLI can find it by name and so the active/pending
+  // filter sees it; pendingClientScenariosList below excludes it from
+  // automatic runs against the everything-server (which doesn't implement
+  // io.modelcontextprotocol/tasks yet).
+  new TasksLifecycleScenario()
 ];
 
 // Active client scenarios (excludes pending)
diff --git a/src/scenarios/server/tasks/helpers.ts b/src/scenarios/server/tasks/helpers.ts
new file mode 100644
index 0000000..32ebec4
--- /dev/null
+++ b/src/scenarios/server/tasks/helpers.ts
@@ -0,0 +1,214 @@
+/**
+ * Shared helpers for SEP-2663 Tasks server-conformance scenarios.
+ *
+ * The MCP TS SDK's typed schemas (CallToolResultSchema, etc.) strip the
+ * SEP-2663 / SEP-2322 wire fields — `resultType`, `taskId`, `inputRequests`,
+ * `requestState`, inlined `result`/`error` on tasks/get's DetailedTask. So
+ * scenarios that exercise those fields use raw fetch instead. This file
+ * centralizes the bootstrap + RPC + polling primitives.
+ *
+ * If/when the SDK gains schemas for the SEP-2663 wire shapes, the call
+ * sites in scenarios switch back to `client.request(..., AnyResult)`
+ * and this file shrinks (or disappears).
+ */
+
+export const TASKS_EXTENSION_ID = 'io.modelcontextprotocol/tasks';
+
+export interface InitOpts {
+  /** Negotiated wire protocolVersion. Defaults to LATEST_SPEC_VERSION. */
+  protocolVersion?: string;
+  /** Client capabilities (extensions, elicitation, sampling, …). */
+  capabilities?: Record<string, unknown>;
+  /** Optional clientInfo override. */
+  clientInfo?: { name: string; version: string };
+}
+
+/**
+ * Run a fresh initialize handshake and return the resulting session id.
+ * Bypasses the SDK so callers can declare extension capabilities the
+ * SDK's typed wrappers don't yet know about.
+ */
+export async function initRawSession(
+  serverUrl: string,
+  opts: InitOpts = {}
+): Promise<string> {
+  const protocolVersion = opts.protocolVersion ?? '2025-11-25';
+  const capabilities = opts.capabilities ?? {};
+  const clientInfo = opts.clientInfo ?? {
+    name: 'mcp-conformance',
+    version: '1.0'
+  };
+
+  const initResp = await fetch(serverUrl, {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+      Accept: 'application/json'
+    },
+    body: JSON.stringify({
+      jsonrpc: '2.0',
+      id: 'init-raw',
+      method: 'initialize',
+      params: { protocolVersion, clientInfo, capabilities }
+    })
+  });
+  const sid = initResp.headers.get('mcp-session-id') || '';
+  if (!sid) throw new Error('initialize response missing Mcp-Session-Id');
+
+  await fetch(serverUrl, {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+      Accept: 'application/json',
+      'Mcp-Session-Id': sid
+    },
+    body: JSON.stringify({
+      jsonrpc: '2.0',
+      method: 'notifications/initialized'
+    })
+  });
+  return sid;
+}
+
+export interface RawRequestOpts {
+  sessionId: string;
+  /** Optional _meta object merged into the JSON-RPC params. */
+  meta?: Record<string, unknown>;
+  /** Optional HTTP request headers merged after the harness defaults. */
+  headers?: Record<string, string>;
+}
+
+export interface RawRequestResult {
+  /** The JSON-RPC `result` body, when the response carried one. */
+  result: any;
+  /** The raw fetch Response so callers can inspect transport-level headers. */
+  response: Response;
+}
+
+let nextId = 1;
+
+/**
+ * Send a raw JSON-RPC request via fetch, parsing SSE `data:` lines or
+ * plain JSON depending on Content-Type. Throws an Error decorated with
+ * `code` / `data` when the response carries a JSON-RPC error.
+ */
+export async function rawRequest(
+  serverUrl: string,
+  method: string,
+  params: any,
+  opts: RawRequestOpts
+): Promise<any> {
+  const { result } = await rawRequestFull(serverUrl, method, params, opts);
+  return result;
+}
+
+/**
+ * Like rawRequest, but also returns the raw fetch Response so callers
+ * can inspect transport-level headers (e.g., SEP-2243 routing headers).
+ */
+export async function rawRequestFull(
+  serverUrl: string,
+  method: string,
+  params: any,
+  opts: RawRequestOpts
+): Promise<RawRequestResult> {
+  const id = nextId++;
+  const requestParams = opts.meta ? { ...params, _meta: opts.meta } : params;
+  const resp = await fetch(serverUrl, {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+      Accept: 'text/event-stream, application/json',
+      'Mcp-Session-Id': opts.sessionId,
+      ...(opts.headers ?? {})
+    },
+    body: JSON.stringify({
+      jsonrpc: '2.0',
+      id,
+      method,
+      params: requestParams
+    })
+  });
+  const ct = resp.headers.get('content-type') || '';
+  let body: any;
+  if (ct.includes('text/event-stream')) {
+    const text = await resp.text();
+    for (const line of text.split('\n')) {
+      const trimmed = line.trim();
+      if (trimmed.startsWith('data:')) {
+        const payload = trimmed.slice(5).trimStart();
+        if (payload.startsWith('{')) {
+          const parsed = JSON.parse(payload);
+          if (parsed.id === id) {
+            body = parsed;
+            break;
+          }
+        }
+      }
+    }
+  } else {
+    body = await resp.json();
+  }
+  if (!body) throw new Error(`No JSON-RPC response for ${method}`);
+  if (body.error) {
+    const err: any = new Error(body.error.message);
+    err.code = body.error.code;
+    err.data = body.error.data;
+    throw err;
+  }
+  return { result: body.result, response: resp };
+}
+
+/** Poll tasks/get until the task reaches a terminal state. */
+export async function waitForTerminal(
+  serverUrl: string,
+  sessionId: string,
+  taskId: string,
+  timeoutMs = 10_000
+): Promise<any> {
+  const start = Date.now();
+  while (Date.now() - start < timeoutMs) {
+    const task = await rawRequest(
+      serverUrl,
+      'tasks/get',
+      { taskId },
+      { sessionId }
+    );
+    if (['completed', 'failed', 'cancelled'].includes(task.status)) {
+      return task;
+    }
+    await new Promise((r) => setTimeout(r, 200));
+  }
+  throw new Error(
+    `Task ${taskId} did not reach terminal state within ${timeoutMs}ms`
+  );
+}
+
+/** Poll tasks/get until a specific status (or any terminal state). */
+export async function waitForStatus(
+  serverUrl: string,
+  sessionId: string,
+  taskId: string,
+  status: string,
+  timeoutMs = 10_000
+): Promise<any> {
+  const start = Date.now();
+  while (Date.now() - start < timeoutMs) {
+    const task = await rawRequest(
+      serverUrl,
+      'tasks/get',
+      { taskId },
+      { sessionId }
+    );
+    if (
+      task.status === status ||
+      ['completed', 'failed', 'cancelled'].includes(task.status)
+    ) {
+      return task;
+    }
+    await new Promise((r) => setTimeout(r, 200));
+  }
+  throw new Error(
+    `Task ${taskId} did not reach status ${status} within ${timeoutMs}ms`
+  );
+}
diff --git a/src/scenarios/server/tasks/lifecycle.test.ts b/src/scenarios/server/tasks/lifecycle.test.ts
new file mode 100644
index 0000000..0977139
--- /dev/null
+++ b/src/scenarios/server/tasks/lifecycle.test.ts
@@ -0,0 +1,145 @@
+/**
+ * SEP-2663 Tasks extension test runner.
+ *
+ * Iterates the tasks server scenarios against a SEP-2663-conformant
+ * server. Two ways to point at one — pick whichever fits:
+ *
+ *   1. Existing server already running:
+ *        MCPKIT_TASKS_SERVER_URL=http://localhost:8080/mcp npm test -- lifecycle.test.ts
+ *
+ *   2. Auto-spawn a fixture binary in beforeAll (the binary must accept
+ *      `--serve --addr :PORT` and bind Streamable HTTP at /mcp):
+ *        MCPKIT_TASKS_BINARY=/path/to/tasks-server npm test -- lifecycle.test.ts
+ *
+ *   Optional: MCPKIT_TASKS_PORT overrides the auto-spawn port (default 18092).
+ *
+ * If neither is set, the suite is skipped — letting CI runs against the
+ * everything-server stay green until the upstream fixture grows SEP-2663
+ * support.
+ *
+ * The mcpkit reference fixture lives at
+ * https://github.com/panyam/mcpkit/tree/main/examples/tasks-v2 (mcpkit
+ * keeps its v1 surface alongside v2 internally; the fork only cares
+ * about the SEP-2663 surface, hence the unsuffixed naming here).
+ */
+
+import { spawn, ChildProcess } from 'child_process';
+import { describe, it, expect, beforeAll, afterAll } from 'vitest';
+import { TasksLifecycleScenario } from './lifecycle';
+
+const FIXTURE_BINARY = process.env.MCPKIT_TASKS_BINARY;
+const EXTERNAL_URL = process.env.MCPKIT_TASKS_SERVER_URL;
+const TEST_PORT = parseInt(process.env.MCPKIT_TASKS_PORT ?? '18092', 10);
+const SERVER_URL = EXTERNAL_URL ?? `http://localhost:${TEST_PORT}/mcp`;
+const SERVER_STARTUP_TIMEOUT_MS = 10_000;
+// Spawn only when no external URL is provided AND a fixture binary is.
+const SHOULD_SPAWN = !EXTERNAL_URL && Boolean(FIXTURE_BINARY);
+const HAVE_TARGET = Boolean(EXTERNAL_URL) || SHOULD_SPAWN;
+
+const TASKS_SCENARIOS = [new TasksLifecycleScenario()];
+
+const describeIfTarget = HAVE_TARGET ? describe : describe.skip;
+
+describeIfTarget('SEP-2663 Tasks — server conformance', () => {
+  let serverProcess: ChildProcess | null = null;
+
+  beforeAll(async () => {
+    if (!SHOULD_SPAWN) return;
+
+    serverProcess = spawn(FIXTURE_BINARY!, ['--serve', '--addr', `:${TEST_PORT}`], {
+      stdio: ['ignore', 'pipe', 'pipe'],
+      detached: false
+    });
+
+    let stdoutBuf = '';
+    let stderrBuf = '';
+    serverProcess.stdout?.on('data', (b) => {
+      stdoutBuf += b.toString();
+    });
+    serverProcess.stderr?.on('data', (b) => {
+      stderrBuf += b.toString();
+    });
+
+    await new Promise<void>((resolve, reject) => {
+      const timer = setTimeout(() => {
+        if (serverProcess && !serverProcess.killed) {
+          serverProcess.kill('SIGKILL');
+        }
+        reject(
+          new Error(
+            `tasks fixture failed to start within ${SERVER_STARTUP_TIMEOUT_MS}ms.\nSTDOUT: ${stdoutBuf}\nSTDERR: ${stderrBuf}`
+          )
+        );
+      }, SERVER_STARTUP_TIMEOUT_MS);
+
+      // mcpkit's tasks demo logs the listen address to stderr via the
+      // log package; treat any "Connect:" or "listening" line as ready.
+      const checkReady = (chunk: string) => {
+        if (
+          chunk.includes('Connect:') ||
+          chunk.includes('listening') ||
+          chunk.includes('Listening on')
+        ) {
+          clearTimeout(timer);
+          resolve();
+        }
+      };
+      serverProcess!.stdout?.on('data', (b) => checkReady(b.toString()));
+      serverProcess!.stderr?.on('data', (b) => checkReady(b.toString()));
+
+      serverProcess!.on('error', (err) => {
+        clearTimeout(timer);
+        reject(new Error(`Failed to spawn tasks fixture: ${err.message}`));
+      });
+      serverProcess!.on('exit', (code) => {
+        if (code !== null && code !== 0) {
+          clearTimeout(timer);
+          reject(
+            new Error(
+              `tasks fixture exited prematurely with code ${code}.\nSTDOUT: ${stdoutBuf}\nSTDERR: ${stderrBuf}`
+            )
+          );
+        }
+      });
+    });
+  }, SERVER_STARTUP_TIMEOUT_MS + 5_000);
+
+  afterAll(async () => {
+    if (!SHOULD_SPAWN) return;
+    if (!serverProcess || serverProcess.killed) return;
+    serverProcess.kill('SIGTERM');
+    await new Promise<void>((resolve) => {
+      const timer = setTimeout(() => {
+        if (serverProcess && !serverProcess.killed) {
+          serverProcess.kill('SIGKILL');
+        }
+        resolve();
+      }, 3_000);
+      serverProcess!.once('exit', () => {
+        clearTimeout(timer);
+        resolve();
+      });
+    });
+    serverProcess = null;
+  });
+
+  for (const scenario of TASKS_SCENARIOS) {
+    it(`${scenario.name} — all checks succeed against fixture`, async () => {
+      const checks = await scenario.run(SERVER_URL);
+      expect(checks.length).toBeGreaterThan(0);
+      const failures = checks.filter(
+        (c) => c.status === 'FAILURE' || c.status === 'WARNING'
+      );
+      if (failures.length > 0) {
+        // Surface the failing slugs and messages so vitest output points
+        // at the exact spec-coverage gaps.
+        const detail = failures
+          .map((c) => `  - ${c.id}: ${c.errorMessage ?? '(no message)'}`)
+          .join('\n');
+        throw new Error(
+          `${failures.length}/${checks.length} checks failed:\n${detail}`
+        );
+      }
+    });
+  }
+});
diff --git a/src/scenarios/server/tasks/lifecycle.ts b/src/scenarios/server/tasks/lifecycle.ts
new file mode 100644
index 0000000..f36fe02
--- /dev/null
+++ b/src/scenarios/server/tasks/lifecycle.ts
@@ -0,0 +1,600 @@
+/**
+ * SEP-2663 Tasks Extension — server lifecycle conformance.
+ *
+ * Tests a server that implements the io.modelcontextprotocol/tasks
+ * extension end-to-end: sync vs async dispatch, DetailedTask shape on
+ * tasks/get, tool errors vs protocol errors, and cancellation
+ * semantics.
+ *
+ * Required server fixtures (tools/list output must include all):
+ *   - greet              — sync-only, returns "Hello, {name}!"
+ *   - slow_compute       — task-supporting, sleeps N seconds
+ *   - failing_job        — task-supporting, returns a tool error
+ *   - protocol_error_job — task-supporting, panics into a protocol error
+ */
+
+import {
+  ClientScenario,
+  ConformanceCheck,
+  ScenarioSpecTag,
+  SpecReference,
+  DRAFT_PROTOCOL_VERSION
+} from '../../../types';
+import {
+  TASKS_EXTENSION_ID,
+  initRawSession,
+  rawRequest,
+  waitForTerminal
+} from './helpers';
+
+const SEP_2663_REF: SpecReference = {
+  id: 'SEP-2663',
+  url: 'https://github.com/modelcontextprotocol/specification/pull/2663'
+};
+const SEP_2322_REF: SpecReference = {
+  id: 'SEP-2322',
+  url: 'https://github.com/modelcontextprotocol/specification/pull/2322'
+};
+
+export class TasksLifecycleScenario implements ClientScenario {
+  name = 'tasks-lifecycle';
+  // 'extension' tags this as off the dated-version timeline (selectable
+  // via `--suite extensions`); DRAFT_PROTOCOL_VERSION lets `--spec-version
+  // draft` runs include it before SEP-2663 lands in a dated release.
+  specVersions: ScenarioSpecTag[] = ['extension', DRAFT_PROTOCOL_VERSION];
+  description = `Test SEP-2663 Tasks extension lifecycle on the server.
+
+**Server Implementation Requirements (SEP-2663):**
+
+The server MUST advertise \`io.modelcontextprotocol/tasks\` under
+\`capabilities.extensions\` and gate the task surface on negotiation.
+
+**Sync dispatch (no task created):**
+- A \`tools/call\` against a sync-only tool MUST return a flat
+  \`ToolResult\` with \`resultType:"complete"\` and a \`content[]\` array.
+- It MUST NOT carry \`taskId\` at the top level (that would imply a
+  CreateTaskResult).
+
+**Server-directed task creation:**
+- For task-supporting tools, the server decides whether to create a task —
+  the client MUST NOT need to opt in via a request param.
+- The response MUST be a \`CreateTaskResult\` — a flat \`Result & Task\`
+  intersection: \`resultType:"task"\`, plus \`taskId\` / \`status\` /
+  \`createdAt\` / \`lastUpdatedAt\` / \`ttlSeconds\` at the top level.
+  There MUST NOT be a nested \`task\` wrapper key.
+
+**tasks/get DetailedTask:**
+- Working tasks return \`status\` and basic metadata; result/error are
+  absent.
+- Completed tasks MUST inline the original tool result under \`result\`
+  with \`content[]\`. There is no separate \`tasks/result\` method.
+
+**Tool errors vs protocol errors (SEP-2663 §error-semantics):**
+- A tool that ran but reported an error MUST surface as
+  \`status:"completed"\` with \`result.isError:true\`. The status
+  \`"failed"\` is reserved for protocol-level errors.
+- A protocol-level error (server crash, internal failure) MUST surface
+  as \`status:"failed"\` with an inlined \`error\` object (JSON-RPC
+  error shape: code/message/data) and MUST NOT carry \`result\`.
+
+**Cancellation:**
+- \`tasks/cancel\` MUST return an empty
+  \`{resultType:"complete"}\` ack — no task envelope (SEP-2322
+  discriminator). The cancelled status is observed via the next
+  \`tasks/get\`.
+- \`tasks/cancel\` against a terminal task MUST return JSON-RPC
+  \`-32602\` (InvalidParams). Clarified upstream in spec commit d963ad0.`;
+
+  async run(serverUrl: string): Promise<ConformanceCheck[]> {
+    const checks: ConformanceCheck[] = [];
+
+    let sessionId: string;
+    try {
+      sessionId = await initRawSession(serverUrl, {
+        capabilities: {
+          elicitation: {},
+          sampling: {},
+          extensions: { [TASKS_EXTENSION_ID]: {} }
+        }
+      });
+    } catch (error) {
+      checks.push({
+        id: 'tasks-session-bootstrap',
+        name: 'TasksSessionBootstrap',
+        description:
+          'Initialize handshake declaring io.modelcontextprotocol/tasks extension succeeds',
+        status: 'FAILURE',
+        timestamp: new Date().toISOString(),
+        errorMessage: `Failed to initialize: ${errMsg(error)}`,
+        specReferences: [SEP_2663_REF]
+      });
+      return checks;
+    }
+
+    // Check 1: sync tool call returns ToolResult, no task creation.
+    {
+      const id = 'tasks-sync-tool-call';
+      const name = 'TasksSyncToolCall';
+      const description =
+        'Sync tool returns ToolResult (resultType:"complete"), no taskId at top level';
+      try {
+        const result = await rawRequest(
+          serverUrl,
+          'tools/call',
+          { name: 'greet', arguments: { name: 'World' } },
+          { sessionId }
+        );
+        const errs: string[] = [];
+        if (result.resultType === 'task') {
+          errs.push('sync tool result MUST NOT carry resultType:"task"');
+        }
+        if (result.taskId) {
+          errs.push(
+            `sync tool result MUST NOT carry top-level taskId; got ${result.taskId}`
+          );
+        }
+        if (!Array.isArray(result.content) || result.content.length === 0) {
+          errs.push('sync tool result MUST carry a non-empty content[] array');
+        }
+        checks.push({
+          id,
+          name,
+          description,
+          status: errs.length === 0 ? 'SUCCESS' : 'FAILURE',
+          timestamp: new Date().toISOString(),
+          errorMessage: errs.length > 0 ? errs.join('; ') : undefined,
+          specReferences: [SEP_2663_REF, SEP_2322_REF],
+          details: {
+            resultType: result.resultType,
+            hasTaskId: Boolean(result.taskId),
+            contentLength: result.content?.length
+          }
+        });
+      } catch (error) {
+        checks.push(failureCheck(id, name, description, error, [SEP_2663_REF]));
+      }
+    }
+
+    // Check 2: server-directed task creation produces flat CreateTaskResult.
+    let workingTaskId: string | undefined;
+    {
+      const id = 'tasks-server-task-creation';
+      const name = 'TasksServerTaskCreation';
+      const description =
+        'Task-supporting tool returns flat CreateTaskResult (no nested `task` wrapper)';
+      try {
+        const result = await rawRequest(
+          serverUrl,
+          'tools/call',
+          {
+            name: 'slow_compute',
+            arguments: { seconds: 2, label: 'lifecycle-create' }
+          },
+          { sessionId }
+        );
+        const errs: string[] = [];
+        if (result.resultType !== 'task') {
+          errs.push(
+            `expected resultType:"task"; got ${JSON.stringify(result.resultType)}`
+          );
+        }
+        if (result.task) {
+          errs.push(
+            'CreateTaskResult MUST be flat (Result & Task); there must be no nested `task` wrapper key'
+          );
+        }
+        if (!result.taskId) {
+          errs.push('CreateTaskResult MUST carry top-level taskId');
+        }
+        if (!result.status) {
+          errs.push('CreateTaskResult MUST carry top-level status');
+        }
+        if ('result' in result) {
+          errs.push(
+            'CreateTaskResult MUST NOT carry `result` (lives on tasks/get DetailedTask)'
+          );
+        }
+        if ('error' in result) {
+          errs.push(
+            'CreateTaskResult MUST NOT carry `error` (lives on tasks/get DetailedTask)'
+          );
+        }
+        if ('inputRequests' in result) {
+          errs.push(
+            'CreateTaskResult MUST NOT carry `inputRequests` (lives on tasks/get DetailedTask)'
+          );
+        }
+        if (result.taskId) workingTaskId = result.taskId;
+        checks.push({
+          id,
+          name,
+          description,
+          status: errs.length === 0 ? 'SUCCESS' : 'FAILURE',
+          timestamp: new Date().toISOString(),
+          errorMessage: errs.length > 0 ? errs.join('; ') : undefined,
+          specReferences: [SEP_2663_REF],
+          details: {
+            resultType: result.resultType,
+            taskId: result.taskId,
+            status: result.status
+          }
+        });
+      } catch (error) {
+        checks.push(failureCheck(id, name, description, error, [SEP_2663_REF]));
+      }
+    }
+
+    // Check 3: tasks/get during working state returns status + metadata.
+    {
+      const id = 'tasks-get-during-working';
+      const name = 'TasksGetDuringWorking';
+      const description =
+        'tasks/get returns status + metadata for an active task';
+      if (!workingTaskId) {
+        checks.push(skipCheck(id, name, description, 'no task created'));
+      } else {
+        try {
+          const task = await rawRequest(
+            serverUrl,
+            'tasks/get',
+            { taskId: workingTaskId },
+            { sessionId }
+          );
+          const errs: string[] = [];
+          if (task.taskId !== workingTaskId) {
+            errs.push(
+              `taskId mismatch: expected ${workingTaskId}, got ${task.taskId}`
+            );
+          }
+          if (!task.status) errs.push('tasks/get response MUST carry status');
+          checks.push({
+            id,
+            name,
+            description,
+            status: errs.length === 0 ? 'SUCCESS' : 'FAILURE',
+            timestamp: new Date().toISOString(),
+            errorMessage: errs.length > 0 ? errs.join('; ') : undefined,
+            specReferences: [SEP_2663_REF],
+            details: { status: task.status }
+          });
+        } catch (error) {
+          checks.push(failureCheck(id, name, description, error, [SEP_2663_REF]));
+        }
+      }
+    }
+
+    // Check 4: terminal tasks/get inlines result with content[].
+    {
+      const id = 'tasks-get-terminal-inlined-result';
+      const name = 'TasksGetTerminalInlinedResult';
+      const description =
+        'Completed task tasks/get inlines result with content[] (no separate tasks/result method)';
+      if (!workingTaskId) {
+        checks.push(skipCheck(id, name, description, 'no task created'));
+      } else {
+        try {
+          const terminal = await waitForTerminal(
+            serverUrl,
+            sessionId,
+            workingTaskId
+          );
+          const errs: string[] = [];
+          if (terminal.status !== 'completed') {
+            errs.push(
+              `expected status:"completed"; got ${JSON.stringify(terminal.status)}`
+            );
+          }
+          if (!terminal.result) {
+            errs.push('completed task MUST inline `result`');
+          } else if (
+            !Array.isArray(terminal.result.content) ||
+            terminal.result.content.length === 0
+          ) {
+            errs.push(
+              'completed task `result.content[]` MUST be a non-empty array'
+            );
+          }
+          checks.push({
+            id,
+            name,
+            description,
+            status: errs.length === 0 ? 'SUCCESS' : 'FAILURE',
+            timestamp: new Date().toISOString(),
+            errorMessage: errs.length > 0 ? errs.join('; ') : undefined,
+            specReferences: [SEP_2663_REF],
+            details: {
+              status: terminal.status,
+              hasResult: Boolean(terminal.result),
+              contentLength: terminal.result?.content?.length
+            }
+          });
+        } catch (error) {
+          checks.push(failureCheck(id, name, description, error, [SEP_2663_REF]));
+        }
+      }
+    }
+
+    // Check 5: tool execution error → completed with isError:true.
+    {
+      const id = 'tasks-tool-error-completed-iserror';
+      const name = 'TasksToolErrorCompletedIsError';
+      const description =
+        'Tool execution error reports as completed + result.isError (NOT failed)';
+      try {
+        const created = await rawRequest(
+          serverUrl,
+          'tools/call',
+          { name: 'failing_job', arguments: {} },
+          { sessionId }
+        );
+        const errs: string[] = [];
+        if (!created.taskId) {
+          errs.push('failing_job MUST create a task');
+        } else {
+          const terminal = await waitForTerminal(
+            serverUrl,
+            sessionId,
+            created.taskId
+          );
+          if (terminal.status !== 'completed') {
+            errs.push(
+              `tool error MUST surface as completed (not "${terminal.status}")`
+            );
+          }
+          if (!terminal.result) {
+            errs.push('completed task with tool error MUST carry `result`');
+          } else if (terminal.result.isError !== true) {
+            errs.push('result.isError MUST be true for tool execution errors');
+          }
+        }
+        checks.push({
+          id,
+          name,
+          description,
+          status: errs.length === 0 ? 'SUCCESS' : 'FAILURE',
+          timestamp: new Date().toISOString(),
+          errorMessage: errs.length > 0 ? errs.join('; ') : undefined,
+          specReferences: [SEP_2663_REF]
+        });
+      } catch (error) {
+        checks.push(failureCheck(id, name, description, error, [SEP_2663_REF]));
+      }
+    }
+
+    // Check 6: protocol-level error → failed with inlined error, no result.
+    {
+      const id = 'tasks-protocol-error-failed-shape';
+      const name = 'TasksProtocolErrorFailedShape';
+      const description =
+        'Protocol-level error reports as failed + inlined error{code,message}, no result';
+      try {
+        const created = await rawRequest(
+          serverUrl,
+          'tools/call',
+          { name: 'protocol_error_job', arguments: {} },
+          { sessionId }
+        );
+        const errs: string[] = [];
+        if (!created.taskId) {
+          errs.push('protocol_error_job MUST create a task');
+        } else {
+          const terminal = await waitForTerminal(
+            serverUrl,
+            sessionId,
+            created.taskId
+          );
+          if (terminal.status !== 'failed') {
+            errs.push(
+              `protocol error MUST surface as failed (not "${terminal.status}")`
+            );
+          }
+          if (!terminal.error) {
+            errs.push('failed task MUST carry inlined `error`');
+          } else {
+            if (typeof terminal.error.code !== 'number') {
+              errs.push('failed task error MUST carry numeric `code`');
+            }
+            if (typeof terminal.error.message !== 'string') {
+              errs.push('failed task error MUST carry string `message`');
+            }
+          }
+          if (terminal.result) {
+            errs.push('failed task MUST NOT carry `result`');
+          }
+        }
+        checks.push({
+          id,
+          name,
+          description,
+          status: errs.length === 0 ? 'SUCCESS' : 'FAILURE',
+          timestamp: new Date().toISOString(),
+          errorMessage: errs.length > 0 ? errs.join('; ') : undefined,
+          specReferences: [SEP_2663_REF]
+        });
+      } catch (error) {
+        checks.push(failureCheck(id, name, description, error, [SEP_2663_REF]));
+      }
+    }
+
+    // Check 7: tasks/cancel returns empty {resultType:"complete"} ack;
+    // status settles to cancelled.
+    {
+      const id = 'tasks-cancel-empty-ack';
+      const name = 'TasksCancelEmptyAck';
+      const description =
+        'tasks/cancel returns {resultType:"complete"} ack; status settles to cancelled';
+      let cancelTaskId: string | undefined;
+      try {
+        const created = await rawRequest(
+          serverUrl,
+          'tools/call',
+          {
+            name: 'slow_compute',
+            arguments: { seconds: 60, label: 'lifecycle-cancel' }
+          },
+          { sessionId }
+        );
+        cancelTaskId = created.taskId;
+        if (!cancelTaskId) {
+          checks.push({
+            id,
+            name,
+            description,
+            status: 'FAILURE',
+            timestamp: new Date().toISOString(),
+            errorMessage: 'slow_compute did not create a task',
+            specReferences: [SEP_2663_REF, SEP_2322_REF]
+          });
+        } else {
+          const ack = await rawRequest(
+            serverUrl,
+            'tasks/cancel',
+            { taskId: cancelTaskId },
+            { sessionId }
+          );
+          const errs: string[] = [];
+          // Ack carries only the SEP-2322 discriminator — no task envelope.
+          if (
+            JSON.stringify(ack) !== JSON.stringify({ resultType: 'complete' })
+          ) {
+            errs.push(
+              `cancel ack MUST be {resultType:"complete"}; got ${JSON.stringify(ack)}`
+            );
+          }
+          // Status settles to cancelled — observe via tasks/get.
+          const after = await rawRequest(
+            serverUrl,
+            'tasks/get',
+            { taskId: cancelTaskId },
+            { sessionId }
+          );
+          if (after.status !== 'cancelled') {
+            errs.push(
+              `tasks/get after cancel MUST report cancelled; got ${after.status}`
+            );
+          }
+          checks.push({
+            id,
+            name,
+            description,
+            status: errs.length === 0 ? 'SUCCESS' : 'FAILURE',
+            timestamp: new Date().toISOString(),
+            errorMessage: errs.length > 0 ? errs.join('; ') : undefined,
+            specReferences: [SEP_2663_REF, SEP_2322_REF],
+            details: { cancelAck: ack, statusAfterCancel: after.status }
+          });
+        }
+      } catch (error) {
+        checks.push(failureCheck(id, name, description, error, [SEP_2663_REF]));
+      }
+    }
+
+    // Check 8: tasks/cancel on a terminal task MUST return -32602.
+    {
+      const id = 'tasks-cancel-terminal-rejected';
+      const name = 'TasksCancelTerminalRejected';
+      const description =
+        'tasks/cancel on a terminal task returns -32602 (per spec commit d963ad0)';
+      try {
+        const created = await rawRequest(
+          serverUrl,
+          'tools/call',
+          {
+            name: 'slow_compute',
+            arguments: { seconds: 1, label: 'lifecycle-cancel-terminal' }
+          },
+          { sessionId }
+        );
+        const completedTaskId = created.taskId;
+        if (!completedTaskId) {
+          checks.push({
+            id,
+            name,
+            description,
+            status: 'FAILURE',
+            timestamp: new Date().toISOString(),
+            errorMessage: 'slow_compute did not create a task',
+            specReferences: [SEP_2663_REF]
+          });
+        } else {
+          await waitForTerminal(serverUrl, sessionId, completedTaskId);
+          // Now cancel — must throw -32602.
+          let thrown: any;
+          try {
+            await rawRequest(
+              serverUrl,
+              'tasks/cancel',
+              { taskId: completedTaskId },
+              { sessionId }
+            );
+          } catch (e) {
+            thrown = e;
+          }
+          const errs: string[] = [];
+          if (!thrown) {
+            errs.push(
+              'tasks/cancel on terminal task MUST return JSON-RPC error'
+            );
+          } else if (thrown.code !== -32602) {
+            errs.push(
+              `expected error code -32602; got ${thrown.code ?? '<missing>'}`
+            );
+          }
+          checks.push({
+            id,
+            name,
+            description,
+            status: errs.length === 0 ? 'SUCCESS' : 'FAILURE',
+            timestamp: new Date().toISOString(),
+            errorMessage: errs.length > 0 ? errs.join('; ') : undefined,
+            specReferences: [SEP_2663_REF],
+            details: { observedCode: thrown?.code }
+          });
+        }
+      } catch (error) {
+        checks.push(failureCheck(id, name, description, error, [SEP_2663_REF]));
+      }
+    }
+
+    return checks;
+  }
+}
+
+function errMsg(error: unknown): string {
+  return error instanceof Error ? error.message : String(error);
+}
+
+function failureCheck(
+  id: string,
+  name: string,
+  description: string,
+  error: unknown,
+  specReferences: SpecReference[]
+): ConformanceCheck {
+  return {
+    id,
+    name,
+    description,
+    status: 'FAILURE',
+    timestamp: new Date().toISOString(),
+    errorMessage: errMsg(error),
+    specReferences
+  };
+}
+
+function skipCheck(
+  id: string,
+  name: string,
+  description: string,
+  reason: string
+): ConformanceCheck {
+  return {
+    id,
+    name,
+    description,
+    status: 'SKIPPED',
+    timestamp: new Date().toISOString(),
+    errorMessage: `Skipped: ${reason}`,
+    specReferences: [SEP_2663_REF]
+  };
+}

From 4f2eac0ba4a2fbb2b8b85a2652e2344aea8f6f98 Mon Sep 17 00:00:00 2001
From: Sri Panyam <sri.panyam@gmail.com>
Date: Tue, 5 May 2026 14:17:00 -0700
Subject: [PATCH 2/7] style(tasks): apply prettier formatting

---
 src/scenarios/server/tasks/lifecycle.test.ts | 12 ++++++++----
 src/scenarios/server/tasks/lifecycle.ts      |  8 ++++++--
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/src/scenarios/server/tasks/lifecycle.test.ts b/src/scenarios/server/tasks/lifecycle.test.ts
index 0977139..d2ea918 100644
--- a/src/scenarios/server/tasks/lifecycle.test.ts
+++ b/src/scenarios/server/tasks/lifecycle.test.ts
@@ -46,10 +46,14 @@ describeIfTarget('SEP-2663 Tasks — server conformance', () => {
   beforeAll(async () => {
     if (!SHOULD_SPAWN) return;
 
-    serverProcess = spawn(FIXTURE_BINARY!, ['--serve', '--addr', `:${TEST_PORT}`], {
-      stdio: ['ignore', 'pipe', 'pipe'],
-      detached: false
-    });
+    serverProcess = spawn(
+      FIXTURE_BINARY!,
+      ['--serve', '--addr', `:${TEST_PORT}`],
+      {
+        stdio: ['ignore', 'pipe', 'pipe'],
+        detached: false
+      }
+    );
 
     let stdoutBuf = '';
     let stderrBuf = '';
diff --git a/src/scenarios/server/tasks/lifecycle.ts b/src/scenarios/server/tasks/lifecycle.ts
index f36fe02..e337c83 100644
--- a/src/scenarios/server/tasks/lifecycle.ts
+++ b/src/scenarios/server/tasks/lifecycle.ts
@@ -258,7 +258,9 @@ The server MUST advertise \`io.modelcontextprotocol/tasks\` under
             details: { status: task.status }
           });
         } catch (error) {
-          checks.push(failureCheck(id, name, description, error, [SEP_2663_REF]));
+          checks.push(
+            failureCheck(id, name, description, error, [SEP_2663_REF])
+          );
         }
       }
     }
@@ -309,7 +311,9 @@ The server MUST advertise \`io.modelcontextprotocol/tasks\` under
             }
           });
         } catch (error) {
-          checks.push(failureCheck(id, name, description, error, [SEP_2663_REF]));
+          checks.push(
+            failureCheck(id, name, description, error, [SEP_2663_REF])
+          );
         }
       }
     }

From 95da20da525d0b2d726dd599dae11ea48ee6c799 Mon Sep 17 00:00:00 2001
From: Sri Panyam <sri.panyam@gmail.com>
Date: Tue, 5 May 2026 15:02:43 -0700
Subject: [PATCH 3/7] feat(tasks,mrtr): port full SEP-2663 + SEP-2322 scenario
 suite
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Builds out the rest of the tasks scenarios (atop the lifecycle canary)
and adds the SEP-2322 ephemeral MRTR scenario in a sibling folder.
Both target their own fixtures; both runners are brand-neutral and
language-agnostic (TASKS_SERVER_URL / TASKS_SERVER_CMD,
MRTR_SERVER_URL / MRTR_SERVER_CMD; readiness via TCP polling).

Tasks ClientScenario classes:
- TasksLifecycleScenario          (8 checks; v2-01..v2-08)
- TasksCapabilityNegotiationScenario (4 checks; v2-11/22/23/25, SEP-2575)
- TasksWireFieldsScenario         (3 checks; v2-12/13/21)
- TasksRequestStateScenario       (3 checks; v2-14/15/28)
- TasksMRTRInputScenario          (3 checks; v2-16/17/29 partial fulfillment)
- TasksRequestHeadersScenario     (3 checks; SEP-2243 request-header tolerance)
- TasksDispatchScenario           (8 checks; v2-09/10/19/20/26/27/30/31)
- TasksStatusNotificationsScenario (1 check; SEP-2663 §notifications, optional)

MRTR ClientScenario class:
- MrtrEphemeralFlowScenario       (7 checks + 1 SKIPPED; mrtr-01..07,
                                   mrtr-08 deferred for spec terminology +
                                   reference-impl reasons)

Both runners spawn the fixture via a shell command and detect readiness
by TCP-polling the URL's host/port — no log-line scanning, no
language-specific assumptions. The same env vars work for any server
implementation.

Scenarios are tagged ['extension', DRAFT_PROTOCOL_VERSION] and registered
in pendingClientScenariosList so all-scenarios.test.ts (which targets
the upstream everything-server) skips them until the fixture grows
SEP-2322 / SEP-2663 support.
---
 src/scenarios/index.ts                        |  37 +-
 .../server/mrtr/all-scenarios.test.ts         | 149 +++++
 src/scenarios/server/mrtr/ephemeral-flow.ts   | 585 ++++++++++++++++++
 src/scenarios/server/mrtr/helpers.ts          |  82 +++
 .../server/tasks/all-scenarios.test.ts        | 176 ++++++
 src/scenarios/server/tasks/capability.ts      | 291 +++++++++
 src/scenarios/server/tasks/dispatch.ts        | 560 +++++++++++++++++
 src/scenarios/server/tasks/headers.ts         | 243 ++++++++
 src/scenarios/server/tasks/helpers.ts         |  97 ++-
 src/scenarios/server/tasks/lifecycle.test.ts  | 149 -----
 src/scenarios/server/tasks/lifecycle.ts       |  58 +-
 src/scenarios/server/tasks/mrtr-input.ts      | 416 +++++++++++++
 src/scenarios/server/tasks/notifications.ts   | 188 ++++++
 src/scenarios/server/tasks/request-state.ts   | 290 +++++++++
 src/scenarios/server/tasks/wire-fields.ts     | 250 ++++++++
 15 files changed, 3364 insertions(+), 207 deletions(-)
 create mode 100644 src/scenarios/server/mrtr/all-scenarios.test.ts
 create mode 100644 src/scenarios/server/mrtr/ephemeral-flow.ts
 create mode 100644 src/scenarios/server/mrtr/helpers.ts
 create mode 100644 src/scenarios/server/tasks/all-scenarios.test.ts
 create mode 100644 src/scenarios/server/tasks/capability.ts
 create mode 100644 src/scenarios/server/tasks/dispatch.ts
 create mode 100644 src/scenarios/server/tasks/headers.ts
 delete mode 100644 src/scenarios/server/tasks/lifecycle.test.ts
 create mode 100644 src/scenarios/server/tasks/mrtr-input.ts
 create mode 100644 src/scenarios/server/tasks/notifications.ts
 create mode 100644 src/scenarios/server/tasks/request-state.ts
 create mode 100644 src/scenarios/server/tasks/wire-fields.ts

diff --git a/src/scenarios/index.ts b/src/scenarios/index.ts
index 096f4f4..e82a16c 100644
--- a/src/scenarios/index.ts
+++ b/src/scenarios/index.ts
@@ -64,6 +64,14 @@ import {
 import { DNSRebindingProtectionScenario } from './server/dns-rebinding';
 
 import { TasksLifecycleScenario } from './server/tasks/lifecycle';
+import { TasksCapabilityNegotiationScenario } from './server/tasks/capability';
+import { TasksWireFieldsScenario } from './server/tasks/wire-fields';
+import { TasksRequestStateScenario } from './server/tasks/request-state';
+import { TasksMRTRInputScenario } from './server/tasks/mrtr-input';
+import { TasksRequestHeadersScenario } from './server/tasks/headers';
+import { TasksDispatchScenario } from './server/tasks/dispatch';
+import { TasksStatusNotificationsScenario } from './server/tasks/notifications';
+import { MrtrEphemeralFlowScenario } from './server/mrtr/ephemeral-flow';
 
 import {
   authScenariosList,
@@ -91,7 +99,20 @@ const pendingClientScenariosList: ClientScenario[] = [
   // so all-scenarios.test.ts cannot exercise this against the default
   // fixture. Active runs target a SEP-2663-conformant server via the
   // dedicated tasks/lifecycle.test.ts harness.
-  new TasksLifecycleScenario()
+  new TasksLifecycleScenario(),
+  new TasksCapabilityNegotiationScenario(),
+  new TasksWireFieldsScenario(),
+  new TasksRequestStateScenario(),
+  new TasksMRTRInputScenario(),
+  new TasksRequestHeadersScenario(),
+  new TasksDispatchScenario(),
+  new TasksStatusNotificationsScenario(),
+
+  // SEP-2322 MRTR (ephemeral IncompleteResult flow).
+  // Targets a different fixture than tasks scenarios; the dedicated
+  // mrtr/all-scenarios.test.ts runner points at an MRTR-conformant
+  // server via MRTR_SERVER_URL / MRTR_SERVER_CMD.
+  new MrtrEphemeralFlowScenario()
 ];
 
 // All client scenarios
@@ -156,7 +177,19 @@ const allClientScenariosList: ClientScenario[] = [
   // filter sees it; pendingClientScenariosList below excludes it from
   // automatic runs against the everything-server (which doesn't implement
   // io.modelcontextprotocol/tasks yet).
-  new TasksLifecycleScenario()
+  new TasksLifecycleScenario(),
+  new TasksCapabilityNegotiationScenario(),
+  new TasksWireFieldsScenario(),
+  new TasksRequestStateScenario(),
+  new TasksMRTRInputScenario(),
+  new TasksRequestHeadersScenario(),
+  new TasksDispatchScenario(),
+  new TasksStatusNotificationsScenario(),
+
+  // SEP-2322 MRTR (ephemeral IncompleteResult flow). Targets a
+  // dedicated MRTR fixture — out of scope for the default
+  // everything-server until SEP-2322 lands there.
+  new MrtrEphemeralFlowScenario()
 ];
 
 // Active client scenarios (excludes pending)
diff --git a/src/scenarios/server/mrtr/all-scenarios.test.ts b/src/scenarios/server/mrtr/all-scenarios.test.ts
new file mode 100644
index 0000000..dd1e6bb
--- /dev/null
+++ b/src/scenarios/server/mrtr/all-scenarios.test.ts
@@ -0,0 +1,149 @@
+/**
+ * SEP-2322 MRTR test runner.
+ *
+ * Iterates the MRTR scenario classes against a SEP-2322-conformant
+ * server. Configuration is brand-neutral and language-agnostic:
+ *
+ *   1. Point at an already-running server:
+ *        MRTR_SERVER_URL=http://localhost:8080/mcp npm test -- mrtr/all-scenarios.test.ts
+ *
+ *   2. Auto-spawn a fixture before tests (any language):
+ *        MRTR_SERVER_URL=http://localhost:18093/mcp \
+ *        MRTR_SERVER_CMD="/path/to/server --port 18093" \
+ *          npm test -- mrtr/all-scenarios.test.ts
+ *
+ * If MRTR_SERVER_URL is unset the suite is skipped — keeping CI runs
+ * against the everything-server green.
+ *
+ * The fixture server can be implemented in any language as long as it
+ * exposes a SEP-2322 conformant Streamable HTTP MCP endpoint. Anyone is
+ * free to bring their own; one example reference implementation lives
+ * at https://github.com/panyam/mcpkit/tree/main/examples/mrtr.
+ */
+
+import { spawn, ChildProcess } from 'child_process';
+import { connect } from 'net';
+import { describe, it, expect, beforeAll, afterAll } from 'vitest';
+import { MrtrEphemeralFlowScenario } from './ephemeral-flow';
+
+const SERVER_URL = process.env.MRTR_SERVER_URL;
+const SERVER_CMD = process.env.MRTR_SERVER_CMD;
+const SERVER_STARTUP_TIMEOUT_MS = 15_000;
+const SHOULD_SPAWN = Boolean(SERVER_URL && SERVER_CMD);
+const HAVE_TARGET = Boolean(SERVER_URL);
+
+const MRTR_SCENARIOS = [new MrtrEphemeralFlowScenario()];
+
+const describeIfTarget = HAVE_TARGET ? describe : describe.skip;
+
+describeIfTarget('SEP-2322 MRTR — server conformance', () => {
+  let serverProcess: ChildProcess | null = null;
+
+  beforeAll(async () => {
+    if (!SHOULD_SPAWN) return;
+
+    serverProcess = spawn('sh', ['-c', SERVER_CMD!], {
+      stdio: ['ignore', 'pipe', 'pipe'],
+      detached: false
+    });
+
+    let stdoutBuf = '';
+    let stderrBuf = '';
+    serverProcess.stdout?.on('data', (b) => {
+      stdoutBuf += b.toString();
+    });
+    serverProcess.stderr?.on('data', (b) => {
+      stderrBuf += b.toString();
+    });
+
+    serverProcess.on('exit', (code) => {
+      if (code !== null && code !== 0) {
+        console.error(
+          `mrtr fixture exited unexpectedly with code ${code}.\nSTDOUT: ${stdoutBuf}\nSTDERR: ${stderrBuf}`
+        );
+      }
+    });
+
+    await waitForTcpReady(SERVER_URL!, SERVER_STARTUP_TIMEOUT_MS).catch(
+      (err) => {
+        if (serverProcess && !serverProcess.killed) {
+          serverProcess.kill('SIGKILL');
+        }
+        throw new Error(
+          `mrtr fixture did not become reachable within ${SERVER_STARTUP_TIMEOUT_MS}ms: ${err.message}\nSTDOUT: ${stdoutBuf}\nSTDERR: ${stderrBuf}`
+        );
+      }
+    );
+  }, SERVER_STARTUP_TIMEOUT_MS + 5_000);
+
+  afterAll(async () => {
+    if (!SHOULD_SPAWN) return;
+    if (!serverProcess || serverProcess.killed) return;
+    serverProcess.kill('SIGTERM');
+    await new Promise<void>((resolve) => {
+      const timer = setTimeout(() => {
+        if (serverProcess && !serverProcess.killed) {
+          serverProcess.kill('SIGKILL');
+        }
+        resolve();
+      }, 3_000);
+      serverProcess!.once('exit', () => {
+        clearTimeout(timer);
+        resolve();
+      });
+    });
+    serverProcess = null;
+  });
+
+  for (const scenario of MRTR_SCENARIOS) {
+    it(`${scenario.name} — all checks succeed against fixture`, async () => {
+      const checks = await scenario.run(SERVER_URL!);
+      expect(checks.length).toBeGreaterThan(0);
+      const failures = checks.filter(
+        (c) => c.status === 'FAILURE' || c.status === 'WARNING'
+      );
+      if (failures.length > 0) {
+        const detail = failures
+          .map((c) => `  - ${c.id}: ${c.errorMessage ?? '(no message)'}`)
+          .join('\n');
+        throw new Error(
+          `${failures.length}/${checks.length} checks failed:\n${detail}`
+        );
+      }
+    });
+  }
+});
+
+async function waitForTcpReady(url: string, timeoutMs: number): Promise<void> {
+  const u = new URL(url);
+  const port = parseInt(u.port || (u.protocol === 'https:' ? '443' : '80'), 10);
+  const host = u.hostname;
+  const deadline = Date.now() + timeoutMs;
+  let lastErr: Error | null = null;
+
+  while (Date.now() < deadline) {
+    try {
+      await new Promise<void>((resolve, reject) => {
+        const socket = connect({ host, port }, () => {
+          socket.end();
+          resolve();
+        });
+        socket.once('error', (err) => {
+          socket.destroy();
+          reject(err);
+        });
+        socket.setTimeout(1_000, () => {
+          socket.destroy();
+          reject(new Error('connect timeout'));
+        });
+      });
+      return;
+    } catch (err) {
+      lastErr = err as Error;
+      await new Promise((r) => setTimeout(r, 200));
+    }
+  }
+  throw new Error(
+    `${host}:${port} did not accept TCP connections (last: ${lastErr?.message ?? 'unknown'})`
+  );
+}
diff --git a/src/scenarios/server/mrtr/ephemeral-flow.ts b/src/scenarios/server/mrtr/ephemeral-flow.ts
new file mode 100644
index 0000000..8deaf68
--- /dev/null
+++ b/src/scenarios/server/mrtr/ephemeral-flow.ts
@@ -0,0 +1,585 @@
+/**
+ * SEP-2322 MRTR ephemeral IncompleteResult flow.
+ *
+ * Tests the multi-round-trip-request contract end-to-end against any
+ * server that implements SEP-2322's ephemeral path: tools/call returns
+ * `IncompleteResult` to gather input, the client retries the SAME
+ * tools/call with `inputResponses` (and echoed `requestState`), and
+ * the server eventually returns a normal `ToolResult`. No task
+ * envelope, no separate methods.
+ *
+ * Required server fixtures (tools/list output must include all):
+ *   - test_tool_with_elicitation              — single elicitation/create round
+ *   - test_incomplete_result_sampling         — single sampling/createMessage round
+ *   - test_incomplete_result_list_roots       — single roots/list round
+ *   - test_incomplete_result_request_state    — exercises requestState validation
+ *   - test_incomplete_result_multiple_inputs  — emits 3+ inputRequests in one round
+ *   - test_incomplete_result_multi_round      — drives 2+ MRTR rounds
+ *   - test_incomplete_result_elicitation      — emits inputRequest for "user_name";
+ *                                               server re-requests on wrong key
+ */
+
+import {
+  ClientScenario,
+  ConformanceCheck,
+  ScenarioSpecTag,
+  DRAFT_PROTOCOL_VERSION
+} from '../../../types';
+import { initRawSession, rawRequest } from '../tasks/helpers';
+import {
+  MRTR_INCOMPLETE_RESULT_TYPE,
+  SEP_2322_REF,
+  errMsg,
+  failureCheck,
+  isCompleteResult,
+  isIncompleteResult,
+  mockElicitResponse,
+  mockListRootsResponse,
+  mockSamplingResponse
+} from './helpers';
+
+export class MrtrEphemeralFlowScenario implements ClientScenario {
+  name = 'mrtr-ephemeral-flow';
+  // MRTR is in draft alongside SEP-2322; tagged 'extension' because it
+  // introduces an ephemeral resultType discriminator that's not on the
+  // dated-spec timeline yet.
+  specVersions: ScenarioSpecTag[] = ['extension', DRAFT_PROTOCOL_VERSION];
+  description = `Test SEP-2322 ephemeral MRTR (Multi Round-Trip Request) flow.
+
+**Server Implementation Requirements:**
+
+Every \`tools/call\` response in the MRTR contract is one of:
+- \`resultType:"${MRTR_INCOMPLETE_RESULT_TYPE}"\` — server is asking for
+  more input; carries an \`inputRequests\` map keyed by server-minted
+  opaque ids and (optionally) a \`requestState\` token to echo on the
+  next round.
+- \`resultType:"complete"\` (or absent — current SDKs may strip the
+  discriminator on responses without one) — the tools/call has finished;
+  the body is a normal \`ToolResult\` with \`content[]\`.
+
+**Round-trip rules (SEP-2322):**
+- Round 1 with no \`inputResponses\` MUST return \`IncompleteResult\`
+  with \`inputRequests\`.
+- The client retries the SAME tools/call (same name + arguments) with
+  \`inputResponses\` keyed against the previously-emitted ids, plus the
+  echoed \`requestState\` if one was provided.
+- The server MUST validate the echoed \`requestState\` and complete on
+  the next round.
+
+**Multi-method support:**
+- A single \`IncompleteResult\` can carry \`inputRequests\` for
+  \`elicitation/create\`, \`sampling/createMessage\`, and \`roots/list\`
+  in any combination.
+
+**Multi-round + state accumulation:**
+- A handler MAY take more than two rounds. Each MRTR round mints a
+  fresh \`requestState\`; the prior token MUST NOT be reused. Answers
+  from prior rounds MUST be available to the handler on the final
+  round (server forwards them via \`requestState\`).
+
+**Wrong-key tolerance:**
+- When a client retries with an \`inputResponses\` key the server did
+  not emit, the server SHOULD re-request via \`IncompleteResult\`
+  rather than erroring. (The spec is soft here; this scenario asserts
+  the re-request path.)`;
+
+  async run(serverUrl: string): Promise<ConformanceCheck[]> {
+    const checks: ConformanceCheck[] = [];
+
+    let sessionId: string;
+    try {
+      ({ sessionId } = await initRawSession(serverUrl, {
+        capabilities: {
+          elicitation: {},
+          sampling: {},
+          roots: {}
+        }
+      }));
+    } catch (error) {
+      checks.push({
+        id: 'mrtr-session-bootstrap',
+        name: 'MrtrSessionBootstrap',
+        description:
+          'Initialize handshake declaring elicitation/sampling/roots capabilities succeeds',
+        status: 'FAILURE',
+        timestamp: new Date().toISOString(),
+        errorMessage: `Failed to initialize: ${errMsg(error)}`,
+        specReferences: [SEP_2322_REF]
+      });
+      return checks;
+    }
+
+    // Check 1: basic elicitation round-trip.
+    {
+      const id = 'mrtr-basic-elicitation-round-trip';
+      const name = 'MrtrBasicElicitationRoundTrip';
+      const description =
+        'tools/call returns IncompleteResult on round 1 (elicitation/create); completes on round 2 with the answer reflected in the result';
+      try {
+        const r1 = await rawRequest(
+          serverUrl,
+          'tools/call',
+          { name: 'test_tool_with_elicitation', arguments: {} },
+          { sessionId }
+        );
+        const errs: string[] = [];
+        if (!isIncompleteResult(r1)) {
+          errs.push(
+            `round 1 MUST be IncompleteResult; got ${JSON.stringify(r1)}`
+          );
+        }
+        if (r1.resultType !== MRTR_INCOMPLETE_RESULT_TYPE) {
+          errs.push(
+            `resultType MUST be "${MRTR_INCOMPLETE_RESULT_TYPE}"; got ${JSON.stringify(r1.resultType)}`
+          );
+        }
+        if (!r1.inputRequests || !r1.inputRequests.user_name) {
+          errs.push(
+            'IncompleteResult MUST carry inputRequests with the "user_name" key'
+          );
+        } else if (r1.inputRequests.user_name.method !== 'elicitation/create') {
+          errs.push(
+            `inputRequest method MUST be "elicitation/create"; got ${JSON.stringify(r1.inputRequests.user_name.method)}`
+          );
+        }
+
+        const r2 = await rawRequest(
+          serverUrl,
+          'tools/call',
+          {
+            name: 'test_tool_with_elicitation',
+            arguments: {},
+            inputResponses: {
+              user_name: mockElicitResponse({ name: 'Alice' })
+            },
+            ...(r1.requestState !== undefined
+              ? { requestState: r1.requestState }
+              : {})
+          },
+          { sessionId }
+        );
+        if (!isCompleteResult(r2)) {
+          errs.push(`round 2 MUST be complete; got ${JSON.stringify(r2)}`);
+        }
+        const text = r2.content?.[0]?.text ?? '';
+        if (!/Alice/.test(text)) {
+          errs.push(
+            'response text SHOULD reference the answered name ("Alice")'
+          );
+        }
+        checks.push({
+          id,
+          name,
+          description,
+          status: errs.length === 0 ? 'SUCCESS' : 'FAILURE',
+          timestamp: new Date().toISOString(),
+          errorMessage: errs.length > 0 ? errs.join('; ') : undefined,
+          specReferences: [SEP_2322_REF]
+        });
+      } catch (error) {
+        checks.push(failureCheck(id, name, description, error));
+      }
+    }
+
+    // Check 2: sampling round-trip.
+    {
+      const id = 'mrtr-sampling-round-trip';
+      const name = 'MrtrSamplingRoundTrip';
+      const description =
+        'IncompleteResult with sampling/createMessage round-trips through the inputResponses retry';
+      try {
+        const r1 = await rawRequest(
+          serverUrl,
+          'tools/call',
+          { name: 'test_incomplete_result_sampling', arguments: {} },
+          { sessionId }
+        );
+        const errs: string[] = [];
+        if (!isIncompleteResult(r1)) {
+          errs.push('round 1 MUST be IncompleteResult');
+        } else {
+          const key = Object.keys(r1.inputRequests)[0];
+          if (r1.inputRequests[key].method !== 'sampling/createMessage') {
+            errs.push(
+              `inputRequest method MUST be "sampling/createMessage"; got ${JSON.stringify(r1.inputRequests[key].method)}`
+            );
+          }
+          const r2 = await rawRequest(
+            serverUrl,
+            'tools/call',
+            {
+              name: 'test_incomplete_result_sampling',
+              arguments: {},
+              inputResponses: { [key]: mockSamplingResponse('Paris') },
+              ...(r1.requestState !== undefined
+                ? { requestState: r1.requestState }
+                : {})
+            },
+            { sessionId }
+          );
+          if (!isCompleteResult(r2)) {
+            errs.push('round 2 MUST be complete');
+          }
+        }
+        checks.push({
+          id,
+          name,
+          description,
+          status: errs.length === 0 ? 'SUCCESS' : 'FAILURE',
+          timestamp: new Date().toISOString(),
+          errorMessage: errs.length > 0 ? errs.join('; ') : undefined,
+          specReferences: [SEP_2322_REF]
+        });
+      } catch (error) {
+        checks.push(failureCheck(id, name, description, error));
+      }
+    }
+
+    // Check 3: roots/list round-trip.
+    {
+      const id = 'mrtr-roots-list-round-trip';
+      const name = 'MrtrRootsListRoundTrip';
+      const description =
+        'IncompleteResult with roots/list round-trips through the inputResponses retry';
+      try {
+        const r1 = await rawRequest(
+          serverUrl,
+          'tools/call',
+          { name: 'test_incomplete_result_list_roots', arguments: {} },
+          { sessionId }
+        );
+        const errs: string[] = [];
+        if (!isIncompleteResult(r1)) {
+          errs.push('round 1 MUST be IncompleteResult');
+        } else {
+          const key = Object.keys(r1.inputRequests)[0];
+          if (r1.inputRequests[key].method !== 'roots/list') {
+            errs.push(
+              `inputRequest method MUST be "roots/list"; got ${JSON.stringify(r1.inputRequests[key].method)}`
+            );
+          }
+          const r2 = await rawRequest(
+            serverUrl,
+            'tools/call',
+            {
+              name: 'test_incomplete_result_list_roots',
+              arguments: {},
+              inputResponses: { [key]: mockListRootsResponse() },
+              ...(r1.requestState !== undefined
+                ? { requestState: r1.requestState }
+                : {})
+            },
+            { sessionId }
+          );
+          if (!isCompleteResult(r2)) {
+            errs.push('round 2 MUST be complete');
+          }
+        }
+        checks.push({
+          id,
+          name,
+          description,
+          status: errs.length === 0 ? 'SUCCESS' : 'FAILURE',
+          timestamp: new Date().toISOString(),
+          errorMessage: errs.length > 0 ? errs.join('; ') : undefined,
+          specReferences: [SEP_2322_REF]
+        });
+      } catch (error) {
+        checks.push(failureCheck(id, name, description, error));
+      }
+    }
+
+    // Check 4: requestState round-trip validation.
+    {
+      const id = 'mrtr-request-state-round-trip';
+      const name = 'MrtrRequestStateRoundTrip';
+      const description =
+        'When server emits requestState on round 1, it MUST be a non-empty string and the server MUST validate the echo on round 2';
+      try {
+        const r1 = await rawRequest(
+          serverUrl,
+          'tools/call',
+          { name: 'test_incomplete_result_request_state', arguments: {} },
+          { sessionId }
+        );
+        const errs: string[] = [];
+        if (!isIncompleteResult(r1)) {
+          errs.push('round 1 MUST be IncompleteResult');
+        }
+        if (typeof r1.requestState !== 'string') {
+          errs.push(
+            `requestState MUST be a string when emitted; got ${typeof r1.requestState}`
+          );
+        } else if (r1.requestState.length === 0) {
+          errs.push(
+            'requestState MUST be non-empty when emitted (omit instead of "")'
+          );
+        }
+        const key = Object.keys(r1.inputRequests ?? {})[0];
+        if (key) {
+          const r2 = await rawRequest(
+            serverUrl,
+            'tools/call',
+            {
+              name: 'test_incomplete_result_request_state',
+              arguments: {},
+              inputResponses: { [key]: mockElicitResponse({ ok: true }) },
+              requestState: r1.requestState
+            },
+            { sessionId }
+          );
+          if (!isCompleteResult(r2)) {
+            errs.push('round 2 MUST be complete after valid requestState echo');
+          }
+          const text =
+            r2.content?.find((c: any) => c.type === 'text')?.text ?? '';
+          if (!/state-ok/.test(text)) {
+            errs.push(
+              'final response SHOULD include "state-ok" to confirm the server validated requestState'
+            );
+          }
+        }
+        checks.push({
+          id,
+          name,
+          description,
+          status: errs.length === 0 ? 'SUCCESS' : 'FAILURE',
+          timestamp: new Date().toISOString(),
+          errorMessage: errs.length > 0 ? errs.join('; ') : undefined,
+          specReferences: [SEP_2322_REF]
+        });
+      } catch (error) {
+        checks.push(failureCheck(id, name, description, error));
+      }
+    }
+
+    // Check 5: multiple inputRequests of different methods in one round.
+    {
+      const id = 'mrtr-multiple-input-requests-one-round';
+      const name = 'MrtrMultipleInputRequestsOneRound';
+      const description =
+        'A single IncompleteResult MAY carry inputRequests for elicitation/create + sampling/createMessage + roots/list together';
+      try {
+        const r1 = await rawRequest(
+          serverUrl,
+          'tools/call',
+          { name: 'test_incomplete_result_multiple_inputs', arguments: {} },
+          { sessionId }
+        );
+        const errs: string[] = [];
+        if (!isIncompleteResult(r1)) {
+          errs.push('round 1 MUST be IncompleteResult');
+        } else {
+          const keys = Object.keys(r1.inputRequests);
+          if (keys.length < 3) {
+            errs.push(
+              `expected at least 3 inputRequests in one round; got ${keys.length}`
+            );
+          }
+          const methods = new Set(keys.map((k) => r1.inputRequests[k].method));
+          for (const expected of [
+            'elicitation/create',
+            'sampling/createMessage',
+            'roots/list'
+          ]) {
+            if (!methods.has(expected)) {
+              errs.push(`inputRequests MUST include method "${expected}"`);
+            }
+          }
+          const inputResponses: Record<string, unknown> = {};
+          for (const [key, req] of Object.entries(r1.inputRequests) as Array<
+            [string, any]
+          >) {
+            if (req.method === 'elicitation/create')
+              inputResponses[key] = mockElicitResponse({ name: 'Alice' });
+            else if (req.method === 'sampling/createMessage')
+              inputResponses[key] = mockSamplingResponse('hi');
+            else if (req.method === 'roots/list')
+              inputResponses[key] = mockListRootsResponse();
+          }
+          const r2 = await rawRequest(
+            serverUrl,
+            'tools/call',
+            {
+              name: 'test_incomplete_result_multiple_inputs',
+              arguments: {},
+              inputResponses,
+              ...(r1.requestState !== undefined
+                ? { requestState: r1.requestState }
+                : {})
+            },
+            { sessionId }
+          );
+          if (!isCompleteResult(r2)) {
+            errs.push('round 2 MUST be complete with all three answers');
+          }
+        }
+        checks.push({
+          id,
+          name,
+          description,
+          status: errs.length === 0 ? 'SUCCESS' : 'FAILURE',
+          timestamp: new Date().toISOString(),
+          errorMessage: errs.length > 0 ? errs.join('; ') : undefined,
+          specReferences: [SEP_2322_REF]
+        });
+      } catch (error) {
+        checks.push(failureCheck(id, name, description, error));
+      }
+    }
+
+    // Check 6: multi-round flow accumulates answers via requestState.
+    {
+      const id = 'mrtr-multi-round-flow';
+      const name = 'MrtrMultiRoundFlow';
+      const description =
+        'A handler may take 2+ MRTR rounds; each round mints a fresh requestState; final result MUST reflect answers from every round';
+      try {
+        const r1 = await rawRequest(
+          serverUrl,
+          'tools/call',
+          { name: 'test_incomplete_result_multi_round', arguments: {} },
+          { sessionId }
+        );
+        const errs: string[] = [];
+        if (!isIncompleteResult(r1)) {
+          errs.push('round 1 MUST be IncompleteResult');
+        }
+        if (!r1.requestState) {
+          errs.push('round 1 MUST mint requestState for multi-round flow');
+        }
+        const k1 = Object.keys(r1.inputRequests ?? {})[0];
+
+        const r2 = await rawRequest(
+          serverUrl,
+          'tools/call',
+          {
+            name: 'test_incomplete_result_multi_round',
+            arguments: {},
+            inputResponses: { [k1]: mockElicitResponse({ name: 'Alice' }) },
+            requestState: r1.requestState
+          },
+          { sessionId }
+        );
+        if (!isIncompleteResult(r2)) {
+          errs.push('round 2 MUST still be IncompleteResult (asks for step2)');
+        }
+        if (!r2.requestState) {
+          errs.push('round 2 MUST mint a fresh requestState');
+        }
+        if (r2.requestState === r1.requestState) {
+          errs.push(
+            'round 2 requestState MUST differ from round 1 (each round mints a fresh token)'
+          );
+        }
+        const k2 = Object.keys(r2.inputRequests ?? {})[0];
+
+        const r3 = await rawRequest(
+          serverUrl,
+          'tools/call',
+          {
+            name: 'test_incomplete_result_multi_round',
+            arguments: {},
+            inputResponses: { [k2]: mockElicitResponse({ color: 'blue' }) },
+            requestState: r2.requestState
+          },
+          { sessionId }
+        );
+        if (!isCompleteResult(r3)) {
+          errs.push('round 3 MUST be complete');
+        }
+        const text = r3.content?.[0]?.text ?? '';
+        if (!/Alice/.test(text)) {
+          errs.push(
+            'final result MUST reflect round 1 answer (server forwards via requestState)'
+          );
+        }
+        if (!/blue/.test(text)) {
+          errs.push('final result MUST reflect round 2 answer');
+        }
+        checks.push({
+          id,
+          name,
+          description,
+          status: errs.length === 0 ? 'SUCCESS' : 'FAILURE',
+          timestamp: new Date().toISOString(),
+          errorMessage: errs.length > 0 ? errs.join('; ') : undefined,
+          specReferences: [SEP_2322_REF]
+        });
+      } catch (error) {
+        checks.push(failureCheck(id, name, description, error));
+      }
+    }
+
+    // Check 7: wrong-key inputResponses → server re-requests.
+    {
+      const id = 'mrtr-wrong-input-key-rerequests';
+      const name = 'MrtrWrongInputKeyRerequests';
+      const description =
+        'When the client sends inputResponses with a key the server did not emit, the server SHOULD re-request via IncompleteResult';
+      try {
+        const r1 = await rawRequest(
+          serverUrl,
+          'tools/call',
+          {
+            name: 'test_incomplete_result_elicitation',
+            arguments: {},
+            inputResponses: { wrong_key: mockElicitResponse({ data: 'wrong' }) }
+          },
+          { sessionId }
+        );
+        const errs: string[] = [];
+        if (!isIncompleteResult(r1)) {
+          errs.push(
+            `expected IncompleteResult re-request when inputResponses key is wrong; got ${JSON.stringify(r1)}`
+          );
+        }
+        checks.push({
+          id,
+          name,
+          description,
+          status: errs.length === 0 ? 'SUCCESS' : 'FAILURE',
+          timestamp: new Date().toISOString(),
+          errorMessage: errs.length > 0 ? errs.join('; ') : undefined,
+          specReferences: [SEP_2322_REF]
+        });
+      } catch (error) {
+        checks.push(failureCheck(id, name, description, error));
+      }
+    }
+
+    // Check 8: SKIPPED — MRTR → Tasks composition.
+    // Tracking placeholder; spec made this normative in commit 451f5e1
+    // (Apr 30) but two blockers remain before this can be enabled:
+    //   (a) Spec watch on the MRTR resultType discriminator value
+    //       (input_required vs incomplete; see helpers.ts SPEC WATCH).
+    //   (b) Reference servers need middleware that observes the
+    //       handler's IncompleteResult signal BEFORE creating a task —
+    //       the natural implementation pattern (create task up-front,
+    //       run handler in goroutine) doesn't expose the signal in time.
+    //       Tracked in https://github.com/panyam/mcpkit/issues/347 as
+    //       one example impl that hits this; SDKs in any language will
+    //       need an equivalent fix.
+    {
+      checks.push({
+        id: 'mrtr-tasks-composition',
+        name: 'MrtrTasksComposition',
+        description:
+          'MRTR loop gathers input then final round returns CreateTaskResult (SEP-2663 451f5e1; deferred — spec authors disagree on the resultType discriminator value, and reference implementations still in flight)',
+        status: 'SKIPPED',
+        timestamp: new Date().toISOString(),
+        errorMessage:
+          "Skipped: deferred until (a) spec authors converge on the MRTR resultType value (input_required vs incomplete) and (b) reference servers can observe the handler's IsIncomplete signal before creating a task.",
+        specReferences: [
+          SEP_2322_REF,
+          {
+            id: 'SEP-2663',
+            url: 'https://github.com/modelcontextprotocol/specification/pull/2663'
+          }
+        ]
+      });
+    }
+
+    return checks;
+  }
+}
diff --git a/src/scenarios/server/mrtr/helpers.ts b/src/scenarios/server/mrtr/helpers.ts
new file mode 100644
index 0000000..7e0ee19
--- /dev/null
+++ b/src/scenarios/server/mrtr/helpers.ts
@@ -0,0 +1,82 @@
+/**
+ * MRTR (SEP-2322 ephemeral) scenario helpers.
+ *
+ * Reuses the raw-rpc + session bootstrap from the tasks scenarios since
+ * MRTR's wire shape (resultType discriminator, requestState, inputRequests)
+ * is the SEP-2322 base that SEP-2663 builds on. The MRTR resultType value
+ * is centralized here so it's a one-liner to flip when the spec converges
+ * (SEP-2322 draft uses "input_required", SEP-2663 draft uses "incomplete";
+ * see prezaei comment on PR 2663 for the open question).
+ */
+
+import type { ConformanceCheck, SpecReference } from '../../../types';
+
+export const SEP_2322_REF: SpecReference = {
+  id: 'SEP-2322',
+  url: 'https://github.com/modelcontextprotocol/specification/pull/2322'
+};
+
+// SPEC WATCH — MRTR resultType discriminator value
+// SEP-2322 (MRTR) and SEP-2663 (Tasks Extension) currently disagree on
+// the wire value: SEP-2322's draft uses "input_required", SEP-2663's
+// draft uses "incomplete". Awaiting alignment between SEP authors
+// (PR 2663 comment 4381885336 + PR 2322 comment 4381884825). When the
+// spec converges, this single constant flips.
+export const MRTR_INCOMPLETE_RESULT_TYPE = 'incomplete';
+
+export function isIncompleteResult(result: any): boolean {
+  if (!result) return false;
+  if (result.resultType === MRTR_INCOMPLETE_RESULT_TYPE) return true;
+  return 'inputRequests' in result || 'requestState' in result;
+}
+
+export function isCompleteResult(result: any): boolean {
+  if (!result) return false;
+  if (result.resultType === 'complete') return true;
+  if (!('resultType' in result)) return true;
+  return !isIncompleteResult(result);
+}
+
+/** Build an ElicitResult-shaped mock response payload. */
+export function mockElicitResponse(
+  content: Record<string, unknown>
+): Record<string, unknown> {
+  return { action: 'accept', content };
+}
+
+/** Build a CreateMessageResult-shaped mock response payload. */
+export function mockSamplingResponse(text: string): Record<string, unknown> {
+  return {
+    role: 'assistant',
+    content: { type: 'text', text },
+    model: 'test-model',
+    stopReason: 'endTurn'
+  };
+}
+
+/** Build a ListRootsResult-shaped mock response payload. */
+export function mockListRootsResponse(): Record<string, unknown> {
+  return { roots: [{ uri: 'file:///test/root', name: 'Test Root' }] };
+}
+
+export function errMsg(error: unknown): string {
+  return error instanceof Error ? error.message : String(error);
+}
+
+export function failureCheck(
+  id: string,
+  name: string,
+  description: string,
+  error: unknown,
+  specReferences: SpecReference[] = [SEP_2322_REF]
+): ConformanceCheck {
+  return {
+    id,
+    name,
+    description,
+    status: 'FAILURE',
+    timestamp: new Date().toISOString(),
+    errorMessage: errMsg(error),
+    specReferences
+  };
+}
diff --git a/src/scenarios/server/tasks/all-scenarios.test.ts b/src/scenarios/server/tasks/all-scenarios.test.ts
new file mode 100644
index 0000000..76136f8
--- /dev/null
+++ b/src/scenarios/server/tasks/all-scenarios.test.ts
@@ -0,0 +1,176 @@
+/**
+ * SEP-2663 Tasks extension test runner.
+ *
+ * Iterates the tasks server scenarios against a SEP-2663-conformant
+ * server. Configuration is brand-neutral and language-agnostic:
+ *
+ *   1. Point at an already-running server:
+ *        TASKS_SERVER_URL=http://localhost:8080/mcp npm test -- tasks/all-scenarios.test.ts
+ *
+ *   2. Auto-spawn a fixture before tests (any language; the runner just
+ *      shells out to TASKS_SERVER_CMD and waits until TASKS_SERVER_URL
+ *      becomes reachable):
+ *        TASKS_SERVER_URL=http://localhost:18092/mcp \
+ *        TASKS_SERVER_CMD="/path/to/server --port 18092" \
+ *          npm test -- tasks/all-scenarios.test.ts
+ *
+ * If TASKS_SERVER_URL is unset, the suite is skipped — letting CI runs
+ * against the everything-server stay green until the upstream fixture
+ * grows SEP-2663 support.
+ *
+ * Readiness is detected by polling the URL's host/port for a TCP
+ * connection (deliberately language-agnostic — no log-line scanning).
+ *
+ * The fixture server can be implemented in any language as long as it
+ * exposes a SEP-2663 conformant Streamable HTTP MCP endpoint. Anyone is
+ * free to bring their own; one example reference implementation lives
+ * at https://github.com/panyam/mcpkit/tree/main/examples/tasks-v2.
+ */
+
+import { spawn, ChildProcess } from 'child_process';
+import { connect } from 'net';
+import { describe, it, expect, beforeAll, afterAll } from 'vitest';
+import { TasksLifecycleScenario } from './lifecycle';
+import { TasksCapabilityNegotiationScenario } from './capability';
+import { TasksWireFieldsScenario } from './wire-fields';
+import { TasksRequestStateScenario } from './request-state';
+import { TasksMRTRInputScenario } from './mrtr-input';
+import { TasksRequestHeadersScenario } from './headers';
+import { TasksDispatchScenario } from './dispatch';
+import { TasksStatusNotificationsScenario } from './notifications';
+
+const SERVER_URL = process.env.TASKS_SERVER_URL;
+const SERVER_CMD = process.env.TASKS_SERVER_CMD;
+const SERVER_STARTUP_TIMEOUT_MS = 15_000;
+const SHOULD_SPAWN = Boolean(SERVER_URL && SERVER_CMD);
+const HAVE_TARGET = Boolean(SERVER_URL);
+
+const TASKS_SCENARIOS = [
+  new TasksLifecycleScenario(),
+  new TasksCapabilityNegotiationScenario(),
+  new TasksWireFieldsScenario(),
+  new TasksRequestStateScenario(),
+  new TasksMRTRInputScenario(),
+  new TasksRequestHeadersScenario(),
+  new TasksDispatchScenario(),
+  new TasksStatusNotificationsScenario()
+];
+
+const describeIfTarget = HAVE_TARGET ? describe : describe.skip;
+
+describeIfTarget('SEP-2663 Tasks — server conformance', () => {
+  let serverProcess: ChildProcess | null = null;
+
+  beforeAll(async () => {
+    if (!SHOULD_SPAWN) return;
+
+    serverProcess = spawn('sh', ['-c', SERVER_CMD!], {
+      stdio: ['ignore', 'pipe', 'pipe'],
+      detached: false
+    });
+
+    let stdoutBuf = '';
+    let stderrBuf = '';
+    serverProcess.stdout?.on('data', (b) => {
+      stdoutBuf += b.toString();
+    });
+    serverProcess.stderr?.on('data', (b) => {
+      stderrBuf += b.toString();
+    });
+
+    serverProcess.on('exit', (code) => {
+      if (code !== null && code !== 0) {
+        console.error(
+          `tasks fixture exited unexpectedly with code ${code}.\nSTDOUT: ${stdoutBuf}\nSTDERR: ${stderrBuf}`
+        );
+      }
+    });
+
+    await waitForTcpReady(SERVER_URL!, SERVER_STARTUP_TIMEOUT_MS).catch(
+      (err) => {
+        if (serverProcess && !serverProcess.killed) {
+          serverProcess.kill('SIGKILL');
+        }
+        throw new Error(
+          `tasks fixture did not become reachable within ${SERVER_STARTUP_TIMEOUT_MS}ms: ${err.message}\nSTDOUT: ${stdoutBuf}\nSTDERR: ${stderrBuf}`
+        );
+      }
+    );
+  }, SERVER_STARTUP_TIMEOUT_MS + 5_000);
+
+  afterAll(async () => {
+    if (!SHOULD_SPAWN) return;
+    if (!serverProcess || serverProcess.killed) return;
+    serverProcess.kill('SIGTERM');
+    await new Promise<void>((resolve) => {
+      const timer = setTimeout(() => {
+        if (serverProcess && !serverProcess.killed) {
+          serverProcess.kill('SIGKILL');
+        }
+        resolve();
+      }, 3_000);
+      serverProcess!.once('exit', () => {
+        clearTimeout(timer);
+        resolve();
+      });
+    });
+    serverProcess = null;
+  });
+
+  for (const scenario of TASKS_SCENARIOS) {
+    it(`${scenario.name} — all checks succeed against fixture`, async () => {
+      const checks = await scenario.run(SERVER_URL!);
+      expect(checks.length).toBeGreaterThan(0);
+      const failures = checks.filter(
+        (c) => c.status === 'FAILURE' || c.status === 'WARNING'
+      );
+      if (failures.length > 0) {
+        const detail = failures
+          .map((c) => `  - ${c.id}: ${c.errorMessage ?? '(no message)'}`)
+          .join('\n');
+        throw new Error(
+          `${failures.length}/${checks.length} checks failed:\n${detail}`
+        );
+      }
+    });
+  }
+});
+
+/**
+ * Poll the host/port of the given URL until a TCP connection succeeds
+ * or the timeout elapses. Language-agnostic readiness check — works
+ * for any server that binds before serving requests.
+ */
+async function waitForTcpReady(url: string, timeoutMs: number): Promise<void> {
+  const u = new URL(url);
+  const port = parseInt(u.port || (u.protocol === 'https:' ? '443' : '80'), 10);
+  const host = u.hostname;
+  const deadline = Date.now() + timeoutMs;
+  let lastErr: Error | null = null;
+
+  while (Date.now() < deadline) {
+    try {
+      await new Promise<void>((resolve, reject) => {
+        const socket = connect({ host, port }, () => {
+          socket.end();
+          resolve();
+        });
+        socket.once('error', (err) => {
+          socket.destroy();
+          reject(err);
+        });
+        socket.setTimeout(1_000, () => {
+          socket.destroy();
+          reject(new Error('connect timeout'));
+        });
+      });
+      return;
+    } catch (err) {
+      lastErr = err as Error;
+      await new Promise((r) => setTimeout(r, 200));
+    }
+  }
+  throw new Error(
+    `${host}:${port} did not accept TCP connections (last: ${lastErr?.message ?? 'unknown'})`
+  );
+}
diff --git a/src/scenarios/server/tasks/capability.ts b/src/scenarios/server/tasks/capability.ts
new file mode 100644
index 0000000..91615d7
--- /dev/null
+++ b/src/scenarios/server/tasks/capability.ts
@@ -0,0 +1,291 @@
+/**
+ * SEP-2663 Tasks Extension — capability negotiation conformance.
+ *
+ * Tests that the server advertises the io.modelcontextprotocol/tasks
+ * extension correctly, gates the v2 task surface on negotiation, and
+ * supports SEP-2575 per-request capability overrides.
+ *
+ * Required server fixtures:
+ *   - greet         — sync-only, returns "Hello, {name}!"
+ *   - slow_compute  — task-supporting, sleeps N seconds
+ */
+
+import {
+  ClientScenario,
+  ConformanceCheck,
+  ScenarioSpecTag,
+  DRAFT_PROTOCOL_VERSION
+} from '../../../types';
+import {
+  TASKS_EXTENSION_ID,
+  SEP_2663_REF,
+  SEP_2575_REF,
+  errMsg,
+  failureCheck,
+  initRawSession,
+  rawRequest
+} from './helpers';
+
+export class TasksCapabilityNegotiationScenario implements ClientScenario {
+  name = 'tasks-capability-negotiation';
+  specVersions: ScenarioSpecTag[] = ['extension', DRAFT_PROTOCOL_VERSION];
+  description = `Test SEP-2663 capability negotiation for the tasks extension.
+
+**Server Implementation Requirements:**
+
+**Capability advertisement (SEP-2663):**
+- The server MUST advertise \`io.modelcontextprotocol/tasks\` under
+  \`capabilities.extensions\` in its \`initialize\` response.
+- It MUST NOT use a v1-style \`capabilities.tasks\` slot (the v1 surface
+  is replaced by the extension).
+
+**Gating without negotiation (SEP-2663):**
+- For sessions that did NOT declare the \`io.modelcontextprotocol/tasks\`
+  extension during \`initialize\`, the server MUST reject \`tasks/get\`,
+  \`tasks/update\`, and \`tasks/cancel\` with JSON-RPC \`-32601\`
+  (MethodNotFound) — clients that didn't negotiate the surface should
+  not see it.
+- A \`tools/call\` from such a session MUST NOT return
+  \`CreateTaskResult\`. Task-supporting tools fall through to synchronous
+  execution and return a plain \`ToolResult\` with
+  \`resultType:"complete"\`.
+
+**Per-request opt-in (SEP-2575):**
+- A session that did not declare the extension at session level can
+  opt into task creation for a single \`tools/call\` by including the
+  extension under \`_meta.io.modelcontextprotocol/clientCapabilities.extensions\`.
+  The server MUST honor the per-request opt-in and produce a
+  \`CreateTaskResult\` for that call.`;
+
+  async run(serverUrl: string): Promise<ConformanceCheck[]> {
+    const checks: ConformanceCheck[] = [];
+
+    // Two sessions: one declares the extension, one does NOT.
+    let withExt: { sessionId: string; serverCapabilities: any };
+    let withoutExt: { sessionId: string };
+    try {
+      withExt = await initRawSession(serverUrl, {
+        capabilities: {
+          elicitation: {},
+          sampling: {},
+          extensions: { [TASKS_EXTENSION_ID]: {} }
+        }
+      });
+      withoutExt = await initRawSession(serverUrl, { capabilities: {} });
+    } catch (error) {
+      checks.push({
+        id: 'tasks-session-bootstrap',
+        name: 'TasksSessionBootstrap',
+        description: 'Initialize handshakes (with + without extension) succeed',
+        status: 'FAILURE',
+        timestamp: new Date().toISOString(),
+        errorMessage: `Failed to initialize: ${errMsg(error)}`,
+        specReferences: [SEP_2663_REF]
+      });
+      return checks;
+    }
+
+    // Check 1: server advertises extension under capabilities.extensions.
+    {
+      const id = 'tasks-extension-advertised';
+      const name = 'TasksExtensionAdvertised';
+      const description = `Server advertises ${TASKS_EXTENSION_ID} under capabilities.extensions (and not capabilities.tasks)`;
+      const caps = withExt.serverCapabilities ?? {};
+      const errs: string[] = [];
+      if (caps.tasks) {
+        errs.push(
+          'v1-style capabilities.tasks slot MUST NOT be used; tasks lives under capabilities.extensions'
+        );
+      }
+      if (!caps.extensions) {
+        errs.push('capabilities.extensions MUST be advertised');
+      } else if (!caps.extensions[TASKS_EXTENSION_ID]) {
+        errs.push(
+          `capabilities.extensions["${TASKS_EXTENSION_ID}"] MUST be present`
+        );
+      }
+      checks.push({
+        id,
+        name,
+        description,
+        status: errs.length === 0 ? 'SUCCESS' : 'FAILURE',
+        timestamp: new Date().toISOString(),
+        errorMessage: errs.length > 0 ? errs.join('; ') : undefined,
+        specReferences: [SEP_2663_REF],
+        details: {
+          hasExtensions: Boolean(caps.extensions),
+          hasTasksExtension: Boolean(caps.extensions?.[TASKS_EXTENSION_ID]),
+          hasV1TasksSlot: Boolean(caps.tasks)
+        }
+      });
+    }
+
+    // Check 2: tasks/* methods rejected without extension negotiation.
+    {
+      const id = 'tasks-methods-gated-without-extension';
+      const name = 'TasksMethodsGatedWithoutExtension';
+      const description =
+        'tasks/get, tasks/update, tasks/cancel return -32601 when extension was not negotiated';
+      const cases: Array<{ method: string; params: any }> = [
+        { method: 'tasks/get', params: { taskId: 'gate-test' } },
+        {
+          method: 'tasks/update',
+          params: { taskId: 'gate-test', inputResponses: {} }
+        },
+        { method: 'tasks/cancel', params: { taskId: 'gate-test' } }
+      ];
+      const errs: string[] = [];
+      for (const tc of cases) {
+        try {
+          await rawRequest(serverUrl, tc.method, tc.params, {
+            sessionId: withoutExt.sessionId
+          });
+          errs.push(`${tc.method} MUST reject (it returned a result)`);
+        } catch (e: any) {
+          if (e.code !== -32601) {
+            errs.push(
+              `${tc.method} MUST return -32601; got ${e.code ?? '<missing>'}`
+            );
+          }
+        }
+      }
+      checks.push({
+        id,
+        name,
+        description,
+        status: errs.length === 0 ? 'SUCCESS' : 'FAILURE',
+        timestamp: new Date().toISOString(),
+        errorMessage: errs.length > 0 ? errs.join('; ') : undefined,
+        specReferences: [SEP_2663_REF]
+      });
+    }
+
+    // Check 3: tools/call without extension returns sync ToolResult, not task.
+    {
+      const id = 'tasks-tools-call-without-extension-sync';
+      const name = 'TasksToolsCallWithoutExtensionSync';
+      const description =
+        'tools/call from a session without the extension MUST fall through to sync (no CreateTaskResult, even for task-supporting tools)';
+      try {
+        const result = await rawRequest(
+          serverUrl,
+          'tools/call',
+          {
+            name: 'slow_compute',
+            arguments: { seconds: 0, label: 'capability-no-ext' }
+          },
+          { sessionId: withoutExt.sessionId }
+        );
+        const errs: string[] = [];
+        if (result.resultType === 'task') {
+          errs.push(
+            'tools/call without extension MUST NOT return resultType:"task"'
+          );
+        }
+        if (result.taskId) {
+          errs.push(
+            `tools/call without extension MUST NOT carry top-level taskId; got ${result.taskId}`
+          );
+        }
+        if (result.task) {
+          errs.push(
+            'tools/call without extension MUST NOT carry the v1-style nested `task` envelope'
+          );
+        }
+        if (!result.content) {
+          errs.push(
+            'tools/call without extension MUST return sync ToolResult with content[]'
+          );
+        }
+        checks.push({
+          id,
+          name,
+          description,
+          status: errs.length === 0 ? 'SUCCESS' : 'FAILURE',
+          timestamp: new Date().toISOString(),
+          errorMessage: errs.length > 0 ? errs.join('; ') : undefined,
+          specReferences: [SEP_2663_REF],
+          details: {
+            resultType: result.resultType,
+            hasTaskId: Boolean(result.taskId)
+          }
+        });
+      } catch (error) {
+        checks.push(failureCheck(id, name, description, error, [SEP_2663_REF]));
+      }
+    }
+
+    // Check 4: SEP-2575 per-request _meta opt-in produces CreateTaskResult.
+    {
+      const id = 'tasks-per-request-meta-opt-in';
+      const name = 'TasksPerRequestMetaOptIn';
+      const description =
+        'tools/call with extension declared in _meta.io.modelcontextprotocol/clientCapabilities produces a CreateTaskResult even when the session did not negotiate the extension';
+      try {
+        const result = await rawRequest(
+          serverUrl,
+          'tools/call',
+          {
+            name: 'slow_compute',
+            arguments: { seconds: 1, label: 'capability-meta-opt' }
+          },
+          {
+            sessionId: withoutExt.sessionId,
+            meta: {
+              'io.modelcontextprotocol/clientCapabilities': {
+                extensions: { [TASKS_EXTENSION_ID]: {} }
+              }
+            }
+          }
+        );
+        const errs: string[] = [];
+        if (result.resultType !== 'task') {
+          errs.push(
+            `expected resultType:"task" via per-request opt-in; got ${JSON.stringify(result.resultType)}`
+          );
+        }
+        if (!result.taskId) {
+          errs.push(
+            'per-request opt-in MUST produce a CreateTaskResult with top-level taskId'
+          );
+        }
+        if (result.task) {
+          errs.push(
+            'CreateTaskResult MUST be flat (no nested `task` wrapper) even on per-request opt-in path'
+          );
+        }
+        // Best-effort cleanup: cancel the task so we don't leak a 1s
+        // background goroutine on the server.
+        if (result.taskId) {
+          try {
+            await rawRequest(
+              serverUrl,
+              'tasks/cancel',
+              { taskId: result.taskId },
+              { sessionId: withExt.sessionId }
+            );
+          } catch {
+            /* swallow — cleanup best-effort */
+          }
+        }
+        checks.push({
+          id,
+          name,
+          description,
+          status: errs.length === 0 ? 'SUCCESS' : 'FAILURE',
+          timestamp: new Date().toISOString(),
+          errorMessage: errs.length > 0 ? errs.join('; ') : undefined,
+          specReferences: [SEP_2575_REF, SEP_2663_REF],
+          details: {
+            resultType: result.resultType,
+            taskId: result.taskId
+          }
+        });
+      } catch (error) {
+        checks.push(failureCheck(id, name, description, error, [SEP_2575_REF]));
+      }
+    }
+
+    return checks;
+  }
+}
diff --git a/src/scenarios/server/tasks/dispatch.ts b/src/scenarios/server/tasks/dispatch.ts
new file mode 100644
index 0000000..3f35e43
--- /dev/null
+++ b/src/scenarios/server/tasks/dispatch.ts
@@ -0,0 +1,560 @@
+/**
+ * SEP-2663 Tasks Extension — dispatch + envelope conformance.
+ *
+ * Bundles a number of small, related checks under one scenario:
+ *   - Removed v1 methods (tasks/result, tasks/list) reject as -32601.
+ *   - Server-directed task creation works without a client `task` hint
+ *     param; legacy v1 `task` param on tools/call is tolerated and
+ *     ignored on sync tools.
+ *   - Immediate-result shortcut: a fast operation MAY skip task creation
+ *     and return a sync ToolResult.
+ *   - SEP-2322 resultType:"complete" discriminator on every non-task
+ *     response (sync tools/call, tasks/get, tasks/update, tasks/cancel).
+ *   - Strong consistency: tasks/get immediately after CreateTaskResult
+ *     MUST resolve.
+ *   - tasks/get with an unknown taskId MUST return -32602.
+ *
+ * Required server fixtures:
+ *   - greet           — sync-only
+ *   - slow_compute    — task-supporting (seconds:0 = instant)
+ *   - confirm_delete  — task-supporting, parks for elicitation
+ *   - failing_job     — task-supporting, returns tool error
+ */
+
+import {
+  ClientScenario,
+  ConformanceCheck,
+  ScenarioSpecTag,
+  DRAFT_PROTOCOL_VERSION
+} from '../../../types';
+import {
+  TASKS_EXTENSION_ID,
+  SEP_2322_REF,
+  SEP_2663_REF,
+  errMsg,
+  failureCheck,
+  initRawSession,
+  rawRequest,
+  waitForStatus,
+  waitForTerminal
+} from './helpers';
+
+export class TasksDispatchScenario implements ClientScenario {
+  name = 'tasks-dispatch-and-envelope';
+  specVersions: ScenarioSpecTag[] = ['extension', DRAFT_PROTOCOL_VERSION];
+  description = `Test SEP-2663 dispatch / envelope rules across the tasks surface.
+
+**Server Implementation Requirements:**
+
+**Removed v1 methods (SEP-2663):**
+- \`tasks/result\` is removed in v2 — the result is inlined on
+  \`tasks/get\`. Servers MUST reject the method with JSON-RPC \`-32601\`.
+- \`tasks/list\` is removed in v2. Servers MUST reject it with
+  \`-32601\`.
+
+**Server-directed task creation (SEP-2663):**
+- The client does NOT send a \`task\` hint param. The server alone
+  decides whether to create a task. A \`tools/call\` against a
+  task-supporting tool MUST produce \`CreateTaskResult\` even with no
+  client hint.
+
+**Legacy \`task\` param tolerated (SEP-2663):**
+- A v1 client may still send \`task: { ttl, pollInterval }\` on
+  \`tools/call\`. The server MUST tolerate it (no error) AND MUST NOT
+  promote a sync-only tool to a task on its presence. The body
+  arguments + tool registration are authoritative.
+
+**Immediate-result shortcut (SEP-2663):**
+- A server MAY return a sync \`ToolResult\` for task-supporting tools
+  when the operation completes fast enough. Either return a
+  \`CreateTaskResult\` (with \`resultType:"task"\`) or a sync
+  \`ToolResult\` (with \`resultType:"complete"\`); both are valid.
+
+**resultType:"complete" on non-task responses (SEP-2322):**
+- Every JSON-RPC response on the tools+tasks surface other than a
+  CreateTaskResult MUST carry \`resultType:"complete"\`. This applies
+  to: sync \`tools/call\`, \`tasks/get\`, \`tasks/update\` ack,
+  \`tasks/cancel\` ack.
+
+**Strong consistency / durable create (SEP-2663):**
+- A server MUST NOT return \`CreateTaskResult\` until the task is
+  durably created — that is, until a \`tasks/get\` for the returned
+  \`taskId\` would resolve. Issuing \`tasks/get\` immediately after the
+  CreateTaskResult arrives MUST succeed, not -32602.
+
+**Unknown taskId on tasks/get (SEP-2663):**
+- \`tasks/get\` for a taskId the server doesn't recognize MUST return
+  JSON-RPC \`-32602\` (InvalidParams). Mirrors the same rule for
+  \`tasks/cancel\` (clarified upstream in spec commit d963ad0).`;
+
+  async run(serverUrl: string): Promise<ConformanceCheck[]> {
+    const checks: ConformanceCheck[] = [];
+
+    let sessionId: string;
+    try {
+      ({ sessionId } = await initRawSession(serverUrl, {
+        capabilities: {
+          elicitation: {},
+          sampling: {},
+          extensions: { [TASKS_EXTENSION_ID]: {} }
+        }
+      }));
+    } catch (error) {
+      checks.push({
+        id: 'tasks-session-bootstrap',
+        name: 'TasksSessionBootstrap',
+        description:
+          'Initialize handshake declaring io.modelcontextprotocol/tasks extension succeeds',
+        status: 'FAILURE',
+        timestamp: new Date().toISOString(),
+        errorMessage: `Failed to initialize: ${errMsg(error)}`,
+        specReferences: [SEP_2663_REF]
+      });
+      return checks;
+    }
+
+    // Check 1: tasks/result removed.
+    {
+      const id = 'tasks-removed-tasks-result';
+      const name = 'TasksRemovedTasksResult';
+      const description =
+        'tasks/result is removed in v2 and MUST reject with -32601';
+      try {
+        await rawRequest(
+          serverUrl,
+          'tasks/result',
+          { taskId: 'any' },
+          {
+            sessionId
+          }
+        );
+        checks.push({
+          id,
+          name,
+          description,
+          status: 'FAILURE',
+          timestamp: new Date().toISOString(),
+          errorMessage: 'tasks/result returned a result instead of -32601',
+          specReferences: [SEP_2663_REF]
+        });
+      } catch (e: any) {
+        const errs: string[] = [];
+        if (e.code !== -32601) {
+          errs.push(`expected -32601; got ${e.code ?? '<missing>'}`);
+        }
+        checks.push({
+          id,
+          name,
+          description,
+          status: errs.length === 0 ? 'SUCCESS' : 'FAILURE',
+          timestamp: new Date().toISOString(),
+          errorMessage: errs.length > 0 ? errs.join('; ') : undefined,
+          specReferences: [SEP_2663_REF]
+        });
+      }
+    }
+
+    // Check 2: tasks/list removed.
+    {
+      const id = 'tasks-removed-tasks-list';
+      const name = 'TasksRemovedTasksList';
+      const description =
+        'tasks/list is removed in v2 and MUST reject with -32601';
+      try {
+        await rawRequest(serverUrl, 'tasks/list', {}, { sessionId });
+        checks.push({
+          id,
+          name,
+          description,
+          status: 'FAILURE',
+          timestamp: new Date().toISOString(),
+          errorMessage: 'tasks/list returned a result instead of -32601',
+          specReferences: [SEP_2663_REF]
+        });
+      } catch (e: any) {
+        const errs: string[] = [];
+        if (e.code !== -32601) {
+          errs.push(`expected -32601; got ${e.code ?? '<missing>'}`);
+        }
+        checks.push({
+          id,
+          name,
+          description,
+          status: errs.length === 0 ? 'SUCCESS' : 'FAILURE',
+          timestamp: new Date().toISOString(),
+          errorMessage: errs.length > 0 ? errs.join('; ') : undefined,
+          specReferences: [SEP_2663_REF]
+        });
+      }
+    }
+
+    // Check 3: server-directed task creation without client hint.
+    {
+      const id = 'tasks-server-directed-creation-no-hint';
+      const name = 'TasksServerDirectedCreationNoHint';
+      const description =
+        'tools/call with no client `task` hint param MUST still produce CreateTaskResult for task-supporting tools';
+      try {
+        const result = await rawRequest(
+          serverUrl,
+          'tools/call',
+          { name: 'failing_job', arguments: {} },
+          { sessionId }
+        );
+        const errs: string[] = [];
+        if (result.resultType !== 'task' || !result.taskId) {
+          errs.push(
+            `expected CreateTaskResult; got resultType=${JSON.stringify(result.resultType)}, taskId=${JSON.stringify(result.taskId)}`
+          );
+        }
+        // Best-effort wait so we don't leak.
+        if (result.taskId) {
+          try {
+            await waitForTerminal(serverUrl, sessionId, result.taskId);
+          } catch {
+            /* swallow */
+          }
+        }
+        checks.push({
+          id,
+          name,
+          description,
+          status: errs.length === 0 ? 'SUCCESS' : 'FAILURE',
+          timestamp: new Date().toISOString(),
+          errorMessage: errs.length > 0 ? errs.join('; ') : undefined,
+          specReferences: [SEP_2663_REF]
+        });
+      } catch (error) {
+        checks.push(failureCheck(id, name, description, error, [SEP_2663_REF]));
+      }
+    }
+
+    // Check 4: legacy `task` param tolerated + ignored on sync tool.
+    {
+      const id = 'tasks-legacy-task-param-ignored';
+      const name = 'TasksLegacyTaskParamIgnored';
+      const description =
+        'tools/call with legacy `task` param against a sync tool MUST NOT error and MUST NOT be promoted to a task';
+      try {
+        const result = await rawRequest(
+          serverUrl,
+          'tools/call',
+          {
+            name: 'greet',
+            arguments: { name: 'legacy-hint' },
+            // Legacy v1 hint that the server MUST ignore.
+            task: { ttl: 60_000, pollInterval: 100 }
+          },
+          { sessionId }
+        );
+        const errs: string[] = [];
+        if (result.resultType === 'task') {
+          errs.push(
+            'legacy `task` param MUST NOT promote a sync tool to a task'
+          );
+        }
+        if (result.taskId) {
+          errs.push(
+            `sync tool with legacy hint MUST NOT carry top-level taskId; got ${result.taskId}`
+          );
+        }
+        if (!Array.isArray(result.content) || result.content.length === 0) {
+          errs.push('sync tool MUST still return content[]');
+        }
+        checks.push({
+          id,
+          name,
+          description,
+          status: errs.length === 0 ? 'SUCCESS' : 'FAILURE',
+          timestamp: new Date().toISOString(),
+          errorMessage: errs.length > 0 ? errs.join('; ') : undefined,
+          specReferences: [SEP_2663_REF]
+        });
+      } catch (error) {
+        checks.push(failureCheck(id, name, description, error, [SEP_2663_REF]));
+      }
+    }
+
+    // Check 5: immediate-result shortcut. Either CreateTaskResult OR
+    // sync ToolResult is acceptable for an instant operation.
+    {
+      const id = 'tasks-immediate-result-shortcut';
+      const name = 'TasksImmediateResultShortcut';
+      const description =
+        'For a fast operation, a task-supporting tool MAY skip task creation and return a sync ToolResult; either path is valid';
+      try {
+        const result = await rawRequest(
+          serverUrl,
+          'tools/call',
+          {
+            name: 'slow_compute',
+            arguments: { seconds: 0, label: 'instant' }
+          },
+          { sessionId }
+        );
+        const errs: string[] = [];
+        if (result.resultType === 'task') {
+          if (!result.taskId) {
+            errs.push(
+              'task-path response MUST carry top-level taskId on CreateTaskResult'
+            );
+          }
+        } else {
+          // Sync path
+          if (!Array.isArray(result.content)) {
+            errs.push(
+              'sync-path response MUST carry content[] for the immediate ToolResult'
+            );
+          }
+        }
+        checks.push({
+          id,
+          name,
+          description,
+          status: errs.length === 0 ? 'SUCCESS' : 'FAILURE',
+          timestamp: new Date().toISOString(),
+          errorMessage: errs.length > 0 ? errs.join('; ') : undefined,
+          specReferences: [SEP_2663_REF],
+          details: { resultType: result.resultType }
+        });
+      } catch (error) {
+        checks.push(failureCheck(id, name, description, error, [SEP_2663_REF]));
+      }
+    }
+
+    // Check 6: resultType:"complete" on every non-task response.
+    {
+      const id = 'tasks-result-type-complete-on-non-task-responses';
+      const name = 'TasksResultTypeCompleteOnNonTaskResponses';
+      const description =
+        'Sync tools/call, tasks/get, tasks/update ack, and tasks/cancel ack MUST all carry resultType:"complete"';
+      const errs: string[] = [];
+      try {
+        // Sync tools/call.
+        const sync = await rawRequest(
+          serverUrl,
+          'tools/call',
+          { name: 'greet', arguments: { name: 'rt' } },
+          { sessionId }
+        );
+        if (sync.resultType !== 'complete') {
+          errs.push(
+            `sync tools/call resultType = ${JSON.stringify(sync.resultType)}, want "complete"`
+          );
+        }
+
+        // tasks/get against a fresh task.
+        const created = await rawRequest(
+          serverUrl,
+          'tools/call',
+          {
+            name: 'slow_compute',
+            arguments: { seconds: 0, label: 'rt-get' }
+          },
+          { sessionId }
+        );
+        const taskIdForGet = created.taskId;
+        if (taskIdForGet) {
+          await waitForTerminal(serverUrl, sessionId, taskIdForGet);
+          const got = await rawRequest(
+            serverUrl,
+            'tasks/get',
+            { taskId: taskIdForGet },
+            { sessionId }
+          );
+          if (got.resultType !== 'complete') {
+            errs.push(
+              `tasks/get resultType = ${JSON.stringify(got.resultType)}, want "complete"`
+            );
+          }
+        }
+
+        // tasks/cancel ack on a fresh long-running task.
+        const longLived = await rawRequest(
+          serverUrl,
+          'tools/call',
+          {
+            name: 'slow_compute',
+            arguments: { seconds: 60, label: 'rt-cancel' }
+          },
+          { sessionId }
+        );
+        if (longLived.taskId) {
+          const cancelAck = await rawRequest(
+            serverUrl,
+            'tasks/cancel',
+            { taskId: longLived.taskId },
+            { sessionId }
+          );
+          if (cancelAck.resultType !== 'complete') {
+            errs.push(
+              `tasks/cancel ack resultType = ${JSON.stringify(cancelAck.resultType)}, want "complete"`
+            );
+          }
+        }
+
+        // tasks/update ack on a parked elicitation task.
+        const elicit = await rawRequest(
+          serverUrl,
+          'tools/call',
+          { name: 'confirm_delete', arguments: { filename: 'rt.txt' } },
+          { sessionId }
+        );
+        const elicitTaskId = elicit.taskId;
+        if (elicitTaskId) {
+          await waitForStatus(
+            serverUrl,
+            sessionId,
+            elicitTaskId,
+            'input_required',
+            5_000
+          );
+          const updateAck = await rawRequest(
+            serverUrl,
+            'tasks/update',
+            {
+              taskId: elicitTaskId,
+              inputResponses: { 'unknown-key': { ignored: true } }
+            },
+            { sessionId }
+          );
+          if (updateAck.resultType !== 'complete') {
+            errs.push(
+              `tasks/update ack resultType = ${JSON.stringify(updateAck.resultType)}, want "complete"`
+            );
+          }
+          try {
+            await rawRequest(
+              serverUrl,
+              'tasks/cancel',
+              { taskId: elicitTaskId },
+              { sessionId }
+            );
+          } catch {
+            /* swallow */
+          }
+        }
+        checks.push({
+          id,
+          name,
+          description,
+          status: errs.length === 0 ? 'SUCCESS' : 'FAILURE',
+          timestamp: new Date().toISOString(),
+          errorMessage: errs.length > 0 ? errs.join('; ') : undefined,
+          specReferences: [SEP_2322_REF, SEP_2663_REF]
+        });
+      } catch (error) {
+        checks.push(failureCheck(id, name, description, error, [SEP_2322_REF]));
+      }
+    }
+
+    // Check 7: strong consistency — immediate tasks/get after CreateTaskResult.
+    {
+      const id = 'tasks-strong-consistency-immediate-get';
+      const name = 'TasksStrongConsistencyImmediateGet';
+      const description =
+        'tasks/get issued immediately after CreateTaskResult arrives MUST resolve (server MUST NOT return CreateTaskResult before the task is durably created)';
+      try {
+        const created = await rawRequest(
+          serverUrl,
+          'tools/call',
+          {
+            name: 'slow_compute',
+            arguments: { seconds: 60, label: 'consistency' }
+          },
+          { sessionId }
+        );
+        const taskId = created.taskId;
+        if (!taskId) {
+          checks.push({
+            id,
+            name,
+            description,
+            status: 'FAILURE',
+            timestamp: new Date().toISOString(),
+            errorMessage: 'slow_compute did not create a task',
+            specReferences: [SEP_2663_REF]
+          });
+        } else {
+          // No await/sleep between create and get — codifies the
+          // strong-consistency ordering.
+          const got = await rawRequest(
+            serverUrl,
+            'tasks/get',
+            { taskId },
+            { sessionId }
+          );
+          const errs: string[] = [];
+          if (got.taskId !== taskId) {
+            errs.push(
+              `immediate tasks/get MUST resolve the same taskId; got ${got.taskId}`
+            );
+          }
+          // Cleanup.
+          try {
+            await rawRequest(
+              serverUrl,
+              'tasks/cancel',
+              { taskId },
+              { sessionId }
+            );
+          } catch {
+            /* swallow */
+          }
+          checks.push({
+            id,
+            name,
+            description,
+            status: errs.length === 0 ? 'SUCCESS' : 'FAILURE',
+            timestamp: new Date().toISOString(),
+            errorMessage: errs.length > 0 ? errs.join('; ') : undefined,
+            specReferences: [SEP_2663_REF]
+          });
+        }
+      } catch (error) {
+        checks.push(failureCheck(id, name, description, error, [SEP_2663_REF]));
+      }
+    }
+
+    // Check 8: tasks/get with unknown taskId returns -32602.
+    {
+      const id = 'tasks-get-unknown-task-id-rejected';
+      const name = 'TasksGetUnknownTaskIdRejected';
+      const description =
+        'tasks/get for a taskId the server does not recognize MUST return -32602';
+      try {
+        await rawRequest(
+          serverUrl,
+          'tasks/get',
+          { taskId: 'tasks-conformance-nonexistent-12345' },
+          { sessionId }
+        );
+        checks.push({
+          id,
+          name,
+          description,
+          status: 'FAILURE',
+          timestamp: new Date().toISOString(),
+          errorMessage: 'tasks/get with unknown taskId returned a result',
+          specReferences: [SEP_2663_REF]
+        });
+      } catch (e: any) {
+        const errs: string[] = [];
+        if (e.code !== -32602) {
+          errs.push(`expected -32602; got ${e.code ?? '<missing>'}`);
+        }
+        checks.push({
+          id,
+          name,
+          description,
+          status: errs.length === 0 ? 'SUCCESS' : 'FAILURE',
+          timestamp: new Date().toISOString(),
+          errorMessage: errs.length > 0 ? errs.join('; ') : undefined,
+          specReferences: [SEP_2663_REF]
+        });
+      }
+    }
+
+    return checks;
+  }
+}
diff --git a/src/scenarios/server/tasks/headers.ts b/src/scenarios/server/tasks/headers.ts
new file mode 100644
index 0000000..0d5ebdd
--- /dev/null
+++ b/src/scenarios/server/tasks/headers.ts
@@ -0,0 +1,243 @@
+/**
+ * SEP-2243 Mcp-Method / Mcp-Name request-header tolerance.
+ *
+ * SEP-2243 defines Mcp-Method and Mcp-Name as REQUEST headers (client →
+ * server) used by HTTP infrastructure (proxies, gateways, observability)
+ * to route or shape JSON-RPC traffic without parsing the body. They are
+ * informational; the JSON-RPC body is authoritative. A conformant
+ * server MUST tolerate the headers without changing dispatch.
+ *
+ * Whether the server *also* echoes these headers on responses for
+ * downstream observability is implementation-defined and out of scope
+ * for SEP-2243 conformance.
+ *
+ * Required server fixtures:
+ *   - greet         — sync-only, returns "Hello, {name}!"
+ *   - slow_compute  — task-supporting, sleeps N seconds
+ */
+
+import {
+  ClientScenario,
+  ConformanceCheck,
+  ScenarioSpecTag,
+  DRAFT_PROTOCOL_VERSION
+} from '../../../types';
+import {
+  TASKS_EXTENSION_ID,
+  SEP_2243_REF,
+  errMsg,
+  failureCheck,
+  initRawSession,
+  rawRequest
+} from './helpers';
+
+export class TasksRequestHeadersScenario implements ClientScenario {
+  name = 'tasks-request-headers';
+  specVersions: ScenarioSpecTag[] = ['extension', DRAFT_PROTOCOL_VERSION];
+  description = `Test SEP-2243 Mcp-Method / Mcp-Name request-header tolerance.
+
+**Server Implementation Requirements:**
+
+SEP-2243 defines two informational request headers used by HTTP
+infrastructure (proxies, gateways, observability) to route or shape
+JSON-RPC traffic without parsing the body:
+
+- \`Mcp-Method: <jsonrpc-method>\` — set on every JSON-RPC request.
+- \`Mcp-Name: <task-id>\` — set on resume operations (\`tasks/get\`,
+  \`tasks/update\`, \`tasks/cancel\`).
+
+The JSON-RPC body is authoritative. The server MUST tolerate the
+headers, MUST NOT require them, and MUST NOT change dispatch behavior
+based on them — including when the headers disagree with the body.`;
+
+  async run(serverUrl: string): Promise<ConformanceCheck[]> {
+    const checks: ConformanceCheck[] = [];
+
+    let sessionId: string;
+    try {
+      ({ sessionId } = await initRawSession(serverUrl, {
+        capabilities: { extensions: { [TASKS_EXTENSION_ID]: {} } }
+      }));
+    } catch (error) {
+      checks.push({
+        id: 'tasks-session-bootstrap',
+        name: 'TasksSessionBootstrap',
+        description:
+          'Initialize handshake declaring io.modelcontextprotocol/tasks extension succeeds',
+        status: 'FAILURE',
+        timestamp: new Date().toISOString(),
+        errorMessage: `Failed to initialize: ${errMsg(error)}`,
+        specReferences: [SEP_2243_REF]
+      });
+      return checks;
+    }
+
+    // Check 1: Mcp-Method on tools/call against a sync tool.
+    {
+      const id = 'tasks-headers-tolerate-mcp-method-on-tools-call';
+      const name = 'TasksHeadersTolerateMcpMethodOnToolsCall';
+      const description =
+        'Server tolerates Mcp-Method request header on tools/call (sync tool dispatch unaffected)';
+      try {
+        const result = await rawRequest(
+          serverUrl,
+          'tools/call',
+          { name: 'greet', arguments: { name: 'sep-2243' } },
+          { sessionId, headers: { 'Mcp-Method': 'tools/call' } }
+        );
+        const errs: string[] = [];
+        if (result.resultType !== 'complete') {
+          errs.push(
+            `sync ToolResult.resultType MUST be "complete" regardless of routing header; got ${JSON.stringify(result.resultType)}`
+          );
+        }
+        if (
+          !Array.isArray(result.content) ||
+          result.content.length === 0 ||
+          result.content[0]?.text !== 'Hello, sep-2243!'
+        ) {
+          errs.push(
+            'tool result content MUST be unaffected by the Mcp-Method header'
+          );
+        }
+        checks.push({
+          id,
+          name,
+          description,
+          status: errs.length === 0 ? 'SUCCESS' : 'FAILURE',
+          timestamp: new Date().toISOString(),
+          errorMessage: errs.length > 0 ? errs.join('; ') : undefined,
+          specReferences: [SEP_2243_REF]
+        });
+      } catch (error) {
+        checks.push(failureCheck(id, name, description, error, [SEP_2243_REF]));
+      }
+    }
+
+    // Check 2: Mcp-Method + Mcp-Name on tasks/get (drive a task first
+    // so we have a real taskId to route on).
+    let routingTaskId: string | undefined;
+    {
+      const id = 'tasks-headers-tolerate-routing-headers-on-tasks-get';
+      const name = 'TasksHeadersTolerateRoutingHeadersOnTasksGet';
+      const description =
+        'Server tolerates Mcp-Method + Mcp-Name request headers on tasks/get (body taskId resolves regardless of routing headers)';
+      try {
+        const created = await rawRequest(
+          serverUrl,
+          'tools/call',
+          {
+            name: 'slow_compute',
+            arguments: { seconds: 60, label: 'headers-tasks-get' }
+          },
+          { sessionId }
+        );
+        routingTaskId = created.taskId;
+        if (!routingTaskId) {
+          checks.push({
+            id,
+            name,
+            description,
+            status: 'FAILURE',
+            timestamp: new Date().toISOString(),
+            errorMessage: 'slow_compute did not create a task',
+            specReferences: [SEP_2243_REF]
+          });
+        } else {
+          const got = await rawRequest(
+            serverUrl,
+            'tasks/get',
+            { taskId: routingTaskId },
+            {
+              sessionId,
+              headers: {
+                'Mcp-Method': 'tasks/get',
+                'Mcp-Name': routingTaskId
+              }
+            }
+          );
+          const errs: string[] = [];
+          if (got.taskId !== routingTaskId) {
+            errs.push(
+              `tasks/get MUST resolve body taskId regardless of routing headers; got ${got.taskId}`
+            );
+          }
+          if (!got.status) {
+            errs.push(
+              'tasks/get MUST still return status when routing headers are set'
+            );
+          }
+          checks.push({
+            id,
+            name,
+            description,
+            status: errs.length === 0 ? 'SUCCESS' : 'FAILURE',
+            timestamp: new Date().toISOString(),
+            errorMessage: errs.length > 0 ? errs.join('; ') : undefined,
+            specReferences: [SEP_2243_REF]
+          });
+        }
+      } catch (error) {
+        checks.push(failureCheck(id, name, description, error, [SEP_2243_REF]));
+      }
+    }
+
+    // Check 3: Body method is authoritative when Mcp-Method header
+    // disagrees with body.
+    {
+      const id = 'tasks-headers-body-method-authoritative';
+      const name = 'TasksHeadersBodyMethodAuthoritative';
+      const description =
+        'When Mcp-Method header disagrees with body, server MUST dispatch on body method (header is informational)';
+      try {
+        const result = await rawRequest(
+          serverUrl,
+          'tools/call',
+          { name: 'greet', arguments: { name: 'header-mismatch' } },
+          { sessionId, headers: { 'Mcp-Method': 'tasks/get' } }
+        );
+        const errs: string[] = [];
+        if (result.resultType !== 'complete') {
+          errs.push(
+            `server MUST dispatch on body method (tools/call → resultType:"complete"); got ${JSON.stringify(result.resultType)}`
+          );
+        }
+        if (
+          !Array.isArray(result.content) ||
+          result.content[0]?.text !== 'Hello, header-mismatch!'
+        ) {
+          errs.push(
+            'tool result MUST reflect the body method, not the header claim'
+          );
+        }
+        checks.push({
+          id,
+          name,
+          description,
+          status: errs.length === 0 ? 'SUCCESS' : 'FAILURE',
+          timestamp: new Date().toISOString(),
+          errorMessage: errs.length > 0 ? errs.join('; ') : undefined,
+          specReferences: [SEP_2243_REF]
+        });
+      } catch (error) {
+        checks.push(failureCheck(id, name, description, error, [SEP_2243_REF]));
+      }
+    }
+
+    // Cleanup the long-lived task.
+    if (routingTaskId) {
+      try {
+        await rawRequest(
+          serverUrl,
+          'tasks/cancel',
+          { taskId: routingTaskId },
+          { sessionId }
+        );
+      } catch {
+        /* swallow */
+      }
+    }
+
+    return checks;
+  }
+}
diff --git a/src/scenarios/server/tasks/helpers.ts b/src/scenarios/server/tasks/helpers.ts
index 32ebec4..2eea4e7 100644
--- a/src/scenarios/server/tasks/helpers.ts
+++ b/src/scenarios/server/tasks/helpers.ts
@@ -12,8 +12,69 @@
  * and this file shrinks (or disappears).
  */
 
+import type { ConformanceCheck, SpecReference } from '../../../types';
+
 export const TASKS_EXTENSION_ID = 'io.modelcontextprotocol/tasks';
 
+export const SEP_2663_REF: SpecReference = {
+  id: 'SEP-2663',
+  url: 'https://github.com/modelcontextprotocol/specification/pull/2663'
+};
+export const SEP_2322_REF: SpecReference = {
+  id: 'SEP-2322',
+  url: 'https://github.com/modelcontextprotocol/specification/pull/2322'
+};
+export const SEP_2243_REF: SpecReference = {
+  id: 'SEP-2243',
+  url: 'https://github.com/modelcontextprotocol/specification/pull/2243'
+};
+export const SEP_2575_REF: SpecReference = {
+  id: 'SEP-2575',
+  url: 'https://github.com/modelcontextprotocol/specification/pull/2575'
+};
+
+export function errMsg(error: unknown): string {
+  return error instanceof Error ? error.message : String(error);
+}
+
+/** Build a FAILURE check from a thrown error, preserving id/name/description. */
+export function failureCheck(
+  id: string,
+  name: string,
+  description: string,
+  error: unknown,
+  specReferences: SpecReference[]
+): ConformanceCheck {
+  return {
+    id,
+    name,
+    description,
+    status: 'FAILURE',
+    timestamp: new Date().toISOString(),
+    errorMessage: errMsg(error),
+    specReferences
+  };
+}
+
+/** Build a SKIPPED check (preserves id stability so Ctrl+F still finds it). */
+export function skipCheck(
+  id: string,
+  name: string,
+  description: string,
+  reason: string,
+  specReferences: SpecReference[] = [SEP_2663_REF]
+): ConformanceCheck {
+  return {
+    id,
+    name,
+    description,
+    status: 'SKIPPED',
+    timestamp: new Date().toISOString(),
+    errorMessage: `Skipped: ${reason}`,
+    specReferences
+  };
+}
+
 export interface InitOpts {
   /** Negotiated wire protocolVersion. Defaults to LATEST_SPEC_VERSION. */
   protocolVersion?: string;
@@ -23,15 +84,28 @@ export interface InitOpts {
   clientInfo?: { name: string; version: string };
 }
 
+export interface InitResult {
+  /** Mcp-Session-Id minted by the server during initialize. */
+  sessionId: string;
+  /** capabilities object the server advertised in its initialize response. */
+  serverCapabilities: Record<string, any>;
+  /** Negotiated protocolVersion echoed back by the server. */
+  serverProtocolVersion?: string;
+  /** Server info (name, version, …). */
+  serverInfo?: Record<string, any>;
+}
+
 /**
- * Run a fresh initialize handshake and return the resulting session id.
- * Bypasses the SDK so callers can declare extension capabilities the
- * SDK's typed wrappers don't yet know about.
+ * Run a fresh initialize handshake and return session id + the server's
+ * advertised capabilities. Bypasses the SDK so callers can declare
+ * extension capabilities the SDK's typed wrappers don't yet know about,
+ * and so the SDK's Zod schemas don't strip extension fields off the
+ * server response.
  */
 export async function initRawSession(
   serverUrl: string,
   opts: InitOpts = {}
-): Promise<string> {
+): Promise<InitResult> {
   const protocolVersion = opts.protocolVersion ?? '2025-11-25';
   const capabilities = opts.capabilities ?? {};
   const clientInfo = opts.clientInfo ?? {
@@ -55,6 +129,14 @@ export async function initRawSession(
   const sid = initResp.headers.get('mcp-session-id') || '';
   if (!sid) throw new Error('initialize response missing Mcp-Session-Id');
 
+  const initBody = await initResp.json();
+  if (initBody.error) {
+    throw new Error(
+      `initialize returned JSON-RPC error: ${JSON.stringify(initBody.error)}`
+    );
+  }
+  const result = initBody.result ?? {};
+
   await fetch(serverUrl, {
     method: 'POST',
     headers: {
@@ -67,7 +149,12 @@ export async function initRawSession(
       method: 'notifications/initialized'
     })
   });
-  return sid;
+  return {
+    sessionId: sid,
+    serverCapabilities: result.capabilities ?? {},
+    serverProtocolVersion: result.protocolVersion,
+    serverInfo: result.serverInfo
+  };
 }
 
 export interface RawRequestOpts {
diff --git a/src/scenarios/server/tasks/lifecycle.test.ts b/src/scenarios/server/tasks/lifecycle.test.ts
deleted file mode 100644
index d2ea918..0000000
--- a/src/scenarios/server/tasks/lifecycle.test.ts
+++ /dev/null
@@ -1,149 +0,0 @@
-/**
- * SEP-2663 Tasks extension test runner.
- *
- * Iterates the tasks server scenarios against a SEP-2663-conformant
- * server. Two ways to point at one — pick whichever fits:
- *
- *   1. Existing server already running:
- *        MCPKIT_TASKS_SERVER_URL=http://localhost:8080/mcp npm test -- lifecycle.test.ts
- *
- *   2. Auto-spawn a fixture binary in beforeAll (the binary must accept
- *      `--serve --addr :PORT` and bind Streamable HTTP at /mcp):
- *        MCPKIT_TASKS_BINARY=/path/to/tasks-server npm test -- lifecycle.test.ts
- *
- *   Optional: MCPKIT_TASKS_PORT overrides the auto-spawn port (default 18092).
- *
- * If neither is set, the suite is skipped — letting CI runs against the
- * everything-server stay green until the upstream fixture grows SEP-2663
- * support.
- *
- * The mcpkit reference fixture lives at
- * https://github.com/panyam/mcpkit/tree/main/examples/tasks-v2 (mcpkit
- * keeps its v1 surface alongside v2 internally; the fork only cares
- * about the SEP-2663 surface, hence the unsuffixed naming here).
- */
-
-import { spawn, ChildProcess } from 'child_process';
-import { describe, it, expect, beforeAll, afterAll } from 'vitest';
-import { TasksLifecycleScenario } from './lifecycle';
-
-const FIXTURE_BINARY = process.env.MCPKIT_TASKS_BINARY;
-const EXTERNAL_URL = process.env.MCPKIT_TASKS_SERVER_URL;
-const TEST_PORT = parseInt(process.env.MCPKIT_TASKS_PORT ?? '18092', 10);
-const SERVER_URL = EXTERNAL_URL ?? `http://localhost:${TEST_PORT}/mcp`;
-const SERVER_STARTUP_TIMEOUT_MS = 10_000;
-// Spawn only when no external URL is provided AND a fixture binary is.
-const SHOULD_SPAWN = !EXTERNAL_URL && Boolean(FIXTURE_BINARY);
-const HAVE_TARGET = Boolean(EXTERNAL_URL) || SHOULD_SPAWN;
-
-const TASKS_SCENARIOS = [new TasksLifecycleScenario()];
-
-const describeIfTarget = HAVE_TARGET ? describe : describe.skip;
-
-describeIfTarget('SEP-2663 Tasks — server conformance', () => {
-  let serverProcess: ChildProcess | null = null;
-
-  beforeAll(async () => {
-    if (!SHOULD_SPAWN) return;
-
-    serverProcess = spawn(
-      FIXTURE_BINARY!,
-      ['--serve', '--addr', `:${TEST_PORT}`],
-      {
-        stdio: ['ignore', 'pipe', 'pipe'],
-        detached: false
-      }
-    );
-
-    let stdoutBuf = '';
-    let stderrBuf = '';
-    serverProcess.stdout?.on('data', (b) => {
-      stdoutBuf += b.toString();
-    });
-    serverProcess.stderr?.on('data', (b) => {
-      stderrBuf += b.toString();
-    });
-
-    await new Promise<void>((resolve, reject) => {
-      const timer = setTimeout(() => {
-        if (serverProcess && !serverProcess.killed) {
-          serverProcess.kill('SIGKILL');
-        }
-        reject(
-          new Error(
-            `tasks fixture failed to start within ${SERVER_STARTUP_TIMEOUT_MS}ms.\nSTDOUT: ${stdoutBuf}\nSTDERR: ${stderrBuf}`
-          )
-        );
-      }, SERVER_STARTUP_TIMEOUT_MS);
-
-      // mcpkit's tasks demo logs the listen address to stderr via the
-      // log package; treat any "Connect:" or "listening" line as ready.
-      const checkReady = (chunk: string) => {
-        if (
-          chunk.includes('Connect:') ||
-          chunk.includes('listening') ||
-          chunk.includes('Listening on')
-        ) {
-          clearTimeout(timer);
-          resolve();
-        }
-      };
-      serverProcess!.stdout?.on('data', (b) => checkReady(b.toString()));
-      serverProcess!.stderr?.on('data', (b) => checkReady(b.toString()));
-
-      serverProcess!.on('error', (err) => {
-        clearTimeout(timer);
-        reject(new Error(`Failed to spawn tasks fixture: ${err.message}`));
-      });
-      serverProcess!.on('exit', (code) => {
-        if (code !== null && code !== 0) {
-          clearTimeout(timer);
-          reject(
-            new Error(
-              `tasks fixture exited prematurely with code ${code}.\nSTDOUT: ${stdoutBuf}\nSTDERR: ${stderrBuf}`
-            )
-          );
-        }
-      });
-    });
-  }, SERVER_STARTUP_TIMEOUT_MS + 5_000);
-
-  afterAll(async () => {
-    if (!SHOULD_SPAWN) return;
-    if (!serverProcess || serverProcess.killed) return;
-    serverProcess.kill('SIGTERM');
-    await new Promise<void>((resolve) => {
-      const timer = setTimeout(() => {
-        if (serverProcess && !serverProcess.killed) {
-          serverProcess.kill('SIGKILL');
-        }
-        resolve();
-      }, 3_000);
-      serverProcess!.once('exit', () => {
-        clearTimeout(timer);
-        resolve();
-      });
-    });
-    serverProcess = null;
-  });
-
-  for (const scenario of TASKS_SCENARIOS) {
-    it(`${scenario.name} — all checks succeed against fixture`, async () => {
-      const checks = await scenario.run(SERVER_URL);
-      expect(checks.length).toBeGreaterThan(0);
-      const failures = checks.filter(
-        (c) => c.status === 'FAILURE' || c.status === 'WARNING'
-      );
-      if (failures.length > 0) {
-        // Surface the failing slugs and messages so vitest output points
-        // at the exact spec-coverage gaps.
-        const detail = failures
-          .map((c) => `  - ${c.id}: ${c.errorMessage ?? '(no message)'}`)
-          .join('\n');
-        throw new Error(
-          `${failures.length}/${checks.length} checks failed:\n${detail}`
-        );
-      }
-    });
-  }
-});
diff --git a/src/scenarios/server/tasks/lifecycle.ts b/src/scenarios/server/tasks/lifecycle.ts
index e337c83..a59adce 100644
--- a/src/scenarios/server/tasks/lifecycle.ts
+++ b/src/scenarios/server/tasks/lifecycle.ts
@@ -17,25 +17,20 @@ import {
   ClientScenario,
   ConformanceCheck,
   ScenarioSpecTag,
-  SpecReference,
   DRAFT_PROTOCOL_VERSION
 } from '../../../types';
 import {
   TASKS_EXTENSION_ID,
+  SEP_2663_REF,
+  SEP_2322_REF,
+  errMsg,
+  failureCheck,
+  skipCheck,
   initRawSession,
   rawRequest,
   waitForTerminal
 } from './helpers';
 
-const SEP_2663_REF: SpecReference = {
-  id: 'SEP-2663',
-  url: 'https://github.com/modelcontextprotocol/specification/pull/2663'
-};
-const SEP_2322_REF: SpecReference = {
-  id: 'SEP-2322',
-  url: 'https://github.com/modelcontextprotocol/specification/pull/2322'
-};
-
 export class TasksLifecycleScenario implements ClientScenario {
   name = 'tasks-lifecycle';
   // 'extension' tags this as off the dated-version timeline (selectable
@@ -90,13 +85,13 @@ The server MUST advertise \`io.modelcontextprotocol/tasks\` under
 
     let sessionId: string;
     try {
-      sessionId = await initRawSession(serverUrl, {
+      ({ sessionId } = await initRawSession(serverUrl, {
         capabilities: {
           elicitation: {},
           sampling: {},
           extensions: { [TASKS_EXTENSION_ID]: {} }
         }
-      });
+      }));
     } catch (error) {
       checks.push({
         id: 'tasks-session-bootstrap',
@@ -563,42 +558,3 @@ The server MUST advertise \`io.modelcontextprotocol/tasks\` under
     return checks;
   }
 }
-
-function errMsg(error: unknown): string {
-  return error instanceof Error ? error.message : String(error);
-}
-
-function failureCheck(
-  id: string,
-  name: string,
-  description: string,
-  error: unknown,
-  specReferences: SpecReference[]
-): ConformanceCheck {
-  return {
-    id,
-    name,
-    description,
-    status: 'FAILURE',
-    timestamp: new Date().toISOString(),
-    errorMessage: errMsg(error),
-    specReferences
-  };
-}
-
-function skipCheck(
-  id: string,
-  name: string,
-  description: string,
-  reason: string
-): ConformanceCheck {
-  return {
-    id,
-    name,
-    description,
-    status: 'SKIPPED',
-    timestamp: new Date().toISOString(),
-    errorMessage: `Skipped: ${reason}`,
-    specReferences: [SEP_2663_REF]
-  };
-}
diff --git a/src/scenarios/server/tasks/mrtr-input.ts b/src/scenarios/server/tasks/mrtr-input.ts
new file mode 100644
index 0000000..49cfacc
--- /dev/null
+++ b/src/scenarios/server/tasks/mrtr-input.ts
@@ -0,0 +1,416 @@
+/**
+ * SEP-2322 / SEP-2663 — MRTR input flow on the tasks surface.
+ *
+ * Tests the input_required → tasks/update → resume loop, including
+ * partial inputResponses fulfillment when a tool fans out multiple
+ * simultaneous input requests.
+ *
+ * Required server fixtures:
+ *   - confirm_delete  — task-supporting, calls TaskElicit once
+ *   - multi_input     — task-supporting, fans out two TaskElicits in
+ *                       parallel so two keys are pending at once
+ */
+
+import {
+  ClientScenario,
+  ConformanceCheck,
+  ScenarioSpecTag,
+  DRAFT_PROTOCOL_VERSION
+} from '../../../types';
+import {
+  TASKS_EXTENSION_ID,
+  SEP_2322_REF,
+  SEP_2663_REF,
+  errMsg,
+  failureCheck,
+  initRawSession,
+  rawRequest,
+  waitForStatus,
+  waitForTerminal
+} from './helpers';
+
+export class TasksMRTRInputScenario implements ClientScenario {
+  name = 'tasks-mrtr-input';
+  specVersions: ScenarioSpecTag[] = ['extension', DRAFT_PROTOCOL_VERSION];
+  description = `Test SEP-2322 MRTR input flow on the tasks surface.
+
+**Server Implementation Requirements:**
+
+**Surfacing inputRequests (SEP-2322):**
+- A task waiting on client input MUST report \`status:"input_required"\`
+  on tasks/get and surface a non-empty \`inputRequests\` map keyed by
+  server-minted opaque ids. Each entry carries the underlying request
+  (\`elicitation/create\`, \`sampling/createMessage\`, etc.).
+
+**Resuming via tasks/update (SEP-2663):**
+- The client delivers responses through \`tasks/update\` with
+  \`inputResponses\` keyed to match the server-emitted ids. The server
+  MUST return an empty \`{resultType:"complete"}\` ack on the
+  tasks/update response — the resulting task state is observed via the
+  next tasks/get.
+- After the response is delivered, the task MUST resume execution and
+  proceed to a terminal state (or back to input_required for another
+  round).
+
+**Partial fulfillment (SEP-2663):**
+- A tool that emits multiple simultaneous input requests parks the task
+  with multiple keys in \`inputRequests\`. A client MAY answer them one
+  at a time:
+  - tasks/update with a subset of keys MUST be acked.
+  - The task MUST stay in \`input_required\` until every pending request
+    has been answered.
+  - tasks/get after a partial update MUST surface only the still-pending
+    keys; the answered key MUST be removed.`;
+
+  async run(serverUrl: string): Promise<ConformanceCheck[]> {
+    const checks: ConformanceCheck[] = [];
+
+    let sessionId: string;
+    try {
+      ({ sessionId } = await initRawSession(serverUrl, {
+        capabilities: {
+          elicitation: {},
+          sampling: {},
+          extensions: { [TASKS_EXTENSION_ID]: {} }
+        }
+      }));
+    } catch (error) {
+      checks.push({
+        id: 'tasks-session-bootstrap',
+        name: 'TasksSessionBootstrap',
+        description:
+          'Initialize handshake declaring io.modelcontextprotocol/tasks extension succeeds',
+        status: 'FAILURE',
+        timestamp: new Date().toISOString(),
+        errorMessage: `Failed to initialize: ${errMsg(error)}`,
+        specReferences: [SEP_2322_REF]
+      });
+      return checks;
+    }
+
+    // Check 1: tasks/get surfaces inputRequests when status=input_required.
+    {
+      const id = 'tasks-mrtr-input-requests-on-tasks-get';
+      const name = 'TasksMRTRInputRequestsOnTasksGet';
+      const description =
+        'tasks/get on an input_required task MUST surface a non-empty inputRequests map';
+      try {
+        const created = await rawRequest(
+          serverUrl,
+          'tools/call',
+          {
+            name: 'confirm_delete',
+            arguments: { filename: 'mrtr-input.txt' }
+          },
+          { sessionId }
+        );
+        const taskId = created.taskId;
+        if (!taskId) {
+          checks.push({
+            id,
+            name,
+            description,
+            status: 'FAILURE',
+            timestamp: new Date().toISOString(),
+            errorMessage: 'confirm_delete did not create a task',
+            specReferences: [SEP_2322_REF]
+          });
+        } else {
+          const task = await waitForStatus(
+            serverUrl,
+            sessionId,
+            taskId,
+            'input_required',
+            5_000
+          );
+          const errs: string[] = [];
+          if (task.status !== 'input_required') {
+            errs.push(
+              `expected status:"input_required"; got ${JSON.stringify(task.status)}`
+            );
+          }
+          if (
+            !task.inputRequests ||
+            typeof task.inputRequests !== 'object' ||
+            Array.isArray(task.inputRequests)
+          ) {
+            errs.push('inputRequests MUST be a non-null object (map)');
+          } else {
+            const keys = Object.keys(task.inputRequests);
+            if (keys.length === 0) {
+              errs.push('inputRequests MUST have at least one entry');
+            } else {
+              const firstReq = task.inputRequests[keys[0]];
+              if (!firstReq?.method) {
+                errs.push(
+                  'each inputRequest MUST carry a `method` (e.g., elicitation/create)'
+                );
+              }
+            }
+          }
+          // Cancel so we don't leave the task parked.
+          try {
+            await rawRequest(
+              serverUrl,
+              'tasks/cancel',
+              { taskId },
+              { sessionId }
+            );
+          } catch {
+            /* swallow */
+          }
+          checks.push({
+            id,
+            name,
+            description,
+            status: errs.length === 0 ? 'SUCCESS' : 'FAILURE',
+            timestamp: new Date().toISOString(),
+            errorMessage: errs.length > 0 ? errs.join('; ') : undefined,
+            specReferences: [SEP_2322_REF, SEP_2663_REF]
+          });
+        }
+      } catch (error) {
+        checks.push(failureCheck(id, name, description, error, [SEP_2322_REF]));
+      }
+    }
+
+    // Check 2: tasks/update delivers inputResponses + resumes the task.
+    {
+      const id = 'tasks-mrtr-tasks-update-resumes';
+      const name = 'TasksMRTRTasksUpdateResumes';
+      const description =
+        'tasks/update with matching inputResponses MUST be acked with {resultType:"complete"} and resume the task to a terminal state';
+      try {
+        const created = await rawRequest(
+          serverUrl,
+          'tools/call',
+          {
+            name: 'confirm_delete',
+            arguments: { filename: 'mrtr-resume.txt' }
+          },
+          { sessionId }
+        );
+        const taskId = created.taskId;
+        if (!taskId) {
+          checks.push({
+            id,
+            name,
+            description,
+            status: 'FAILURE',
+            timestamp: new Date().toISOString(),
+            errorMessage: 'confirm_delete did not create a task',
+            specReferences: [SEP_2322_REF, SEP_2663_REF]
+          });
+        } else {
+          const inputTask = await waitForStatus(
+            serverUrl,
+            sessionId,
+            taskId,
+            'input_required',
+            5_000
+          );
+          const errs: string[] = [];
+          const responses: Record<string, any> = {};
+          for (const key of Object.keys(inputTask.inputRequests ?? {})) {
+            responses[key] = {
+              action: 'accept',
+              content: { confirm: true }
+            };
+          }
+          const ack = await rawRequest(
+            serverUrl,
+            'tasks/update',
+            {
+              taskId,
+              inputResponses: responses,
+              requestState: inputTask.requestState
+            },
+            { sessionId }
+          );
+          if (
+            JSON.stringify(ack) !== JSON.stringify({ resultType: 'complete' })
+          ) {
+            errs.push(
+              `tasks/update ack MUST be {resultType:"complete"}; got ${JSON.stringify(ack)}`
+            );
+          }
+          const terminal = await waitForTerminal(serverUrl, sessionId, taskId);
+          if (terminal.status !== 'completed') {
+            errs.push(
+              `task MUST resume to completed after tasks/update; got status ${JSON.stringify(terminal.status)}`
+            );
+          }
+          checks.push({
+            id,
+            name,
+            description,
+            status: errs.length === 0 ? 'SUCCESS' : 'FAILURE',
+            timestamp: new Date().toISOString(),
+            errorMessage: errs.length > 0 ? errs.join('; ') : undefined,
+            specReferences: [SEP_2322_REF, SEP_2663_REF]
+          });
+        }
+      } catch (error) {
+        checks.push(
+          failureCheck(id, name, description, error, [
+            SEP_2322_REF,
+            SEP_2663_REF
+          ])
+        );
+      }
+    }
+
+    // Check 3: partial inputResponses fulfillment leaves the rest pending.
+    {
+      const id = 'tasks-mrtr-partial-fulfillment';
+      const name = 'TasksMRTRPartialFulfillment';
+      const description =
+        'tasks/update with a subset of keys MUST keep the task in input_required with only the unanswered key remaining';
+      try {
+        const created = await rawRequest(
+          serverUrl,
+          'tools/call',
+          { name: 'multi_input', arguments: {} },
+          { sessionId }
+        );
+        const taskId = created.taskId;
+        if (!taskId) {
+          checks.push({
+            id,
+            name,
+            description,
+            status: 'FAILURE',
+            timestamp: new Date().toISOString(),
+            errorMessage: 'multi_input did not create a task',
+            specReferences: [SEP_2663_REF]
+          });
+        } else {
+          // Wait until two keys are pending (the fan-out tool races two
+          // TaskElicits, so we may briefly see one before the second).
+          let inputTask: any;
+          const start = Date.now();
+          while (Date.now() - start < 5_000) {
+            inputTask = await rawRequest(
+              serverUrl,
+              'tasks/get',
+              { taskId },
+              { sessionId }
+            );
+            if (
+              inputTask.status === 'input_required' &&
+              inputTask.inputRequests &&
+              Object.keys(inputTask.inputRequests).length >= 2
+            ) {
+              break;
+            }
+            await new Promise((r) => setTimeout(r, 100));
+          }
+          const errs: string[] = [];
+          if (inputTask.status !== 'input_required') {
+            errs.push(
+              `task with two parallel elicits MUST be input_required; got ${JSON.stringify(inputTask.status)}`
+            );
+          }
+          const keys = Object.keys(inputTask.inputRequests ?? {});
+          if (keys.length < 2) {
+            errs.push(
+              `multi_input MUST surface 2 inputRequests; got ${keys.length}`
+            );
+          } else {
+            const [firstKey, secondKey] = keys;
+
+            // Answer first key only.
+            const firstAck = await rawRequest(
+              serverUrl,
+              'tasks/update',
+              {
+                taskId,
+                inputResponses: {
+                  [firstKey]: {
+                    action: 'accept',
+                    content: { name: 'partial-1', confirm: true }
+                  }
+                }
+              },
+              { sessionId }
+            );
+            if (firstAck.resultType !== 'complete') {
+              errs.push(
+                `partial tasks/update ack MUST carry resultType:"complete"; got ${JSON.stringify(firstAck)}`
+              );
+            }
+
+            // Status MUST still be input_required with only the second
+            // key remaining.
+            const afterFirst = await rawRequest(
+              serverUrl,
+              'tasks/get',
+              { taskId },
+              { sessionId }
+            );
+            if (afterFirst.status !== 'input_required') {
+              errs.push(
+                `task MUST stay input_required while another input is still pending; got ${JSON.stringify(afterFirst.status)}`
+              );
+            }
+            const remaining = Object.keys(afterFirst.inputRequests ?? {});
+            if (!remaining.includes(secondKey)) {
+              errs.push(
+                `unanswered key MUST remain in inputRequests; got ${JSON.stringify(remaining)}`
+              );
+            }
+            if (remaining.includes(firstKey)) {
+              errs.push(
+                `answered key MUST be removed from inputRequests; still saw ${firstKey}`
+              );
+            }
+
+            // Answer second key — task resumes and finishes.
+            await rawRequest(
+              serverUrl,
+              'tasks/update',
+              {
+                taskId,
+                inputResponses: {
+                  [secondKey]: {
+                    action: 'accept',
+                    content: { name: 'partial-2', confirm: true }
+                  }
+                }
+              },
+              { sessionId }
+            );
+            const terminal = await waitForTerminal(
+              serverUrl,
+              sessionId,
+              taskId
+            );
+            if (terminal.status !== 'completed') {
+              errs.push(
+                `task MUST complete after both inputs are satisfied; got ${JSON.stringify(terminal.status)}`
+              );
+            }
+          }
+          checks.push({
+            id,
+            name,
+            description,
+            status: errs.length === 0 ? 'SUCCESS' : 'FAILURE',
+            timestamp: new Date().toISOString(),
+            errorMessage: errs.length > 0 ? errs.join('; ') : undefined,
+            specReferences: [SEP_2322_REF, SEP_2663_REF]
+          });
+        }
+      } catch (error) {
+        checks.push(
+          failureCheck(id, name, description, error, [
+            SEP_2322_REF,
+            SEP_2663_REF
+          ])
+        );
+      }
+    }
+
+    return checks;
+  }
+}
diff --git a/src/scenarios/server/tasks/notifications.ts b/src/scenarios/server/tasks/notifications.ts
new file mode 100644
index 0000000..a3881a2
--- /dev/null
+++ b/src/scenarios/server/tasks/notifications.ts
@@ -0,0 +1,188 @@
+/**
+ * SEP-2663 Tasks Extension — status notifications conformance.
+ *
+ * Status notifications are OPTIONAL. The check pattern is:
+ *   - INFO when no notifications are received (well-formed silence).
+ *   - SUCCESS when notifications arrive and carry the SEP-2663 shape
+ *     (DetailedTask: taskId + status, with inlined result on terminal).
+ *   - FAILURE only if a notification was emitted but is malformed.
+ *
+ * The raw HTTP harness can't open a long-lived GET SSE stream from the
+ * scenario layer easily, so this check observes notifications via the
+ * POST tools/call SSE response stream. That captures the status
+ * transitions emitted while the task is running. This is a best-effort
+ * smoke test — passing servers may still emit additional notifications
+ * on the persistent GET stream that this harness doesn't see.
+ *
+ * Required server fixtures:
+ *   - slow_compute  — task-supporting, sleeps N seconds
+ */
+
+import {
+  ClientScenario,
+  ConformanceCheck,
+  ScenarioSpecTag,
+  DRAFT_PROTOCOL_VERSION
+} from '../../../types';
+import {
+  TASKS_EXTENSION_ID,
+  SEP_2663_REF,
+  errMsg,
+  failureCheck,
+  initRawSession,
+  waitForTerminal
+} from './helpers';
+
+export class TasksStatusNotificationsScenario implements ClientScenario {
+  name = 'tasks-status-notifications';
+  specVersions: ScenarioSpecTag[] = ['extension', DRAFT_PROTOCOL_VERSION];
+  description = `Test SEP-2663 status notifications (optional).
+
+**Server Implementation Requirements:**
+
+Servers MAY emit \`notifications/tasks/status\` to inform clients of
+task state changes without polling. Notifications are optional — a
+server is conformant whether it sends them or not. When sent, the
+notification params MUST carry:
+
+- \`taskId\`: the task the notification refers to.
+- \`status\`: the new task status.
+- For terminal statuses (\`completed\`/\`failed\`/\`cancelled\`),
+  notifications MAY inline the corresponding \`result\` or \`error\`
+  per the SEP-2663 DetailedTask shape.`;
+
+  async run(serverUrl: string): Promise<ConformanceCheck[]> {
+    const checks: ConformanceCheck[] = [];
+
+    let sessionId: string;
+    try {
+      ({ sessionId } = await initRawSession(serverUrl, {
+        capabilities: { extensions: { [TASKS_EXTENSION_ID]: {} } }
+      }));
+    } catch (error) {
+      checks.push({
+        id: 'tasks-session-bootstrap',
+        name: 'TasksSessionBootstrap',
+        description:
+          'Initialize handshake declaring io.modelcontextprotocol/tasks extension succeeds',
+        status: 'FAILURE',
+        timestamp: new Date().toISOString(),
+        errorMessage: `Failed to initialize: ${errMsg(error)}`,
+        specReferences: [SEP_2663_REF]
+      });
+      return checks;
+    }
+
+    const id = 'tasks-status-notifications-shape';
+    const name = 'TasksStatusNotificationsShape';
+    const description =
+      'When status notifications are emitted, each MUST carry taskId + status (SEP-2663 DetailedTask)';
+
+    // Issue tools/call with SSE-accepting headers and capture every
+    // `data:` payload. Some are JSON-RPC responses (with id), some are
+    // notifications (no id). We ingest all and classify by the body.
+    let taskId: string | undefined;
+    const notifications: any[] = [];
+    try {
+      const resp = await fetch(serverUrl, {
+        method: 'POST',
+        headers: {
+          'Content-Type': 'application/json',
+          Accept: 'text/event-stream, application/json',
+          'Mcp-Session-Id': sessionId
+        },
+        body: JSON.stringify({
+          jsonrpc: '2.0',
+          id: 'notif-test',
+          method: 'tools/call',
+          params: {
+            name: 'slow_compute',
+            arguments: { seconds: 1, label: 'notif' }
+          }
+        })
+      });
+      const ct = resp.headers.get('content-type') || '';
+      if (ct.includes('text/event-stream')) {
+        const text = await resp.text();
+        for (const line of text.split('\n')) {
+          const trimmed = line.trim();
+          if (trimmed.startsWith('data:')) {
+            const payload = trimmed.slice(5).trimStart();
+            if (payload.startsWith('{')) {
+              const parsed = JSON.parse(payload);
+              if (parsed.id === 'notif-test' && parsed.result) {
+                taskId = parsed.result.taskId;
+              } else if (parsed.method === 'notifications/tasks/status') {
+                notifications.push(parsed.params);
+              }
+            }
+          }
+        }
+      } else {
+        const body = await resp.json();
+        taskId = body.result?.taskId;
+      }
+    } catch (error) {
+      checks.push(failureCheck(id, name, description, error, [SEP_2663_REF]));
+      return checks;
+    }
+
+    // Drain to a terminal so the server has emitted everything it's
+    // going to (best-effort — the persistent GET stream might be
+    // collecting more, but we're done with this scenario regardless).
+    if (taskId) {
+      try {
+        await waitForTerminal(serverUrl, sessionId, taskId);
+      } catch {
+        /* swallow */
+      }
+    }
+
+    if (notifications.length === 0) {
+      checks.push({
+        id,
+        name,
+        description,
+        status: 'INFO',
+        timestamp: new Date().toISOString(),
+        errorMessage:
+          'No status notifications received on the tools/call POST SSE stream (notifications are optional)',
+        specReferences: [SEP_2663_REF]
+      });
+      return checks;
+    }
+
+    const errs: string[] = [];
+    for (const evt of notifications) {
+      if (!evt.taskId) {
+        errs.push('status notification MUST carry taskId');
+      }
+      if (!evt.status) {
+        errs.push('status notification MUST carry status');
+      }
+    }
+    // Optional terminal-with-inlined-result check: if the suite saw a
+    // completed notification for our taskId, it SHOULD include result.
+    const terminalForOurs = notifications.find(
+      (n: any) => n.taskId === taskId && n.status === 'completed'
+    );
+    if (terminalForOurs && !terminalForOurs.result) {
+      errs.push(
+        'completed status notification SHOULD inline result (DetailedTask shape)'
+      );
+    }
+
+    checks.push({
+      id,
+      name,
+      description,
+      status: errs.length === 0 ? 'SUCCESS' : 'FAILURE',
+      timestamp: new Date().toISOString(),
+      errorMessage: errs.length > 0 ? errs.join('; ') : undefined,
+      specReferences: [SEP_2663_REF],
+      details: { notificationCount: notifications.length }
+    });
+
+    return checks;
+  }
+}
diff --git a/src/scenarios/server/tasks/request-state.ts b/src/scenarios/server/tasks/request-state.ts
new file mode 100644
index 0000000..8c2b165
--- /dev/null
+++ b/src/scenarios/server/tasks/request-state.ts
@@ -0,0 +1,290 @@
+/**
+ * SEP-2322 / SEP-2663 — requestState conformance.
+ *
+ * Tests the optional opaque session-continuation token:
+ *   - Server MAY include requestState on tasks/get responses.
+ *   - Clients MUST echo it back on subsequent tasks/get / tasks/update /
+ *     tasks/cancel for the same task — server MUST accept the echo.
+ *   - Servers MUST tolerate a stale but still-valid token (one minted
+ *     before a newer one but still within its TTL window).
+ *
+ * If the server does not issue requestState at all (it's optional per
+ * SEP-2322), the dependent checks emit INFO rather than failing — the
+ * spec allows omission.
+ *
+ * Required server fixtures:
+ *   - slow_compute  — task-supporting, sleeps N seconds
+ */
+
+import {
+  ClientScenario,
+  ConformanceCheck,
+  ScenarioSpecTag,
+  DRAFT_PROTOCOL_VERSION
+} from '../../../types';
+import {
+  TASKS_EXTENSION_ID,
+  SEP_2322_REF,
+  SEP_2663_REF,
+  errMsg,
+  failureCheck,
+  initRawSession,
+  rawRequest
+} from './helpers';
+
+export class TasksRequestStateScenario implements ClientScenario {
+  name = 'tasks-request-state';
+  specVersions: ScenarioSpecTag[] = ['extension', DRAFT_PROTOCOL_VERSION];
+  description = `Test SEP-2322 requestState semantics on the tasks surface.
+
+**Server Implementation Requirements:**
+
+**Optional emission (SEP-2322):**
+- A server MAY include a non-empty string \`requestState\` on tasks/get
+  responses to allow stateless deployments to resume the conversation.
+  When present, it MUST be a non-empty string.
+
+**Echo acceptance:**
+- A client that receives a \`requestState\` from tasks/get MUST be able
+  to echo it back on a subsequent \`tasks/get\`/\`tasks/update\`/
+  \`tasks/cancel\` for the same task. The server MUST accept the echo.
+
+**Stale-but-valid tolerance (SEP-2663):**
+- Each tasks/get may mint a new requestState (e.g., for a refreshed
+  TTL). After a fresh tasks/get returns a newer token, echoing the
+  earlier one MUST still succeed as long as the earlier token has not
+  itself expired. (Servers MUST tolerate stale-but-valid tokens
+  gracefully.)`;
+
+  async run(serverUrl: string): Promise<ConformanceCheck[]> {
+    const checks: ConformanceCheck[] = [];
+
+    let sessionId: string;
+    try {
+      ({ sessionId } = await initRawSession(serverUrl, {
+        capabilities: { extensions: { [TASKS_EXTENSION_ID]: {} } }
+      }));
+    } catch (error) {
+      checks.push({
+        id: 'tasks-session-bootstrap',
+        name: 'TasksSessionBootstrap',
+        description:
+          'Initialize handshake declaring io.modelcontextprotocol/tasks extension succeeds',
+        status: 'FAILURE',
+        timestamp: new Date().toISOString(),
+        errorMessage: `Failed to initialize: ${errMsg(error)}`,
+        specReferences: [SEP_2322_REF]
+      });
+      return checks;
+    }
+
+    // Drive a long-running task once and reuse it for every check.
+    let taskId: string | undefined;
+    try {
+      const created = await rawRequest(
+        serverUrl,
+        'tools/call',
+        {
+          name: 'slow_compute',
+          arguments: { seconds: 60, label: 'request-state' }
+        },
+        { sessionId }
+      );
+      taskId = created.taskId;
+    } catch (error) {
+      checks.push(
+        failureCheck(
+          'tasks-request-state-setup',
+          'TasksRequestStateSetup',
+          'Failed to create a long-running task to exercise requestState',
+          error,
+          [SEP_2322_REF]
+        )
+      );
+      return checks;
+    }
+    if (!taskId) {
+      checks.push({
+        id: 'tasks-request-state-setup',
+        name: 'TasksRequestStateSetup',
+        description:
+          'slow_compute did not produce a task; cannot exercise requestState',
+        status: 'FAILURE',
+        timestamp: new Date().toISOString(),
+        errorMessage: 'no taskId in CreateTaskResult',
+        specReferences: [SEP_2322_REF]
+      });
+      return checks;
+    }
+
+    let firstToken: string | undefined;
+
+    // Check 1: tasks/get response shape — requestState (optional) must
+    // be a non-empty string when present.
+    {
+      const id = 'tasks-request-state-shape';
+      const name = 'TasksRequestStateShape';
+      const description =
+        'tasks/get may include requestState; when present it MUST be a non-empty string';
+      try {
+        const task = await rawRequest(
+          serverUrl,
+          'tasks/get',
+          { taskId },
+          { sessionId }
+        );
+        const errs: string[] = [];
+        if (task.requestState !== undefined) {
+          if (typeof task.requestState !== 'string') {
+            errs.push(
+              `requestState MUST be a string when present; got ${typeof task.requestState}`
+            );
+          } else if (task.requestState.length === 0) {
+            errs.push(
+              'requestState MUST be non-empty when present (omit the field instead of emitting "")'
+            );
+          } else {
+            firstToken = task.requestState;
+          }
+        }
+        // Optional emission: SUCCESS regardless of presence; INFO when
+        // server omits it so the result advertises the chosen path.
+        const status: 'SUCCESS' | 'INFO' | 'FAILURE' =
+          errs.length === 0 ? (firstToken ? 'SUCCESS' : 'INFO') : 'FAILURE';
+        checks.push({
+          id,
+          name,
+          description,
+          status,
+          timestamp: new Date().toISOString(),
+          errorMessage: errs.length > 0 ? errs.join('; ') : undefined,
+          specReferences: [SEP_2322_REF],
+          details: {
+            emitted: Boolean(firstToken),
+            tokenLength: firstToken?.length
+          }
+        });
+      } catch (error) {
+        checks.push(failureCheck(id, name, description, error, [SEP_2322_REF]));
+      }
+    }
+
+    // Check 2: client echoes requestState; server accepts the echo.
+    {
+      const id = 'tasks-request-state-echo';
+      const name = 'TasksRequestStateEcho';
+      const description =
+        'Server accepts a tasks/get with the previously-emitted requestState echoed back';
+      if (!firstToken) {
+        checks.push({
+          id,
+          name,
+          description,
+          status: 'INFO',
+          timestamp: new Date().toISOString(),
+          errorMessage: 'Server did not emit requestState; nothing to echo',
+          specReferences: [SEP_2322_REF]
+        });
+      } else {
+        try {
+          const echoed = await rawRequest(
+            serverUrl,
+            'tasks/get',
+            { taskId, requestState: firstToken },
+            { sessionId }
+          );
+          const errs: string[] = [];
+          if (echoed.taskId !== taskId) {
+            errs.push(
+              `tasks/get with echoed requestState MUST resolve the same taskId; got ${echoed.taskId}`
+            );
+          }
+          checks.push({
+            id,
+            name,
+            description,
+            status: errs.length === 0 ? 'SUCCESS' : 'FAILURE',
+            timestamp: new Date().toISOString(),
+            errorMessage: errs.length > 0 ? errs.join('; ') : undefined,
+            specReferences: [SEP_2322_REF]
+          });
+        } catch (error) {
+          checks.push(
+            failureCheck(id, name, description, error, [SEP_2322_REF])
+          );
+        }
+      }
+    }
+
+    // Check 3: stale-but-valid tolerance.
+    {
+      const id = 'tasks-request-state-stale-tolerance';
+      const name = 'TasksRequestStateStaleTolerance';
+      const description =
+        'After a newer requestState is minted, the earlier (stale-but-still-valid) token MUST still be accepted';
+      if (!firstToken) {
+        checks.push({
+          id,
+          name,
+          description,
+          status: 'INFO',
+          timestamp: new Date().toISOString(),
+          errorMessage:
+            'Server did not emit requestState; stale tolerance is moot',
+          specReferences: [SEP_2663_REF, SEP_2322_REF]
+        });
+      } else {
+        try {
+          // Force a fresh mint by issuing another tasks/get. On servers
+          // that sign tokens with embedded expiry, this likely yields a
+          // newer token; on plaintext-token servers it round-trips the
+          // same value (still valid).
+          await rawRequest(
+            serverUrl,
+            'tasks/get',
+            { taskId, requestState: firstToken },
+            { sessionId }
+          );
+          // Now re-echo the OLDER token; server MUST accept.
+          const stale = await rawRequest(
+            serverUrl,
+            'tasks/get',
+            { taskId, requestState: firstToken },
+            { sessionId }
+          );
+          const errs: string[] = [];
+          if (stale.taskId !== taskId) {
+            errs.push(
+              `stale-but-valid requestState MUST resolve the same taskId; got ${stale.taskId}`
+            );
+          }
+          checks.push({
+            id,
+            name,
+            description,
+            status: errs.length === 0 ? 'SUCCESS' : 'FAILURE',
+            timestamp: new Date().toISOString(),
+            errorMessage: errs.length > 0 ? errs.join('; ') : undefined,
+            specReferences: [SEP_2663_REF, SEP_2322_REF]
+          });
+        } catch (error) {
+          checks.push(
+            failureCheck(id, name, description, error, [
+              SEP_2663_REF,
+              SEP_2322_REF
+            ])
+          );
+        }
+      }
+    }
+
+    // Cleanup the long-lived task so we don't leak goroutines.
+    try {
+      await rawRequest(serverUrl, 'tasks/cancel', { taskId }, { sessionId });
+    } catch {
+      /* swallow */
+    }
+
+    return checks;
+  }
+}
diff --git a/src/scenarios/server/tasks/wire-fields.ts b/src/scenarios/server/tasks/wire-fields.ts
new file mode 100644
index 0000000..3fb377d
--- /dev/null
+++ b/src/scenarios/server/tasks/wire-fields.ts
@@ -0,0 +1,250 @@
+/**
+ * SEP-2663 Tasks Extension — wire-format / TTL conformance.
+ *
+ * Tests the renamed wire fields (ttlSeconds, pollIntervalMilliseconds),
+ * the no-early-TTL-expiry rule, and confirms the v1 `related-task` _meta
+ * key is absent on tasks/get's inlined result (taskId is at root level
+ * already, so the metadata is redundant).
+ *
+ * Required server fixtures:
+ *   - slow_compute — task-supporting, sleeps N seconds
+ */
+
+import {
+  ClientScenario,
+  ConformanceCheck,
+  ScenarioSpecTag,
+  DRAFT_PROTOCOL_VERSION
+} from '../../../types';
+import {
+  TASKS_EXTENSION_ID,
+  SEP_2663_REF,
+  errMsg,
+  failureCheck,
+  skipCheck,
+  initRawSession,
+  rawRequest,
+  waitForTerminal
+} from './helpers';
+
+export class TasksWireFieldsScenario implements ClientScenario {
+  name = 'tasks-wire-fields';
+  specVersions: ScenarioSpecTag[] = ['extension', DRAFT_PROTOCOL_VERSION];
+  description = `Test SEP-2663 wire-field renames + TTL semantics.
+
+**Server Implementation Requirements:**
+
+**Wire-field renames (SEP-2663):**
+- The TTL field is named \`ttlSeconds\` on the wire (the v1 \`ttl\`
+  key is in milliseconds-by-convention; SEP-2663 puts the unit in the
+  field name).
+- The poll-interval field is named \`pollIntervalMilliseconds\` (v1
+  used \`pollInterval\`).
+- A \`CreateTaskResult\` MUST NOT carry the legacy \`ttl\` or
+  \`pollInterval\` keys — clients keying off v1 names on a v2 server
+  would silently miss the TTL guidance.
+
+**TTL non-expiry (SEP-2663):**
+- A task MUST remain accessible via \`tasks/get\` for the duration of
+  its \`ttlSeconds\`; a server MUST NOT expire it earlier.
+
+**Inlined-result \`_meta\` (SEP-2663):**
+- The v1 \`io.modelcontextprotocol/related-task\` \`_meta\` key MUST NOT
+  appear on tasks/get's inlined \`result\` — the \`taskId\` is already
+  at the root level of the \`tasks/get\` response, so the metadata is
+  redundant.`;
+
+  async run(serverUrl: string): Promise<ConformanceCheck[]> {
+    const checks: ConformanceCheck[] = [];
+
+    let sessionId: string;
+    try {
+      ({ sessionId } = await initRawSession(serverUrl, {
+        capabilities: {
+          extensions: { [TASKS_EXTENSION_ID]: {} }
+        }
+      }));
+    } catch (error) {
+      checks.push({
+        id: 'tasks-session-bootstrap',
+        name: 'TasksSessionBootstrap',
+        description:
+          'Initialize handshake declaring io.modelcontextprotocol/tasks extension succeeds',
+        status: 'FAILURE',
+        timestamp: new Date().toISOString(),
+        errorMessage: `Failed to initialize: ${errMsg(error)}`,
+        specReferences: [SEP_2663_REF]
+      });
+      return checks;
+    }
+
+    // Check 1: ttlSeconds + pollIntervalMilliseconds wire shape.
+    let createdTaskId: string | undefined;
+    {
+      const id = 'tasks-wire-field-renames';
+      const name = 'TasksWireFieldRenames';
+      const description =
+        'CreateTaskResult uses ttlSeconds + pollIntervalMilliseconds; legacy ttl / pollInterval keys absent';
+      try {
+        const result = await rawRequest(
+          serverUrl,
+          'tools/call',
+          {
+            name: 'slow_compute',
+            arguments: { seconds: 1, label: 'wire-fields' }
+          },
+          { sessionId }
+        );
+        createdTaskId = result.taskId;
+        const errs: string[] = [];
+        // ttlSeconds — required, positive (or null = unlimited; treat
+        // either as well-formed). Legacy `ttl` MUST be absent.
+        if (!('ttlSeconds' in result)) {
+          errs.push(
+            'CreateTaskResult MUST carry ttlSeconds (renamed from v1 `ttl`)'
+          );
+        } else if (
+          result.ttlSeconds !== null &&
+          (typeof result.ttlSeconds !== 'number' || result.ttlSeconds <= 0)
+        ) {
+          errs.push(
+            `ttlSeconds MUST be null or a positive number; got ${JSON.stringify(result.ttlSeconds)}`
+          );
+        }
+        if ('ttl' in result) {
+          errs.push(
+            'CreateTaskResult MUST NOT carry the v1 `ttl` key (use ttlSeconds)'
+          );
+        }
+        // pollIntervalMilliseconds — optional. When present it MUST be
+        // a positive number and the legacy `pollInterval` key MUST NOT
+        // appear.
+        if (
+          result.pollIntervalMilliseconds !== undefined &&
+          (typeof result.pollIntervalMilliseconds !== 'number' ||
+            result.pollIntervalMilliseconds <= 0)
+        ) {
+          errs.push(
+            `pollIntervalMilliseconds MUST be a positive number when present; got ${JSON.stringify(result.pollIntervalMilliseconds)}`
+          );
+        }
+        if ('pollInterval' in result) {
+          errs.push(
+            'CreateTaskResult MUST NOT carry the v1 `pollInterval` key (use pollIntervalMilliseconds)'
+          );
+        }
+        checks.push({
+          id,
+          name,
+          description,
+          status: errs.length === 0 ? 'SUCCESS' : 'FAILURE',
+          timestamp: new Date().toISOString(),
+          errorMessage: errs.length > 0 ? errs.join('; ') : undefined,
+          specReferences: [SEP_2663_REF],
+          details: {
+            ttlSeconds: result.ttlSeconds,
+            pollIntervalMilliseconds: result.pollIntervalMilliseconds,
+            hasLegacyTtl: 'ttl' in result,
+            hasLegacyPollInterval: 'pollInterval' in result
+          }
+        });
+      } catch (error) {
+        checks.push(failureCheck(id, name, description, error, [SEP_2663_REF]));
+      }
+    }
+
+    // Check 2: task accessible before TTL elapses.
+    {
+      const id = 'tasks-no-early-ttl-expiry';
+      const name = 'TasksNoEarlyTtlExpiry';
+      const description =
+        'Task remains accessible via tasks/get for the duration of its ttlSeconds';
+      if (!createdTaskId) {
+        checks.push(skipCheck(id, name, description, 'no task created'));
+      } else {
+        try {
+          await waitForTerminal(serverUrl, sessionId, createdTaskId);
+          // Sanity probe well before TTL (the unit is seconds; servers
+          // typically pick order-of-minutes defaults).
+          await new Promise((r) => setTimeout(r, 500));
+          const after = await rawRequest(
+            serverUrl,
+            'tasks/get',
+            { taskId: createdTaskId },
+            { sessionId }
+          );
+          const errs: string[] = [];
+          if (after.taskId !== createdTaskId) {
+            errs.push(
+              `task MUST still be accessible before TTL; got taskId=${after.taskId}`
+            );
+          }
+          checks.push({
+            id,
+            name,
+            description,
+            status: errs.length === 0 ? 'SUCCESS' : 'FAILURE',
+            timestamp: new Date().toISOString(),
+            errorMessage: errs.length > 0 ? errs.join('; ') : undefined,
+            specReferences: [SEP_2663_REF]
+          });
+        } catch (error) {
+          checks.push(
+            failureCheck(id, name, description, error, [SEP_2663_REF])
+          );
+        }
+      }
+    }
+
+    // Check 3: no related-task _meta on inlined result.
+    {
+      const id = 'tasks-no-related-task-meta-on-inlined-result';
+      const name = 'TasksNoRelatedTaskMetaOnInlinedResult';
+      const description =
+        'tasks/get inlined result MUST NOT include the v1 io.modelcontextprotocol/related-task _meta key (taskId is at the root)';
+      try {
+        const created = await rawRequest(
+          serverUrl,
+          'tools/call',
+          {
+            name: 'slow_compute',
+            arguments: { seconds: 1, label: 'wire-fields-meta' }
+          },
+          { sessionId }
+        );
+        const taskId = created.taskId;
+        if (!taskId) {
+          checks.push(skipCheck(id, name, description, 'no task created'));
+        } else {
+          const terminal = await waitForTerminal(serverUrl, sessionId, taskId);
+          const errs: string[] = [];
+          const meta = terminal.result?._meta;
+          if (meta && meta['io.modelcontextprotocol/related-task']) {
+            errs.push(
+              'related-task _meta MUST NOT appear on tasks/get inlined result'
+            );
+          }
+          checks.push({
+            id,
+            name,
+            description,
+            status: errs.length === 0 ? 'SUCCESS' : 'FAILURE',
+            timestamp: new Date().toISOString(),
+            errorMessage: errs.length > 0 ? errs.join('; ') : undefined,
+            specReferences: [SEP_2663_REF],
+            details: {
+              hasMeta: Boolean(meta),
+              hasRelatedTask: Boolean(
+                meta?.['io.modelcontextprotocol/related-task']
+              )
+            }
+          });
+        }
+      } catch (error) {
+        checks.push(failureCheck(id, name, description, error, [SEP_2663_REF]));
+      }
+    }
+
+    return checks;
+  }
+}

From 10bf8370247e373004510aec1a0a7f278be7ee3c Mon Sep 17 00:00:00 2001
From: Sri Panyam <sri.panyam@gmail.com>
Date: Tue, 5 May 2026 15:09:20 -0700
Subject: [PATCH 4/7] docs(tasks,mrtr): scenario READMEs for upstream porting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Restructured around ClientScenario classes (one row per class with
check-list under it) rather than per-numbered-test slugs. Documents
fixture requirements, env vars, open spec questions, and the
wire-format diff for each suite.

Per AGENTS.md, severity follows spec keyword (MUST/MUST NOT → FAILURE,
SHOULD/SHOULD NOT → WARNING). The READMEs explain why some checks emit
INFO rather than FAILURE (optional emission paths per SEP-2322).
---
 src/scenarios/server/mrtr/README.md  | 111 +++++++++++++++
 src/scenarios/server/tasks/README.md | 197 +++++++++++++++++++++++++++
 2 files changed, 308 insertions(+)
 create mode 100644 src/scenarios/server/mrtr/README.md
 create mode 100644 src/scenarios/server/tasks/README.md

diff --git a/src/scenarios/server/mrtr/README.md b/src/scenarios/server/mrtr/README.md
new file mode 100644
index 0000000..8e1bf53
--- /dev/null
+++ b/src/scenarios/server/mrtr/README.md
@@ -0,0 +1,111 @@
+# SEP-2322 MRTR — Server Conformance
+
+Tests any MCP server that implements the SEP-2322 ephemeral
+Multi Round-Trip Request flow on `tools/call` — the
+`IncompleteResult` → retry-with-`inputResponses` → `ToolResult`
+contract that lets a tool gather elicitation / sampling / roots input
+without creating a task envelope.
+
+## Specs covered
+
+| SEP      | What it adds                                                                                                     | Where it shows up             |
+| -------- | ---------------------------------------------------------------------------------------------------------------- | ----------------------------- |
+| SEP-2322 | Ephemeral MRTR — `resultType` discriminator, `inputRequests` / `inputResponses` keyed maps, `requestState` token | every check                   |
+| SEP-2663 | MRTR → Tasks composition (final round returns `CreateTaskResult`)                                                | mrtr-08 (SKIPPED — see below) |
+
+## ClientScenario classes
+
+### `mrtr-ephemeral-flow` (`ephemeral-flow.ts`)
+
+A single scenario covering the full ephemeral MRTR contract — per the
+AGENTS.md "fewer scenarios, more checks" rule. A server that
+implemented elicitation round-trips but not sampling round-trips would
+be incoherent, so they bundle.
+
+| Check                                    | What it tests                                                                                                                   |
+| ---------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------- |
+| `mrtr-basic-elicitation-round-trip`      | Round 1 returns `IncompleteResult` with `elicitation/create`; round 2 completes with the answer reflected                       |
+| `mrtr-sampling-round-trip`               | Same flow with `sampling/createMessage`                                                                                         |
+| `mrtr-roots-list-round-trip`             | Same flow with `roots/list`                                                                                                     |
+| `mrtr-request-state-round-trip`          | When server emits `requestState`, it's a non-empty string and the server validates the echo                                     |
+| `mrtr-multiple-input-requests-one-round` | A single `IncompleteResult` MAY carry inputRequests for `elicitation/create` + `sampling/createMessage` + `roots/list` together |
+| `mrtr-multi-round-flow`                  | A handler MAY take 2+ rounds; each round mints a fresh `requestState`; final result reflects answers from every round           |
+| `mrtr-wrong-input-key-rerequests`        | When client sends a wrong `inputResponses` key, server SHOULD re-request via `IncompleteResult` rather than erroring            |
+| `mrtr-tasks-composition`                 | **SKIPPED** — see "Open issues" below                                                                                           |
+
+## Required server fixtures
+
+The fixture server MUST register these tools:
+
+| Tool                                     | Behavior                                                                                    |
+| ---------------------------------------- | ------------------------------------------------------------------------------------------- |
+| `test_tool_with_elicitation`             | One `elicitation/create` round, completes with answer reflected                             |
+| `test_incomplete_result_sampling`        | One `sampling/createMessage` round                                                          |
+| `test_incomplete_result_list_roots`      | One `roots/list` round                                                                      |
+| `test_incomplete_result_request_state`   | Exercises `requestState` validation; final result includes `state-ok` to confirm validation |
+| `test_incomplete_result_multiple_inputs` | Emits 3+ inputRequests of different methods in one round                                    |
+| `test_incomplete_result_multi_round`     | Drives 2+ MRTR rounds, final result references every answer                                 |
+| `test_incomplete_result_elicitation`     | Emits inputRequest for `user_name`; server re-requests on wrong-key responses               |
+
+The fixture can be implemented in any language; one example reference
+implementation lives at
+[`panyam/mcpkit/examples/mrtr`](https://github.com/panyam/mcpkit/tree/main/examples/mrtr).
+
+## Running
+
+```bash
+# Against an already-running server
+MRTR_SERVER_URL=http://localhost:8080/mcp \
+  npx vitest run src/scenarios/server/mrtr/all-scenarios.test.ts
+
+# Auto-spawn a fixture in beforeAll
+MRTR_SERVER_URL=http://localhost:18093/mcp \
+MRTR_SERVER_CMD="/path/to/mrtr-server --port 18093" \
+  npx vitest run src/scenarios/server/mrtr/all-scenarios.test.ts
+```
+
+## Open issues
+
+### `mrtr-tasks-composition` deferred
+
+SEP-2663 commit `451f5e1` (Apr 30) made the MRTR → Tasks composition
+flow normative: a `tools/call` MAY exchange `IncompleteResult` rounds
+to gather input, then return `CreateTaskResult` to go async on a
+subsequent round. Two blockers prevent enabling the check today:
+
+1. **Spec watch — discriminator value.** SEP-2322 (MRTR base) and
+   SEP-2663 (Tasks Extension) currently disagree on the wire value for
+   the "needs more input" discriminator: SEP-2322's draft uses
+   `"input_required"`, SEP-2663's draft uses `"incomplete"`. Awaiting
+   alignment between the SEP authors. The current literal lives in
+   `MRTR_INCOMPLETE_RESULT_TYPE` (helpers.ts) so it's a one-line flip
+   when the spec converges.
+
+2. **Reference-impl gap.** The natural server-side implementation
+   pattern for tasks (mint task up-front, run handler in a goroutine /
+   async task) means the handler's `IncompleteResult` signal isn't
+   visible to the middleware in time — by the time the handler returns
+   `IsIncomplete`, the `CreateTaskResult` is already on the wire. SDKs
+   in any language need an inverted middleware pattern that runs the
+   first round synchronously and only spins up the task once the
+   handler signals async-promotion.
+   ([panyam/mcpkit issue 347](https://github.com/panyam/mcpkit/issues/347)
+   tracks this for one example impl; SDKs in any language hit the
+   same architectural choice.)
+
+The check is registered with `status: 'SKIPPED'` so it's discoverable
+but doesn't fail conformance runs. When both blockers resolve, remove
+the SKIPPED short-circuit in `ephemeral-flow.ts` Check 8.
+
+## Design notes
+
+### Why the MRTR scenarios share helpers with `tasks/`
+
+`MRTR_INCOMPLETE_RESULT_TYPE`, the result-type predicates
+(`isIncompleteResult`, `isCompleteResult`), and the elicitation/sampling/
+roots mocks live in `mrtr/helpers.ts`. The raw-fetch primitives
+(`initRawSession`, `rawRequest`) are imported from the sibling
+`../tasks/helpers` because both scenario sets share the same wire-shape
+problem (SDK Zod schemas strip extension fields). When the upstream
+SDK gains schemas for SEP-2322 / SEP-2663 shapes, those import paths
+collapse back into the SDK.
diff --git a/src/scenarios/server/tasks/README.md b/src/scenarios/server/tasks/README.md
new file mode 100644
index 0000000..f145279
--- /dev/null
+++ b/src/scenarios/server/tasks/README.md
@@ -0,0 +1,197 @@
+# SEP-2663 Tasks Extension — Server Conformance
+
+Tests any MCP server that implements the `io.modelcontextprotocol/tasks`
+extension (SEP-2663) plus the SEP-2322 base types it builds on, the
+SEP-2575 per-request capability override, and the SEP-2243 routing
+headers.
+
+The scenarios assert what the spec text says — not what any particular
+implementation does. When the SDK schemas in
+`@modelcontextprotocol/sdk/types.js` lag the spec, scenarios bypass
+the SDK and use raw `fetch` so the SEP-2663 wire fields (`resultType`,
+`taskId`, `inputRequests`, `requestState`, inlined `result`/`error`)
+aren't stripped.
+
+## Specs covered
+
+| SEP      | What it adds                                                                                                                                                                                                                                                                                                          | Where it shows up                   |
+| -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------- |
+| SEP-2663 | Tasks Extension — `io.modelcontextprotocol/tasks` capability, flat `CreateTaskResult` (`Result & Task`), `DetailedTask` on `tasks/get` (with inlined result/error/inputRequests/requestState), `tasks/update` for MRTR resume, ack-only `tasks/cancel`, wire-field renames (`ttlSeconds`, `pollIntervalMilliseconds`) | every scenario                      |
+| SEP-2322 | MRTR base types — `inputRequests`/`inputResponses` keyed maps, `requestState`, `resultType` discriminator (`"task"`/`"complete"`/`"incomplete"`)                                                                                                                                                                      | request-state, mrtr-input, dispatch |
+| SEP-2575 | Per-request capability override via `_meta.io.modelcontextprotocol/clientCapabilities`                                                                                                                                                                                                                                | capability                          |
+| SEP-2243 | Server tolerates `Mcp-Method` / `Mcp-Name` request headers as informational routing metadata; body is authoritative                                                                                                                                                                                                   | headers                             |
+
+## ClientScenario classes
+
+Per the AGENTS.md "fewer scenarios, more checks" rule, related checks
+are bundled into one scenario class with multiple `ConformanceCheck`
+records. Each row below is one class.
+
+### `tasks-lifecycle` (`lifecycle.ts`)
+
+Sync vs async dispatch, DetailedTask shape on tasks/get, tool errors
+vs protocol errors, cancellation semantics.
+
+| Check                                | What it tests                                                                                                                                    |
+| ------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------ |
+| `tasks-sync-tool-call`               | Sync tool returns `resultType:"complete"`; no top-level `taskId`                                                                                 |
+| `tasks-server-task-creation`         | Task-supporting tool returns flat `CreateTaskResult` (no nested `task` wrapper); MUST NOT carry `result`/`error`/`inputRequests` on the envelope |
+| `tasks-get-during-working`           | `tasks/get` on an active task returns status + metadata                                                                                          |
+| `tasks-get-terminal-inlined-result`  | Completed task `tasks/get` inlines `result.content[]` (no separate `tasks/result`)                                                               |
+| `tasks-tool-error-completed-iserror` | Tool execution errors → `status:"completed"` + `result.isError:true` (NOT `failed`)                                                              |
+| `tasks-protocol-error-failed-shape`  | Protocol errors → `status:"failed"` with inlined `error{code,message}`; no `result`                                                              |
+| `tasks-cancel-empty-ack`             | `tasks/cancel` returns `{resultType:"complete"}`; status settles to cancelled                                                                    |
+| `tasks-cancel-terminal-rejected`     | `tasks/cancel` on a terminal task returns `-32602` (clarified in spec commit `d963ad0`)                                                          |
+
+### `tasks-capability-negotiation` (`capability.ts`)
+
+| Check                                     | What it tests                                                                                                                              |
+| ----------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------ |
+| `tasks-extension-advertised`              | Server advertises `io.modelcontextprotocol/tasks` under `capabilities.extensions`; v1 `capabilities.tasks` slot stays absent               |
+| `tasks-methods-gated-without-extension`   | `tasks/get`, `tasks/update`, `tasks/cancel` return `-32601` for sessions that didn't negotiate the extension                               |
+| `tasks-tools-call-without-extension-sync` | `tools/call` from a non-negotiated session falls through to sync (no `CreateTaskResult`)                                                   |
+| `tasks-per-request-meta-opt-in`           | SEP-2575 — per-request `_meta.io.modelcontextprotocol/clientCapabilities` produces `CreateTaskResult` even without session-level extension |
+
+### `tasks-wire-fields` (`wire-fields.ts`)
+
+| Check                                          | What it tests                                                                                |
+| ---------------------------------------------- | -------------------------------------------------------------------------------------------- |
+| `tasks-wire-field-renames`                     | `ttlSeconds` + `pollIntervalMilliseconds` present; legacy `ttl` / `pollInterval` keys absent |
+| `tasks-no-early-ttl-expiry`                    | Task remains accessible via `tasks/get` for the duration of its `ttlSeconds`                 |
+| `tasks-no-related-task-meta-on-inlined-result` | v1 `io.modelcontextprotocol/related-task` `_meta` key absent on tasks/get's inlined `result` |
+
+### `tasks-request-state` (`request-state.ts`)
+
+| Check                                 | What it tests                                                                                                     |
+| ------------------------------------- | ----------------------------------------------------------------------------------------------------------------- |
+| `tasks-request-state-shape`           | When emitted, `requestState` is a non-empty string (`INFO` if server omits it; emission is optional per SEP-2322) |
+| `tasks-request-state-echo`            | Server accepts `tasks/get` with the previously-emitted `requestState` echoed back                                 |
+| `tasks-request-state-stale-tolerance` | Earlier (stale-but-still-valid) `requestState` MUST still be accepted after a newer one is minted                 |
+
+### `tasks-mrtr-input` (`mrtr-input.ts`)
+
+| Check                                    | What it tests                                                                                                   |
+| ---------------------------------------- | --------------------------------------------------------------------------------------------------------------- |
+| `tasks-mrtr-input-requests-on-tasks-get` | `tasks/get` on `input_required` task surfaces non-empty `inputRequests` map                                     |
+| `tasks-mrtr-tasks-update-resumes`        | `tasks/update` with matching `inputResponses` is acked with `{resultType:"complete"}`; task resumes to terminal |
+| `tasks-mrtr-partial-fulfillment`         | A subset-of-keys `tasks/update` keeps the task in `input_required` with only the unanswered key remaining       |
+
+### `tasks-request-headers` (`headers.ts`)
+
+| Check                                                 | What it tests                                                                                    |
+| ----------------------------------------------------- | ------------------------------------------------------------------------------------------------ |
+| `tasks-headers-tolerate-mcp-method-on-tools-call`     | Server tolerates `Mcp-Method` request header on `tools/call` (sync dispatch unaffected)          |
+| `tasks-headers-tolerate-routing-headers-on-tasks-get` | Server tolerates `Mcp-Method` + `Mcp-Name` request headers on `tasks/get` (body taskId resolves) |
+| `tasks-headers-body-method-authoritative`             | When `Mcp-Method` header disagrees with body, server MUST dispatch on body method                |
+
+> SEP-2243 defines these as **request** headers (client → server) used by HTTP infrastructure for routing. Whether the server _also_ echoes them on responses for downstream observability is implementation-defined and out of scope here.
+
+### `tasks-dispatch-and-envelope` (`dispatch.ts`)
+
+| Check                                              | What it tests                                                                                            |
+| -------------------------------------------------- | -------------------------------------------------------------------------------------------------------- |
+| `tasks-removed-tasks-result`                       | `tasks/result` removed in v2 → `-32601`                                                                  |
+| `tasks-removed-tasks-list`                         | `tasks/list` removed in v2 → `-32601`                                                                    |
+| `tasks-server-directed-creation-no-hint`           | `tools/call` without client `task` hint still produces `CreateTaskResult`                                |
+| `tasks-legacy-task-param-ignored`                  | Legacy v1 `task` param tolerated AND ignored on a sync tool (no error, no promotion)                     |
+| `tasks-immediate-result-shortcut`                  | Fast operation MAY skip task creation and return a sync `ToolResult`                                     |
+| `tasks-result-type-complete-on-non-task-responses` | Sync `tools/call`, `tasks/get`, `tasks/update` ack, `tasks/cancel` ack all carry `resultType:"complete"` |
+| `tasks-strong-consistency-immediate-get`           | `tasks/get` immediately after `CreateTaskResult` MUST resolve (no -32602)                                |
+| `tasks-get-unknown-task-id-rejected`               | `tasks/get` with unknown taskId returns `-32602`                                                         |
+
+### `tasks-status-notifications` (`notifications.ts`)
+
+| Check                              | What it tests                                                                                                                                           |
+| ---------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `tasks-status-notifications-shape` | Optional check — when sent, each `notifications/tasks/status` carries `taskId` + `status`; terminal notifications SHOULD inline `result` (DetailedTask) |
+
+> Notifications are optional per SEP-2663. The check emits `INFO` (not `FAILURE`) when no notifications are received, so a server that doesn't implement the optional path stays conformant.
+
+## Required server fixtures
+
+The fixture server MUST register these tools:
+
+| Tool                 | Behavior                                                                                |
+| -------------------- | --------------------------------------------------------------------------------------- |
+| `greet`              | Sync — returns `Hello, {name}!`                                                         |
+| `slow_compute`       | Async — `seconds`-second sleep, returns result; `seconds:0` for immediate path          |
+| `failing_job`        | Async — always returns tool error after ~1s                                             |
+| `protocol_error_job` | Async — panics, surfaces as protocol error                                              |
+| `confirm_delete`     | Async — calls `TaskElicit` (single inputRequest)                                        |
+| `multi_input`        | Async — fans out two `TaskElicit` calls in parallel (used by partial-fulfillment check) |
+
+The fixture can be implemented in any language; one example reference
+implementation lives at
+[`panyam/mcpkit/examples/tasks-v2`](https://github.com/panyam/mcpkit/tree/main/examples/tasks-v2).
+
+## Running
+
+The runner is brand-neutral and language-agnostic — it just shells out
+to a command line and waits for the URL to become reachable.
+
+### Against an already-running server
+
+```bash
+TASKS_SERVER_URL=http://localhost:8080/mcp \
+  npx vitest run src/scenarios/server/tasks/all-scenarios.test.ts
+```
+
+### Auto-spawn a fixture in `beforeAll`
+
+```bash
+TASKS_SERVER_URL=http://localhost:18092/mcp \
+TASKS_SERVER_CMD="/path/to/tasks-server --port 18092" \
+  npx vitest run src/scenarios/server/tasks/all-scenarios.test.ts
+```
+
+If `TASKS_SERVER_URL` is unset, the suite is `describe.skip`'d so CI
+runs against the upstream `everything-server` stay green until that
+fixture grows SEP-2663 support.
+
+## Open spec questions
+
+Where the spec is silent or ambiguous, this suite picks the louder /
+safer option (typically `-32602` over silent ack) so a misbehaving
+server fails loudly rather than appearing well-formed. Today:
+
+1. **Invalid `requestState`** — silent ack vs `-32602`. Suite asserts `-32602` (a server that silently accepts a forged token is a security hazard).
+2. **SEP-2575 per-request capabilities envelope shape** — covered by `tasks-per-request-meta-opt-in`; the suite asserts only the observable behavior (`CreateTaskResult` produced) so the inner shape can evolve without churn.
+3. **`tasks/update` / `tasks/cancel` for unknown taskId** — silent ack vs `-32602`. The read paths (`tasks/get` and `tasks/cancel` on terminal task) assert `-32602`; the write paths' upstream wording is too soft to assert against here.
+
+## Wire-format diff vs MCP Tasks v1 (spec 2025-11-25)
+
+| Aspect                     | v1                             | SEP-2663                                                                                       |
+| -------------------------- | ------------------------------ | ---------------------------------------------------------------------------------------------- |
+| Capability slot            | `capabilities.tasks`           | `capabilities.extensions["io.modelcontextprotocol/tasks"]`                                     |
+| Client opt-in              | (none)                         | MUST declare extension at session OR per-request (SEP-2575)                                    |
+| Task creation              | Client sends `task` hint param | Server decides unilaterally                                                                    |
+| `resultType` discriminator | absent                         | `"task"` (CreateTaskResult) / `"complete"` (everything else) / `"incomplete"` (MRTR ephemeral) |
+| `CreateTaskResult` shape   | `{task: {...}}` (nested)       | flat: `{resultType, taskId, status, ttlSeconds, ...}` (no nested wrapper)                      |
+| `tasks/get` response       | flat `TaskInfo` only           | `DetailedTask` with inlined `result`/`error`/`inputRequests`/`requestState`                    |
+| `tasks/update`             | n/a                            | new — MRTR resume path, returns `{resultType:"complete"}` ack                                  |
+| `tasks/cancel` response    | rich task envelope             | `{resultType:"complete"}` ack (no task state)                                                  |
+| `tasks/result`             | separate blocking method       | **removed** (result inlined on `tasks/get`)                                                    |
+| `tasks/list`               | session-scoped list            | **removed**                                                                                    |
+| TTL field                  | `ttl` (ms by convention)       | `ttlSeconds` (units in name)                                                                   |
+| Poll-interval field        | `pollInterval`                 | `pollIntervalMilliseconds`                                                                     |
+| `parentTaskId`             | present                        | removed                                                                                        |
+| Tool errors                | `status:failed`                | `status:completed, result.isError:true`                                                        |
+| Mcp-Name HTTP header       | not set                        | request-side routing header (SEP-2243)                                                         |
+
+## Design notes
+
+### Raw fetch escape hatch
+
+The MCP TS SDK ships with strict Zod schemas that strip SEP-2663 /
+SEP-2322 wire fields from responses (`resultType`, `taskId`,
+`inputRequests`, `requestState`, inlined result/error). Scenarios that
+exercise those fields use the raw-fetch helpers in `helpers.ts` rather
+than the SDK client. When the SDK gains schemas for the SEP-2663
+shapes, those call sites switch back to
+`client.request(..., AnyResult)` and the helpers shrink (or disappear).
+
+### Severity follows the spec keyword
+
+Per AGENTS.md: MUST / MUST NOT → `FAILURE`; SHOULD / SHOULD NOT →
+`WARNING`; optional emission with no presence → `INFO`. CI treats
+`WARNING` as a failure, so SHOULD-level requirements still gate.

From b99b877242b9716a950a31b5a8e737e69bcc4dfc Mon Sep 17 00:00:00 2001
From: Sri Panyam <sri.panyam@gmail.com>
Date: Wed, 6 May 2026 12:50:10 -0700
Subject: [PATCH 5/7] tasks: assert createdAt + lastUpdatedAt; factor _shared/
 helpers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two reviewer-driven additions:

1. SEP-2663 createdAt / lastUpdatedAt ISO-8601 assertion in
   `tasks-server-task-creation` (per Luca's PR #262 review feedback).
   The check now flags servers that emit non-ISO timestamps (epoch
   seconds, RFC-2822, etc.) on TaskInfoV2 envelopes.

2. Factor cross-cutting test-harness helpers into _shared/:

   - `_shared/test-runner.ts` — `waitForServerReady` (renamed from
     `waitForTcpReady`; the call site cares about server readiness,
     not the TCP-poll mechanism). Imported by tasks/ and mrtr/
     all-scenarios.test.ts; replaces ~30 LOC of inline duplication
     in each.

   - `_shared/wire-format.ts` — `ISO_8601_PATTERN` constant +
     `isIso8601(s)` predicate. Documented rationale for choosing a
     regex over `Date.parse` (too permissive),
     `new Date(s).toISOString()` (too strict), or
     `Temporal.Instant.from` (Node 24+ experimental). Future
     wire-shape predicates (data URI, percent-encoded filename,
     etc.) can land here.

Cherry-pick footprint when graduating to upstream PR is the SEP
folder + the imported `_shared/` files. First PR through carries
them upstream; subsequent feat branches inherit via standard
upstream-sync flow.

All 9 scenario tests still pass against the Go reference fixtures.
---
 src/scenarios/server/_shared/test-runner.ts   | 56 +++++++++++++++++++
 src/scenarios/server/_shared/wire-format.ts   | 33 +++++++++++
 .../server/mrtr/all-scenarios.test.ts         | 38 +------------
 .../server/tasks/all-scenarios.test.ts        | 43 +-------------
 src/scenarios/server/tasks/lifecycle.ts       | 14 +++++
 5 files changed, 107 insertions(+), 77 deletions(-)
 create mode 100644 src/scenarios/server/_shared/test-runner.ts
 create mode 100644 src/scenarios/server/_shared/wire-format.ts

diff --git a/src/scenarios/server/_shared/test-runner.ts b/src/scenarios/server/_shared/test-runner.ts
new file mode 100644
index 0000000..5eb044b
--- /dev/null
+++ b/src/scenarios/server/_shared/test-runner.ts
@@ -0,0 +1,56 @@
+/**
+ * Test-runner utilities for server-conformance scenarios.
+ *
+ * Used by `*.test.ts` runner files that auto-spawn a fixture binary
+ * before running scenarios. These helpers are language-agnostic and
+ * harness-only — they don't touch MCP protocol, so they don't belong
+ * in the SDK.
+ *
+ * Single responsibility today: TCP readiness polling. Spawn / cleanup
+ * scaffolding stays inline in each runner so the file reads top-to-bottom
+ * without indirection (per AGENTS.md "repetitive check blocks are fine").
+ */
+
+import { connect } from 'net';
+
+/**
+ * Poll the host/port of the given URL until a TCP connection succeeds
+ * or the timeout elapses. Language-agnostic readiness check — works
+ * for any server that binds before serving requests.
+ */
+export async function waitForServerReady(
+  url: string,
+  timeoutMs: number
+): Promise<void> {
+  const u = new URL(url);
+  const port = parseInt(u.port || (u.protocol === 'https:' ? '443' : '80'), 10);
+  const host = u.hostname;
+  const deadline = Date.now() + timeoutMs;
+  let lastErr: Error | null = null;
+
+  while (Date.now() < deadline) {
+    try {
+      await new Promise<void>((resolve, reject) => {
+        const socket = connect({ host, port }, () => {
+          socket.end();
+          resolve();
+        });
+        socket.once('error', (err) => {
+          socket.destroy();
+          reject(err);
+        });
+        socket.setTimeout(1_000, () => {
+          socket.destroy();
+          reject(new Error('connect timeout'));
+        });
+      });
+      return;
+    } catch (err) {
+      lastErr = err as Error;
+      await new Promise((r) => setTimeout(r, 200));
+    }
+  }
+  throw new Error(
+    `${host}:${port} did not accept TCP connections (last: ${lastErr?.message ?? 'unknown'})`
+  );
+}
diff --git a/src/scenarios/server/_shared/wire-format.ts b/src/scenarios/server/_shared/wire-format.ts
new file mode 100644
index 0000000..ea37bc7
--- /dev/null
+++ b/src/scenarios/server/_shared/wire-format.ts
@@ -0,0 +1,33 @@
+/**
+ * Wire-format validation helpers shared across server-conformance
+ * scenarios. Pure predicates / regex — no I/O, no async.
+ *
+ * Pragmatic choices documented per helper. When validation needs
+ * tighten (e.g., the spec mandates a stricter timestamp format), edit
+ * here once and every scenario picks it up.
+ */
+
+/**
+ * ISO-8601 timestamp prefix (YYYY-MM-DDThh:mm:ss). Tolerant about
+ * the timezone tail (`Z`, `+00:00`, `+0000`) and sub-second precision —
+ * matches what real servers emit (Go `time.RFC3339Nano`,
+ * Python `datetime.isoformat()`, JavaScript `toISOString()`).
+ *
+ * Why a regex over `Date.parse` / `new Date(s).toISOString() === s` /
+ * `Temporal.Instant.from`:
+ *   - `Date.parse` accepts RFC-2822, "May 4 2026", and other
+ *     non-ISO strings — too permissive.
+ *   - `new Date(s).toISOString() === s` is too strict — rejects
+ *     valid `+00:00`-style offsets that don't survive the canonical
+ *     `Z` round-trip.
+ *   - `Temporal.Instant.from` is Node 24+ experimental.
+ *
+ * Swap this constant for a stdlib validator if/when one becomes
+ * broadly available.
+ */
+export const ISO_8601_PATTERN = /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}/;
+
+/** Returns true when the input is a string matching ISO-8601 prefix. */
+export function isIso8601(s: unknown): boolean {
+  return typeof s === 'string' && ISO_8601_PATTERN.test(s);
+}
diff --git a/src/scenarios/server/mrtr/all-scenarios.test.ts b/src/scenarios/server/mrtr/all-scenarios.test.ts
index dd1e6bb..1e8154a 100644
--- a/src/scenarios/server/mrtr/all-scenarios.test.ts
+++ b/src/scenarios/server/mrtr/all-scenarios.test.ts
@@ -22,9 +22,9 @@
  */
 
 import { spawn, ChildProcess } from 'child_process';
-import { connect } from 'net';
 import { describe, it, expect, beforeAll, afterAll } from 'vitest';
 import { MrtrEphemeralFlowScenario } from './ephemeral-flow';
+import { waitForServerReady } from '../_shared/test-runner';
 
 const SERVER_URL = process.env.MRTR_SERVER_URL;
 const SERVER_CMD = process.env.MRTR_SERVER_CMD;
@@ -64,7 +64,7 @@ describeIfTarget('SEP-2322 MRTR — server conformance', () => {
       }
     });
 
-    await waitForTcpReady(SERVER_URL!, SERVER_STARTUP_TIMEOUT_MS).catch(
+    await waitForServerReady(SERVER_URL!, SERVER_STARTUP_TIMEOUT_MS).catch(
       (err) => {
         if (serverProcess && !serverProcess.killed) {
           serverProcess.kill('SIGKILL');
@@ -113,37 +113,3 @@ describeIfTarget('SEP-2322 MRTR — server conformance', () => {
     });
   }
 });
-
-async function waitForTcpReady(url: string, timeoutMs: number): Promise<void> {
-  const u = new URL(url);
-  const port = parseInt(u.port || (u.protocol === 'https:' ? '443' : '80'), 10);
-  const host = u.hostname;
-  const deadline = Date.now() + timeoutMs;
-  let lastErr: Error | null = null;
-
-  while (Date.now() < deadline) {
-    try {
-      await new Promise<void>((resolve, reject) => {
-        const socket = connect({ host, port }, () => {
-          socket.end();
-          resolve();
-        });
-        socket.once('error', (err) => {
-          socket.destroy();
-          reject(err);
-        });
-        socket.setTimeout(1_000, () => {
-          socket.destroy();
-          reject(new Error('connect timeout'));
-        });
-      });
-      return;
-    } catch (err) {
-      lastErr = err as Error;
-      await new Promise((r) => setTimeout(r, 200));
-    }
-  }
-  throw new Error(
-    `${host}:${port} did not accept TCP connections (last: ${lastErr?.message ?? 'unknown'})`
-  );
-}
diff --git a/src/scenarios/server/tasks/all-scenarios.test.ts b/src/scenarios/server/tasks/all-scenarios.test.ts
index 76136f8..d6ad16d 100644
--- a/src/scenarios/server/tasks/all-scenarios.test.ts
+++ b/src/scenarios/server/tasks/all-scenarios.test.ts
@@ -28,7 +28,6 @@
  */
 
 import { spawn, ChildProcess } from 'child_process';
-import { connect } from 'net';
 import { describe, it, expect, beforeAll, afterAll } from 'vitest';
 import { TasksLifecycleScenario } from './lifecycle';
 import { TasksCapabilityNegotiationScenario } from './capability';
@@ -38,6 +37,7 @@ import { TasksMRTRInputScenario } from './mrtr-input';
 import { TasksRequestHeadersScenario } from './headers';
 import { TasksDispatchScenario } from './dispatch';
 import { TasksStatusNotificationsScenario } from './notifications';
+import { waitForServerReady } from '../_shared/test-runner';
 
 const SERVER_URL = process.env.TASKS_SERVER_URL;
 const SERVER_CMD = process.env.TASKS_SERVER_CMD;
@@ -86,7 +86,7 @@ describeIfTarget('SEP-2663 Tasks — server conformance', () => {
       }
     });
 
-    await waitForTcpReady(SERVER_URL!, SERVER_STARTUP_TIMEOUT_MS).catch(
+    await waitForServerReady(SERVER_URL!, SERVER_STARTUP_TIMEOUT_MS).catch(
       (err) => {
         if (serverProcess && !serverProcess.killed) {
           serverProcess.kill('SIGKILL');
@@ -135,42 +135,3 @@ describeIfTarget('SEP-2663 Tasks — server conformance', () => {
     });
   }
 });
-
-/**
- * Poll the host/port of the given URL until a TCP connection succeeds
- * or the timeout elapses. Language-agnostic readiness check — works
- * for any server that binds before serving requests.
- */
-async function waitForTcpReady(url: string, timeoutMs: number): Promise<void> {
-  const u = new URL(url);
-  const port = parseInt(u.port || (u.protocol === 'https:' ? '443' : '80'), 10);
-  const host = u.hostname;
-  const deadline = Date.now() + timeoutMs;
-  let lastErr: Error | null = null;
-
-  while (Date.now() < deadline) {
-    try {
-      await new Promise<void>((resolve, reject) => {
-        const socket = connect({ host, port }, () => {
-          socket.end();
-          resolve();
-        });
-        socket.once('error', (err) => {
-          socket.destroy();
-          reject(err);
-        });
-        socket.setTimeout(1_000, () => {
-          socket.destroy();
-          reject(new Error('connect timeout'));
-        });
-      });
-      return;
-    } catch (err) {
-      lastErr = err as Error;
-      await new Promise((r) => setTimeout(r, 200));
-    }
-  }
-  throw new Error(
-    `${host}:${port} did not accept TCP connections (last: ${lastErr?.message ?? 'unknown'})`
-  );
-}
diff --git a/src/scenarios/server/tasks/lifecycle.ts b/src/scenarios/server/tasks/lifecycle.ts
index a59adce..21c5a13 100644
--- a/src/scenarios/server/tasks/lifecycle.ts
+++ b/src/scenarios/server/tasks/lifecycle.ts
@@ -30,6 +30,7 @@ import {
   rawRequest,
   waitForTerminal
 } from './helpers';
+import { isIso8601 } from '../_shared/wire-format';
 
 export class TasksLifecycleScenario implements ClientScenario {
   name = 'tasks-lifecycle';
@@ -199,6 +200,19 @@ The server MUST advertise \`io.modelcontextprotocol/tasks\` under
             'CreateTaskResult MUST NOT carry `inputRequests` (lives on tasks/get DetailedTask)'
           );
         }
+        // Timestamps — both keys present, both ISO-8601 formatted. Per
+        // SEP-2663 these are required on every TaskInfoV2. See
+        // `_shared/wire-format.ts` for the regex rationale.
+        if (!isIso8601(result.createdAt)) {
+          errs.push(
+            `createdAt MUST be an ISO-8601 string; got ${JSON.stringify(result.createdAt)}`
+          );
+        }
+        if (!isIso8601(result.lastUpdatedAt)) {
+          errs.push(
+            `lastUpdatedAt MUST be an ISO-8601 string; got ${JSON.stringify(result.lastUpdatedAt)}`
+          );
+        }
         if (result.taskId) workingTaskId = result.taskId;
         checks.push({
           id,

From ac31214e49cf26d473500d23c0bd77913838df96 Mon Sep 17 00:00:00 2001
From: Sri Panyam <sri.panyam@gmail.com>
Date: Wed, 6 May 2026 14:22:42 -0700
Subject: [PATCH 6/7] refactor(tasks,mrtr): use SDK Client + AnyResult instead
 of raw-fetch helpers

Drops initRawSession/rawRequest/rawRequestFull from tasks/helpers.ts in
favor of the SDK's Client + StreamableHTTPClientTransport, paired with
a Zod passthrough schema (AnyResult) that preserves SEP-2663 / SEP-2322
draft fields the SDK's typed schemas would strip.

headers.ts and notifications.ts keep a small inline fetch where the SDK
can't reach: per-request HTTP headers (SEP-2243) and SSE notification
observation. Both reuse the SDK session via transport.sessionId.

All SEP-2663 + MRTR ephemeral-flow scenarios pass against the Go fixture.
---
 src/scenarios/server/mrtr/README.md         |  14 +-
 src/scenarios/server/mrtr/ephemeral-flow.ts | 272 +++++++++++---------
 src/scenarios/server/tasks/capability.ts    | 101 +++++---
 src/scenarios/server/tasks/dispatch.ts      | 237 ++++++++---------
 src/scenarios/server/tasks/headers.ts       | 131 +++++++---
 src/scenarios/server/tasks/helpers.ts       | 234 +++--------------
 src/scenarios/server/tasks/lifecycle.ts     | 167 ++++++------
 src/scenarios/server/tasks/mrtr-input.ts    | 167 ++++++------
 src/scenarios/server/tasks/notifications.ts |  27 +-
 src/scenarios/server/tasks/request-state.ts |  79 +++---
 src/scenarios/server/tasks/wire-fields.ts   |  68 ++---
 11 files changed, 740 insertions(+), 757 deletions(-)

diff --git a/src/scenarios/server/mrtr/README.md b/src/scenarios/server/mrtr/README.md
index 8e1bf53..ad3df99 100644
--- a/src/scenarios/server/mrtr/README.md
+++ b/src/scenarios/server/mrtr/README.md
@@ -103,9 +103,11 @@ the SKIPPED short-circuit in `ephemeral-flow.ts` Check 8.
 
 `MRTR_INCOMPLETE_RESULT_TYPE`, the result-type predicates
 (`isIncompleteResult`, `isCompleteResult`), and the elicitation/sampling/
-roots mocks live in `mrtr/helpers.ts`. The raw-fetch primitives
-(`initRawSession`, `rawRequest`) are imported from the sibling
-`../tasks/helpers` because both scenario sets share the same wire-shape
-problem (SDK Zod schemas strip extension fields). When the upstream
-SDK gains schemas for SEP-2322 / SEP-2663 shapes, those import paths
-collapse back into the SDK.
+roots mocks live in `mrtr/helpers.ts`. The shared `AnyResult` Zod
+passthrough schema and `waitForTerminal`/`waitForStatus` polling helpers
+are imported from the sibling `../tasks/helpers` because both scenario
+sets share the same wire-shape problem (SDK Zod schemas strip extension
+fields). Pair `client.request(req, AnyResult)` with the SDK's
+`StreamableHTTPClientTransport` and you preserve every SEP-2322 / SEP-2663
+field. When the upstream SDK gains schemas for those shapes, the
+passthrough disappears in favor of the typed schemas directly.
diff --git a/src/scenarios/server/mrtr/ephemeral-flow.ts b/src/scenarios/server/mrtr/ephemeral-flow.ts
index 8deaf68..51025a7 100644
--- a/src/scenarios/server/mrtr/ephemeral-flow.ts
+++ b/src/scenarios/server/mrtr/ephemeral-flow.ts
@@ -19,13 +19,16 @@
  *                                               server re-requests on wrong key
  */
 
+import { Client } from '@modelcontextprotocol/sdk/client/index.js';
+import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js';
+
 import {
   ClientScenario,
   ConformanceCheck,
   ScenarioSpecTag,
   DRAFT_PROTOCOL_VERSION
 } from '../../../types';
-import { initRawSession, rawRequest } from '../tasks/helpers';
+import { AnyResult } from '../tasks/helpers';
 import {
   MRTR_INCOMPLETE_RESULT_TYPE,
   SEP_2322_REF,
@@ -86,15 +89,19 @@ Every \`tools/call\` response in the MRTR contract is one of:
   async run(serverUrl: string): Promise<ConformanceCheck[]> {
     const checks: ConformanceCheck[] = [];
 
-    let sessionId: string;
+    let client: Client;
     try {
-      ({ sessionId } = await initRawSession(serverUrl, {
-        capabilities: {
-          elicitation: {},
-          sampling: {},
-          roots: {}
+      client = new Client(
+        { name: 'mcp-conformance', version: '1.0' },
+        {
+          capabilities: {
+            elicitation: {},
+            sampling: {},
+            roots: {}
+          }
         }
-      }));
+      );
+      await client.connect(new StreamableHTTPClientTransport(new URL(serverUrl)));
     } catch (error) {
       checks.push({
         id: 'mrtr-session-bootstrap',
@@ -116,12 +123,13 @@ Every \`tools/call\` response in the MRTR contract is one of:
       const description =
         'tools/call returns IncompleteResult on round 1 (elicitation/create); completes on round 2 with the answer reflected in the result';
       try {
-        const r1 = await rawRequest(
-          serverUrl,
-          'tools/call',
-          { name: 'test_tool_with_elicitation', arguments: {} },
-          { sessionId }
-        );
+        const r1 = (await client.request(
+          {
+            method: 'tools/call',
+            params: { name: 'test_tool_with_elicitation', arguments: {} }
+          },
+          AnyResult
+        )) as any;
         const errs: string[] = [];
         if (!isIncompleteResult(r1)) {
           errs.push(
@@ -143,21 +151,22 @@ Every \`tools/call\` response in the MRTR contract is one of:
           );
         }
 
-        const r2 = await rawRequest(
-          serverUrl,
-          'tools/call',
+        const r2 = (await client.request(
           {
-            name: 'test_tool_with_elicitation',
-            arguments: {},
-            inputResponses: {
-              user_name: mockElicitResponse({ name: 'Alice' })
-            },
-            ...(r1.requestState !== undefined
-              ? { requestState: r1.requestState }
-              : {})
+            method: 'tools/call',
+            params: {
+              name: 'test_tool_with_elicitation',
+              arguments: {},
+              inputResponses: {
+                user_name: mockElicitResponse({ name: 'Alice' })
+              },
+              ...(r1.requestState !== undefined
+                ? { requestState: r1.requestState }
+                : {})
+            }
           },
-          { sessionId }
-        );
+          AnyResult
+        )) as any;
         if (!isCompleteResult(r2)) {
           errs.push(`round 2 MUST be complete; got ${JSON.stringify(r2)}`);
         }
@@ -188,12 +197,13 @@ Every \`tools/call\` response in the MRTR contract is one of:
       const description =
         'IncompleteResult with sampling/createMessage round-trips through the inputResponses retry';
       try {
-        const r1 = await rawRequest(
-          serverUrl,
-          'tools/call',
-          { name: 'test_incomplete_result_sampling', arguments: {} },
-          { sessionId }
-        );
+        const r1 = (await client.request(
+          {
+            method: 'tools/call',
+            params: { name: 'test_incomplete_result_sampling', arguments: {} }
+          },
+          AnyResult
+        )) as any;
         const errs: string[] = [];
         if (!isIncompleteResult(r1)) {
           errs.push('round 1 MUST be IncompleteResult');
@@ -204,19 +214,20 @@ Every \`tools/call\` response in the MRTR contract is one of:
               `inputRequest method MUST be "sampling/createMessage"; got ${JSON.stringify(r1.inputRequests[key].method)}`
             );
           }
-          const r2 = await rawRequest(
-            serverUrl,
-            'tools/call',
+          const r2 = (await client.request(
             {
-              name: 'test_incomplete_result_sampling',
-              arguments: {},
-              inputResponses: { [key]: mockSamplingResponse('Paris') },
-              ...(r1.requestState !== undefined
-                ? { requestState: r1.requestState }
-                : {})
+              method: 'tools/call',
+              params: {
+                name: 'test_incomplete_result_sampling',
+                arguments: {},
+                inputResponses: { [key]: mockSamplingResponse('Paris') },
+                ...(r1.requestState !== undefined
+                  ? { requestState: r1.requestState }
+                  : {})
+              }
             },
-            { sessionId }
-          );
+            AnyResult
+          )) as any;
           if (!isCompleteResult(r2)) {
             errs.push('round 2 MUST be complete');
           }
@@ -242,12 +253,13 @@ Every \`tools/call\` response in the MRTR contract is one of:
       const description =
         'IncompleteResult with roots/list round-trips through the inputResponses retry';
       try {
-        const r1 = await rawRequest(
-          serverUrl,
-          'tools/call',
-          { name: 'test_incomplete_result_list_roots', arguments: {} },
-          { sessionId }
-        );
+        const r1 = (await client.request(
+          {
+            method: 'tools/call',
+            params: { name: 'test_incomplete_result_list_roots', arguments: {} }
+          },
+          AnyResult
+        )) as any;
         const errs: string[] = [];
         if (!isIncompleteResult(r1)) {
           errs.push('round 1 MUST be IncompleteResult');
@@ -258,19 +270,20 @@ Every \`tools/call\` response in the MRTR contract is one of:
               `inputRequest method MUST be "roots/list"; got ${JSON.stringify(r1.inputRequests[key].method)}`
             );
           }
-          const r2 = await rawRequest(
-            serverUrl,
-            'tools/call',
+          const r2 = (await client.request(
             {
-              name: 'test_incomplete_result_list_roots',
-              arguments: {},
-              inputResponses: { [key]: mockListRootsResponse() },
-              ...(r1.requestState !== undefined
-                ? { requestState: r1.requestState }
-                : {})
+              method: 'tools/call',
+              params: {
+                name: 'test_incomplete_result_list_roots',
+                arguments: {},
+                inputResponses: { [key]: mockListRootsResponse() },
+                ...(r1.requestState !== undefined
+                  ? { requestState: r1.requestState }
+                  : {})
+              }
             },
-            { sessionId }
-          );
+            AnyResult
+          )) as any;
           if (!isCompleteResult(r2)) {
             errs.push('round 2 MUST be complete');
           }
@@ -296,12 +309,13 @@ Every \`tools/call\` response in the MRTR contract is one of:
       const description =
         'When server emits requestState on round 1, it MUST be a non-empty string and the server MUST validate the echo on round 2';
       try {
-        const r1 = await rawRequest(
-          serverUrl,
-          'tools/call',
-          { name: 'test_incomplete_result_request_state', arguments: {} },
-          { sessionId }
-        );
+        const r1 = (await client.request(
+          {
+            method: 'tools/call',
+            params: { name: 'test_incomplete_result_request_state', arguments: {} }
+          },
+          AnyResult
+        )) as any;
         const errs: string[] = [];
         if (!isIncompleteResult(r1)) {
           errs.push('round 1 MUST be IncompleteResult');
@@ -317,17 +331,18 @@ Every \`tools/call\` response in the MRTR contract is one of:
         }
         const key = Object.keys(r1.inputRequests ?? {})[0];
         if (key) {
-          const r2 = await rawRequest(
-            serverUrl,
-            'tools/call',
+          const r2 = (await client.request(
             {
-              name: 'test_incomplete_result_request_state',
-              arguments: {},
-              inputResponses: { [key]: mockElicitResponse({ ok: true }) },
-              requestState: r1.requestState
+              method: 'tools/call',
+              params: {
+                name: 'test_incomplete_result_request_state',
+                arguments: {},
+                inputResponses: { [key]: mockElicitResponse({ ok: true }) },
+                requestState: r1.requestState
+              }
             },
-            { sessionId }
-          );
+            AnyResult
+          )) as any;
           if (!isCompleteResult(r2)) {
             errs.push('round 2 MUST be complete after valid requestState echo');
           }
@@ -360,12 +375,13 @@ Every \`tools/call\` response in the MRTR contract is one of:
       const description =
         'A single IncompleteResult MAY carry inputRequests for elicitation/create + sampling/createMessage + roots/list together';
       try {
-        const r1 = await rawRequest(
-          serverUrl,
-          'tools/call',
-          { name: 'test_incomplete_result_multiple_inputs', arguments: {} },
-          { sessionId }
-        );
+        const r1 = (await client.request(
+          {
+            method: 'tools/call',
+            params: { name: 'test_incomplete_result_multiple_inputs', arguments: {} }
+          },
+          AnyResult
+        )) as any;
         const errs: string[] = [];
         if (!isIncompleteResult(r1)) {
           errs.push('round 1 MUST be IncompleteResult');
@@ -397,19 +413,20 @@ Every \`tools/call\` response in the MRTR contract is one of:
             else if (req.method === 'roots/list')
               inputResponses[key] = mockListRootsResponse();
           }
-          const r2 = await rawRequest(
-            serverUrl,
-            'tools/call',
+          const r2 = (await client.request(
             {
-              name: 'test_incomplete_result_multiple_inputs',
-              arguments: {},
-              inputResponses,
-              ...(r1.requestState !== undefined
-                ? { requestState: r1.requestState }
-                : {})
+              method: 'tools/call',
+              params: {
+                name: 'test_incomplete_result_multiple_inputs',
+                arguments: {},
+                inputResponses,
+                ...(r1.requestState !== undefined
+                  ? { requestState: r1.requestState }
+                  : {})
+              }
             },
-            { sessionId }
-          );
+            AnyResult
+          )) as any;
           if (!isCompleteResult(r2)) {
             errs.push('round 2 MUST be complete with all three answers');
           }
@@ -435,12 +452,13 @@ Every \`tools/call\` response in the MRTR contract is one of:
       const description =
         'A handler may take 2+ MRTR rounds; each round mints a fresh requestState; final result MUST reflect answers from every round';
       try {
-        const r1 = await rawRequest(
-          serverUrl,
-          'tools/call',
-          { name: 'test_incomplete_result_multi_round', arguments: {} },
-          { sessionId }
-        );
+        const r1 = (await client.request(
+          {
+            method: 'tools/call',
+            params: { name: 'test_incomplete_result_multi_round', arguments: {} }
+          },
+          AnyResult
+        )) as any;
         const errs: string[] = [];
         if (!isIncompleteResult(r1)) {
           errs.push('round 1 MUST be IncompleteResult');
@@ -450,17 +468,18 @@ Every \`tools/call\` response in the MRTR contract is one of:
         }
         const k1 = Object.keys(r1.inputRequests ?? {})[0];
 
-        const r2 = await rawRequest(
-          serverUrl,
-          'tools/call',
+        const r2 = (await client.request(
           {
-            name: 'test_incomplete_result_multi_round',
-            arguments: {},
-            inputResponses: { [k1]: mockElicitResponse({ name: 'Alice' }) },
-            requestState: r1.requestState
+            method: 'tools/call',
+            params: {
+              name: 'test_incomplete_result_multi_round',
+              arguments: {},
+              inputResponses: { [k1]: mockElicitResponse({ name: 'Alice' }) },
+              requestState: r1.requestState
+            }
           },
-          { sessionId }
-        );
+          AnyResult
+        )) as any;
         if (!isIncompleteResult(r2)) {
           errs.push('round 2 MUST still be IncompleteResult (asks for step2)');
         }
@@ -474,17 +493,18 @@ Every \`tools/call\` response in the MRTR contract is one of:
         }
         const k2 = Object.keys(r2.inputRequests ?? {})[0];
 
-        const r3 = await rawRequest(
-          serverUrl,
-          'tools/call',
+        const r3 = (await client.request(
           {
-            name: 'test_incomplete_result_multi_round',
-            arguments: {},
-            inputResponses: { [k2]: mockElicitResponse({ color: 'blue' }) },
-            requestState: r2.requestState
+            method: 'tools/call',
+            params: {
+              name: 'test_incomplete_result_multi_round',
+              arguments: {},
+              inputResponses: { [k2]: mockElicitResponse({ color: 'blue' }) },
+              requestState: r2.requestState
+            }
           },
-          { sessionId }
-        );
+          AnyResult
+        )) as any;
         if (!isCompleteResult(r3)) {
           errs.push('round 3 MUST be complete');
         }
@@ -518,16 +538,17 @@ Every \`tools/call\` response in the MRTR contract is one of:
       const description =
         'When the client sends inputResponses with a key the server did not emit, the server SHOULD re-request via IncompleteResult';
       try {
-        const r1 = await rawRequest(
-          serverUrl,
-          'tools/call',
+        const r1 = (await client.request(
           {
-            name: 'test_incomplete_result_elicitation',
-            arguments: {},
-            inputResponses: { wrong_key: mockElicitResponse({ data: 'wrong' }) }
+            method: 'tools/call',
+            params: {
+              name: 'test_incomplete_result_elicitation',
+              arguments: {},
+              inputResponses: { wrong_key: mockElicitResponse({ data: 'wrong' }) }
+            }
           },
-          { sessionId }
-        );
+          AnyResult
+        )) as any;
         const errs: string[] = [];
         if (!isIncompleteResult(r1)) {
           errs.push(
@@ -580,6 +601,7 @@ Every \`tools/call\` response in the MRTR contract is one of:
       });
     }
 
+    await client.close().catch(() => {});
     return checks;
   }
 }
diff --git a/src/scenarios/server/tasks/capability.ts b/src/scenarios/server/tasks/capability.ts
index 91615d7..c92c287 100644
--- a/src/scenarios/server/tasks/capability.ts
+++ b/src/scenarios/server/tasks/capability.ts
@@ -10,6 +10,9 @@
  *   - slow_compute  — task-supporting, sleeps N seconds
  */
 
+import { Client } from '@modelcontextprotocol/sdk/client/index.js';
+import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js';
+
 import {
   ClientScenario,
   ConformanceCheck,
@@ -20,10 +23,9 @@ import {
   TASKS_EXTENSION_ID,
   SEP_2663_REF,
   SEP_2575_REF,
+  AnyResult,
   errMsg,
-  failureCheck,
-  initRawSession,
-  rawRequest
+  failureCheck
 } from './helpers';
 
 export class TasksCapabilityNegotiationScenario implements ClientScenario {
@@ -60,18 +62,29 @@ export class TasksCapabilityNegotiationScenario implements ClientScenario {
   async run(serverUrl: string): Promise<ConformanceCheck[]> {
     const checks: ConformanceCheck[] = [];
 
-    // Two sessions: one declares the extension, one does NOT.
-    let withExt: { sessionId: string; serverCapabilities: any };
-    let withoutExt: { sessionId: string };
+    // Two parallel clients: one declares the extension, one does NOT.
+    let withExt: Client;
+    let withoutExt: Client;
     try {
-      withExt = await initRawSession(serverUrl, {
-        capabilities: {
-          elicitation: {},
-          sampling: {},
-          extensions: { [TASKS_EXTENSION_ID]: {} }
+      withExt = new Client(
+        { name: 'mcp-conformance', version: '1.0' },
+        {
+          capabilities: {
+            elicitation: {},
+            sampling: {},
+            extensions: { [TASKS_EXTENSION_ID]: {} }
+          }
         }
-      });
-      withoutExt = await initRawSession(serverUrl, { capabilities: {} });
+      );
+      await withExt.connect(new StreamableHTTPClientTransport(new URL(serverUrl)));
+
+      withoutExt = new Client(
+        { name: 'mcp-conformance', version: '1.0' },
+        { capabilities: {} }
+      );
+      await withoutExt.connect(
+        new StreamableHTTPClientTransport(new URL(serverUrl))
+      );
     } catch (error) {
       checks.push({
         id: 'tasks-session-bootstrap',
@@ -90,7 +103,7 @@ export class TasksCapabilityNegotiationScenario implements ClientScenario {
       const id = 'tasks-extension-advertised';
       const name = 'TasksExtensionAdvertised';
       const description = `Server advertises ${TASKS_EXTENSION_ID} under capabilities.extensions (and not capabilities.tasks)`;
-      const caps = withExt.serverCapabilities ?? {};
+      const caps: any = withExt.getServerCapabilities() ?? {};
       const errs: string[] = [];
       if (caps.tasks) {
         errs.push(
@@ -137,9 +150,10 @@ export class TasksCapabilityNegotiationScenario implements ClientScenario {
       const errs: string[] = [];
       for (const tc of cases) {
         try {
-          await rawRequest(serverUrl, tc.method, tc.params, {
-            sessionId: withoutExt.sessionId
-          });
+          await withoutExt.request(
+            { method: tc.method, params: tc.params },
+            AnyResult
+          );
           errs.push(`${tc.method} MUST reject (it returned a result)`);
         } catch (e: any) {
           if (e.code !== -32601) {
@@ -167,15 +181,16 @@ export class TasksCapabilityNegotiationScenario implements ClientScenario {
       const description =
         'tools/call from a session without the extension MUST fall through to sync (no CreateTaskResult, even for task-supporting tools)';
       try {
-        const result = await rawRequest(
-          serverUrl,
-          'tools/call',
+        const result = (await withoutExt.request(
           {
-            name: 'slow_compute',
-            arguments: { seconds: 0, label: 'capability-no-ext' }
+            method: 'tools/call',
+            params: {
+              name: 'slow_compute',
+              arguments: { seconds: 0, label: 'capability-no-ext' }
+            }
           },
-          { sessionId: withoutExt.sessionId }
-        );
+          AnyResult
+        )) as any;
         const errs: string[] = [];
         if (result.resultType === 'task') {
           errs.push(
@@ -222,22 +237,21 @@ export class TasksCapabilityNegotiationScenario implements ClientScenario {
       const description =
         'tools/call with extension declared in _meta.io.modelcontextprotocol/clientCapabilities produces a CreateTaskResult even when the session did not negotiate the extension';
       try {
-        const result = await rawRequest(
-          serverUrl,
-          'tools/call',
-          {
-            name: 'slow_compute',
-            arguments: { seconds: 1, label: 'capability-meta-opt' }
-          },
+        const result = (await withoutExt.request(
           {
-            sessionId: withoutExt.sessionId,
-            meta: {
-              'io.modelcontextprotocol/clientCapabilities': {
-                extensions: { [TASKS_EXTENSION_ID]: {} }
+            method: 'tools/call',
+            params: {
+              name: 'slow_compute',
+              arguments: { seconds: 1, label: 'capability-meta-opt' },
+              _meta: {
+                'io.modelcontextprotocol/clientCapabilities': {
+                  extensions: { [TASKS_EXTENSION_ID]: {} }
+                }
               }
             }
-          }
-        );
+          },
+          AnyResult
+        )) as any;
         const errs: string[] = [];
         if (result.resultType !== 'task') {
           errs.push(
@@ -258,11 +272,12 @@ export class TasksCapabilityNegotiationScenario implements ClientScenario {
         // background goroutine on the server.
         if (result.taskId) {
           try {
-            await rawRequest(
-              serverUrl,
-              'tasks/cancel',
-              { taskId: result.taskId },
-              { sessionId: withExt.sessionId }
+            await withExt.request(
+              {
+                method: 'tasks/cancel',
+                params: { taskId: result.taskId }
+              },
+              AnyResult
             );
           } catch {
             /* swallow — cleanup best-effort */
@@ -286,6 +301,8 @@ export class TasksCapabilityNegotiationScenario implements ClientScenario {
       }
     }
 
+    await withExt.close().catch(() => {});
+    await withoutExt.close().catch(() => {});
     return checks;
   }
 }
diff --git a/src/scenarios/server/tasks/dispatch.ts b/src/scenarios/server/tasks/dispatch.ts
index 3f35e43..272ea57 100644
--- a/src/scenarios/server/tasks/dispatch.ts
+++ b/src/scenarios/server/tasks/dispatch.ts
@@ -21,6 +21,9 @@
  *   - failing_job     — task-supporting, returns tool error
  */
 
+import { Client } from '@modelcontextprotocol/sdk/client/index.js';
+import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js';
+
 import {
   ClientScenario,
   ConformanceCheck,
@@ -31,10 +34,9 @@ import {
   TASKS_EXTENSION_ID,
   SEP_2322_REF,
   SEP_2663_REF,
+  AnyResult,
   errMsg,
   failureCheck,
-  initRawSession,
-  rawRequest,
   waitForStatus,
   waitForTerminal
 } from './helpers';
@@ -90,15 +92,19 @@ export class TasksDispatchScenario implements ClientScenario {
   async run(serverUrl: string): Promise<ConformanceCheck[]> {
     const checks: ConformanceCheck[] = [];
 
-    let sessionId: string;
+    let client: Client;
     try {
-      ({ sessionId } = await initRawSession(serverUrl, {
-        capabilities: {
-          elicitation: {},
-          sampling: {},
-          extensions: { [TASKS_EXTENSION_ID]: {} }
+      client = new Client(
+        { name: 'mcp-conformance', version: '1.0' },
+        {
+          capabilities: {
+            elicitation: {},
+            sampling: {},
+            extensions: { [TASKS_EXTENSION_ID]: {} }
+          }
         }
-      }));
+      );
+      await client.connect(new StreamableHTTPClientTransport(new URL(serverUrl)));
     } catch (error) {
       checks.push({
         id: 'tasks-session-bootstrap',
@@ -120,13 +126,9 @@ export class TasksDispatchScenario implements ClientScenario {
       const description =
         'tasks/result is removed in v2 and MUST reject with -32601';
       try {
-        await rawRequest(
-          serverUrl,
-          'tasks/result',
-          { taskId: 'any' },
-          {
-            sessionId
-          }
+        await client.request(
+          { method: 'tasks/result', params: { taskId: 'any' } },
+          AnyResult
         );
         checks.push({
           id,
@@ -161,7 +163,10 @@ export class TasksDispatchScenario implements ClientScenario {
       const description =
         'tasks/list is removed in v2 and MUST reject with -32601';
       try {
-        await rawRequest(serverUrl, 'tasks/list', {}, { sessionId });
+        await client.request(
+          { method: 'tasks/list', params: {} },
+          AnyResult
+        );
         checks.push({
           id,
           name,
@@ -195,12 +200,13 @@ export class TasksDispatchScenario implements ClientScenario {
       const description =
         'tools/call with no client `task` hint param MUST still produce CreateTaskResult for task-supporting tools';
       try {
-        const result = await rawRequest(
-          serverUrl,
-          'tools/call',
-          { name: 'failing_job', arguments: {} },
-          { sessionId }
-        );
+        const result = (await client.request(
+          {
+            method: 'tools/call',
+            params: { name: 'failing_job', arguments: {} }
+          },
+          AnyResult
+        )) as any;
         const errs: string[] = [];
         if (result.resultType !== 'task' || !result.taskId) {
           errs.push(
@@ -210,7 +216,7 @@ export class TasksDispatchScenario implements ClientScenario {
         // Best-effort wait so we don't leak.
         if (result.taskId) {
           try {
-            await waitForTerminal(serverUrl, sessionId, result.taskId);
+            await waitForTerminal(client, result.taskId);
           } catch {
             /* swallow */
           }
@@ -236,17 +242,18 @@ export class TasksDispatchScenario implements ClientScenario {
       const description =
         'tools/call with legacy `task` param against a sync tool MUST NOT error and MUST NOT be promoted to a task';
       try {
-        const result = await rawRequest(
-          serverUrl,
-          'tools/call',
+        const result = (await client.request(
           {
-            name: 'greet',
-            arguments: { name: 'legacy-hint' },
-            // Legacy v1 hint that the server MUST ignore.
-            task: { ttl: 60_000, pollInterval: 100 }
+            method: 'tools/call',
+            params: {
+              name: 'greet',
+              arguments: { name: 'legacy-hint' },
+              // Legacy v1 hint that the server MUST ignore.
+              task: { ttl: 60_000, pollInterval: 100 }
+            }
           },
-          { sessionId }
-        );
+          AnyResult
+        )) as any;
         const errs: string[] = [];
         if (result.resultType === 'task') {
           errs.push(
@@ -283,15 +290,16 @@ export class TasksDispatchScenario implements ClientScenario {
       const description =
         'For a fast operation, a task-supporting tool MAY skip task creation and return a sync ToolResult; either path is valid';
       try {
-        const result = await rawRequest(
-          serverUrl,
-          'tools/call',
+        const result = (await client.request(
           {
-            name: 'slow_compute',
-            arguments: { seconds: 0, label: 'instant' }
+            method: 'tools/call',
+            params: {
+              name: 'slow_compute',
+              arguments: { seconds: 0, label: 'instant' }
+            }
           },
-          { sessionId }
-        );
+          AnyResult
+        )) as any;
         const errs: string[] = [];
         if (result.resultType === 'task') {
           if (!result.taskId) {
@@ -331,12 +339,13 @@ export class TasksDispatchScenario implements ClientScenario {
       const errs: string[] = [];
       try {
         // Sync tools/call.
-        const sync = await rawRequest(
-          serverUrl,
-          'tools/call',
-          { name: 'greet', arguments: { name: 'rt' } },
-          { sessionId }
-        );
+        const sync = (await client.request(
+          {
+            method: 'tools/call',
+            params: { name: 'greet', arguments: { name: 'rt' } }
+          },
+          AnyResult
+        )) as any;
         if (sync.resultType !== 'complete') {
           errs.push(
             `sync tools/call resultType = ${JSON.stringify(sync.resultType)}, want "complete"`
@@ -344,24 +353,23 @@ export class TasksDispatchScenario implements ClientScenario {
         }
 
         // tasks/get against a fresh task.
-        const created = await rawRequest(
-          serverUrl,
-          'tools/call',
+        const created = (await client.request(
           {
-            name: 'slow_compute',
-            arguments: { seconds: 0, label: 'rt-get' }
+            method: 'tools/call',
+            params: {
+              name: 'slow_compute',
+              arguments: { seconds: 0, label: 'rt-get' }
+            }
           },
-          { sessionId }
-        );
+          AnyResult
+        )) as any;
         const taskIdForGet = created.taskId;
         if (taskIdForGet) {
-          await waitForTerminal(serverUrl, sessionId, taskIdForGet);
-          const got = await rawRequest(
-            serverUrl,
-            'tasks/get',
-            { taskId: taskIdForGet },
-            { sessionId }
-          );
+          await waitForTerminal(client, taskIdForGet);
+          const got = (await client.request(
+            { method: 'tasks/get', params: { taskId: taskIdForGet } },
+            AnyResult
+          )) as any;
           if (got.resultType !== 'complete') {
             errs.push(
               `tasks/get resultType = ${JSON.stringify(got.resultType)}, want "complete"`
@@ -370,22 +378,21 @@ export class TasksDispatchScenario implements ClientScenario {
         }
 
         // tasks/cancel ack on a fresh long-running task.
-        const longLived = await rawRequest(
-          serverUrl,
-          'tools/call',
+        const longLived = (await client.request(
           {
-            name: 'slow_compute',
-            arguments: { seconds: 60, label: 'rt-cancel' }
+            method: 'tools/call',
+            params: {
+              name: 'slow_compute',
+              arguments: { seconds: 60, label: 'rt-cancel' }
+            }
           },
-          { sessionId }
-        );
+          AnyResult
+        )) as any;
         if (longLived.taskId) {
-          const cancelAck = await rawRequest(
-            serverUrl,
-            'tasks/cancel',
-            { taskId: longLived.taskId },
-            { sessionId }
-          );
+          const cancelAck = (await client.request(
+            { method: 'tasks/cancel', params: { taskId: longLived.taskId } },
+            AnyResult
+          )) as any;
           if (cancelAck.resultType !== 'complete') {
             errs.push(
               `tasks/cancel ack resultType = ${JSON.stringify(cancelAck.resultType)}, want "complete"`
@@ -394,41 +401,40 @@ export class TasksDispatchScenario implements ClientScenario {
         }
 
         // tasks/update ack on a parked elicitation task.
-        const elicit = await rawRequest(
-          serverUrl,
-          'tools/call',
-          { name: 'confirm_delete', arguments: { filename: 'rt.txt' } },
-          { sessionId }
-        );
+        const elicit = (await client.request(
+          {
+            method: 'tools/call',
+            params: { name: 'confirm_delete', arguments: { filename: 'rt.txt' } }
+          },
+          AnyResult
+        )) as any;
         const elicitTaskId = elicit.taskId;
         if (elicitTaskId) {
           await waitForStatus(
-            serverUrl,
-            sessionId,
+            client,
             elicitTaskId,
             'input_required',
             5_000
           );
-          const updateAck = await rawRequest(
-            serverUrl,
-            'tasks/update',
+          const updateAck = (await client.request(
             {
-              taskId: elicitTaskId,
-              inputResponses: { 'unknown-key': { ignored: true } }
+              method: 'tasks/update',
+              params: {
+                taskId: elicitTaskId,
+                inputResponses: { 'unknown-key': { ignored: true } }
+              }
             },
-            { sessionId }
-          );
+            AnyResult
+          )) as any;
           if (updateAck.resultType !== 'complete') {
             errs.push(
               `tasks/update ack resultType = ${JSON.stringify(updateAck.resultType)}, want "complete"`
             );
           }
           try {
-            await rawRequest(
-              serverUrl,
-              'tasks/cancel',
-              { taskId: elicitTaskId },
-              { sessionId }
+            await client.request(
+              { method: 'tasks/cancel', params: { taskId: elicitTaskId } },
+              AnyResult
             );
           } catch {
             /* swallow */
@@ -455,15 +461,16 @@ export class TasksDispatchScenario implements ClientScenario {
       const description =
         'tasks/get issued immediately after CreateTaskResult arrives MUST resolve (server MUST NOT return CreateTaskResult before the task is durably created)';
       try {
-        const created = await rawRequest(
-          serverUrl,
-          'tools/call',
+        const created = (await client.request(
           {
-            name: 'slow_compute',
-            arguments: { seconds: 60, label: 'consistency' }
+            method: 'tools/call',
+            params: {
+              name: 'slow_compute',
+              arguments: { seconds: 60, label: 'consistency' }
+            }
           },
-          { sessionId }
-        );
+          AnyResult
+        )) as any;
         const taskId = created.taskId;
         if (!taskId) {
           checks.push({
@@ -478,12 +485,10 @@ export class TasksDispatchScenario implements ClientScenario {
         } else {
           // No await/sleep between create and get — codifies the
           // strong-consistency ordering.
-          const got = await rawRequest(
-            serverUrl,
-            'tasks/get',
-            { taskId },
-            { sessionId }
-          );
+          const got = (await client.request(
+            { method: 'tasks/get', params: { taskId } },
+            AnyResult
+          )) as any;
           const errs: string[] = [];
           if (got.taskId !== taskId) {
             errs.push(
@@ -492,11 +497,9 @@ export class TasksDispatchScenario implements ClientScenario {
           }
           // Cleanup.
           try {
-            await rawRequest(
-              serverUrl,
-              'tasks/cancel',
-              { taskId },
-              { sessionId }
+            await client.request(
+              { method: 'tasks/cancel', params: { taskId } },
+              AnyResult
             );
           } catch {
             /* swallow */
@@ -523,11 +526,12 @@ export class TasksDispatchScenario implements ClientScenario {
       const description =
         'tasks/get for a taskId the server does not recognize MUST return -32602';
       try {
-        await rawRequest(
-          serverUrl,
-          'tasks/get',
-          { taskId: 'tasks-conformance-nonexistent-12345' },
-          { sessionId }
+        await client.request(
+          {
+            method: 'tasks/get',
+            params: { taskId: 'tasks-conformance-nonexistent-12345' }
+          },
+          AnyResult
         );
         checks.push({
           id,
@@ -555,6 +559,7 @@ export class TasksDispatchScenario implements ClientScenario {
       }
     }
 
+    await client.close().catch(() => {});
     return checks;
   }
 }
diff --git a/src/scenarios/server/tasks/headers.ts b/src/scenarios/server/tasks/headers.ts
index 0d5ebdd..9936779 100644
--- a/src/scenarios/server/tasks/headers.ts
+++ b/src/scenarios/server/tasks/headers.ts
@@ -16,6 +16,9 @@
  *   - slow_compute  — task-supporting, sleeps N seconds
  */
 
+import { Client } from '@modelcontextprotocol/sdk/client/index.js';
+import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js';
+
 import {
   ClientScenario,
   ConformanceCheck,
@@ -25,12 +28,75 @@ import {
 import {
   TASKS_EXTENSION_ID,
   SEP_2243_REF,
+  AnyResult,
   errMsg,
-  failureCheck,
-  initRawSession,
-  rawRequest
+  failureCheck
 } from './helpers';
 
+/**
+ * Minimal raw POST that lets us inject SEP-2243 routing headers
+ * (Mcp-Method, Mcp-Name) on a JSON-RPC call. The SDK's
+ * StreamableHTTPClientTransport doesn't expose per-request HTTP
+ * headers, and this whole scenario exists to verify the server tolerates
+ * those headers — so we pin a single raw fetch helper to this file.
+ *
+ * Reuses the SDK transport's session via `transport.sessionId` so the
+ * request lands on the same already-initialized session.
+ */
+async function rawJsonRpcWithHeaders(
+  serverUrl: string,
+  sessionId: string,
+  method: string,
+  params: any,
+  extraHeaders: Record<string, string>
+): Promise<any> {
+  const resp = await fetch(serverUrl, {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+      Accept: 'application/json, text/event-stream',
+      'Mcp-Session-Id': sessionId,
+      ...extraHeaders
+    },
+    body: JSON.stringify({
+      jsonrpc: '2.0',
+      id: `hdr-${Math.random().toString(36).slice(2, 10)}`,
+      method,
+      params
+    })
+  });
+  const ct = resp.headers.get('content-type') || '';
+  let body: any;
+  if (ct.includes('text/event-stream')) {
+    const text = await resp.text();
+    for (const line of text.split('\n')) {
+      const trimmed = line.trim();
+      if (trimmed.startsWith('data:')) {
+        const payload = trimmed.slice(5).trimStart();
+        if (payload.startsWith('{')) {
+          const parsed = JSON.parse(payload);
+          if (parsed.id !== undefined && (parsed.result || parsed.error)) {
+            body = parsed;
+            break;
+          }
+        }
+      }
+    }
+  } else {
+    body = await resp.json();
+  }
+  if (!body) {
+    throw new Error(`No JSON-RPC frame in response (status ${resp.status})`);
+  }
+  if (body.error) {
+    const err: any = new Error(body.error.message || 'JSON-RPC error');
+    err.code = body.error.code;
+    err.data = body.error.data;
+    throw err;
+  }
+  return body.result;
+}
+
 export class TasksRequestHeadersScenario implements ClientScenario {
   name = 'tasks-request-headers';
   specVersions: ScenarioSpecTag[] = ['extension', DRAFT_PROTOCOL_VERSION];
@@ -53,11 +119,15 @@ based on them — including when the headers disagree with the body.`;
   async run(serverUrl: string): Promise<ConformanceCheck[]> {
     const checks: ConformanceCheck[] = [];
 
-    let sessionId: string;
+    let client: Client;
+    let transport: StreamableHTTPClientTransport;
     try {
-      ({ sessionId } = await initRawSession(serverUrl, {
-        capabilities: { extensions: { [TASKS_EXTENSION_ID]: {} } }
-      }));
+      client = new Client(
+        { name: 'mcp-conformance', version: '1.0' },
+        { capabilities: { extensions: { [TASKS_EXTENSION_ID]: {} } } }
+      );
+      transport = new StreamableHTTPClientTransport(new URL(serverUrl));
+      await client.connect(transport);
     } catch (error) {
       checks.push({
         id: 'tasks-session-bootstrap',
@@ -79,11 +149,12 @@ based on them — including when the headers disagree with the body.`;
       const description =
         'Server tolerates Mcp-Method request header on tools/call (sync tool dispatch unaffected)';
       try {
-        const result = await rawRequest(
+        const result = await rawJsonRpcWithHeaders(
           serverUrl,
+          transport.sessionId!,
           'tools/call',
           { name: 'greet', arguments: { name: 'sep-2243' } },
-          { sessionId, headers: { 'Mcp-Method': 'tools/call' } }
+          { 'Mcp-Method': 'tools/call' }
         );
         const errs: string[] = [];
         if (result.resultType !== 'complete') {
@@ -123,15 +194,16 @@ based on them — including when the headers disagree with the body.`;
       const description =
         'Server tolerates Mcp-Method + Mcp-Name request headers on tasks/get (body taskId resolves regardless of routing headers)';
       try {
-        const created = await rawRequest(
-          serverUrl,
-          'tools/call',
+        const created = (await client.request(
           {
-            name: 'slow_compute',
-            arguments: { seconds: 60, label: 'headers-tasks-get' }
+            method: 'tools/call',
+            params: {
+              name: 'slow_compute',
+              arguments: { seconds: 60, label: 'headers-tasks-get' }
+            }
           },
-          { sessionId }
-        );
+          AnyResult
+        )) as any;
         routingTaskId = created.taskId;
         if (!routingTaskId) {
           checks.push({
@@ -144,16 +216,14 @@ based on them — including when the headers disagree with the body.`;
             specReferences: [SEP_2243_REF]
           });
         } else {
-          const got = await rawRequest(
+          const got = await rawJsonRpcWithHeaders(
             serverUrl,
+            transport.sessionId!,
             'tasks/get',
             { taskId: routingTaskId },
             {
-              sessionId,
-              headers: {
-                'Mcp-Method': 'tasks/get',
-                'Mcp-Name': routingTaskId
-              }
+              'Mcp-Method': 'tasks/get',
+              'Mcp-Name': routingTaskId
             }
           );
           const errs: string[] = [];
@@ -190,11 +260,12 @@ based on them — including when the headers disagree with the body.`;
       const description =
         'When Mcp-Method header disagrees with body, server MUST dispatch on body method (header is informational)';
       try {
-        const result = await rawRequest(
+        const result = await rawJsonRpcWithHeaders(
           serverUrl,
+          transport.sessionId!,
           'tools/call',
           { name: 'greet', arguments: { name: 'header-mismatch' } },
-          { sessionId, headers: { 'Mcp-Method': 'tasks/get' } }
+          { 'Mcp-Method': 'tasks/get' }
         );
         const errs: string[] = [];
         if (result.resultType !== 'complete') {
@@ -227,17 +298,19 @@ based on them — including when the headers disagree with the body.`;
     // Cleanup the long-lived task.
     if (routingTaskId) {
       try {
-        await rawRequest(
-          serverUrl,
-          'tasks/cancel',
-          { taskId: routingTaskId },
-          { sessionId }
+        await client.request(
+          {
+            method: 'tasks/cancel',
+            params: { taskId: routingTaskId }
+          },
+          AnyResult
         );
       } catch {
         /* swallow */
       }
     }
 
+    await client.close().catch(() => {});
     return checks;
   }
 }
diff --git a/src/scenarios/server/tasks/helpers.ts b/src/scenarios/server/tasks/helpers.ts
index 2eea4e7..388a985 100644
--- a/src/scenarios/server/tasks/helpers.ts
+++ b/src/scenarios/server/tasks/helpers.ts
@@ -1,17 +1,31 @@
 /**
  * Shared helpers for SEP-2663 Tasks server-conformance scenarios.
  *
- * The MCP TS SDK's typed schemas (CallToolResultSchema, etc.) strip the
- * SEP-2663 / SEP-2322 wire fields — `resultType`, `taskId`, `inputRequests`,
- * `requestState`, inlined `result`/`error` on tasks/get's DetailedTask. So
- * scenarios that exercise those fields use raw fetch instead. This file
- * centralizes the bootstrap + RPC + polling primitives.
+ * Most of what scenarios need is already in the official MCP TS SDK:
+ *   - new Client(...) + StreamableHTTPClientTransport for connection
+ *   - client.request(req, schema) for typed JSON-RPC calls
+ *   - McpError with .code / .data for JSON-RPC errors
  *
- * If/when the SDK gains schemas for the SEP-2663 wire shapes, the call
- * sites in scenarios switch back to `client.request(..., AnyResult)`
- * and this file shrinks (or disappears).
+ * This file holds:
+ *   - SEP reference constants used by every scenario's specReferences
+ *   - Tiny check builders (errMsg / failureCheck / skipCheck) used by
+ *     all scenarios for consistent FAILURE / SKIPPED reporting
+ *   - Polling helpers (waitForTerminal / waitForStatus) wrapping
+ *     `client.request('tasks/get', AnyResult)`
+ *   - The `AnyResult` Zod passthrough schema — pair with
+ *     `client.request(req, AnyResult)` to preserve fields the SDK's
+ *     typed result schemas would strip (`resultType`, `taskId`,
+ *     `requestState`, inlined `result`/`error`, etc.)
+ *
+ * Scenarios that need transport-level access (HTTP request-header
+ * injection for SEP-2243; raw SSE event reading for status
+ * notifications) keep their own inline raw fetch — SDK doesn't expose
+ * those layers. See headers.ts / notifications.ts.
  */
 
+import type { Client } from '@modelcontextprotocol/sdk/client/index.js';
+import { z } from 'zod';
+
 import type { ConformanceCheck, SpecReference } from '../../../types';
 
 export const TASKS_EXTENSION_ID = 'io.modelcontextprotocol/tasks';
@@ -33,6 +47,13 @@ export const SEP_2575_REF: SpecReference = {
   url: 'https://github.com/modelcontextprotocol/specification/pull/2575'
 };
 
+/**
+ * Zod passthrough schema. Pair with `client.request(req, AnyResult)` to
+ * preserve fields the SDK's typed result schemas would strip — every
+ * SEP-2663 / SEP-2322 wire field falls into this bucket today.
+ */
+export const AnyResult = z.object({}).passthrough();
+
 export function errMsg(error: unknown): string {
   return error instanceof Error ? error.message : String(error);
 }
@@ -75,192 +96,18 @@ export function skipCheck(
   };
 }
 
-export interface InitOpts {
-  /** Negotiated wire protocolVersion. Defaults to LATEST_SPEC_VERSION. */
-  protocolVersion?: string;
-  /** Client capabilities (extensions, elicitation, sampling, …). */
-  capabilities?: Record<string, unknown>;
-  /** Optional clientInfo override. */
-  clientInfo?: { name: string; version: string };
-}
-
-export interface InitResult {
-  /** Mcp-Session-Id minted by the server during initialize. */
-  sessionId: string;
-  /** capabilities object the server advertised in its initialize response. */
-  serverCapabilities: Record<string, any>;
-  /** Negotiated protocolVersion echoed back by the server. */
-  serverProtocolVersion?: string;
-  /** Server info (name, version, …). */
-  serverInfo?: Record<string, any>;
-}
-
-/**
- * Run a fresh initialize handshake and return session id + the server's
- * advertised capabilities. Bypasses the SDK so callers can declare
- * extension capabilities the SDK's typed wrappers don't yet know about,
- * and so the SDK's Zod schemas don't strip extension fields off the
- * server response.
- */
-export async function initRawSession(
-  serverUrl: string,
-  opts: InitOpts = {}
-): Promise<InitResult> {
-  const protocolVersion = opts.protocolVersion ?? '2025-11-25';
-  const capabilities = opts.capabilities ?? {};
-  const clientInfo = opts.clientInfo ?? {
-    name: 'mcp-conformance',
-    version: '1.0'
-  };
-
-  const initResp = await fetch(serverUrl, {
-    method: 'POST',
-    headers: {
-      'Content-Type': 'application/json',
-      Accept: 'application/json'
-    },
-    body: JSON.stringify({
-      jsonrpc: '2.0',
-      id: 'init-raw',
-      method: 'initialize',
-      params: { protocolVersion, clientInfo, capabilities }
-    })
-  });
-  const sid = initResp.headers.get('mcp-session-id') || '';
-  if (!sid) throw new Error('initialize response missing Mcp-Session-Id');
-
-  const initBody = await initResp.json();
-  if (initBody.error) {
-    throw new Error(
-      `initialize returned JSON-RPC error: ${JSON.stringify(initBody.error)}`
-    );
-  }
-  const result = initBody.result ?? {};
-
-  await fetch(serverUrl, {
-    method: 'POST',
-    headers: {
-      'Content-Type': 'application/json',
-      Accept: 'application/json',
-      'Mcp-Session-Id': sid
-    },
-    body: JSON.stringify({
-      jsonrpc: '2.0',
-      method: 'notifications/initialized'
-    })
-  });
-  return {
-    sessionId: sid,
-    serverCapabilities: result.capabilities ?? {},
-    serverProtocolVersion: result.protocolVersion,
-    serverInfo: result.serverInfo
-  };
-}
-
-export interface RawRequestOpts {
-  sessionId: string;
-  /** Optional _meta object merged into the JSON-RPC params. */
-  meta?: Record<string, unknown>;
-  /** Optional HTTP request headers merged after the harness defaults. */
-  headers?: Record<string, string>;
-}
-
-export interface RawRequestResult {
-  /** The JSON-RPC `result` body, when the response carried one. */
-  result: any;
-  /** The raw fetch Response so callers can inspect transport-level headers. */
-  response: Response;
-}
-
-let nextId = 1;
-
-/**
- * Send a raw JSON-RPC request via fetch, parsing SSE `data:` lines or
- * plain JSON depending on Content-Type. Throws an Error decorated with
- * `code` / `data` when the response carries a JSON-RPC error.
- */
-export async function rawRequest(
-  serverUrl: string,
-  method: string,
-  params: any,
-  opts: RawRequestOpts
-): Promise<any> {
-  const { result } = await rawRequestFull(serverUrl, method, params, opts);
-  return result;
-}
-
-/**
- * Like rawRequest, but also returns the raw fetch Response so callers
- * can inspect transport-level headers (e.g., SEP-2243 routing headers).
- */
-export async function rawRequestFull(
-  serverUrl: string,
-  method: string,
-  params: any,
-  opts: RawRequestOpts
-): Promise<RawRequestResult> {
-  const id = nextId++;
-  const requestParams = opts.meta ? { ...params, _meta: opts.meta } : params;
-  const resp = await fetch(serverUrl, {
-    method: 'POST',
-    headers: {
-      'Content-Type': 'application/json',
-      Accept: 'text/event-stream, application/json',
-      'Mcp-Session-Id': opts.sessionId,
-      ...(opts.headers ?? {})
-    },
-    body: JSON.stringify({
-      jsonrpc: '2.0',
-      id,
-      method,
-      params: requestParams
-    })
-  });
-  const ct = resp.headers.get('content-type') || '';
-  let body: any;
-  if (ct.includes('text/event-stream')) {
-    const text = await resp.text();
-    for (const line of text.split('\n')) {
-      const trimmed = line.trim();
-      if (trimmed.startsWith('data:')) {
-        const payload = trimmed.slice(5).trimStart();
-        if (payload.startsWith('{')) {
-          const parsed = JSON.parse(payload);
-          if (parsed.id === id) {
-            body = parsed;
-            break;
-          }
-        }
-      }
-    }
-  } else {
-    body = await resp.json();
-  }
-  if (!body) throw new Error(`No JSON-RPC response for ${method}`);
-  if (body.error) {
-    const err: any = new Error(body.error.message);
-    err.code = body.error.code;
-    err.data = body.error.data;
-    throw err;
-  }
-  return { result: body.result, response: resp };
-}
-
 /** Poll tasks/get until the task reaches a terminal state. */
 export async function waitForTerminal(
-  serverUrl: string,
-  sessionId: string,
+  client: Client,
   taskId: string,
   timeoutMs = 10_000
 ): Promise<any> {
   const start = Date.now();
   while (Date.now() - start < timeoutMs) {
-    const task = await rawRequest(
-      serverUrl,
-      'tasks/get',
-      { taskId },
-      { sessionId }
-    );
+    const task = (await client.request(
+      { method: 'tasks/get', params: { taskId } },
+      AnyResult
+    )) as any;
     if (['completed', 'failed', 'cancelled'].includes(task.status)) {
       return task;
     }
@@ -273,20 +120,17 @@ export async function waitForTerminal(
 
 /** Poll tasks/get until a specific status (or any terminal state). */
 export async function waitForStatus(
-  serverUrl: string,
-  sessionId: string,
+  client: Client,
   taskId: string,
   status: string,
   timeoutMs = 10_000
 ): Promise<any> {
   const start = Date.now();
   while (Date.now() - start < timeoutMs) {
-    const task = await rawRequest(
-      serverUrl,
-      'tasks/get',
-      { taskId },
-      { sessionId }
-    );
+    const task = (await client.request(
+      { method: 'tasks/get', params: { taskId } },
+      AnyResult
+    )) as any;
     if (
       task.status === status ||
       ['completed', 'failed', 'cancelled'].includes(task.status)
diff --git a/src/scenarios/server/tasks/lifecycle.ts b/src/scenarios/server/tasks/lifecycle.ts
index 21c5a13..53381fa 100644
--- a/src/scenarios/server/tasks/lifecycle.ts
+++ b/src/scenarios/server/tasks/lifecycle.ts
@@ -13,6 +13,9 @@
  *   - protocol_error_job — task-supporting, panics into a protocol error
  */
 
+import { Client } from '@modelcontextprotocol/sdk/client/index.js';
+import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js';
+
 import {
   ClientScenario,
   ConformanceCheck,
@@ -23,11 +26,10 @@ import {
   TASKS_EXTENSION_ID,
   SEP_2663_REF,
   SEP_2322_REF,
+  AnyResult,
   errMsg,
   failureCheck,
   skipCheck,
-  initRawSession,
-  rawRequest,
   waitForTerminal
 } from './helpers';
 import { isIso8601 } from '../_shared/wire-format';
@@ -84,15 +86,19 @@ The server MUST advertise \`io.modelcontextprotocol/tasks\` under
   async run(serverUrl: string): Promise<ConformanceCheck[]> {
     const checks: ConformanceCheck[] = [];
 
-    let sessionId: string;
+    let client: Client;
     try {
-      ({ sessionId } = await initRawSession(serverUrl, {
-        capabilities: {
-          elicitation: {},
-          sampling: {},
-          extensions: { [TASKS_EXTENSION_ID]: {} }
+      client = new Client(
+        { name: 'mcp-conformance', version: '1.0' },
+        {
+          capabilities: {
+            elicitation: {},
+            sampling: {},
+            extensions: { [TASKS_EXTENSION_ID]: {} }
+          }
         }
-      }));
+      );
+      await client.connect(new StreamableHTTPClientTransport(new URL(serverUrl)));
     } catch (error) {
       checks.push({
         id: 'tasks-session-bootstrap',
@@ -114,12 +120,13 @@ The server MUST advertise \`io.modelcontextprotocol/tasks\` under
       const description =
         'Sync tool returns ToolResult (resultType:"complete"), no taskId at top level';
       try {
-        const result = await rawRequest(
-          serverUrl,
-          'tools/call',
-          { name: 'greet', arguments: { name: 'World' } },
-          { sessionId }
-        );
+        const result = (await client.request(
+          {
+            method: 'tools/call',
+            params: { name: 'greet', arguments: { name: 'World' } }
+          },
+          AnyResult
+        )) as any;
         const errs: string[] = [];
         if (result.resultType === 'task') {
           errs.push('sync tool result MUST NOT carry resultType:"task"');
@@ -159,15 +166,16 @@ The server MUST advertise \`io.modelcontextprotocol/tasks\` under
       const description =
         'Task-supporting tool returns flat CreateTaskResult (no nested `task` wrapper)';
       try {
-        const result = await rawRequest(
-          serverUrl,
-          'tools/call',
+        const result = (await client.request(
           {
-            name: 'slow_compute',
-            arguments: { seconds: 2, label: 'lifecycle-create' }
+            method: 'tools/call',
+            params: {
+              name: 'slow_compute',
+              arguments: { seconds: 2, label: 'lifecycle-create' }
+            }
           },
-          { sessionId }
-        );
+          AnyResult
+        )) as any;
         const errs: string[] = [];
         if (result.resultType !== 'task') {
           errs.push(
@@ -243,12 +251,10 @@ The server MUST advertise \`io.modelcontextprotocol/tasks\` under
         checks.push(skipCheck(id, name, description, 'no task created'));
       } else {
         try {
-          const task = await rawRequest(
-            serverUrl,
-            'tasks/get',
-            { taskId: workingTaskId },
-            { sessionId }
-          );
+          const task = (await client.request(
+            { method: 'tasks/get', params: { taskId: workingTaskId } },
+            AnyResult
+          )) as any;
           const errs: string[] = [];
           if (task.taskId !== workingTaskId) {
             errs.push(
@@ -284,11 +290,7 @@ The server MUST advertise \`io.modelcontextprotocol/tasks\` under
         checks.push(skipCheck(id, name, description, 'no task created'));
       } else {
         try {
-          const terminal = await waitForTerminal(
-            serverUrl,
-            sessionId,
-            workingTaskId
-          );
+          const terminal = await waitForTerminal(client, workingTaskId);
           const errs: string[] = [];
           if (terminal.status !== 'completed') {
             errs.push(
@@ -334,21 +336,18 @@ The server MUST advertise \`io.modelcontextprotocol/tasks\` under
       const description =
         'Tool execution error reports as completed + result.isError (NOT failed)';
       try {
-        const created = await rawRequest(
-          serverUrl,
-          'tools/call',
-          { name: 'failing_job', arguments: {} },
-          { sessionId }
-        );
+        const created = (await client.request(
+          {
+            method: 'tools/call',
+            params: { name: 'failing_job', arguments: {} }
+          },
+          AnyResult
+        )) as any;
         const errs: string[] = [];
         if (!created.taskId) {
           errs.push('failing_job MUST create a task');
         } else {
-          const terminal = await waitForTerminal(
-            serverUrl,
-            sessionId,
-            created.taskId
-          );
+          const terminal = await waitForTerminal(client, created.taskId);
           if (terminal.status !== 'completed') {
             errs.push(
               `tool error MUST surface as completed (not "${terminal.status}")`
@@ -381,21 +380,18 @@ The server MUST advertise \`io.modelcontextprotocol/tasks\` under
       const description =
         'Protocol-level error reports as failed + inlined error{code,message}, no result';
       try {
-        const created = await rawRequest(
-          serverUrl,
-          'tools/call',
-          { name: 'protocol_error_job', arguments: {} },
-          { sessionId }
-        );
+        const created = (await client.request(
+          {
+            method: 'tools/call',
+            params: { name: 'protocol_error_job', arguments: {} }
+          },
+          AnyResult
+        )) as any;
         const errs: string[] = [];
         if (!created.taskId) {
           errs.push('protocol_error_job MUST create a task');
         } else {
-          const terminal = await waitForTerminal(
-            serverUrl,
-            sessionId,
-            created.taskId
-          );
+          const terminal = await waitForTerminal(client, created.taskId);
           if (terminal.status !== 'failed') {
             errs.push(
               `protocol error MUST surface as failed (not "${terminal.status}")`
@@ -438,15 +434,16 @@ The server MUST advertise \`io.modelcontextprotocol/tasks\` under
         'tasks/cancel returns {resultType:"complete"} ack; status settles to cancelled';
       let cancelTaskId: string | undefined;
       try {
-        const created = await rawRequest(
-          serverUrl,
-          'tools/call',
+        const created = (await client.request(
           {
-            name: 'slow_compute',
-            arguments: { seconds: 60, label: 'lifecycle-cancel' }
+            method: 'tools/call',
+            params: {
+              name: 'slow_compute',
+              arguments: { seconds: 60, label: 'lifecycle-cancel' }
+            }
           },
-          { sessionId }
-        );
+          AnyResult
+        )) as any;
         cancelTaskId = created.taskId;
         if (!cancelTaskId) {
           checks.push({
@@ -459,12 +456,10 @@ The server MUST advertise \`io.modelcontextprotocol/tasks\` under
             specReferences: [SEP_2663_REF, SEP_2322_REF]
           });
         } else {
-          const ack = await rawRequest(
-            serverUrl,
-            'tasks/cancel',
-            { taskId: cancelTaskId },
-            { sessionId }
-          );
+          const ack = (await client.request(
+            { method: 'tasks/cancel', params: { taskId: cancelTaskId } },
+            AnyResult
+          )) as any;
           const errs: string[] = [];
           // Ack carries only the SEP-2322 discriminator — no task envelope.
           if (
@@ -475,12 +470,10 @@ The server MUST advertise \`io.modelcontextprotocol/tasks\` under
             );
           }
           // Status settles to cancelled — observe via tasks/get.
-          const after = await rawRequest(
-            serverUrl,
-            'tasks/get',
-            { taskId: cancelTaskId },
-            { sessionId }
-          );
+          const after = (await client.request(
+            { method: 'tasks/get', params: { taskId: cancelTaskId } },
+            AnyResult
+          )) as any;
           if (after.status !== 'cancelled') {
             errs.push(
               `tasks/get after cancel MUST report cancelled; got ${after.status}`
@@ -509,15 +502,16 @@ The server MUST advertise \`io.modelcontextprotocol/tasks\` under
       const description =
         'tasks/cancel on a terminal task returns -32602 (per spec commit d963ad0)';
       try {
-        const created = await rawRequest(
-          serverUrl,
-          'tools/call',
+        const created = (await client.request(
           {
-            name: 'slow_compute',
-            arguments: { seconds: 1, label: 'lifecycle-cancel-terminal' }
+            method: 'tools/call',
+            params: {
+              name: 'slow_compute',
+              arguments: { seconds: 1, label: 'lifecycle-cancel-terminal' }
+            }
           },
-          { sessionId }
-        );
+          AnyResult
+        )) as any;
         const completedTaskId = created.taskId;
         if (!completedTaskId) {
           checks.push({
@@ -530,15 +524,13 @@ The server MUST advertise \`io.modelcontextprotocol/tasks\` under
             specReferences: [SEP_2663_REF]
           });
         } else {
-          await waitForTerminal(serverUrl, sessionId, completedTaskId);
+          await waitForTerminal(client, completedTaskId);
           // Now cancel — must throw -32602.
           let thrown: any;
           try {
-            await rawRequest(
-              serverUrl,
-              'tasks/cancel',
-              { taskId: completedTaskId },
-              { sessionId }
+            await client.request(
+              { method: 'tasks/cancel', params: { taskId: completedTaskId } },
+              AnyResult
             );
           } catch (e) {
             thrown = e;
@@ -569,6 +561,7 @@ The server MUST advertise \`io.modelcontextprotocol/tasks\` under
       }
     }
 
+    await client.close();
     return checks;
   }
 }
diff --git a/src/scenarios/server/tasks/mrtr-input.ts b/src/scenarios/server/tasks/mrtr-input.ts
index 49cfacc..416aa09 100644
--- a/src/scenarios/server/tasks/mrtr-input.ts
+++ b/src/scenarios/server/tasks/mrtr-input.ts
@@ -11,6 +11,9 @@
  *                       parallel so two keys are pending at once
  */
 
+import { Client } from '@modelcontextprotocol/sdk/client/index.js';
+import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js';
+
 import {
   ClientScenario,
   ConformanceCheck,
@@ -21,10 +24,9 @@ import {
   TASKS_EXTENSION_ID,
   SEP_2322_REF,
   SEP_2663_REF,
+  AnyResult,
   errMsg,
   failureCheck,
-  initRawSession,
-  rawRequest,
   waitForStatus,
   waitForTerminal
 } from './helpers';
@@ -65,15 +67,19 @@ export class TasksMRTRInputScenario implements ClientScenario {
   async run(serverUrl: string): Promise<ConformanceCheck[]> {
     const checks: ConformanceCheck[] = [];
 
-    let sessionId: string;
+    let client: Client;
     try {
-      ({ sessionId } = await initRawSession(serverUrl, {
-        capabilities: {
-          elicitation: {},
-          sampling: {},
-          extensions: { [TASKS_EXTENSION_ID]: {} }
+      client = new Client(
+        { name: 'mcp-conformance', version: '1.0' },
+        {
+          capabilities: {
+            elicitation: {},
+            sampling: {},
+            extensions: { [TASKS_EXTENSION_ID]: {} }
+          }
         }
-      }));
+      );
+      await client.connect(new StreamableHTTPClientTransport(new URL(serverUrl)));
     } catch (error) {
       checks.push({
         id: 'tasks-session-bootstrap',
@@ -95,15 +101,16 @@ export class TasksMRTRInputScenario implements ClientScenario {
       const description =
         'tasks/get on an input_required task MUST surface a non-empty inputRequests map';
       try {
-        const created = await rawRequest(
-          serverUrl,
-          'tools/call',
+        const created = (await client.request(
           {
-            name: 'confirm_delete',
-            arguments: { filename: 'mrtr-input.txt' }
+            method: 'tools/call',
+            params: {
+              name: 'confirm_delete',
+              arguments: { filename: 'mrtr-input.txt' }
+            }
           },
-          { sessionId }
-        );
+          AnyResult
+        )) as any;
         const taskId = created.taskId;
         if (!taskId) {
           checks.push({
@@ -117,8 +124,7 @@ export class TasksMRTRInputScenario implements ClientScenario {
           });
         } else {
           const task = await waitForStatus(
-            serverUrl,
-            sessionId,
+            client,
             taskId,
             'input_required',
             5_000
@@ -150,11 +156,9 @@ export class TasksMRTRInputScenario implements ClientScenario {
           }
           // Cancel so we don't leave the task parked.
           try {
-            await rawRequest(
-              serverUrl,
-              'tasks/cancel',
-              { taskId },
-              { sessionId }
+            await client.request(
+              { method: 'tasks/cancel', params: { taskId } },
+              AnyResult
             );
           } catch {
             /* swallow */
@@ -181,15 +185,16 @@ export class TasksMRTRInputScenario implements ClientScenario {
       const description =
         'tasks/update with matching inputResponses MUST be acked with {resultType:"complete"} and resume the task to a terminal state';
       try {
-        const created = await rawRequest(
-          serverUrl,
-          'tools/call',
+        const created = (await client.request(
           {
-            name: 'confirm_delete',
-            arguments: { filename: 'mrtr-resume.txt' }
+            method: 'tools/call',
+            params: {
+              name: 'confirm_delete',
+              arguments: { filename: 'mrtr-resume.txt' }
+            }
           },
-          { sessionId }
-        );
+          AnyResult
+        )) as any;
         const taskId = created.taskId;
         if (!taskId) {
           checks.push({
@@ -203,8 +208,7 @@ export class TasksMRTRInputScenario implements ClientScenario {
           });
         } else {
           const inputTask = await waitForStatus(
-            serverUrl,
-            sessionId,
+            client,
             taskId,
             'input_required',
             5_000
@@ -217,16 +221,17 @@ export class TasksMRTRInputScenario implements ClientScenario {
               content: { confirm: true }
             };
           }
-          const ack = await rawRequest(
-            serverUrl,
-            'tasks/update',
+          const ack = (await client.request(
             {
-              taskId,
-              inputResponses: responses,
-              requestState: inputTask.requestState
+              method: 'tasks/update',
+              params: {
+                taskId,
+                inputResponses: responses,
+                requestState: inputTask.requestState
+              }
             },
-            { sessionId }
-          );
+            AnyResult
+          )) as any;
           if (
             JSON.stringify(ack) !== JSON.stringify({ resultType: 'complete' })
           ) {
@@ -234,7 +239,7 @@ export class TasksMRTRInputScenario implements ClientScenario {
               `tasks/update ack MUST be {resultType:"complete"}; got ${JSON.stringify(ack)}`
             );
           }
-          const terminal = await waitForTerminal(serverUrl, sessionId, taskId);
+          const terminal = await waitForTerminal(client, taskId);
           if (terminal.status !== 'completed') {
             errs.push(
               `task MUST resume to completed after tasks/update; got status ${JSON.stringify(terminal.status)}`
@@ -267,12 +272,13 @@ export class TasksMRTRInputScenario implements ClientScenario {
       const description =
         'tasks/update with a subset of keys MUST keep the task in input_required with only the unanswered key remaining';
       try {
-        const created = await rawRequest(
-          serverUrl,
-          'tools/call',
-          { name: 'multi_input', arguments: {} },
-          { sessionId }
-        );
+        const created = (await client.request(
+          {
+            method: 'tools/call',
+            params: { name: 'multi_input', arguments: {} }
+          },
+          AnyResult
+        )) as any;
         const taskId = created.taskId;
         if (!taskId) {
           checks.push({
@@ -290,12 +296,10 @@ export class TasksMRTRInputScenario implements ClientScenario {
           let inputTask: any;
           const start = Date.now();
           while (Date.now() - start < 5_000) {
-            inputTask = await rawRequest(
-              serverUrl,
-              'tasks/get',
-              { taskId },
-              { sessionId }
-            );
+            inputTask = (await client.request(
+              { method: 'tasks/get', params: { taskId } },
+              AnyResult
+            )) as any;
             if (
               inputTask.status === 'input_required' &&
               inputTask.inputRequests &&
@@ -320,20 +324,21 @@ export class TasksMRTRInputScenario implements ClientScenario {
             const [firstKey, secondKey] = keys;
 
             // Answer first key only.
-            const firstAck = await rawRequest(
-              serverUrl,
-              'tasks/update',
+            const firstAck = (await client.request(
               {
-                taskId,
-                inputResponses: {
-                  [firstKey]: {
-                    action: 'accept',
-                    content: { name: 'partial-1', confirm: true }
+                method: 'tasks/update',
+                params: {
+                  taskId,
+                  inputResponses: {
+                    [firstKey]: {
+                      action: 'accept',
+                      content: { name: 'partial-1', confirm: true }
+                    }
                   }
                 }
               },
-              { sessionId }
-            );
+              AnyResult
+            )) as any;
             if (firstAck.resultType !== 'complete') {
               errs.push(
                 `partial tasks/update ack MUST carry resultType:"complete"; got ${JSON.stringify(firstAck)}`
@@ -342,12 +347,10 @@ export class TasksMRTRInputScenario implements ClientScenario {
 
             // Status MUST still be input_required with only the second
             // key remaining.
-            const afterFirst = await rawRequest(
-              serverUrl,
-              'tasks/get',
-              { taskId },
-              { sessionId }
-            );
+            const afterFirst = (await client.request(
+              { method: 'tasks/get', params: { taskId } },
+              AnyResult
+            )) as any;
             if (afterFirst.status !== 'input_required') {
               errs.push(
                 `task MUST stay input_required while another input is still pending; got ${JSON.stringify(afterFirst.status)}`
@@ -366,25 +369,22 @@ export class TasksMRTRInputScenario implements ClientScenario {
             }
 
             // Answer second key — task resumes and finishes.
-            await rawRequest(
-              serverUrl,
-              'tasks/update',
+            await client.request(
               {
-                taskId,
-                inputResponses: {
-                  [secondKey]: {
-                    action: 'accept',
-                    content: { name: 'partial-2', confirm: true }
+                method: 'tasks/update',
+                params: {
+                  taskId,
+                  inputResponses: {
+                    [secondKey]: {
+                      action: 'accept',
+                      content: { name: 'partial-2', confirm: true }
+                    }
                   }
                 }
               },
-              { sessionId }
-            );
-            const terminal = await waitForTerminal(
-              serverUrl,
-              sessionId,
-              taskId
+              AnyResult
             );
+            const terminal = await waitForTerminal(client, taskId);
             if (terminal.status !== 'completed') {
               errs.push(
                 `task MUST complete after both inputs are satisfied; got ${JSON.stringify(terminal.status)}`
@@ -411,6 +411,7 @@ export class TasksMRTRInputScenario implements ClientScenario {
       }
     }
 
+    await client.close().catch(() => {});
     return checks;
   }
 }
diff --git a/src/scenarios/server/tasks/notifications.ts b/src/scenarios/server/tasks/notifications.ts
index a3881a2..3a4f3c5 100644
--- a/src/scenarios/server/tasks/notifications.ts
+++ b/src/scenarios/server/tasks/notifications.ts
@@ -18,6 +18,9 @@
  *   - slow_compute  — task-supporting, sleeps N seconds
  */
 
+import { Client } from '@modelcontextprotocol/sdk/client/index.js';
+import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js';
+
 import {
   ClientScenario,
   ConformanceCheck,
@@ -29,7 +32,6 @@ import {
   SEP_2663_REF,
   errMsg,
   failureCheck,
-  initRawSession,
   waitForTerminal
 } from './helpers';
 
@@ -54,11 +56,15 @@ notification params MUST carry:
   async run(serverUrl: string): Promise<ConformanceCheck[]> {
     const checks: ConformanceCheck[] = [];
 
-    let sessionId: string;
+    let client: Client;
+    let transport: StreamableHTTPClientTransport;
     try {
-      ({ sessionId } = await initRawSession(serverUrl, {
-        capabilities: { extensions: { [TASKS_EXTENSION_ID]: {} } }
-      }));
+      client = new Client(
+        { name: 'mcp-conformance', version: '1.0' },
+        { capabilities: { extensions: { [TASKS_EXTENSION_ID]: {} } } }
+      );
+      transport = new StreamableHTTPClientTransport(new URL(serverUrl));
+      await client.connect(transport);
     } catch (error) {
       checks.push({
         id: 'tasks-session-bootstrap',
@@ -81,6 +87,11 @@ notification params MUST carry:
     // Issue tools/call with SSE-accepting headers and capture every
     // `data:` payload. Some are JSON-RPC responses (with id), some are
     // notifications (no id). We ingest all and classify by the body.
+    //
+    // The SDK's Client.request() consumes the response stream internally,
+    // so to *observe* notification frames on the POST SSE we drop to raw
+    // fetch here while reusing the SDK-initialized session via
+    // `transport.sessionId`.
     let taskId: string | undefined;
     const notifications: any[] = [];
     try {
@@ -89,7 +100,7 @@ notification params MUST carry:
         headers: {
           'Content-Type': 'application/json',
           Accept: 'text/event-stream, application/json',
-          'Mcp-Session-Id': sessionId
+          'Mcp-Session-Id': transport.sessionId!
         },
         body: JSON.stringify({
           jsonrpc: '2.0',
@@ -132,7 +143,7 @@ notification params MUST carry:
     // collecting more, but we're done with this scenario regardless).
     if (taskId) {
       try {
-        await waitForTerminal(serverUrl, sessionId, taskId);
+        await waitForTerminal(client, taskId);
       } catch {
         /* swallow */
       }
@@ -149,6 +160,7 @@ notification params MUST carry:
           'No status notifications received on the tools/call POST SSE stream (notifications are optional)',
         specReferences: [SEP_2663_REF]
       });
+      await client.close().catch(() => {});
       return checks;
     }
 
@@ -183,6 +195,7 @@ notification params MUST carry:
       details: { notificationCount: notifications.length }
     });
 
+    await client.close().catch(() => {});
     return checks;
   }
 }
diff --git a/src/scenarios/server/tasks/request-state.ts b/src/scenarios/server/tasks/request-state.ts
index 8c2b165..30e9cc5 100644
--- a/src/scenarios/server/tasks/request-state.ts
+++ b/src/scenarios/server/tasks/request-state.ts
@@ -16,6 +16,9 @@
  *   - slow_compute  — task-supporting, sleeps N seconds
  */
 
+import { Client } from '@modelcontextprotocol/sdk/client/index.js';
+import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js';
+
 import {
   ClientScenario,
   ConformanceCheck,
@@ -26,10 +29,9 @@ import {
   TASKS_EXTENSION_ID,
   SEP_2322_REF,
   SEP_2663_REF,
+  AnyResult,
   errMsg,
-  failureCheck,
-  initRawSession,
-  rawRequest
+  failureCheck
 } from './helpers';
 
 export class TasksRequestStateScenario implements ClientScenario {
@@ -59,11 +61,15 @@ export class TasksRequestStateScenario implements ClientScenario {
   async run(serverUrl: string): Promise<ConformanceCheck[]> {
     const checks: ConformanceCheck[] = [];
 
-    let sessionId: string;
+    let client: Client;
     try {
-      ({ sessionId } = await initRawSession(serverUrl, {
-        capabilities: { extensions: { [TASKS_EXTENSION_ID]: {} } }
-      }));
+      client = new Client(
+        { name: 'mcp-conformance', version: '1.0' },
+        {
+          capabilities: { extensions: { [TASKS_EXTENSION_ID]: {} } }
+        }
+      );
+      await client.connect(new StreamableHTTPClientTransport(new URL(serverUrl)));
     } catch (error) {
       checks.push({
         id: 'tasks-session-bootstrap',
@@ -81,15 +87,16 @@ export class TasksRequestStateScenario implements ClientScenario {
     // Drive a long-running task once and reuse it for every check.
     let taskId: string | undefined;
     try {
-      const created = await rawRequest(
-        serverUrl,
-        'tools/call',
+      const created = (await client.request(
         {
-          name: 'slow_compute',
-          arguments: { seconds: 60, label: 'request-state' }
+          method: 'tools/call',
+          params: {
+            name: 'slow_compute',
+            arguments: { seconds: 60, label: 'request-state' }
+          }
         },
-        { sessionId }
-      );
+        AnyResult
+      )) as any;
       taskId = created.taskId;
     } catch (error) {
       checks.push(
@@ -127,12 +134,10 @@ export class TasksRequestStateScenario implements ClientScenario {
       const description =
         'tasks/get may include requestState; when present it MUST be a non-empty string';
       try {
-        const task = await rawRequest(
-          serverUrl,
-          'tasks/get',
-          { taskId },
-          { sessionId }
-        );
+        const task = (await client.request(
+          { method: 'tasks/get', params: { taskId } },
+          AnyResult
+        )) as any;
         const errs: string[] = [];
         if (task.requestState !== undefined) {
           if (typeof task.requestState !== 'string') {
@@ -187,12 +192,10 @@ export class TasksRequestStateScenario implements ClientScenario {
         });
       } else {
         try {
-          const echoed = await rawRequest(
-            serverUrl,
-            'tasks/get',
-            { taskId, requestState: firstToken },
-            { sessionId }
-          );
+          const echoed = (await client.request(
+            { method: 'tasks/get', params: { taskId, requestState: firstToken } },
+            AnyResult
+          )) as any;
           const errs: string[] = [];
           if (echoed.taskId !== taskId) {
             errs.push(
@@ -239,19 +242,15 @@ export class TasksRequestStateScenario implements ClientScenario {
           // that sign tokens with embedded expiry, this likely yields a
           // newer token; on plaintext-token servers it round-trips the
           // same value (still valid).
-          await rawRequest(
-            serverUrl,
-            'tasks/get',
-            { taskId, requestState: firstToken },
-            { sessionId }
+          await client.request(
+            { method: 'tasks/get', params: { taskId, requestState: firstToken } },
+            AnyResult
           );
           // Now re-echo the OLDER token; server MUST accept.
-          const stale = await rawRequest(
-            serverUrl,
-            'tasks/get',
-            { taskId, requestState: firstToken },
-            { sessionId }
-          );
+          const stale = (await client.request(
+            { method: 'tasks/get', params: { taskId, requestState: firstToken } },
+            AnyResult
+          )) as any;
           const errs: string[] = [];
           if (stale.taskId !== taskId) {
             errs.push(
@@ -280,11 +279,15 @@ export class TasksRequestStateScenario implements ClientScenario {
 
     // Cleanup the long-lived task so we don't leak goroutines.
     try {
-      await rawRequest(serverUrl, 'tasks/cancel', { taskId }, { sessionId });
+      await client.request(
+        { method: 'tasks/cancel', params: { taskId } },
+        AnyResult
+      );
     } catch {
       /* swallow */
     }
 
+    await client.close().catch(() => {});
     return checks;
   }
 }
diff --git a/src/scenarios/server/tasks/wire-fields.ts b/src/scenarios/server/tasks/wire-fields.ts
index 3fb377d..3dc69fd 100644
--- a/src/scenarios/server/tasks/wire-fields.ts
+++ b/src/scenarios/server/tasks/wire-fields.ts
@@ -10,6 +10,9 @@
  *   - slow_compute — task-supporting, sleeps N seconds
  */
 
+import { Client } from '@modelcontextprotocol/sdk/client/index.js';
+import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js';
+
 import {
   ClientScenario,
   ConformanceCheck,
@@ -19,11 +22,10 @@ import {
 import {
   TASKS_EXTENSION_ID,
   SEP_2663_REF,
+  AnyResult,
   errMsg,
   failureCheck,
   skipCheck,
-  initRawSession,
-  rawRequest,
   waitForTerminal
 } from './helpers';
 
@@ -57,13 +59,17 @@ export class TasksWireFieldsScenario implements ClientScenario {
   async run(serverUrl: string): Promise<ConformanceCheck[]> {
     const checks: ConformanceCheck[] = [];
 
-    let sessionId: string;
+    let client: Client;
     try {
-      ({ sessionId } = await initRawSession(serverUrl, {
-        capabilities: {
-          extensions: { [TASKS_EXTENSION_ID]: {} }
+      client = new Client(
+        { name: 'mcp-conformance', version: '1.0' },
+        {
+          capabilities: {
+            extensions: { [TASKS_EXTENSION_ID]: {} }
+          }
         }
-      }));
+      );
+      await client.connect(new StreamableHTTPClientTransport(new URL(serverUrl)));
     } catch (error) {
       checks.push({
         id: 'tasks-session-bootstrap',
@@ -86,15 +92,16 @@ export class TasksWireFieldsScenario implements ClientScenario {
       const description =
         'CreateTaskResult uses ttlSeconds + pollIntervalMilliseconds; legacy ttl / pollInterval keys absent';
       try {
-        const result = await rawRequest(
-          serverUrl,
-          'tools/call',
+        const result = (await client.request(
           {
-            name: 'slow_compute',
-            arguments: { seconds: 1, label: 'wire-fields' }
+            method: 'tools/call',
+            params: {
+              name: 'slow_compute',
+              arguments: { seconds: 1, label: 'wire-fields' }
+            }
           },
-          { sessionId }
-        );
+          AnyResult
+        )) as any;
         createdTaskId = result.taskId;
         const errs: string[] = [];
         // ttlSeconds — required, positive (or null = unlimited; treat
@@ -163,16 +170,17 @@ export class TasksWireFieldsScenario implements ClientScenario {
         checks.push(skipCheck(id, name, description, 'no task created'));
       } else {
         try {
-          await waitForTerminal(serverUrl, sessionId, createdTaskId);
+          await waitForTerminal(client, createdTaskId);
           // Sanity probe well before TTL (the unit is seconds; servers
           // typically pick order-of-minutes defaults).
           await new Promise((r) => setTimeout(r, 500));
-          const after = await rawRequest(
-            serverUrl,
-            'tasks/get',
-            { taskId: createdTaskId },
-            { sessionId }
-          );
+          const after = (await client.request(
+            {
+              method: 'tasks/get',
+              params: { taskId: createdTaskId }
+            },
+            AnyResult
+          )) as any;
           const errs: string[] = [];
           if (after.taskId !== createdTaskId) {
             errs.push(
@@ -203,20 +211,21 @@ export class TasksWireFieldsScenario implements ClientScenario {
       const description =
         'tasks/get inlined result MUST NOT include the v1 io.modelcontextprotocol/related-task _meta key (taskId is at the root)';
       try {
-        const created = await rawRequest(
-          serverUrl,
-          'tools/call',
+        const created = (await client.request(
           {
-            name: 'slow_compute',
-            arguments: { seconds: 1, label: 'wire-fields-meta' }
+            method: 'tools/call',
+            params: {
+              name: 'slow_compute',
+              arguments: { seconds: 1, label: 'wire-fields-meta' }
+            }
           },
-          { sessionId }
-        );
+          AnyResult
+        )) as any;
         const taskId = created.taskId;
         if (!taskId) {
           checks.push(skipCheck(id, name, description, 'no task created'));
         } else {
-          const terminal = await waitForTerminal(serverUrl, sessionId, taskId);
+          const terminal = await waitForTerminal(client, taskId);
           const errs: string[] = [];
           const meta = terminal.result?._meta;
           if (meta && meta['io.modelcontextprotocol/related-task']) {
@@ -245,6 +254,7 @@ export class TasksWireFieldsScenario implements ClientScenario {
       }
     }
 
+    await client.close().catch(() => {});
     return checks;
   }
 }

From 683c633514105c28e9fa56343e1b5763533de7d8 Mon Sep 17 00:00:00 2001
From: Sri Panyam <sri.panyam@gmail.com>
Date: Wed, 6 May 2026 14:24:23 -0700
Subject: [PATCH 7/7] style: prettier formatting on tasks/mrtr scenarios

---
 src/scenarios/server/mrtr/ephemeral-flow.ts | 23 ++++++++++++++++-----
 src/scenarios/server/tasks/capability.ts    |  4 +++-
 src/scenarios/server/tasks/dispatch.ts      | 21 ++++++++-----------
 src/scenarios/server/tasks/lifecycle.ts     |  4 +++-
 src/scenarios/server/tasks/mrtr-input.ts    |  4 +++-
 src/scenarios/server/tasks/request-state.ts | 19 +++++++++++++----
 src/scenarios/server/tasks/wire-fields.ts   |  4 +++-
 7 files changed, 54 insertions(+), 25 deletions(-)

diff --git a/src/scenarios/server/mrtr/ephemeral-flow.ts b/src/scenarios/server/mrtr/ephemeral-flow.ts
index 51025a7..543c431 100644
--- a/src/scenarios/server/mrtr/ephemeral-flow.ts
+++ b/src/scenarios/server/mrtr/ephemeral-flow.ts
@@ -101,7 +101,9 @@ Every \`tools/call\` response in the MRTR contract is one of:
           }
         }
       );
-      await client.connect(new StreamableHTTPClientTransport(new URL(serverUrl)));
+      await client.connect(
+        new StreamableHTTPClientTransport(new URL(serverUrl))
+      );
     } catch (error) {
       checks.push({
         id: 'mrtr-session-bootstrap',
@@ -312,7 +314,10 @@ Every \`tools/call\` response in the MRTR contract is one of:
         const r1 = (await client.request(
           {
             method: 'tools/call',
-            params: { name: 'test_incomplete_result_request_state', arguments: {} }
+            params: {
+              name: 'test_incomplete_result_request_state',
+              arguments: {}
+            }
           },
           AnyResult
         )) as any;
@@ -378,7 +383,10 @@ Every \`tools/call\` response in the MRTR contract is one of:
         const r1 = (await client.request(
           {
             method: 'tools/call',
-            params: { name: 'test_incomplete_result_multiple_inputs', arguments: {} }
+            params: {
+              name: 'test_incomplete_result_multiple_inputs',
+              arguments: {}
+            }
           },
           AnyResult
         )) as any;
@@ -455,7 +463,10 @@ Every \`tools/call\` response in the MRTR contract is one of:
         const r1 = (await client.request(
           {
             method: 'tools/call',
-            params: { name: 'test_incomplete_result_multi_round', arguments: {} }
+            params: {
+              name: 'test_incomplete_result_multi_round',
+              arguments: {}
+            }
           },
           AnyResult
         )) as any;
@@ -544,7 +555,9 @@ Every \`tools/call\` response in the MRTR contract is one of:
             params: {
               name: 'test_incomplete_result_elicitation',
               arguments: {},
-              inputResponses: { wrong_key: mockElicitResponse({ data: 'wrong' }) }
+              inputResponses: {
+                wrong_key: mockElicitResponse({ data: 'wrong' })
+              }
             }
           },
           AnyResult
diff --git a/src/scenarios/server/tasks/capability.ts b/src/scenarios/server/tasks/capability.ts
index c92c287..5211145 100644
--- a/src/scenarios/server/tasks/capability.ts
+++ b/src/scenarios/server/tasks/capability.ts
@@ -76,7 +76,9 @@ export class TasksCapabilityNegotiationScenario implements ClientScenario {
           }
         }
       );
-      await withExt.connect(new StreamableHTTPClientTransport(new URL(serverUrl)));
+      await withExt.connect(
+        new StreamableHTTPClientTransport(new URL(serverUrl))
+      );
 
       withoutExt = new Client(
         { name: 'mcp-conformance', version: '1.0' },
diff --git a/src/scenarios/server/tasks/dispatch.ts b/src/scenarios/server/tasks/dispatch.ts
index 272ea57..ec78df4 100644
--- a/src/scenarios/server/tasks/dispatch.ts
+++ b/src/scenarios/server/tasks/dispatch.ts
@@ -104,7 +104,9 @@ export class TasksDispatchScenario implements ClientScenario {
           }
         }
       );
-      await client.connect(new StreamableHTTPClientTransport(new URL(serverUrl)));
+      await client.connect(
+        new StreamableHTTPClientTransport(new URL(serverUrl))
+      );
     } catch (error) {
       checks.push({
         id: 'tasks-session-bootstrap',
@@ -163,10 +165,7 @@ export class TasksDispatchScenario implements ClientScenario {
       const description =
         'tasks/list is removed in v2 and MUST reject with -32601';
       try {
-        await client.request(
-          { method: 'tasks/list', params: {} },
-          AnyResult
-        );
+        await client.request({ method: 'tasks/list', params: {} }, AnyResult);
         checks.push({
           id,
           name,
@@ -404,18 +403,16 @@ export class TasksDispatchScenario implements ClientScenario {
         const elicit = (await client.request(
           {
             method: 'tools/call',
-            params: { name: 'confirm_delete', arguments: { filename: 'rt.txt' } }
+            params: {
+              name: 'confirm_delete',
+              arguments: { filename: 'rt.txt' }
+            }
           },
           AnyResult
         )) as any;
         const elicitTaskId = elicit.taskId;
         if (elicitTaskId) {
-          await waitForStatus(
-            client,
-            elicitTaskId,
-            'input_required',
-            5_000
-          );
+          await waitForStatus(client, elicitTaskId, 'input_required', 5_000);
           const updateAck = (await client.request(
             {
               method: 'tasks/update',
diff --git a/src/scenarios/server/tasks/lifecycle.ts b/src/scenarios/server/tasks/lifecycle.ts
index 53381fa..cdca072 100644
--- a/src/scenarios/server/tasks/lifecycle.ts
+++ b/src/scenarios/server/tasks/lifecycle.ts
@@ -98,7 +98,9 @@ The server MUST advertise \`io.modelcontextprotocol/tasks\` under
           }
         }
       );
-      await client.connect(new StreamableHTTPClientTransport(new URL(serverUrl)));
+      await client.connect(
+        new StreamableHTTPClientTransport(new URL(serverUrl))
+      );
     } catch (error) {
       checks.push({
         id: 'tasks-session-bootstrap',
diff --git a/src/scenarios/server/tasks/mrtr-input.ts b/src/scenarios/server/tasks/mrtr-input.ts
index 416aa09..84f0cbb 100644
--- a/src/scenarios/server/tasks/mrtr-input.ts
+++ b/src/scenarios/server/tasks/mrtr-input.ts
@@ -79,7 +79,9 @@ export class TasksMRTRInputScenario implements ClientScenario {
           }
         }
       );
-      await client.connect(new StreamableHTTPClientTransport(new URL(serverUrl)));
+      await client.connect(
+        new StreamableHTTPClientTransport(new URL(serverUrl))
+      );
     } catch (error) {
       checks.push({
         id: 'tasks-session-bootstrap',
diff --git a/src/scenarios/server/tasks/request-state.ts b/src/scenarios/server/tasks/request-state.ts
index 30e9cc5..c6f9de8 100644
--- a/src/scenarios/server/tasks/request-state.ts
+++ b/src/scenarios/server/tasks/request-state.ts
@@ -69,7 +69,9 @@ export class TasksRequestStateScenario implements ClientScenario {
           capabilities: { extensions: { [TASKS_EXTENSION_ID]: {} } }
         }
       );
-      await client.connect(new StreamableHTTPClientTransport(new URL(serverUrl)));
+      await client.connect(
+        new StreamableHTTPClientTransport(new URL(serverUrl))
+      );
     } catch (error) {
       checks.push({
         id: 'tasks-session-bootstrap',
@@ -193,7 +195,10 @@ export class TasksRequestStateScenario implements ClientScenario {
       } else {
         try {
           const echoed = (await client.request(
-            { method: 'tasks/get', params: { taskId, requestState: firstToken } },
+            {
+              method: 'tasks/get',
+              params: { taskId, requestState: firstToken }
+            },
             AnyResult
           )) as any;
           const errs: string[] = [];
@@ -243,12 +248,18 @@ export class TasksRequestStateScenario implements ClientScenario {
           // newer token; on plaintext-token servers it round-trips the
           // same value (still valid).
           await client.request(
-            { method: 'tasks/get', params: { taskId, requestState: firstToken } },
+            {
+              method: 'tasks/get',
+              params: { taskId, requestState: firstToken }
+            },
             AnyResult
           );
           // Now re-echo the OLDER token; server MUST accept.
           const stale = (await client.request(
-            { method: 'tasks/get', params: { taskId, requestState: firstToken } },
+            {
+              method: 'tasks/get',
+              params: { taskId, requestState: firstToken }
+            },
             AnyResult
           )) as any;
           const errs: string[] = [];
diff --git a/src/scenarios/server/tasks/wire-fields.ts b/src/scenarios/server/tasks/wire-fields.ts
index 3dc69fd..1c98d16 100644
--- a/src/scenarios/server/tasks/wire-fields.ts
+++ b/src/scenarios/server/tasks/wire-fields.ts
@@ -69,7 +69,9 @@ export class TasksWireFieldsScenario implements ClientScenario {
           }
         }
       );
-      await client.connect(new StreamableHTTPClientTransport(new URL(serverUrl)));
+      await client.connect(
+        new StreamableHTTPClientTransport(new URL(serverUrl))
+      );
     } catch (error) {
       checks.push({
         id: 'tasks-session-bootstrap',