Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 52 additions & 2 deletions src/scenarios/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,16 @@ import {

import { DNSRebindingProtectionScenario } from './server/dns-rebinding';

import { TasksLifecycleScenario } from './server/tasks/lifecycle';
import { TasksCapabilityNegotiationScenario } from './server/tasks/capability';
import { TasksWireFieldsScenario } from './server/tasks/wire-fields';
import { TasksRequestStateScenario } from './server/tasks/request-state';
import { TasksMRTRInputScenario } from './server/tasks/mrtr-input';
import { TasksRequestHeadersScenario } from './server/tasks/headers';
import { TasksDispatchScenario } from './server/tasks/dispatch';
import { TasksStatusNotificationsScenario } from './server/tasks/notifications';
import { MrtrEphemeralFlowScenario } from './server/mrtr/ephemeral-flow';

import {
authScenariosList,
backcompatScenariosList,
Expand All @@ -81,7 +91,28 @@ const pendingClientScenariosList: ClientScenario[] = [

// On hold until server-side SSE improvements are made
// https://github.com/modelcontextprotocol/typescript-sdk/pull/1129
new ServerSSEPollingScenario()
new ServerSSEPollingScenario(),

// SEP-2663 Tasks extension lifecycle.
// The SEP is still in draft (see PR 2663) and the everything-server
// does not yet implement the io.modelcontextprotocol/tasks extension,
// so all-scenarios.test.ts cannot exercise this against the default
// fixture. Active runs target a SEP-2663-conformant server via the
// dedicated tasks/lifecycle.test.ts harness.
new TasksLifecycleScenario(),
new TasksCapabilityNegotiationScenario(),
new TasksWireFieldsScenario(),
new TasksRequestStateScenario(),
new TasksMRTRInputScenario(),
new TasksRequestHeadersScenario(),
new TasksDispatchScenario(),
new TasksStatusNotificationsScenario(),

// SEP-2322 MRTR (ephemeral IncompleteResult flow).
// Targets a different fixture than tasks scenarios; the dedicated
// mrtr/all-scenarios.test.ts runner points at an MRTR-conformant
// server via MRTR_SERVER_URL / MRTR_SERVER_CMD.
new MrtrEphemeralFlowScenario()
];

// All client scenarios
Expand Down Expand Up @@ -139,7 +170,26 @@ const allClientScenariosList: ClientScenario[] = [
new PromptsGetWithImageScenario(),

// Security scenarios
new DNSRebindingProtectionScenario()
new DNSRebindingProtectionScenario(),

// SEP-2663 Tasks extension (draft).
// Listed here so the CLI can find it by name and so the active/pending
// filter sees it; pendingClientScenariosList below excludes it from
// automatic runs against the everything-server (which doesn't implement
// io.modelcontextprotocol/tasks yet).
new TasksLifecycleScenario(),
new TasksCapabilityNegotiationScenario(),
new TasksWireFieldsScenario(),
new TasksRequestStateScenario(),
new TasksMRTRInputScenario(),
new TasksRequestHeadersScenario(),
new TasksDispatchScenario(),
new TasksStatusNotificationsScenario(),

// SEP-2322 MRTR (ephemeral IncompleteResult flow). Targets a
// dedicated MRTR fixture — out of scope for the default
// everything-server until SEP-2322 lands there.
new MrtrEphemeralFlowScenario()
];

// Active client scenarios (excludes pending)
Expand Down
56 changes: 56 additions & 0 deletions src/scenarios/server/_shared/test-runner.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/**
* Test-runner utilities for server-conformance scenarios.
*
* Used by `*.test.ts` runner files that auto-spawn a fixture binary
* before running scenarios. These helpers are language-agnostic and
* harness-only — they don't touch MCP protocol, so they don't belong
* in the SDK.
*
* Single responsibility today: TCP readiness polling. Spawn / cleanup
* scaffolding stays inline in each runner so the file reads top-to-bottom
* without indirection (per AGENTS.md "repetitive check blocks are fine").
*/

import { connect } from 'net';

/**
* Poll the host/port of the given URL until a TCP connection succeeds
* or the timeout elapses. Language-agnostic readiness check — works
* for any server that binds before serving requests.
*/
export async function waitForServerReady(
url: string,
timeoutMs: number
): Promise<void> {
const u = new URL(url);
const port = parseInt(u.port || (u.protocol === 'https:' ? '443' : '80'), 10);
const host = u.hostname;
const deadline = Date.now() + timeoutMs;
let lastErr: Error | null = null;

while (Date.now() < deadline) {
try {
await new Promise<void>((resolve, reject) => {
const socket = connect({ host, port }, () => {
socket.end();
resolve();
});
socket.once('error', (err) => {
socket.destroy();
reject(err);
});
socket.setTimeout(1_000, () => {
socket.destroy();
reject(new Error('connect timeout'));
});
});
return;
} catch (err) {
lastErr = err as Error;
await new Promise((r) => setTimeout(r, 200));
}
}
throw new Error(
`${host}:${port} did not accept TCP connections (last: ${lastErr?.message ?? 'unknown'})`
);
}
33 changes: 33 additions & 0 deletions src/scenarios/server/_shared/wire-format.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/**
* Wire-format validation helpers shared across server-conformance
* scenarios. Pure predicates / regex — no I/O, no async.
*
* Pragmatic choices documented per helper. When validation needs
* tighten (e.g., the spec mandates a stricter timestamp format), edit
* here once and every scenario picks it up.
*/

/**
* ISO-8601 timestamp prefix (YYYY-MM-DDThh:mm:ss). Tolerant about
* the timezone tail (`Z`, `+00:00`, `+0000`) and sub-second precision —
* matches what real servers emit (Go `time.RFC3339Nano`,
* Python `datetime.isoformat()`, JavaScript `toISOString()`).
*
* Why a regex over `Date.parse` / `new Date(s).toISOString() === s` /
* `Temporal.Instant.from`:
* - `Date.parse` accepts RFC-2822, "May 4 2026", and other
* non-ISO strings — too permissive.
* - `new Date(s).toISOString() === s` is too strict — rejects
* valid `+00:00`-style offsets that don't survive the canonical
* `Z` round-trip.
* - `Temporal.Instant.from` is Node 24+ experimental.
*
* Swap this constant for a stdlib validator if/when one becomes
* broadly available.
*/
export const ISO_8601_PATTERN = /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}/;

/** Returns true when the input is a string matching ISO-8601 prefix. */
export function isIso8601(s: unknown): boolean {
return typeof s === 'string' && ISO_8601_PATTERN.test(s);
}
113 changes: 113 additions & 0 deletions src/scenarios/server/mrtr/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
# SEP-2322 MRTR — Server Conformance

Tests any MCP server that implements the SEP-2322 ephemeral
Multi Round-Trip Request flow on `tools/call` — the
`IncompleteResult` → retry-with-`inputResponses` → `ToolResult`
contract that lets a tool gather elicitation / sampling / roots input
without creating a task envelope.

## Specs covered

| SEP | What it adds | Where it shows up |
| -------- | ---------------------------------------------------------------------------------------------------------------- | ----------------------------- |
| SEP-2322 | Ephemeral MRTR — `resultType` discriminator, `inputRequests` / `inputResponses` keyed maps, `requestState` token | every check |
| SEP-2663 | MRTR → Tasks composition (final round returns `CreateTaskResult`) | mrtr-08 (SKIPPED — see below) |

## ClientScenario classes

### `mrtr-ephemeral-flow` (`ephemeral-flow.ts`)

A single scenario covering the full ephemeral MRTR contract — per the
AGENTS.md "fewer scenarios, more checks" rule. A server that
implemented elicitation round-trips but not sampling round-trips would
be incoherent, so they bundle.

| Check | What it tests |
| ---------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------- |
| `mrtr-basic-elicitation-round-trip` | Round 1 returns `IncompleteResult` with `elicitation/create`; round 2 completes with the answer reflected |
| `mrtr-sampling-round-trip` | Same flow with `sampling/createMessage` |
| `mrtr-roots-list-round-trip` | Same flow with `roots/list` |
| `mrtr-request-state-round-trip` | When server emits `requestState`, it's a non-empty string and the server validates the echo |
| `mrtr-multiple-input-requests-one-round` | A single `IncompleteResult` MAY carry inputRequests for `elicitation/create` + `sampling/createMessage` + `roots/list` together |
| `mrtr-multi-round-flow` | A handler MAY take 2+ rounds; each round mints a fresh `requestState`; final result reflects answers from every round |
| `mrtr-wrong-input-key-rerequests` | When client sends a wrong `inputResponses` key, server SHOULD re-request via `IncompleteResult` rather than erroring |
| `mrtr-tasks-composition` | **SKIPPED** — see "Open issues" below |

## Required server fixtures

The fixture server MUST register these tools:

| Tool | Behavior |
| ---------------------------------------- | ------------------------------------------------------------------------------------------- |
| `test_tool_with_elicitation` | One `elicitation/create` round, completes with answer reflected |
| `test_incomplete_result_sampling` | One `sampling/createMessage` round |
| `test_incomplete_result_list_roots` | One `roots/list` round |
| `test_incomplete_result_request_state` | Exercises `requestState` validation; final result includes `state-ok` to confirm validation |
| `test_incomplete_result_multiple_inputs` | Emits 3+ inputRequests of different methods in one round |
| `test_incomplete_result_multi_round` | Drives 2+ MRTR rounds, final result references every answer |
| `test_incomplete_result_elicitation` | Emits inputRequest for `user_name`; server re-requests on wrong-key responses |

The fixture can be implemented in any language; one example reference
implementation lives at
[`panyam/mcpkit/examples/mrtr`](https://github.com/panyam/mcpkit/tree/main/examples/mrtr).

## Running

```bash
# Against an already-running server
MRTR_SERVER_URL=http://localhost:8080/mcp \
npx vitest run src/scenarios/server/mrtr/all-scenarios.test.ts

# Auto-spawn a fixture in beforeAll
MRTR_SERVER_URL=http://localhost:18093/mcp \
MRTR_SERVER_CMD="/path/to/mrtr-server --port 18093" \
npx vitest run src/scenarios/server/mrtr/all-scenarios.test.ts
```

## Open issues

### `mrtr-tasks-composition` deferred

SEP-2663 commit `451f5e1` (Apr 30) made the MRTR → Tasks composition
flow normative: a `tools/call` MAY exchange `IncompleteResult` rounds
to gather input, then return `CreateTaskResult` to go async on a
subsequent round. Two blockers prevent enabling the check today:

1. **Spec watch — discriminator value.** SEP-2322 (MRTR base) and
SEP-2663 (Tasks Extension) currently disagree on the wire value for
the "needs more input" discriminator: SEP-2322's draft uses
`"input_required"`, SEP-2663's draft uses `"incomplete"`. Awaiting
alignment between the SEP authors. The current literal lives in
`MRTR_INCOMPLETE_RESULT_TYPE` (helpers.ts) so it's a one-line flip
when the spec converges.

2. **Reference-impl gap.** The natural server-side implementation
pattern for tasks (mint task up-front, run handler in a goroutine /
async task) means the handler's `IncompleteResult` signal isn't
visible to the middleware in time — by the time the handler returns
`IsIncomplete`, the `CreateTaskResult` is already on the wire. SDKs
in any language need an inverted middleware pattern that runs the
first round synchronously and only spins up the task once the
handler signals async-promotion.
([panyam/mcpkit issue 347](https://github.com/panyam/mcpkit/issues/347)
tracks this for one example impl; SDKs in any language hit the
same architectural choice.)

The check is registered with `status: 'SKIPPED'` so it's discoverable
but doesn't fail conformance runs. When both blockers resolve, remove
the SKIPPED short-circuit in `ephemeral-flow.ts` Check 8.

## Design notes

### Why the MRTR scenarios share helpers with `tasks/`

`MRTR_INCOMPLETE_RESULT_TYPE`, the result-type predicates
(`isIncompleteResult`, `isCompleteResult`), and the elicitation/sampling/
roots mocks live in `mrtr/helpers.ts`. The shared `AnyResult` Zod
passthrough schema and `waitForTerminal`/`waitForStatus` polling helpers
are imported from the sibling `../tasks/helpers` because both scenario
sets share the same wire-shape problem (SDK Zod schemas strip extension
fields). Pair `client.request(req, AnyResult)` with the SDK's
`StreamableHTTPClientTransport` and you preserve every SEP-2322 / SEP-2663
field. When the upstream SDK gains schemas for those shapes, the
passthrough disappears in favor of the typed schemas directly.
115 changes: 115 additions & 0 deletions src/scenarios/server/mrtr/all-scenarios.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
/**
* SEP-2322 MRTR test runner.
*
* Iterates the MRTR scenario classes against a SEP-2322-conformant
* server. Configuration is brand-neutral and language-agnostic:
*
* 1. Point at an already-running server:
* MRTR_SERVER_URL=http://localhost:8080/mcp npm test -- mrtr/all-scenarios.test.ts
*
* 2. Auto-spawn a fixture before tests (any language):
* MRTR_SERVER_URL=http://localhost:18093/mcp \
* MRTR_SERVER_CMD="/path/to/server --port 18093" \
* npm test -- mrtr/all-scenarios.test.ts
*
* If MRTR_SERVER_URL is unset the suite is skipped — keeping CI runs
* against the everything-server green.
*
* The fixture server can be implemented in any language as long as it
* exposes a SEP-2322 conformant Streamable HTTP MCP endpoint. Anyone is
* free to bring their own; one example reference implementation lives
* at https://github.com/panyam/mcpkit/tree/main/examples/mrtr.
*/

import { spawn, ChildProcess } from 'child_process';
import { describe, it, expect, beforeAll, afterAll } from 'vitest';
import { MrtrEphemeralFlowScenario } from './ephemeral-flow';
import { waitForServerReady } from '../_shared/test-runner';

const SERVER_URL = process.env.MRTR_SERVER_URL;
const SERVER_CMD = process.env.MRTR_SERVER_CMD;
const SERVER_STARTUP_TIMEOUT_MS = 15_000;
const SHOULD_SPAWN = Boolean(SERVER_URL && SERVER_CMD);
const HAVE_TARGET = Boolean(SERVER_URL);

const MRTR_SCENARIOS = [new MrtrEphemeralFlowScenario()];

const describeIfTarget = HAVE_TARGET ? describe : describe.skip;

describeIfTarget('SEP-2322 MRTR — server conformance', () => {
let serverProcess: ChildProcess | null = null;

beforeAll(async () => {
if (!SHOULD_SPAWN) return;

serverProcess = spawn('sh', ['-c', SERVER_CMD!], {
stdio: ['ignore', 'pipe', 'pipe'],
detached: false
});

let stdoutBuf = '';
let stderrBuf = '';
serverProcess.stdout?.on('data', (b) => {
stdoutBuf += b.toString();
});
serverProcess.stderr?.on('data', (b) => {
stderrBuf += b.toString();
});

serverProcess.on('exit', (code) => {
if (code !== null && code !== 0) {
console.error(
`mrtr fixture exited unexpectedly with code ${code}.\nSTDOUT: ${stdoutBuf}\nSTDERR: ${stderrBuf}`
);
}
});

await waitForServerReady(SERVER_URL!, SERVER_STARTUP_TIMEOUT_MS).catch(
(err) => {
if (serverProcess && !serverProcess.killed) {
serverProcess.kill('SIGKILL');
}
throw new Error(
`mrtr fixture did not become reachable within ${SERVER_STARTUP_TIMEOUT_MS}ms: ${err.message}\nSTDOUT: ${stdoutBuf}\nSTDERR: ${stderrBuf}`
);
}
);
}, SERVER_STARTUP_TIMEOUT_MS + 5_000);

afterAll(async () => {
if (!SHOULD_SPAWN) return;
if (!serverProcess || serverProcess.killed) return;
serverProcess.kill('SIGTERM');
await new Promise<void>((resolve) => {
const timer = setTimeout(() => {
if (serverProcess && !serverProcess.killed) {
serverProcess.kill('SIGKILL');
}
resolve();
}, 3_000);
serverProcess!.once('exit', () => {
clearTimeout(timer);
resolve();
});
});
serverProcess = null;
});

for (const scenario of MRTR_SCENARIOS) {
it(`${scenario.name} — all checks succeed against fixture`, async () => {
const checks = await scenario.run(SERVER_URL!);
expect(checks.length).toBeGreaterThan(0);
const failures = checks.filter(
(c) => c.status === 'FAILURE' || c.status === 'WARNING'
);
if (failures.length > 0) {
const detail = failures
.map((c) => ` - ${c.id}: ${c.errorMessage ?? '(no message)'}`)
.join('\n');
throw new Error(
`${failures.length}/${checks.length} checks failed:\n${detail}`
);
}
});
}
});
Loading