From fc390bfad5841a38c41e6473a3b4b3e776dba340 Mon Sep 17 00:00:00 2001 From: Gabor Szabo Date: Fri, 12 Jun 2026 23:42:27 +0200 Subject: [PATCH] docs(repo): track showcase-completion e1-e5 prps (#406) --- ...pletion-E1-metadata-provenance-backbone.md | 1031 ++++++++++++++ ...ase-completion-E2-safe-replay-lifecycle.md | 1247 +++++++++++++++++ ...howcase-completion-E3-seed-config-scope.md | 1080 ++++++++++++++ ...completion-E4-run-config-phase-controls.md | 820 +++++++++++ ...e-completion-E5-agent-rag-story-capture.md | 1185 ++++++++++++++++ 5 files changed, 5363 insertions(+) create mode 100644 PRPs/PRP-showcase-completion-E1-metadata-provenance-backbone.md create mode 100644 PRPs/PRP-showcase-completion-E2-safe-replay-lifecycle.md create mode 100644 PRPs/PRP-showcase-completion-E3-seed-config-scope.md create mode 100644 PRPs/PRP-showcase-completion-E4-run-config-phase-controls.md create mode 100644 PRPs/PRP-showcase-completion-E5-agent-rag-story-capture.md diff --git a/PRPs/PRP-showcase-completion-E1-metadata-provenance-backbone.md b/PRPs/PRP-showcase-completion-E1-metadata-provenance-backbone.md new file mode 100644 index 00000000..101fdf00 --- /dev/null +++ b/PRPs/PRP-showcase-completion-E1-metadata-provenance-backbone.md @@ -0,0 +1,1031 @@ +name: "PRP — Showcase Completion E1: Workspace Metadata & Provenance Backbone (issue #407)" +description: | + +## Purpose + +Implement the Foundation epic of the showcase-completion initiative (umbrella #406): +one Alembic migration extends `showcase_workspace` with lifecycle + provenance columns +(`replayed_from_workspace_id`, `archived`, `pinned`, `notes`, `tags`, +`config_schema_version`) and six documented JSONB story-slot columns +(`seed_overrides`, `user_scope`, `approval_events`, `rag_events`, `job_ids`, +`phase_summaries`); a `PATCH /demo/workspaces/{id}` lifecycle endpoint +(rename/notes/tags/archive/pin) lands with its Pydantic schema surface; and Replay +writes `replayed_from_workspace_id`. Every Parallel epic (#408–#412) writes into or +reads from this surface, so it ships first. Blocks E2 #408, E3 #409, E4 #410, +E5 #411, E6 #412. + +## Core Principles + +1. **Context is King**: every reference below was verified against the live code on 2026-06-12 (branch `dev` @ `bdf85f6`). +2. **Validation Loops**: each level is executable as written. +3. **Information Dense**: patterns cite exact file:line. +4. **Progressive Success**: model+migration → schemas → service helpers → PATCH route → replay wiring → tests → docs. +5. **Global rules**: follow CLAUDE.md / AGENTS.md; all five CI gates must pass; all changes ADDITIVE. + +--- + +## Goal + +The `showcase_workspace` table gains the metadata + provenance backbone every other +epic of umbrella #406 consumes: + +- **Lifecycle columns**: `archived` (bool), `pinned` (bool), `notes` (free text), + `tags` (queryable JSONB string array, GIN-indexed — exact `scenario_plan.tags` + pattern), `config_schema_version` (int, schema-evolution marker). +- **Provenance column**: `replayed_from_workspace_id` — a SOFT reference (String(32), + indexed, deliberately **no ForeignKey**, not even self-referential) recorded when a + run is a Replay of a saved workspace. +- **Six documented JSONB story slots** as dedicated nullable JSONB columns: + `seed_overrides`, `user_scope`, `approval_events`, `rag_events`, `job_ids`, + `phase_summaries`. E1 ships the columns + the documented per-slot schema; E1 writes + NONE of them (all stay NULL) — E3 (#409) writes `seed_overrides` + `user_scope`, + E5 (#411) writes `approval_events` + `rag_events`, later parallel epics write + `job_ids` + `phase_summaries`. +- **`PATCH /demo/workspaces/{workspace_id}`** — partial-update lifecycle endpoint: + rename / notes / tags / archive / pin. Missing id → RFC 7807 404. Returns the + updated `WorkspaceDetailResponse`. +- **Replay provenance**: `DemoRunRequest` gains an additive Optional + `replayed_from_workspace_id` field; the frontend Replay handler sends the source + row's `workspace_id`; `create_workspace` records it on the NEW row. + +A run/request without any new field behaves **byte-identically to today** (legacy WS +start frames and HTTP bodies unchanged). One migration applies AND downgrades cleanly +on a fresh DB. + +**Deliverable** (all additive): + +- `app/features/demo/models.py` — 12 new columns on `ShowcaseWorkspace` + tags GIN index + replayed-from index. +- `alembic/versions/_add_showcase_workspace_metadata_provenance.py` — `down_revision = "324a2fa37fcc"`; add-columns + indexes; clean downgrade. +- `app/features/demo/schemas.py` — `DemoRunRequest.replayed_from_workspace_id`; new `WorkspaceUpdateRequest`; `WorkspaceListItem` / `WorkspaceDetailResponse` additive response fields. +- `app/features/demo/workspace.py` — `create_workspace` records `replayed_from_workspace_id`; new `update_workspace` helper. +- `app/features/demo/routes.py` — `PATCH /demo/workspaces/{workspace_id}`. +- `frontend/src/types/api.ts` + `frontend/src/pages/showcase.tsx` — two-line additive Replay wiring (see "Why the (ui) sliver" below). +- Tests: schema unit tests, model constraint/roundtrip integration tests, workspace-helper integration tests, PATCH route tests (2xx + 404 + 422), migration up/down. +- Docs: `docs/_base/API_CONTRACTS.md` + `docs/_base/DOMAIN_MODEL.md` additive notes (the documented story-slot schema lives in DOMAIN_MODEL — umbrella #406 risk mitigation). + +**Success definition**: all Success Criteria below check off; the five CI gates are +green; integration suite green; a manual Replay from the `/showcase` Saved-workspaces +panel produces a new row whose `replayed_from_workspace_id` equals the source row's +`workspace_id`; `PATCH /demo/workspaces/{id}` round-trips rename/notes/tags/archive/pin. + +## Why + +- Umbrella #406: today workspaces cannot be renamed/archived/annotated/searched, the + row lacks replay lineage, seed overrides, user scope, approval history, and RAG + events. E1 is the Foundation — **every** Parallel epic writes into or reads from + the columns added here, so the frozen column/slot contract ships first. +- Replays are currently indistinguishable from fresh keep-runs except by + name/timestamp (documented gap, `docs/_base/RUNBOOKS.md` § Showcase workspace, + "Explicitly out of scope" — the `replayed_from` provenance column is this epic). +- The umbrella's junk-drawer risk ("JSONB story slots become a junk drawer") is + mitigated here by `config_schema_version` + a documented per-slot schema in + `docs/_base/DOMAIN_MODEL.md`. + +### Why the (ui) sliver in an (api,db) epic + +"Replay writes `replayed_from_workspace_id`" is a frozen epic-level success +criterion, and Replay is frontend-initiated: `handleReplayWorkspace` +(`frontend/src/pages/showcase.tsx:174-186`) re-submits the recorded config through +the WS start frame. Without the sender including the field, the backend has nothing +to record. The wiring is two additive lines (one TS interface field + one start-frame +key) — deliberately included here so the criterion is verifiable in E1; the lineage +*rendering* (badge + chain) stays in E2 (#408). + +## What + +### User-visible behavior + +- `PATCH /demo/workspaces/{workspace_id}` accepts a partial body of + `{name?, notes?, tags?, archived?, pinned?}`; only provided fields change; explicit + `null` clears `name` / `notes`. Missing id → `404 application/problem+json`. A + malformed body (bad name pattern, unknown key, >20 tags) → `422 + application/problem+json`. Empty body `{}` → `200` no-op returning the current row + (mirrors the `RunUpdate` precedent — see Decisions). +- `POST /demo/run` and the `WS /demo/stream` start frame accept an additive Optional + `replayed_from_workspace_id: str | null` (`^[0-9a-f]{32}$`); supplying it without + `preservation="keep"` is a 422 (a lineage pointer is meaningless when no row is + written — same validator pattern as `workspace_name`). +- Clicking **Replay** on the Saved-workspaces panel now records the source + `workspace_id` on the new row. The original row is never mutated (E4 #393 + invariant preserved). +- `GET /demo/workspaces` list items additively carry `archived`, `pinned`, `tags`, + `replayed_from_workspace_id`; the detail response additively carries those plus + `notes`, `config_schema_version`, and the six story slots. **List behavior is + otherwise unchanged in E1** — archived rows are still listed; default-filtering / + search / sort is E2 (#408). + +### Technical requirements + +- One Alembic migration off head `324a2fa37fcc` (verified `uv run alembic heads`, + 2026-06-12). Forward-only: a NEW revision — never edit + `324a2fa37fcc_create_showcase_workspace_table.py`. +- Every new column is nullable OR carries a `server_default` so the migration applies + on a table with existing rows; downgrade drops indexes then columns, cleanly. +- **No ForeignKeys anywhere** — `replayed_from_workspace_id` is an opaque soft + reference, consistent with the table-wide invariant + (`docs/_base/DOMAIN_MODEL.md` § `showcase_workspace`: "`created_objects` carries + SOFT references only — no ForeignKeys by design"). Even a *self-referential* FK is + ruled out: ancestor workspace rows must remain independently deletable + (metadata-only delete, #404) without cascading to or blocking descendants. State + this in the model docstring. +- `status` is NOT patchable — the pipeline finalize hook owns the + running/completed/failed lifecycle; `archived` is an orthogonal boolean so the + existing `ck_showcase_workspace_status` CHECK is untouched. +- Vertical slice: all backend changes inside `app/features/demo/` + + `alembic/versions/`; no cross-slice imports (demo imports only `app.core.*`, + `app.shared.*`, stdlib/3rd-party). +- RFC 7807 errors only — `NotFoundError` from `app/core/exceptions.py` (the demo + routes' existing pattern, `routes.py:134`), never bare `HTTPException`. +- Pydantic v2 `ConfigDict(strict=True)` on the new request body. All new fields are + JSON-native (`str`/`bool`/`list[str]`) → NO `Field(strict=False)` override needed; + the AST policy walker (`app/core/tests/test_strict_mode_policy.py`) only fires on + date/datetime/time/UUID/Decimal. +- Warn-and-continue invariant untouched: `create_workspace` /`finalize_workspace` + keep swallowing all DB errors. The new `update_workspace` helper is + request-scoped (caller-owned session, raises normally) — it backs an HTTP + endpoint, not the pipeline. + +### Success Criteria + +- [ ] Migration applies AND downgrades cleanly on a fresh DB (`upgrade head` → + `downgrade -1` → `upgrade head`); applies on a DB with pre-existing + `showcase_workspace` rows (server defaults backfill `archived=false`, + `pinned=false`, `tags=[]`, `config_schema_version=1`). +- [ ] `DemoRunRequest()` (no args) serializes identically to today plus + `replayed_from_workspace_id=None`; a legacy start frame (no new keys) validates; + `replayed_from_workspace_id` without `preservation="keep"` → 422; a non-32-hex + value → 422. +- [ ] A keep-run with `replayed_from_workspace_id="<32hex>"` produces a row whose + `replayed_from_workspace_id` column equals that value; the source row is unread + and unmodified (the value is recorded verbatim — no existence check, it is a soft + reference). +- [ ] Frontend Replay sends `replayed_from_workspace_id: ws.workspace_id`; + `pnpm tsc -b` introduces no NEW errors (see gotcha on the pre-existing-failure + baseline) and `pnpm test --run` green. +- [ ] `PATCH /demo/workspaces/{id}`: happy path updates exactly the provided fields + and returns the updated detail; `{}` is a 200 no-op; missing id → 404 + problem+json; bad name pattern / unknown key / 21 tags → 422 problem+json. +- [ ] `tags` round-trips as a JSONB string array and is GIN-indexed + (`ix_showcase_workspace_tags_gin`); a `.contains(["x"])` containment query works + (E2 will route it — E1 proves it in an integration test). +- [ ] All six story-slot columns exist, default NULL, and round-trip a JSONB payload + in an integration test; E1 production code writes none of them. +- [ ] `uv run ruff check . && uv run ruff format --check . && uv run mypy app/ && + uv run pyright app/ && uv run pytest -v -m "not integration"` all green; + integration suite green against docker-compose Postgres; + `test_strict_mode_policy.py` green. + +## Decisions (the open questions this PRP resolves) + +> These are FROZEN for the parallel epics. #408–#412 PRP authors: consume, don't re-decide. + +1. **`tags` representation — CONFIRMED: mirror `scenario_plan.tags` exactly.** + A dedicated JSONB string-array column, `nullable=False`, + `server_default=text("'[]'::jsonb")`, with a GIN index + (`ix_showcase_workspace_tags_gin`). Verified in code: + `app/features/scenarios/models.py:74-76,97` (column + index), migration + `alembic/versions/bb8c4587ef1d_add_scenario_library_columns.py:26-45` + (add_column + GIN), and the containment query + `app/features/scenarios/service.py:464` (`ScenarioPlan.tags.contains(tags)`). + No deviation: the pattern is proven, queryable, and E2's tag filter reuses the + same `.contains()` shape. Tags are free-text strings (scenario precedent has no + per-item pattern); the PATCH boundary caps the list at 20 items + (`Field(max_length=20)` — same cap as `ScenarioCreateRequest.tags`, + `app/features/scenarios/schemas.py:203-206`). + +2. **Story slots — six dedicated nullable JSONB columns** (NOT keys inside one + `story` blob, NOT keys inside `created_objects`). Rationale: the existing + precedent is purpose-named JSONB columns with documented internal schemas + (`created_objects`, `result_summary` — `app/features/demo/models.py:77-81`); + each slot has a different writer epic and a different write moment + (create-time vs mid-run append vs finalize), and separate columns keep each + write isolated, independently nullable (NULL = "never written", distinct from + empty), individually typed in the ORM (`dict[str, Any] | None` vs + `list[dict] | None`), and trivially additive in responses. A single `story` + column would force read-modify-write of one blob across four epics and would + itself need a documented sub-schema anyway — more coupling, zero benefit on a + low-cardinality audit table. Per-slot documented schema: see the Data-models + blueprint below + the DOMAIN_MODEL doc task. + +3. **`replayed_from_workspace_id` — SOFT reference, no FK, confirmed.** String(32) + nullable, btree index (`ix_showcase_workspace_replayed_from`), NO ForeignKey — + including no self-referential FK: `docs/_base/DOMAIN_MODEL.md` pins + "deletion in either direction never cascades", and an FK (even `ON DELETE SET + NULL`) would couple delete behavior to lineage. Dangling lineage pointers after + an ancestor delete are expected and harmless (same semantics as every + `created_objects` id). Recorded verbatim from the request — no existence + validation (a replay of a just-deleted workspace still records the id it came + from; E2's liveness check surfaces dangles). + +4. **PATCH semantics — `exclude_unset` partial update, `extra="forbid"`, empty body + = no-op 200.** `model_dump(exclude_unset=True)` distinguishes absent from + explicit-null (runtime-verified, see Gotchas); explicit `null` clears `name` / + `notes`; `extra="forbid"` catches typo'd field names (the `RunUpdate` precedent, + `app/features/registry/schemas.py:113-123`); an empty body is a valid no-op + (mirrors `RunUpdate`, which has no min-fields validator). `archived`/`pinned` + accept only `true`/`false` and `tags` accepts only a list (not null — all + three back NOT NULL columns; send `[]` to clear tags). Explicit `null` on any + of the three is rejected at the schema boundary (422), never reaching + `setattr` → IntegrityError 500. + +5. **E1 writes no story slot.** `seed_overrides`/`user_scope` writers land in E3 + (#409), `approval_events`/`rag_events` in E5 (#411), `job_ids`/ + `phase_summaries` in the remaining parallel epics (E2 #408 health summary / + E4 #410 run-config echo — whichever lands first follows the documented schema). + E1 ships columns + schema docs + roundtrip tests only. + +6. **`config_schema_version` starts at 1.** Integer NOT NULL, `server_default + text("1")`, ORM `default=1`. It versions the *workspace config + story-slot + schema* as a whole; any epic that changes a documented slot shape bumps the + ORM default and documents the delta in DOMAIN_MODEL. E1 does not branch on it. + +### Assumptions (explicit, decided without user input) + +- `notes` is `sa.Text()` in the DB with a 2000-char cap enforced at the Pydantic + boundary only (no DB CHECK) — matches the repo's boundary-validation style + (`RunUpdate.error_message` caps at the schema layer, `registry/schemas.py:123`). +- Renaming via PATCH uses the same `^[a-z0-9][a-z0-9\-_]*$` / ≤100 pattern as + `DemoRunRequest.workspace_name` (`demo/schemas.py:72-77`) — names stay + non-unique by design (E4 #393 invariant). +- The PATCH route reuses `WorkspaceDetailResponse` as its response model (the + updated row, full detail) rather than introducing a new response shape. +- Pin/archive carry NO behavioral semantics in E1 (no list reordering, no + default-filtering) — E2 (#408) wires the UX. E1 just persists the booleans. +- The umbrella's "destructive-replay confirmation" is E2 (#408) — NOT here. + E1's replay change is provenance-recording only. +- `replayed_from_workspace_id` requires `preservation="keep"`: a lineage pointer + on an ephemeral run has no row to land on. (The frontend Replay always sends + `preservation: 'keep'` — `showcase.tsx:179-185` — so this constraint is + invisible to the shipped UI.) + +## All Needed Context + +### Documentation & References + +```yaml +# MUST READ — codebase patterns (all verified 2026-06-12, branch dev @ bdf85f6) + +- file: app/features/demo/models.py + why: | + THE file you extend. ShowcaseWorkspace at line 37; status constants 32-34; + JSONB precedent created_objects/result_summary at 77-81; __table_args__ with + named CheckConstraint + composite index at 83-89. Module docstring documents + the no-FK soft-reference decision — extend that docstring for + replayed_from_workspace_id. GOTCHA in docstring: SQLAlchemy reserves the + attr name `metadata`. + +- file: alembic/versions/324a2fa37fcc_create_showcase_workspace_table.py + why: | + CURRENT HEAD (verified `uv run alembic heads` → 324a2fa37fcc). Your + down_revision. Header/docstring format, typing (`revision: str`, + `down_revision: str | None`), op.f() index-naming convention to mirror. + NEVER edit this file — forward-only. + +- file: alembic/versions/bb8c4587ef1d_add_scenario_library_columns.py + why: | + THE add-columns migration to mirror: op.add_column with JSONB + server_default text("'[]'::jsonb") (lines 26-34), GIN index creation + (39-45), downgrade drops index-then-columns (48-52) incl. the + postgresql_using='gin' kwarg on drop_index. + +- file: app/features/scenarios/models.py + why: | + tags JSONB-array pattern (lines 74-76: Mapped[list[str]], nullable=False, + default=list, server_default=text("'[]'::jsonb")) + GIN index in + __table_args__ (line 97). This is the tags representation E1 mirrors + verbatim (Decision 1). + +- file: app/features/scenarios/service.py + why: | + Line 464: `ScenarioPlan.tags.contains(tags)` — the JSONB containment query + shape the tags column must support (prove it in an integration test; E2 + routes it). + +- file: app/features/demo/schemas.py + why: | + DemoRunRequest at 29-85: ConfigDict(strict=True) line 40; the + workspace_name pattern + model_validator _workspace_name_requires_keep + (72-85) — copy this exact validator shape for replayed_from_workspace_id. + WorkspaceListItem (169-189) / WorkspaceDetailResponse (192-203) / + WorkspaceListResponse (205-213) — the response models you extend + additively. Response models are plain BaseModel + from_attributes (NOT + strict) — keep that split. + +- file: app/features/demo/workspace.py + why: | + create_workspace (46-79): the insert you extend with one kwarg + (replayed_from_workspace_id=req.replayed_from_workspace_id). get_workspace + (158-171) — reuse inside update_workspace. delete_workspace (199-221) — + the caller-owned-session + commit + logger.info shape update_workspace + mirrors. NOTE the split: create/finalize open their OWN sessions + (pipeline-scoped, warn-and-continue); get/list/delete take a caller-owned + AsyncSession (request-scoped, raise normally) — update_workspace is the + second kind. + +- file: app/features/demo/routes.py + why: | + The router you extend. delete_showcase_workspace (138-163) — the exact + route shape for PATCH: Depends(get_db), NotFoundError on missing (RFC 7807 + via registered handler), docstring style. get_showcase_workspace (110-135) + — WorkspaceDetailResponse return shape. + +- file: app/features/registry/schemas.py + why: | + RunUpdate (113-123) — THE partial-update request precedent: + ConfigDict(extra="forbid"), all-Optional fields, no min-fields validator + (empty body = no-op). E1's WorkspaceUpdateRequest adds strict=True on top + (post-PRP-14 request-body policy; RunUpdate predates it). + +- file: app/features/demo/pipeline.py + why: | + DemoContext workspace fields at 258-263; the keep-branch create hook at + 2652-2657; finalize hook at 2741-2746. E1 does NOT touch the pipeline — + create_workspace reads the new field straight off `req`. Read only to + confirm no hook change is needed. + +- file: app/core/exceptions.py + why: | + NotFoundError (line 72) → RFC 7807 404 via registered handler. The 422s + come FREE from Pydantic validation at the boundary (FastAPI → 422 + problem+json). + +- file: app/features/demo/tests/test_schemas.py + why: | + Existing DemoRunRequest tests INCLUDING the mandatory JSON-dict path + (Model.model_validate({...}) per .claude/rules/security-patterns.md + § strict mode). Extend for the new field + add a WorkspaceUpdateRequest + block. + +- file: app/features/demo/tests/test_workspace.py + why: | + Integration-test patterns for create/finalize/get/list/delete — session + fixture, @pytest.mark.integration, row-cleanup conventions. Extend with + update_workspace + replayed_from cases. + +- file: app/features/demo/tests/test_models.py + why: | + Constraint/roundtrip integration tests for ShowcaseWorkspace — extend with + new-column defaults, tags containment, story-slot roundtrip. + +- file: app/features/demo/tests/test_routes.py + why: | + Route-test conventions: ASGITransport client from conftest, workspace + module monkeypatched for unit-shaped route tests, integration-marked tests + for DB-backed paths. The DELETE 404 test is the template for PATCH 404. + +- file: frontend/src/pages/showcase.tsx + why: | + handleReplayWorkspace at 174-186 — the start() call that gains ONE key: + `replayed_from_workspace_id: ws.workspace_id`. handleLoadWorkspace + (160-168) stays untouched (Load is read-only). + +- file: frontend/src/types/api.ts + why: | + DemoRunRequest interface at 778-788 — add + `replayed_from_workspace_id?: string` with an `// E1 (#407)` comment in + the existing style. + +- file: docs/_base/DOMAIN_MODEL.md + why: | + § showcase_workspace aggregate — additively document the new columns, the + six story-slot schemas, the config_schema_version semantics, and restate + that replayed_from_workspace_id is a soft reference (no FK). This is the + umbrella's junk-drawer risk mitigation — non-optional. + +- file: docs/_base/API_CONTRACTS.md + why: | + The /demo rows + "WebSocket Events (/demo/stream)" section — additive + notes for the PATCH endpoint, the new request field, and the response + additions, in the established "E1 (#407) — ..." style. + +# Issue / initiative context +- url: https://github.com/w7-mgfcode/ForecastLabAI/issues/407 + why: The epic this PRP implements (Foundation; frozen column/slot/endpoint contract). +- url: https://github.com/w7-mgfcode/ForecastLabAI/issues/406 + why: Umbrella — success criteria, out-of-scope list, risk table (junk-drawer mitigation = config_schema_version + documented slot schema). + +# Exemplar PRPs (style + validation-gate conventions) +- file: PRPs/PRP-showcase-workspace-E1-persistence-backbone.md + why: Closest analog — created the table this PRP extends; task style, gates, anti-patterns. +- file: PRPs/PRP-showcase-workspace-E4-restore-replay.md + why: Replay flow context — verbatim re-submission through the WS path; original row never mutated. +``` + +### Current Codebase tree (relevant subset) + +```bash +app/features/demo/ +├── models.py # ShowcaseWorkspace @37 (16 columns today) +├── workspace.py # create @46 / finalize @106 / get @158 / list @174 / delete @199 / count @224 +├── schemas.py # DemoRunRequest @29; WorkspaceListItem @169; WorkspaceDetailResponse @192 +├── routes.py # GET list @80; GET detail @110; DELETE @138; POST /run @51; WS @166 +├── pipeline.py # keep-branch create hook @2652; finalize hook @2741 (NO E1 changes) +├── service.py # (NO E1 changes) +└── tests/ # conftest, test_models, test_workspace, test_schemas, test_routes, test_pipeline +alembic/ +├── env.py # demo models import already present @19 +└── versions/ # head: 324a2fa37fcc +frontend/src/ +├── pages/showcase.tsx # handleReplayWorkspace @174 +└── types/api.ts # DemoRunRequest @778 +``` + +### Desired Codebase tree (files added/modified) + +```bash +app/features/demo/ +├── models.py # MOD — +12 columns, +2 indexes, extended docstring +├── schemas.py # MOD — DemoRunRequest +replayed_from_workspace_id (+validator); +│ # NEW WorkspaceUpdateRequest; ListItem/Detail additive fields +├── workspace.py # MOD — create_workspace records replayed_from; NEW update_workspace +├── routes.py # MOD — PATCH /demo/workspaces/{workspace_id} +└── tests/ + ├── test_schemas.py # MOD — new-field + WorkspaceUpdateRequest unit tests + ├── test_models.py # MOD — column defaults, tags containment, slot roundtrip (integration) + ├── test_workspace.py # MOD — replayed_from recording; update_workspace semantics (integration) + └── test_routes.py # MOD — PATCH 200/404/422 (+ list/detail field passthrough) +alembic/versions/_add_showcase_workspace_metadata_provenance.py # NEW +frontend/src/types/api.ts # MOD — +replayed_from_workspace_id?: string +frontend/src/pages/showcase.tsx # MOD — one start-frame key in handleReplayWorkspace +docs/_base/API_CONTRACTS.md # MOD — additive contract notes +docs/_base/DOMAIN_MODEL.md # MOD — columns + documented story-slot schemas +``` + +### Known Gotchas & Library Quirks + +```python +# CRITICAL — forward-only migrations: down_revision = "324a2fa37fcc" (verified +# `uv run alembic heads` → 324a2fa37fcc, 2026-06-12). NEVER edit the merged +# create-table migration. Revision ids are hand-written 12-hex continuing the +# chain (or keep an `alembic revision -m ...` generated id). + +# CRITICAL — every new NOT NULL column needs a server_default or the migration +# fails on tables with existing rows: archived/pinned text("false"), +# config_schema_version text("1"), tags text("'[]'::jsonb"). All six story +# slots + notes + replayed_from_workspace_id are nullable (no default needed). + +# CRITICAL — strict-mode policy: WorkspaceUpdateRequest and the new +# DemoRunRequest field are all JSON-native (str/bool/list[str]) → NO +# Field(strict=False) override. The AST walker +# (app/core/tests/test_strict_mode_policy.py) only fires on +# date/datetime/time/UUID/Decimal — nothing here triggers it. + +# CRITICAL — do NOT add extra="forbid" to DemoRunRequest (unknown-key tolerance +# is the WS forward/backward-compat contract, routes.py:182). DO add it to +# WorkspaceUpdateRequest (HTTP-only body; typo'd PATCH fields must 422, not +# silently no-op — RunUpdate precedent). + +# CRITICAL — JSONB change detection: always ASSIGN whole values +# (row.tags = [...]), never mutate in place (row.tags.append(...)) — in-place +# mutation is invisible to SQLAlchemy without flag_modified. The existing +# finalize_workspace assigns; keep that style in update_workspace. + +# GOTCHA — SQLAlchemy reserves the declarative attr name `metadata` +# (demo/models.py docstring). None of the new names collide — keep it that way. + +# GOTCHA — `status` stays out of WorkspaceUpdateRequest; the CHECK constraint +# ck_showcase_workspace_status is untouched. `archived` is orthogonal. + +# GOTCHA — update_workspace is caller-owned-session + raises normally (it backs +# an HTTP route). Do NOT wrap it in the warn-and-continue pattern — that +# contract is for the PIPELINE-scoped create/finalize only. + +# GOTCHA — repo has mixed CRLF/LF line endings; run `git diff --stat` before +# committing — Edit/Write emit LF, so verify schema/route/model diffs are +# surgical, not whole-file noise. + +# GOTCHA — frontend type gate: `pnpm tsc --noEmit` is vacuous (solution-style +# tsconfig checks zero files) and `pnpm tsc -b` already fails on dev with +# pre-existing errors. Gate on "no NEW errors vs the dev baseline" + +# `pnpm lint` + `pnpm test --run`. + +# GOTCHA — mypy --strict AND pyright --strict gate merge: full annotations incl. +# `-> None` on tests and typed fixtures. + +# CONVENTION — branch: feat/showcase-completion-e1-metadata-provenance (off dev). +# Commits reference #407, e.g. `feat(db): ... (#407)` for the migration, +# `feat(api): ... (#407)` for slice code, `feat(ui): ... (#407)` for the +# replay wiring (or `feat(api,ui)` if combined). NO AI trailer (hook-enforced). + +# RUNTIME-VERIFICATION LOG (per prp-create step 3 — re-run on library upgrade): +# 1. `uv run alembic heads` → 324a2fa37fcc (2026-06-12). +# 2. Pydantic exclude_unset distinguishes absent vs explicit-null, pattern +# constraint skips the None arm of `str | None`, extra="forbid" 422s +# unknown keys, strict=True accepts list[str] and rejects a bare str: +# uv run python -c " +# from pydantic import BaseModel, ConfigDict, Field +# class P(BaseModel): +# model_config = ConfigDict(strict=True, extra='forbid') +# name: str | None = Field(default=None, max_length=100, pattern=r'^[a-z0-9][a-z0-9\-_]*$') +# notes: str | None = Field(default=None, max_length=2000) +# tags: list[str] | None = Field(default=None, max_length=20) +# p = P.model_validate({'notes': None}); assert p.model_fields_set == {'notes'} +# assert p.model_dump(exclude_unset=True) == {'notes': None} +# assert P.model_validate({'name': None}).name is None # null clears +# assert P.model_validate({'tags': ['a','b']}).tags == ['a','b'] +# " +# → verified on pydantic in-repo (2026-06-12). +# 3. SQLAlchemy 2.0.46: Boolean/Integer/JSONB server_default DDL compiles as +# expected (`DEFAULT false NOT NULL`, `DEFAULT 1 NOT NULL`, +# `DEFAULT '[]'::jsonb NOT NULL`): +# uv run python -c "import sqlalchemy as sa; from sqlalchemy.dialects import postgresql; from sqlalchemy.schema import CreateTable; md=sa.MetaData(); t=sa.Table('x',md, sa.Column('archived',sa.Boolean(),nullable=False,server_default=sa.text('false')), sa.Column('v',sa.Integer(),nullable=False,server_default=sa.text('1')), sa.Column('tags',postgresql.JSONB(),nullable=False,server_default=sa.text(\"'[]'::jsonb\"))); print(CreateTable(t).compile(dialect=postgresql.dialect()))" +# → verified (2026-06-12). +# 4. JSONB .contains() containment is already production code in this repo +# (scenarios/service.py:464) — no external claim to probe. +``` + +## Implementation Blueprint + +### Data models and structure + +```python +# app/features/demo/models.py — ADD after result_summary (line 81), keep the +# existing __table_args__ entries and append the two new indexes. + + # ── E1 (#407) — lifecycle metadata ──────────────────────────────────── + # Orthogonal to `status` (which the pipeline owns): archive/pin are + # operator curation flags, PATCH-mutable, default false. + archived: Mapped[bool] = mapped_column( + nullable=False, default=False, server_default=text("false") + ) + pinned: Mapped[bool] = mapped_column( + nullable=False, default=False, server_default=text("false") + ) + # Free-text operator annotation; length capped at the Pydantic boundary (2000). + notes: Mapped[str | None] = mapped_column(Text, nullable=True) + # Queryable JSONB string array — EXACT scenario_plan.tags pattern + # (app/features/scenarios/models.py:74-76); GIN-indexed below. + tags: Mapped[list[str]] = mapped_column( + JSONB, nullable=False, default=list, server_default=text("'[]'::jsonb") + ) + # Version of the workspace config + story-slot schema (umbrella #406 + # junk-drawer mitigation). Bump the ORM default when a slot shape changes. + config_schema_version: Mapped[int] = mapped_column( + Integer, nullable=False, default=1, server_default=text("1") + ) + + # ── E1 (#407) — replay provenance ───────────────────────────────────── + # SOFT reference to the workspace this run replayed (uuid4().hex of the + # source row). Deliberately NO ForeignKey — not even self-referential: + # ancestor rows must stay independently deletable (metadata-only delete), + # and dangling lineage pointers are expected, like every created_objects id. + replayed_from_workspace_id: Mapped[str | None] = mapped_column( + String(32), nullable=True + ) + + # ── E1 (#407) — documented JSONB story slots ────────────────────────── + # Six dedicated nullable JSONB columns (precedent: created_objects / + # result_summary). NULL = "slot never written" (distinct from empty). + # E1 writes NONE of them; documented schema per slot (authoritative copy + # in docs/_base/DOMAIN_MODEL.md): + # seed_overrides (E3 #409 writes) — dict: the curated seeder-override + # payload from the start frame, stored verbatim + # (model_dump(mode="json")); replay echoes it. + # user_scope (E3 #409 writes) — dict: operator-selected focus, + # {"store_id": int, "product_id": int} (additive keys + # allowed later). + # approval_events (E5 #411 writes) — list[dict], append-only: + # {"action_id": str, "tool_name": str, + # "decision": "approved"|"rejected", + # "decided_at": iso8601-str, "session_id": str}. + # rag_events (E5 #411 writes) — list[dict], append-only: + # {"event": "index"|"retrieve"|"skip", "detail": str, + # "count": int, "occurred_at": iso8601-str}. + # job_ids (later parallel epic) — list[str]: job / batch + # sub-job ids the run submitted (soft references). + # phase_summaries (later parallel epic) — list[dict], one per phase: + # {"phase_name": str, "status": "pass"|"fail"|"warn"|"skip", + # "steps": int, "duration_ms": float}. + seed_overrides: Mapped[dict[str, Any] | None] = mapped_column(JSONB, nullable=True) + user_scope: Mapped[dict[str, Any] | None] = mapped_column(JSONB, nullable=True) + approval_events: Mapped[list[dict[str, Any]] | None] = mapped_column(JSONB, nullable=True) + rag_events: Mapped[list[dict[str, Any]] | None] = mapped_column(JSONB, nullable=True) + job_ids: Mapped[list[str] | None] = mapped_column(JSONB, nullable=True) + phase_summaries: Mapped[list[dict[str, Any]] | None] = mapped_column(JSONB, nullable=True) + + # __table_args__ — APPEND (keep existing CheckConstraint + composite index): + # Index("ix_showcase_workspace_tags_gin", "tags", postgresql_using="gin"), + # Index("ix_showcase_workspace_replayed_from", "replayed_from_workspace_id"), + # imports to extend: Text from sqlalchemy (others already imported). +``` + +```python +# app/features/demo/schemas.py — DemoRunRequest addition (after workspace_name, +# line 78) + validator extension. + + # E1 (#407): replay provenance. The frontend Replay handler sends the + # SOURCE row's workspace_id; create_workspace records it verbatim on the + # NEW row (soft reference — no existence check). JSON-native str → no + # Field(strict=False) needed. + replayed_from_workspace_id: str | None = Field( + default=None, + pattern=r"^[0-9a-f]{32}$", # uuid4().hex shape of workspace_id + description="workspace_id this run replays; requires preservation='keep'.", + ) + + @model_validator(mode="after") + def _replayed_from_requires_keep(self) -> DemoRunRequest: + """Reject a lineage pointer on a run that writes no workspace row.""" + if self.replayed_from_workspace_id is not None and self.preservation != "keep": + raise ValueError("replayed_from_workspace_id requires preservation='keep'") + return self + + +# NEW request model — place after DemoRunRequest. +# (add `field_validator` to the pydantic import at schemas.py:14 — the file +# currently imports only BaseModel/ConfigDict/Field/model_validator) +class WorkspaceUpdateRequest(BaseModel): + """Partial lifecycle update for PATCH /demo/workspaces/{workspace_id}. + + exclude_unset semantics: only fields present in the body are applied; + explicit ``null`` clears ``name`` / ``notes``. Explicit ``null`` on + ``archived`` / ``pinned`` / ``tags`` is rejected (422) — they back NOT NULL + columns; send ``[]`` to clear tags. ``extra="forbid"`` so a typo'd field + 422s instead of silently no-opping (RunUpdate precedent, + app/features/registry/schemas.py:113). All fields JSON-native -> the + model-level strict=True needs no per-field override. ``status`` is + deliberately absent — the pipeline owns the run lifecycle. + """ + + model_config = ConfigDict(strict=True, extra="forbid") + + name: str | None = Field( + default=None, + max_length=100, + pattern=r"^[a-z0-9][a-z0-9\-_]*$", # same as workspace_name + description="Rename the workspace; explicit null clears the label.", + ) + notes: str | None = Field( + default=None, max_length=2000, + description="Free-text annotation; explicit null clears it.", + ) + tags: list[str] | None = Field( + default=None, max_length=20, + description="Replace the full tag list (not a merge).", + ) + archived: bool | None = Field(default=None, description="Archive flag.") + pinned: bool | None = Field(default=None, description="Pin flag.") + + @field_validator("archived", "pinned", "tags") + @classmethod + def _reject_explicit_null(cls, v: bool | list[str] | None) -> bool | list[str]: + # Fires only on explicitly provided values (pydantic skips validators for + # defaults unless validate_default=True), so absent stays None/unset while + # an explicit {"archived": null} / {"tags": null} 422s instead of reaching + # the NOT NULL column via exclude_unset -> setattr -> IntegrityError 500. + # tags: send [] to clear, never null. + if v is None: + raise ValueError( + "archived/pinned accept only true/false and tags accepts a list " + "(send [] to clear) — explicit null is not allowed" + ) + return v + + +# Response additions (additive — keep from_attributes, NOT strict): +# WorkspaceListItem += archived: bool, pinned: bool, tags: list[str] +# (default_factory=list), replayed_from_workspace_id: str | None +# WorkspaceDetailResponse += notes: str | None, config_schema_version: int, +# seed_overrides / user_scope: dict[str, Any] | None, +# approval_events / rag_events / phase_summaries: +# list[dict[str, Any]] | None, job_ids: list[str] | None +``` + +```python +# app/features/demo/workspace.py — update_workspace (NEW; caller-owned session, +# raises normally — this backs an HTTP route, NOT the pipeline). +async def update_workspace( + db: AsyncSession, + workspace_id: str, + update: WorkspaceUpdateRequest, +) -> ShowcaseWorkspace | None: + """Apply a partial lifecycle update; return the row or None when missing.""" + row = await get_workspace(db, workspace_id) + if row is None: + return None + changes = update.model_dump(exclude_unset=True) # absent != explicit null + for field, value in changes.items(): + setattr(row, field, value) # whole-value ASSIGNMENT (JSONB gotcha) + await db.commit() + await db.refresh(row) + logger.info("demo.workspace_updated", workspace_id=workspace_id, fields=sorted(changes)) + return row + +# create_workspace — ONE added kwarg in the ShowcaseWorkspace(...) constructor: +# replayed_from_workspace_id=req.replayed_from_workspace_id, +``` + +```python +# app/features/demo/routes.py — PATCH route (mirror the DELETE shape @138). +@router.patch( + "/workspaces/{workspace_id}", + response_model=WorkspaceDetailResponse, + summary="Update a saved showcase workspace's lifecycle metadata", + description=( + "Partial update: rename / notes / tags / archive / pin. Only fields " + "present in the body change; explicit null clears name/notes. The run " + "lifecycle status is not patchable." + ), +) +async def update_showcase_workspace( + workspace_id: str, + update: WorkspaceUpdateRequest, + db: AsyncSession = Depends(get_db), +) -> WorkspaceDetailResponse: + row = await workspace.update_workspace(db, workspace_id, update) + if row is None: + raise NotFoundError(message=f"Workspace not found: {workspace_id}") + return WorkspaceDetailResponse.model_validate(row) +``` + +### List of tasks (dependency order) + +```yaml +Task 1 — branch & issue hygiene: + RUN: git switch dev && git pull && git switch -c feat/showcase-completion-e1-metadata-provenance + VERIFY: gh issue view 407 --json state # open + NOTE: git status shows untracked docker-compose.lan.yml on this host — leave it alone. + +Task 2 — MODIFY app/features/demo/models.py: + - ADD the 12 columns per the blueprint (lifecycle block, provenance column, six slots) + - ADD `Text` to the sqlalchemy import line (others already imported) + - APPEND the two indexes to __table_args__ (tags GIN + replayed_from btree) + - EXTEND the module docstring: replayed_from_workspace_id is a soft reference + (no FK, not even self-referential); story slots NULL until their writer epic lands + - PRESERVE: existing columns, constants, CheckConstraint, composite index — untouched + +Task 3 — CREATE alembic/versions/_add_showcase_workspace_metadata_provenance.py: + - down_revision = "324a2fa37fcc" + - MIRROR: bb8c4587ef1d_add_scenario_library_columns.py (add_column + GIN + downgrade order) + - upgrade(): op.add_column x12 (server_defaults: archived/pinned text("false"), + config_schema_version text("1"), tags text("'[]'::jsonb"); the rest nullable), + then op.create_index("ix_showcase_workspace_tags_gin", ..., postgresql_using="gin") + and op.create_index("ix_showcase_workspace_replayed_from", ...) + - downgrade(): drop the two indexes (GIN drop with postgresql_using="gin", + matching bb8c4587ef1d:50), then drop the 12 columns in reverse order + - VERIFY: docker compose up -d && + uv run alembic upgrade head && uv run alembic downgrade -1 && uv run alembic upgrade head + +Task 4 — MODIFY app/features/demo/schemas.py: + - ADD DemoRunRequest.replayed_from_workspace_id + _replayed_from_requires_keep + validator (blueprint); UPDATE the docstring sentence listing JSON-native fields + - ADD WorkspaceUpdateRequest (blueprint) — placed after DemoRunRequest + - EXTEND WorkspaceListItem (+archived/pinned/tags/replayed_from_workspace_id) + and WorkspaceDetailResponse (+notes/config_schema_version/six slots) additively + +Task 5 — MODIFY app/features/demo/workspace.py: + - create_workspace: add replayed_from_workspace_id=req.replayed_from_workspace_id + to the ShowcaseWorkspace(...) constructor (one line; warn-and-continue untouched) + - ADD update_workspace (blueprint) + the WorkspaceUpdateRequest import + - UPDATE module docstring routing note (PATCH now routed too) + +Task 6 — MODIFY app/features/demo/routes.py: + - ADD the PATCH route (blueprint) between GET detail and DELETE + - ADD WorkspaceUpdateRequest to the schemas import block + - UPDATE the module docstring endpoint list + +Task 7 — MODIFY frontend (two additive lines): + - frontend/src/types/api.ts DemoRunRequest (@778): add + `// E1 (#407) — replay provenance: the source workspace_id a Replay re-runs.` + `replayed_from_workspace_id?: string` + - frontend/src/pages/showcase.tsx handleReplayWorkspace start() call (@179-185): + add `replayed_from_workspace_id: ws.workspace_id,` + - DO NOT touch handleLoadWorkspace (Load is read-only) or WorkspacePanel + +Task 8 — tests (full matrix in Validation Loop): + - MODIFY tests/test_schemas.py (unit) + - MODIFY tests/test_models.py (@pytest.mark.integration) + - MODIFY tests/test_workspace.py (@pytest.mark.integration) + - MODIFY tests/test_routes.py (PATCH 200/404/422; unit-shaped via monkeypatched + workspace.update_workspace where the existing file does so, integration otherwise — + follow whichever convention the existing GET/DELETE tests use) + +Task 9 — docs (additive): + - docs/_base/API_CONTRACTS.md: + * NEW row: `demo | PATCH | /demo/workspaces/{workspace_id} | E1 (#407) — partial + lifecycle update (name/notes/tags/archived/pinned; exclude_unset, explicit null + clears name/notes; status NOT patchable); 404 problem+json when missing; 422 on + unknown keys / bad name pattern / >20 tags; empty body = 200 no-op` + * POST /demo/run row + WS /demo/stream section: additive Optional + `replayed_from_workspace_id` (`^[0-9a-f]{32}$`, requires preservation='keep'); + Replay now sends it; recorded verbatim as a soft reference + * GET /demo/workspaces rows: note the additive response fields + - docs/_base/DOMAIN_MODEL.md § showcase_workspace: + * Stored metadata: add lifecycle columns + config_schema_version semantics + * JSONB fields: add the six story slots WITH their documented schemas (copy the + model-comment schemas verbatim — this is the authoritative copy) + * Invariants: replayed_from_workspace_id is a SOFT reference (no FK, dangles OK); + status not patchable; archived orthogonal to status + * Trim the "Out of scope" line that lists `replayed_from` as not-modeled (now shipped) + - docs/_base/RUNBOOKS.md § Showcase workspace: remove `replayed_from` from the + "Explicitly out of scope" list (one-line edit; the full runbook sweep is E7) + +Task 10 — gates, commit, PR: + - RUN the full Validation Loop (Levels 1-4) + - git diff --stat # surgical diffs only (CRLF noise check) + - COMMITS (reference #407, no AI trailer), e.g.: + feat(db): extend showcase_workspace with metadata and provenance columns (#407) + feat(api): add workspace patch lifecycle endpoint and replay provenance (#407) + feat(ui): send replayed_from_workspace_id on showcase replay (#407) + docs(repo): document workspace story slots and patch contract (#407) + - PR into dev; title `feat(api,db): showcase-completion E1 — workspace metadata & provenance backbone (#407)` +``` + +### Integration Points + +```yaml +DATABASE: + - migration: 12 add_column on showcase_workspace + ix_showcase_workspace_tags_gin (GIN) + + ix_showcase_workspace_replayed_from (btree); clean downgrade + - registration: alembic/env.py already imports demo models (line 19) — NO change + +CONFIG: none — no new settings, no env vars. + +ROUTES: PATCH /demo/workspaces/{workspace_id} on the existing demo router — no + app/main.py change (router already wired). + +PIPELINE: none — create_workspace reads the new field straight off req; the + keep-branch hook (pipeline.py:2652) and finalize hook (2741) are untouched. + +FRONTEND: two additive lines (Task 7). No new components; lineage badge/chain is E2. + +DOCS: API_CONTRACTS + DOMAIN_MODEL (+ one-line RUNBOOKS trim). Full sweep is E7. +``` + +## Validation Loop + +### Level 1: Syntax & Style + +```bash +uv run ruff check . && uv run ruff format --check . +uv run mypy app/ && uv run pyright app/ +# Expected: clean. Both type checkers are --strict and gate merge. +``` + +### Level 2: Unit Tests (no DB) + +```python +# tests/test_schemas.py — add: +def test_demo_run_request_replayed_from_default_none() -> None: ... + # DemoRunRequest() -> replayed_from_workspace_id is None; legacy frame + # model_validate({"seed": 7}) still validates + +def test_demo_run_request_replayed_from_json_path() -> None: ... + # MANDATORY json-dict path (security-patterns.md § strict mode): + # model_validate({"preservation": "keep", "replayed_from_workspace_id": "a"*32}) + +def test_demo_run_request_replayed_from_requires_keep() -> None: ... + # pytest.raises(ValidationError): model_validate({"replayed_from_workspace_id": "a"*32}) + +def test_demo_run_request_replayed_from_pattern_rejected() -> None: ... + # "not-hex!", "ABC..." (uppercase), 31-char and 33-char values all raise + +def test_workspace_update_request_partial_fields_set() -> None: ... + # model_validate({"notes": None}).model_dump(exclude_unset=True) == {"notes": None} + # model_validate({}).model_dump(exclude_unset=True) == {} + +def test_workspace_update_request_rejects_unknown_key() -> None: ... + # model_validate({"status": "archived"}) raises (extra="forbid" — status not patchable) + +def test_workspace_update_request_name_pattern_and_tags_cap() -> None: ... + # "Bad Name!" raises; 21 tags raises; ["workspace:x", "demo"] passes + +def test_workspace_update_request_rejects_explicit_null_flags() -> None: ... + # pytest.raises(ValidationError): model_validate({"archived": None}) + # pytest.raises(ValidationError): model_validate({"pinned": None}) + # pytest.raises(ValidationError): model_validate({"tags": None}) + # model_validate({"tags": []}) passes (the sanctioned clear path) + # (NOT NULL columns — explicit null must 422, never reach setattr) + +# tests/test_routes.py — add (follow the file's existing GET/DELETE conventions): +async def test_patch_workspace_happy_path(...) -> None: ... + # PATCH {"name": "renamed", "pinned": true, "tags": ["t1"]} -> 200; response + # echoes the changes and the untouched fields +async def test_patch_workspace_missing_404_problem_json(...) -> None: ... + # status 404; content-type application/problem+json +async def test_patch_workspace_unknown_field_422(...) -> None: ... + # body {"bogus": 1} -> 422 problem+json +async def test_patch_workspace_explicit_null_archived_422(...) -> None: ... + # body {"archived": null} -> 422 problem+json (NOT NULL column guard) +async def test_patch_workspace_empty_body_noop_200(...) -> None: ... +async def test_run_demo_rejects_replayed_from_without_keep_422(...) -> None: ... +``` + +```bash +uv run pytest app/features/demo -v -m "not integration" +uv run pytest app/core/tests/test_strict_mode_policy.py -v # AST walker still green +``` + +### Level 3: Integration (real Postgres) + +```python +# tests/test_models.py — @pytest.mark.integration, extend: +# - insert with NO new kwargs -> archived=False, pinned=False, tags=[], +# config_schema_version=1, all six slots None, replayed_from None +# (server_default + ORM default agreement) +# - tags JSONB roundtrip + containment: insert tags=["workspace:x","demo"]; +# select(...).where(ShowcaseWorkspace.tags.contains(["demo"])) finds it +# (scenarios/service.py:464 query shape) +# - story-slot roundtrip: write a dict into seed_overrides and a list[dict] +# into approval_events; read back identical +# - status CHECK still enforced (regression — constraint untouched) + +# tests/test_workspace.py — @pytest.mark.integration, extend: +# - create_workspace with req.replayed_from_workspace_id set -> column recorded +# verbatim; without it -> None (legacy identical) +# - update_workspace partial: set name+pinned only -> other fields untouched; +# explicit name=None clears; tags replaced whole (not merged); +# missing workspace_id -> returns None (route maps to 404) +# - update_workspace empty request -> no-op, row returned +``` + +```bash +docker compose up -d +uv run alembic upgrade head +uv run alembic downgrade -1 && uv run alembic upgrade head # downgrade is clean +uv run pytest app/features/demo -v -m integration +``` + +### Level 4: Manual smoke (seeded local stack, uvicorn on :8123 + vite) + +```bash +# 1. Keep-run, then PATCH lifecycle round-trip: +curl -s -X POST http://localhost:8123/demo/run -H 'Content-Type: application/json' \ + -d '{"skip_seed": true, "preservation": "keep", "workspace_name": "e1-smoke"}' \ + | python3 -c "import sys,json; print(json.load(sys.stdin)['workspace_id'])" +WS= +curl -s -X PATCH http://localhost:8123/demo/workspaces/$WS \ + -H 'Content-Type: application/json' \ + -d '{"name": "e1-renamed", "notes": "smoke", "tags": ["smoke"], "pinned": true}' | python3 -m json.tool +curl -s -X PATCH http://localhost:8123/demo/workspaces/deadbeef -H 'Content-Type: application/json' -d '{}' \ + | python3 -m json.tool # 404 problem+json + +# 2. Replay provenance (browser): /showcase -> Saved workspaces -> Replay on +# the e1-renamed row; after the run: +docker exec forecastlab-postgres psql -U forecastlab -d forecastlab -c \ + "SELECT workspace_id, name, replayed_from_workspace_id FROM showcase_workspace ORDER BY created_at DESC LIMIT 2;" +# Expect: newest row's replayed_from_workspace_id == $WS; the $WS row unchanged. + +# 3. Frontend gates: +cd frontend && pnpm lint && pnpm test --run +# pnpm tsc -b — confirm no NEW errors vs the dev baseline (gate is vacuous-aware, +# see Known Gotchas). +``` + +## Final validation Checklist + +- [ ] All five gates green: `uv run ruff check . && uv run ruff format --check . && uv run mypy app/ && uv run pyright app/ && uv run pytest -v -m "not integration"` +- [ ] Integration suite green: `uv run pytest -v -m integration` (fresh docker-compose DB; reset first if the shared DB is polluted) +- [ ] Migration upgrade + downgrade clean on a fresh DB AND applies on a DB with existing workspace rows +- [ ] Legacy surfaces byte-identical: start frame without new keys, GET list/detail for old rows (new fields all default/null), `test_strict_mode_policy.py` green +- [ ] PATCH 200 / 404 / 422 paths verified (Level 2 + Level 4) +- [ ] Replay records `replayed_from_workspace_id`; source row untouched (Level 4 step 2) +- [ ] `git diff --stat` shows surgical diffs (no CRLF whole-file noise) +- [ ] docs/_base/API_CONTRACTS.md + DOMAIN_MODEL.md updated additively (slot schemas documented); RUNBOOKS out-of-scope line trimmed +- [ ] Commits `feat(db)/feat(api)/feat(ui)/docs(repo): ... (#407)`, no AI trailer; PR into dev + +--- + +## Anti-Patterns to Avoid + +- ❌ Don't add ANY ForeignKey — not even self-referential on `replayed_from_workspace_id`. Soft references only. +- ❌ Don't edit `324a2fa37fcc_create_showcase_workspace_table.py` — new revision off head `324a2fa37fcc`. +- ❌ Don't make `status` patchable or widen `ck_showcase_workspace_status` — `archived` is the orthogonal flag. +- ❌ Don't add `extra="forbid"` to `DemoRunRequest` (WS compat) — but DO add it to `WorkspaceUpdateRequest`. +- ❌ Don't write any story slot from E1 production code — columns + docs + roundtrip tests only. +- ❌ Don't validate that `replayed_from_workspace_id` points at an existing row — it's a soft reference; dangles are designed. +- ❌ Don't wrap `update_workspace` in warn-and-continue — that contract is pipeline-only; HTTP helpers raise. +- ❌ Don't add list filtering/sorting/search or archive-hiding — that's E2 (#408). +- ❌ Don't add a replay confirmation dialog or lineage UI — E2 (#408). +- ❌ Don't mutate JSONB values in place — always assign whole values. +- ❌ Don't import another feature slice from `app/features/demo/` — core/shared only. + +## Notes for parallel-epic PRP authors (#408–#412) + +- The column set, slot names, and per-slot schemas above are the frozen E1 contract. + `job_ids` / `phase_summaries` have a documented schema but NO assigned writer in + E1 — E2 (#408, health summary) and E4 (#410, config echo) should agree on which + populates which and follow the documented shapes. +- Slot writes that happen DURING a pipeline run inherit the warn-and-continue + invariant (extend `finalize_workspace` / add sibling helpers in `workspace.py`); + slot writes via HTTP go through caller-owned-session helpers like + `update_workspace`. +- Tag filtering on `GET /demo/workspaces` (E2) should reuse the + `ShowcaseWorkspace.tags.contains([...])` containment shape proven in E1's + integration test, mirroring `GET /scenarios?tags=` (scenarios/routes.py:180). +- A schema change to any slot bumps `config_schema_version` (ORM default) and + documents the delta in DOMAIN_MODEL. + +## Confidence Score + +**9/10** for one-pass implementation success. Every element has a verified in-repo +precedent: the add-columns+GIN migration (`bb8c4587ef1d`), the tags column +(`scenarios/models.py:74`), the partial-update schema (`registry RunUpdate`), the +404-on-missing route shape (the demo DELETE), and the request-field+validator pattern +(`workspace_name`, same file). The three judgment calls (tags representation, slot +shape, no-FK soft reference) are resolved and frozen above, and all changes are +additive — a wrong slot-schema guess costs a documented `config_schema_version` bump, +not a rework. The −1: the PATCH route tests must match whichever +unit-vs-integration convention `test_routes.py` currently uses for the workspace +GET/DELETE endpoints (read it first), and the frontend type-gate baseline is fuzzy +on this host (`tsc -b` has pre-existing dev failures — gate on "no NEW errors"). diff --git a/PRPs/PRP-showcase-completion-E2-safe-replay-lifecycle.md b/PRPs/PRP-showcase-completion-E2-safe-replay-lifecycle.md new file mode 100644 index 00000000..6adfe994 --- /dev/null +++ b/PRPs/PRP-showcase-completion-E2-safe-replay-lifecycle.md @@ -0,0 +1,1247 @@ +name: "PRP — Showcase Completion E2: Safe Replay & Workspace Lifecycle (issue #408)" +description: | + +## Purpose + +Implement the safe-replay + workspace-lifecycle epic of the showcase-completion +initiative (umbrella #406): an explicit confirmation step (with preview/diff) +before every replay — destructive copy when `reset=true` — lineage rendering of +the E1 `replayed_from_workspace_id` chain, full lifecycle management on the +saved-workspaces panel (rename / archive / pin / notes / tags / search / +filter / sort / multi-select delete), a two-workspace compare view, and the +folded-in ops slice: artifact-link liveness checks with dead-link warnings on +soft references plus a per-workspace health summary (partial-run warning +included). Parallel epic after Foundation E1 (#407) — **execution starts only +AFTER E1 merges**; this PRP treats E1's epic body as a frozen contract (every +dependency on it is tagged `CONTRACT(E1)` below). + +## Core Principles + +1. **Context is King**: every reference below was verified against the live code on 2026-06-12 (branch `dev`, post-#404/#405 merge — E1 #407 NOT yet merged; see the E1-reconciliation task). +2. **Validation Loops**: each level is executable as written. +3. **Information Dense**: patterns cite exact file:line. +4. **Progressive Success**: backend list-filters + health endpoint → frontend types/hooks → confirm/diff dialog → lifecycle panel rework → lineage → compare page → docs. +5. **Global rules**: follow CLAUDE.md / AGENTS.md; all five CI gates must pass; UI work follows `.claude/rules/ui-design.md` + `.claude/rules/shadcn-ui.md`. + +--- + +## Goal + +An operator on `/showcase` can: + +- (a) **Replay safely** — clicking Replay opens a confirmation dialog showing a + preview/diff: the recorded config (seed / scenario / reset / skip_seed / + name) side-by-side with the exact `DemoRunRequest` about to be sent, any + divergence highlighted. When the recorded config has `reset=true`, the + dialog carries explicit destructive copy ("Replaying this workspace WIPES + the database") and a destructive-styled confirm button. No replay starts + without confirmation. +- (b) **See lineage** — a workspace created by a replay carries a "replay" + badge in the list; the loaded-workspace view renders the + `replayed_from_workspace_id` chain (newest → original), with dangling + ancestors (deleted rows) marked rather than erroring. +- (c) **Manage the library** — per-row actions: rename, edit notes, edit tags, + pin/unpin, archive/unarchive (all via the E1 `PATCH /demo/workspaces/{id}`), + plus the existing single delete. The list gains a search box (name), a + show-archived toggle (archived hidden by default), a tag filter, and an + allow-listed sort; pinned rows always sort first. +- (d) **Multi-select delete** — checkbox per row, "Delete selected (N)" behind + one confirmation dialog, implemented as N sequential single + `DELETE /demo/workspaces/{id}` calls. **No new bulk endpoint** (metadata-only + singles; vision-compatible — no "wipe everything" operation). +- (e) **Compare two workspaces** — select exactly two rows → Compare navigates + to a new deep-linkable page (`/showcase/compare?a=&b=`) mirroring the + run-compare two-picker pattern: config diff, result-summary diff (winner / + WAPE delta / wall-clock), created-objects presence matrix, lineage relation. +- (f) **See link health** — loading a workspace probes its soft references + (model runs, scenario plans, alias, batch, agent session, E1 `job_ids`) + through a new backend aggregation endpoint + `GET /demo/workspaces/{id}/health`; dead references render a warning marker + on the artifact cards and a per-workspace health summary chip shows + alive/dead counts plus a partial-run warning when the run never completed. + +**Deliverable** (all additive — no migration in E2; the schema delta is E1's): + +- `app/features/demo/workspace.py` — `list_workspaces` / `count_workspaces` + gain filter/sort parameters (`q`, `tags`, `include_archived`, `sort_by`, + `sort_order`; pinned-first ordering). +- `app/features/demo/link_health.py` — NEW: in-process soft-reference probe + module (httpx `ASGITransport`, mirroring `pipeline._Client`). +- `app/features/demo/schemas.py` — `WorkspaceRefHealth`, + `WorkspaceHealthResponse` response models (plain BaseModel, NOT strict). +- `app/features/demo/routes.py` — query params on `GET /demo/workspaces`; + NEW `GET /demo/workspaces/{workspace_id}/health`. +- `frontend/src/types/api.ts` — lifecycle fields on the workspace types + (verify-or-add per CONTRACT(E1)), health types, list-params type, + `WorkspaceUpdate` type. +- `frontend/src/hooks/use-workspaces.ts` — params-aware `useWorkspaces`, + `usePatchWorkspace`, `useWorkspaceHealth`, `useWorkspaceLineage`. +- `frontend/src/components/demo/ReplayConfirmDialog.tsx` — NEW confirm + + preview/diff dialog. +- `frontend/src/components/demo/WorkspaceEditDialog.tsx` — NEW + rename/notes/tags editor. +- `frontend/src/components/demo/WorkspaceLineageStrip.tsx` — NEW lineage chain. +- `frontend/src/components/demo/WorkspacePanel.tsx` — reworked: toolbar + (search / show-archived / sort), row badges (pinned, archived, replay), + per-row actions dropdown, multi-select + delete-selected + compare-selected. +- `frontend/src/components/demo/WorkspaceArtifactsPanel.tsx` — health-aware + cards (dead-link warnings) + health summary chip. +- `frontend/src/pages/workspace-compare.tsx` — NEW two-workspace compare page; + route + `ROUTES.SHOWCASE_COMPARE` constant. +- `frontend/src/pages/showcase.tsx` — replay-confirm flow, lineage strip + + health wiring, `replayed_from_workspace_id` on the replay start frame. +- Tests: backend route + module unit tests, integration tests for list filters + and health; frontend vitest for every new/changed component + hook. +- `docs/_base/API_CONTRACTS.md` + `docs/_base/RUNBOOKS.md` — additive updates + (incl. superseding the "deliberately no confirm dialog" note). + +**Success definition**: all Success Criteria below check off, the five backend +CI gates and the frontend gates are green, and a manual browser dogfood on a +seeded stack walks: save → search/sort → rename/pin/archive → replay (confirm +dialog with diff, destructive variant on a reset workspace) → lineage chain +visible → two-workspace compare → delete a referenced run → health shows the +dead link. + +## Why + +- Umbrella #406 success criteria commit: "a `reset=true` replay requires an + explicit confirmation step before it runs" and "Workspaces can be renamed, + archived, pinned, annotated (notes/tags), searched, filtered, sorted, and + multi-select-deleted (metadata-only) from the saved-workspaces panel". +- Today a replay of a `reset=true` workspace wipes the database with **no + confirmation** — documented designed behavior + (`docs/_base/RUNBOOKS.md` § "Showcase workspace", item 1: "there is + deliberately no confirm dialog") that #406 explicitly reverses. +- E1 (#407) ships the storage + PATCH surface but no UI consumes it; E2 is the + delivery surface that makes lifecycle, lineage, and provenance visible. +- `created_objects` ids are soft references by design — operator deletes leave + dangling deep links ("expected; the workspace row records what WAS created, + not what still exists", RUNBOOKS § Showcase workspace item 4). Link health + turns that silent staleness into a visible, per-workspace signal — the novel + ops slice #406 folded into this epic. + +## What + +### Decisions locked here (so implementation doesn't re-litigate) + +These were the open questions this PRP owns; the decisions below are final for E2. + +1. **Replay-policy picker (exact / safe-keep / modified): OUT OF SCOPE.** + Replay stays verbatim (`E4 #393` semantics). Rationale: the umbrella + commits only confirm + preview/diff; a "modified replay" already exists as + Load → edit controls → Run (the Load path repopulates every control); a + policy enum would add request-surface + backend validation for zero new + capability. The confirm dialog's footer carries a one-line hint — + "Want to change the config first? Use Load instead." Document the + deferral in the PR description. +2. **Confirmation applies to EVERY replay, not just `reset=true`.** The + preview/diff panel needs a pre-flight surface and a sometimes-there dialog + is worse UX than an always-there one. The `reset=true` variant escalates: + destructive copy + destructive-styled action button. This satisfies the + umbrella's "explicit confirmation before any reset=true replay" as a + strict superset. The direct Run button (operator-configured runs) is + unchanged — confirmation guards replays only. +3. **Link-health architecture: BACKEND aggregation endpoint** + (`GET /demo/workspaces/{id}/health`), implemented by probing the public + API **in-process** via `httpx.ASGITransport` — the exact mechanism + `pipeline._Client` already uses from inside a request context + (`app/features/demo/pipeline.py:141-148`; `POST /demo/run` passes + `request.app` into the pipeline at `routes.py:75`). Justification: + (a) the demo slice may NOT import registry/scenarios/jobs/agents services + (vertical-slice rule), and in-process HTTP through the public surface is + the slice's established cross-slice seam; (b) one workspace has up to ~10+ + references (3 runs + N plans + alias + batch + session + M jobs) — a + frontend-probed design costs 1+N browser round-trips per workspace and + duplicates existence semantics per artifact type; (c) a backend endpoint + gives the health summary a single testable contract and a place for the + partial-run flag. Probes run concurrently (`asyncio.gather`), classify + 2xx→`alive`, 404→`dead`, anything else→`unknown`, and are fetched + on-demand (loaded workspace only — never for every list row). +4. **Compare view: FRONTEND-ONLY page.** A workspace compare is a plain field + diff over two already-served `WorkspaceDetail` payloads — no new backend + endpoint (contrast: `GET /registry/compare/{a}/{b}` exists because metric + diffing has server-side logic). New page `/showcase/compare?a=&b=` + mirroring `frontend/src/pages/explorer/run-compare.tsx` (two `Select` + pickers + `useSearchParams` deep-linking). +5. **Multi-select delete = N sequential single DELETEs.** The existing + `DELETE /demo/workspaces/{id}` is called once per selected row behind one + confirmation dialog. NO new bulk endpoint — product-vision guardrail ("no + wipe-everything operations"); failures are collected and toasted, the list + refetches once at the end. +6. **Search/filter/sort: SERVER-SIDE additive query params** on + `GET /demo/workspaces`, mirroring established precedents: name search → + `dimensions` `search` ILIKE pattern (`app/features/dimensions/routes.py:65`), + tags → `scenarios` repeated-`tags` JSONB containment + (`app/features/scenarios/routes.py:180`, `service.py:462-465`), sort → + allow-listed `sort_by`/`sort_order` with silent fallback to default + (`dimensions/routes.py:70-75`). `include_archived=false` is the default + (archived rows hidden). Pinned rows always order first + (`ORDER BY pinned DESC, `). Server-side keeps the panel honest as + rows accumulate and gives the filter a route-test contract. + +### Frozen contract — CONTRACT(E1) (#407 ships these; E2 consumes, never re-decides) + +Every assumption below MUST be reconciled against the merged E1 diff before +implementation (Task 1). Where E1's PRP chose different names, adapt E2's code +to E1's names — never the reverse. + +- `CONTRACT(E1)-1` — `showcase_workspace` columns exist post-migration: + `replayed_from_workspace_id` (nullable String(32), soft reference — NO FK, + consistent with `models.py` no-FK doctrine), `archived` (bool, default + false), `pinned` (bool, default false), `notes` (nullable text), `tags` + (JSONB string array, default `[]`), `config_schema_version` (int). +- `CONTRACT(E1)-2` — `tags` representation is a JSONB string array with a GIN + index, mirroring `scenario_plan.tags` + (`app/features/scenarios/models.py:74,97`), so SQLAlchemy + `.contains([tag])` containment filtering works. +- `CONTRACT(E1)-3` — `PATCH /demo/workspaces/{workspace_id}` exists with an + all-Optional partial-update body (rename/notes/tags/archive/pin — assumed + schema name `WorkspaceUpdateRequest`, semantics mirroring registry + `RunUpdate`, `app/features/registry/schemas.py:113-121`: absent field = + unchanged), returns the updated workspace (assumed + `WorkspaceDetailResponse`), 404 problem+json on a missing id. +- `CONTRACT(E1)-4` — the GET list/detail response schemas expose the new + columns (`WorkspaceListItem` += `archived`, `pinned`, `tags`, + `replayed_from_workspace_id`; `WorkspaceDetailResponse` += `notes`, + `config_schema_version` and the JSONB story slots it serves). **Defensive + rule**: if E1 did NOT extend the GET responses, E2 adds the fields + additively in Task 3 (they are required reading surface for this epic). +- `CONTRACT(E1)-5` — replay provenance mechanism: `DemoRunRequest` (and the + WS start frame) carries an additive Optional + `replayed_from_workspace_id: str | None` that `workspace.create_workspace` + persists onto the new row (E1's epic body: "Replay writes + `replayed_from_workspace_id`"). NOTE: E1's PRP itself wires the frontend + send (handleReplayWorkspace sends `ws.workspace_id` — an E1 success + criterion), so E2 PRESERVES the field through the executeReplay refactor + rather than adding it; if E1 instead derived it server-side, E2 adapts. +- `CONTRACT(E1)-6` — the `job_ids` JSONB story slot is a `list[str]` of job + ids; the health endpoint probes each via `GET /jobs/{job_id}` when the slot + is non-empty (and silently skips when absent/empty — pre-E1-backfill rows). +- `CONTRACT(E1)-7` — E1 does NOT add filtering/sorting to + `GET /demo/workspaces` (its scope is migration + PATCH + schemas); the list + query params are E2's to add. If E1's merged code already added any of + them, reuse instead of duplicating. + +### User-visible behavior + +- **Replay confirm/diff**: Replay button → dialog titled "Replay workspace + \"name\"?" with a two-column table (Recorded / Will send) over seed, + scenario, reset, skip_seed, workspace name, preservation (always `keep`), + replayed-from (the source workspace id). Rows where the two values differ + are highlighted (defensive — verbatim replay means they normally match). + `reset=true` → red warning block + destructive confirm button labeled + "Replay & wipe database"; otherwise a default confirm labeled "Replay". + Cancel never starts a run. +- **Lineage**: list rows with `replayed_from_workspace_id != null` show an + outline `Badge` "replay". The loaded-workspace view renders a breadcrumb + strip: `this ← parent ← grandparent …` (depth-capped at 5), each ancestor + clickable (loads it); a deleted ancestor renders as + "(original deleted)" — dangling soft references are expected, never errors. +- **Lifecycle panel**: toolbar = search `Input` (filters by name, + debounced/enter-applied), "Show archived" `Checkbox`, sort `Select` + (Newest / Oldest / Name / Status). Rows: pin icon (filled when pinned), + muted styling + "archived" badge on archived rows, tags rendered as small + chips (clicking a chip filters the list by that tag; an active tag filter + shows as a clearable chip in the toolbar). Per-row `DropdownMenu` (lucide + `MoreHorizontal`): Pin/Unpin, Archive/Unarchive, Edit details…, Delete…. + "Edit details…" opens `WorkspaceEditDialog` (name input with the + `^[a-z0-9][a-z0-9\-_]*$` client validation already used by the run controls, + notes `Textarea`, tags comma-separated input). +- **Multi-select**: leading `Checkbox` per row + header select-all; selection + shows "N selected" with **Delete selected** (AlertDialog: "Delete N + workspace records? Their created objects are NOT deleted.") and **Compare** + (enabled only when exactly 2 selected → navigates to the compare page). +- **Compare page** (`/showcase/compare?a=&b=`): back-link to `/showcase`, two + workspace `Select` pickers (deep-linkable URL params), then: config table + (seed/scenario/reset/skip_seed/name/tags, mismatches highlighted), + result-summary table (winner, WAPE with the `DeltaCell` sign-only + indicator, wall-clock), created-objects presence matrix (per soft-reference + key: recorded A / recorded B), lineage note when one side is a replay of + the other, partial-run badge per side when `status != "completed"`. +- **Link health**: loading a workspace fires + `GET /demo/workspaces/{id}/health`; the artifacts panel shows a summary + chip — `✓ N live · ✕ M dead` (plus "partial run" warning chip when the + row's status is not `completed`) — and each card whose reference probed + `dead` gets a lucide `AlertTriangle` + tooltip "This object no longer + exists — it was deleted after the run." `unknown` references render + without a marker (no false alarms on transient 5xx). + +### Technical requirements + +- All five backend gates green; frontend `pnpm lint && pnpm test --run` green. +- New/changed endpoints: route tests covering 2xx + at least one error path + (`.claude/rules/test-requirements.md`). +- RFC 7807 for every error path (`NotFoundError` from `app/core/exceptions.py:72`). +- Response models stay plain `BaseModel` (+`from_attributes` where ORM-built) + — strict mode is request-body-only policy (`demo/schemas.py:88-95` precedent). +- The demo slice imports NO other feature slice — link health goes through + in-process HTTP (`request.app` + `ASGITransport`), never a service import. +- Frontend: TanStack Query for all IO; shadcn/ui new-york primitives only + (everything needed is already installed — see gotchas); lucide icons; + semantic tokens only (`text-destructive`, `bg-muted`, …) — no raw colors. +- Legacy behavior byte-identical: a client that never touches the new query + params / endpoints sees today's responses (new list params all default to + today's semantics EXCEPT archived-hidden — see gotcha on `include_archived`). + +### Success Criteria + +- [ ] Replay (panel button) always opens the confirm dialog with the + recorded-vs-sent preview; confirming a `reset=true` workspace requires + the destructive-styled button; Cancel starts nothing. No code path + starts a replay without the dialog. +- [ ] A confirmed replay sends the recorded config verbatim + + `preservation="keep"` + the recorded name + `replayed_from_workspace_id` + (CONTRACT(E1)-5); the new row carries the provenance id and the list + shows its "replay" badge; the loaded view renders the ancestor chain, + tolerating deleted ancestors. +- [ ] Rename / notes / tags / pin / archive each round-trip through + `PATCH /demo/workspaces/{id}` and re-render without a manual refresh + (query invalidation on list + detail). +- [ ] `GET /demo/workspaces` supports `q` (name ILIKE), `tags` (repeated, + containment), `include_archived` (default false), allow-listed + `sort_by`/`sort_order` (unknown → default `created_at desc`); pinned + rows order first; `total` respects the active filters; route tests + cover each param + the bad-param paths. +- [ ] Multi-select delete removes N metadata rows via N single DELETEs behind + one confirmation; created objects untouched; NO new bulk endpoint exists. +- [ ] `/showcase/compare?a=&b=` deep-links two workspaces and renders config + diff, result diff, created-objects matrix, lineage note, partial-run + badges; invalid/missing ids degrade to the picker (no crash). +- [ ] `GET /demo/workspaces/{id}/health` returns per-reference + `alive`/`dead`/`unknown` + counts + `partial_run`; 404 problem+json on a + missing workspace; integration test proves a bogus reference probes + `dead` and a real one probes `alive`. +- [ ] Loaded-workspace artifact cards show dead-link warnings + the health + summary chip; the partial-run warning renders for non-completed rows. +- [ ] Legacy list calls (no new params) return archived-free, pinned-first, + newest-first pages; all pre-existing demo tests still pass. +- [ ] `uv run ruff check . && uv run ruff format --check . && uv run mypy app/ + && uv run pyright app/ && uv run pytest -v -m "not integration"` green; + integration suite green; `cd frontend && pnpm lint && pnpm test --run` + green. + +## Assumptions (no user available — documented, not asked) + +1. E1 (#407) merges before E2 execution begins (implementation-order gate from + the umbrella). This PRP is authored against pre-E1 `dev`; Task 1 + reconciles every CONTRACT(E1) point against E1's actual merged shape. +2. Exact E1 schema/endpoint names (`WorkspaceUpdateRequest`, field names as + listed in CONTRACT(E1)) — adapt to E1's real names on divergence. +3. Archived-by-default-hidden is the correct list semantics (that is what + "archive" means for a library); the only consumer of `GET /demo/workspaces` + is the Showcase panel (verified — no other frontend or backend caller), so + the default-flip is safe. +4. Health probing is acceptable on-demand-only (loaded workspace), not for + every list row — probing N rows × M references on list render would be a + self-inflicted thundering herd through the in-process transport. +5. The lineage chain depth cap of 5 is sufficient (a replay-of-a-replay chain + deeper than 5 is a pathological case; the strip renders "…" beyond it). +6. `sonner` `toast` (already used by `WorkspacePanel.tsx:20`) is the + feedback surface for mutation success/failure — no new notification system. +7. Tag editing via a comma-separated text input is acceptable UX for a + single-operator tool (no tag-autocomplete component is installed; building + one is out of scope). + +## All Needed Context + +### Documentation & References + +```yaml +# MUST READ — issues (the contract stack) +- url: https://github.com/w7-mgfcode/ForecastLabAI/issues/408 + why: The epic this PRP implements — scope list is exhaustive (this PRP covers all of it). +- url: https://github.com/w7-mgfcode/ForecastLabAI/issues/406 + why: Umbrella — success criteria rows 2 & 3 are E2's acceptance bar; out-of-scope list (no replay-policy infra beyond confirm+diff). +- url: https://github.com/w7-mgfcode/ForecastLabAI/issues/407 + why: Foundation epic body = the frozen CONTRACT(E1) surface (columns, JSONB slots, PATCH endpoint, replay provenance write). +- file: PRPs/PRP-showcase-workspace-E4-restore-replay.md + why: Closest-analog predecessor PRP — the E4 restore/replay semantics E2 hardens; its "decisions locked" #2/#3 (no confirm dialog, no provenance) are the two designed behaviors #406/#407 now reverse. + +# MUST READ — backend (verified 2026-06-12, dev pre-E1) +- file: app/features/demo/routes.py + why: | + Current surface: POST /run @51 (passes request.app into the pipeline @75 — + the request-context app handle the health route also needs), GET + /workspaces @80-107 (limit/offset only — EXTEND with filters), GET + /workspaces/{workspace_id} @110-135 (NotFoundError 404 pattern @133-134), + DELETE @138-163, WS /stream @166. Router prefix="/demo" @48. Health route + lands between the GET detail and DELETE. +- file: app/features/demo/workspace.py + why: | + list_workspaces @174-196 (order created_at.desc, id.desc @192) and + count_workspaces @224-234 — the two functions E2 extends with q/tags/ + include_archived/sort_by/sort_order. get_workspace @158, delete_workspace + @199. All take caller-owned AsyncSession. create_workspace @46 is E1's to + extend (replayed_from) — DO NOT touch unless E1 missed it. +- file: app/features/demo/models.py + why: | + ShowcaseWorkspace @37; current columns @59-81; CHECK + composite index + @83-89. E1 adds the lifecycle/provenance columns here — E2 reads them, + never migrates. No-FK doctrine in the module docstring @4-11 (the health + feature exists BECAUSE of this doctrine). +- file: app/features/demo/schemas.py + why: | + DemoRunRequest @29 (strict=True @40; preservation @68; workspace_name + pattern @72-78; requires-keep validator @80-85 — the model E1 extends with + replayed_from_workspace_id). Response-model non-strict precedent: StepEvent + docstring @88-95, WorkspaceListItem @169 (from_attributes @177), + WorkspaceDetailResponse @192, WorkspaceListResponse @205. Append the two + health models here. +- file: app/features/demo/pipeline.py + why: | + THE in-process probe mechanism to copy into link_health.py: _Client + @127-204 — httpx.AsyncClient(transport=httpx.ASGITransport(app=app, + raise_app_exceptions=False), base_url cosmetic, timeout @98) and + request() status handling @188-200. link_health needs a SIMPLER client: + status-code classification only, no _StepError. DO NOT modify pipeline.py + in E2 (E1 owns the provenance write; replay flows through unchanged). +- file: app/features/demo/tests/test_routes.py + why: | + Route-test conventions to extend: unit tests monkeypatch the workspace + module functions (list @236-251, pagination pass-through @253-276, 404 + @286-298, delete @324-347); integration tests @359+ use the db_session + fixture and seed real rows. New filter/health tests follow these shapes. +- file: app/features/demo/tests/conftest.py + why: client fixture (ASGITransport over app.main.app) + db_session fixture + (real Postgres, wipes showcase_workspace on teardown). +- file: app/features/scenarios/routes.py + why: | + Repeated-tags Query param precedent @168-195 (tags: list[str] | None = + Query(default=None)) — copy for the workspace list. GET detail 404 style + @198-223. +- file: app/features/scenarios/service.py + why: list_plans @436-472 — tags containment filter @462-465 + (stmt.where(ScenarioPlan.tags.contains(tags))) applied to BOTH count and + rows statements; total respects filters. Mirror exactly. +- file: app/features/scenarios/models.py + why: tags JSONB string-array column @70-74 + GIN index @97 — the + representation CONTRACT(E1)-2 assumes for workspace tags. +- file: app/features/dimensions/routes.py + why: | + search + allow-listed sort precedent @65-105 (search Query min-2-chars, + sort_by Query with allow-list note "unknown values use default order", + sort_order asc|desc). Mirror the docstring + silent-fallback style. +- file: app/features/registry/schemas.py + why: RunUpdate @113-121 — the all-Optional partial-update body shape + CONTRACT(E1)-3 assumes for WorkspaceUpdateRequest (extra="forbid"). +- file: app/features/registry/routes.py + why: | + PATCH precedent @235; probe targets for link health: GET /registry/runs/ + {run_id} @200-201, GET /registry/aliases/{alias_name} @503-504. +- file: app/features/jobs/routes.py + why: probe target GET /jobs/{job_id} @219-220. +- file: app/features/batch/routes.py + why: probe target GET /batch/{batch_id} @55-62 (NotFoundError on miss). +- file: app/features/agents/routes.py + why: probe target GET /agents/sessions/{session_id} @80-104 — 404 via plain + HTTPException (status code is all the probe reads; body shape irrelevant). +- file: app/core/exceptions.py + why: NotFoundError @72 (RFC 7807 404). No new exception classes needed. + +# MUST READ — frontend (verified 2026-06-12) +- file: frontend/src/pages/showcase.tsx + why: | + 453 lines. State block @118-131 (seed/keepWorkspace/workspaceName/ + selectedWorkspaceId + useWorkspace detail resolution @128-131); handleRun + @139-156; handleLoadWorkspace @160-168; handleReplayWorkspace @174-186 — + THE function the confirm dialog intercepts (today it calls start() + directly); WorkspacePanel mount @245-255; name-pattern client validation + @26 + @135-137 (reuse in WorkspaceEditDialog); WorkspaceArtifactsPanel + mount @448-450 (gets health props). +- file: frontend/src/components/demo/WorkspacePanel.tsx + why: | + 219 lines — the component this epic reworks. Props @37-48; statusClass + @50-59 (semantic-token status colors); DESTRUCTIVE marker @144-148 + (text-destructive span); per-row buttons @153-183; the AlertDialog + delete-confirm pattern @191-216 (open-state via pendingDelete, shared + one dialog for all rows, data-testid on the action) — COPY this pattern + for ReplayConfirmDialog + the multi-delete confirm; list invalidation + effect @106-110. +- file: frontend/src/components/demo/WorkspacePanel.test.tsx + why: vitest conventions for this component family (mock use-workspaces + hooks via vi.mock, fire dialog actions, assert mutation calls). +- file: frontend/src/components/demo/WorkspaceArtifactsPanel.tsx + why: | + 157 lines. ArtifactCard shape @15-20, buildCards key mapping @30-107 + (winning_run_id/v2_run_id/scenario_plan_ids/batch_id/alias/ + agent_session_id + grain), disabled-card opacity-50 + title tooltip + @128-149. Health markers extend buildCards: each card gains an optional + `dead: boolean` resolved from the health response keyed by reference id. +- file: frontend/src/hooks/use-workspaces.ts + why: | + 43 lines — extend in place. useWorkspaces @10-16 (queryKey ['workspaces', + {limit}] — params object grows), useWorkspace @19-25, useDeleteWorkspace + @33-42 (invalidate ['workspaces'] on success — same invalidation for + usePatchWorkspace). useWorkspaceHealth + useWorkspaceLineage are new + siblings here. +- file: frontend/src/pages/explorer/run-compare.tsx + why: | + THE compare-page pattern (370 lines): useSearchParams a/b @87-89, + selectRun setParams updater @103-109, RunPicker Select @56-84, DeltaCell + sign-only indicator @33-54, side-by-side Card/Table layout @114+. The + workspace compare page mirrors all of it with useWorkspace×2 instead of + useCompareRuns (frontend-only diff — Decision 4). +- file: frontend/src/lib/constants.ts + why: ROUTES.SHOWCASE='/showcase' @4, ROUTES.EXPLORER.RUN_COMPARE @20 — add + SHOWCASE_COMPARE='/showcase/compare' beside SHOWCASE. +- file: frontend/src/App.tsx + why: lazy-page + Suspense route registration pattern (ShowcasePage @12, + @54-61; RunComparePage @21, @119-126) — register WorkspaceComparePage + identically. +- file: frontend/src/lib/api.ts + why: api(endpoint, {params, method, body}) wrapper; ApiError carries + status (WorkspacePanel.tsx:97 shows instanceof usage); getErrorMessage. +- file: frontend/src/types/api.ts + why: workspace types block @806-831 (WorkspaceListItem @806, WorkspaceDetail + @819, WorkspaceListResponse @828); DemoRunRequest @778-787 — extend here. +- file: frontend/src/hooks/use-demo-pipeline.ts + why: start(req) signature + the picker-desync gotcha (start() does not sync + the scenario picker — Replay must setScenario first; already handled in + handleReplayWorkspace, keep that ordering inside the confirmed path). + +# Project docs to update (additive) +- file: docs/_base/API_CONTRACTS.md + why: GET /demo/workspaces row gains the filter params; new health-endpoint + row; WS section note for replayed_from (if E1 didn't already add it). +- file: docs/_base/RUNBOOKS.md + why: § "Showcase workspace — preserve/restore/replay/delete semantics" item 1 + says "there is deliberately no confirm dialog" — E2 supersedes this + (update the item; keep the DESTRUCTIVE-marker sentence). Items 2-4 gain + one-line pointers to lineage badges / metadata-only multi-delete / health. +- file: docs/_base/DOMAIN_MODEL.md + why: showcase_workspace § "Out of scope" lists the replayed_from column — + E1's PRP owns that doc edit; E2 only verifies it happened (do not double-edit). +``` + +### Current Codebase tree (relevant subset, pre-E1) + +```bash +app/features/demo/ +├── link_health.py # DOES NOT EXIST — E2 creates +├── models.py # ShowcaseWorkspace @37 (E1 extends; E2 reads) +├── pipeline.py # 2771 lines; _Client @127 — UNTOUCHED in E2 +├── routes.py # POST /run @51; GETs @80,@110; DELETE @138; WS @166 +├── schemas.py # 214 lines; workspace response models @169-213 +├── service.py # lock + PipelineBusyError — untouched +├── workspace.py # 235 lines; list @174 / count @224 — E2 extends +└── tests/ # conftest, test_{models,pipeline,routes,schemas,workspace}.py +frontend/src/ +├── pages/showcase.tsx # 453 lines +├── pages/explorer/run-compare.tsx # 370 lines — compare pattern +├── components/demo/WorkspacePanel.tsx # 219 lines — reworked in E2 +├── components/demo/WorkspaceArtifactsPanel.tsx # 157 lines — health-aware in E2 +├── hooks/use-workspaces.ts # 43 lines — extended in E2 +├── types/api.ts # workspace block @806-831 +└── components/ui/ # 27 primitives incl. alert-dialog, + # dialog, dropdown-menu, textarea, + # table, select, tooltip, badge +``` + +### Desired Codebase tree (files added/modified) + +```bash +app/features/demo/ +├── link_health.py # NEW — probe targets + probe_workspace_links() +├── schemas.py # MOD — +WorkspaceRefHealth +WorkspaceHealthResponse +├── workspace.py # MOD — list/count filters + sort +├── routes.py # MOD — list query params; +GET /workspaces/{id}/health +└── tests/ + ├── test_link_health.py # NEW — probe classification vs a stub ASGI app + ├── test_routes.py # MOD — filter/sort/health unit + integration tests + └── test_workspace.py # MOD — list/count filter unit coverage (db-less where possible) +frontend/src/ +├── types/api.ts # MOD — lifecycle fields (verify-or-add), health types, params, update type +├── hooks/use-workspaces.ts # MOD — params-aware list; +usePatchWorkspace +useWorkspaceHealth +useWorkspaceLineage +├── hooks/use-workspaces.test.ts # MOD — new hooks covered +├── components/demo/ReplayConfirmDialog.tsx # NEW (+ .test.tsx) +├── components/demo/WorkspaceEditDialog.tsx # NEW (+ .test.tsx) +├── components/demo/WorkspaceLineageStrip.tsx # NEW (+ .test.tsx) +├── components/demo/WorkspacePanel.tsx # MOD — toolbar/badges/dropdown/multi-select (+ test MOD) +├── components/demo/WorkspaceArtifactsPanel.tsx # MOD — health markers + summary chip (+ test MOD) +├── components/demo/index.ts # MOD — barrel exports +├── pages/workspace-compare.tsx # NEW (+ workspace-compare.test.tsx) +├── pages/showcase.tsx # MOD — confirm flow, lineage, health, provenance field +├── lib/constants.ts # MOD — ROUTES.SHOWCASE_COMPARE +└── App.tsx # MOD — compare route registration +docs/_base/API_CONTRACTS.md # MOD — list params + health endpoint +docs/_base/RUNBOOKS.md # MOD — supersede "no confirm dialog"; lifecycle notes +``` + +### Known Gotchas & Library Quirks + +```python +# CRITICAL — EXECUTION GATE: do not start until E1 (#407) is merged to dev. +# Task 1 reconciles every CONTRACT(E1) point against the real merged code +# (git log --oneline --grep "#407"; read the E1 PRP + diff). Adapt E2 to +# E1's names; flag (don't silently fix) any E1 contract gap in the PR body. + +# CRITICAL — NO migration, NO models.py edit, NO pipeline.py edit in E2. +# The schema delta and the provenance/PATCH plumbing are E1's. If a column +# you need is missing post-E1, STOP and surface it — don't ship a stealth +# migration under E2. + +# CRITICAL — no cross-slice imports from app/features/demo/. Link health MUST +# go through in-process HTTP (request.app + httpx.ASGITransport — precedent +# pipeline.py:141-148 driven from a request context via routes.py:75). +# Importing RegistryService/ScenarioService/etc. fails the architecture rule. + +# CRITICAL — health probe classification: 2xx -> "alive", 404 -> "dead", +# EVERYTHING else (5xx, timeout, transport error) -> "unknown". Never let a +# probe exception escape the endpoint (asyncio.gather(..., return_exceptions= +# True) or per-probe try/except) — a flaky slice must not 500 the health +# route. raise_app_exceptions=False is REQUIRED on the ASGITransport (an +# unhandled error in a probed endpoint must surface as a 500 *response*). + +# CRITICAL — multi-select delete is N SINGLE DELETEs (existing endpoint). +# Adding POST /demo/workspaces/bulk-delete or DELETE /demo/workspaces is a +# product-vision violation (no bulk-wipe operations) — do not create it. + +# CRITICAL — the `total` returned by the filtered list MUST respect the active +# filters (scenarios precedent: BOTH count_stmt and rows_stmt get the same +# .where chain, scenarios/service.py:462-465). A filter-blind total breaks +# the "showing X of Y" header. + +# GOTCHA — include_archived default false flips list semantics for archived +# rows. Pre-E1 rows have archived=false (E1 migration default), so legacy +# lists are unchanged; route tests must still pin: no-param call returns +# only archived=false rows, include_archived=true returns both. + +# GOTCHA — sort allow-list: {created_at, name, seed, status}; unknown sort_by +# silently falls back to created_at desc (dimensions precedent — NOT a 422). +# Pinned-first is unconditional: ORDER BY pinned DESC, , id DESC +# tiebreak. name sort: NULLS LAST (unnamed rows sink) — use +# sqlalchemy .nulls_last() on the asc/desc expression. + +# GOTCHA — tags Query param: list[str] | None = Query(default=None) gives +# repeated-param parsing (?tags=a&tags=b). JSONB containment via +# ShowcaseWorkspace.tags.contains(tags) requires CONTRACT(E1)-2 (JSONB array +# column). Frontend sends ONE tag at a time (chip filter) — a single +# `tags` param serializes fine through api()'s params. + +# GOTCHA — q search: mirror dimensions ILIKE (case-insensitive, escape % and _ +# if the precedent does; check dimensions/service.py before writing). +# Search NAME only (workspace_id prefixes are copy-paste handles, not search). + +# GOTCHA — strict-mode policy: the new health/response models are response +# models -> plain BaseModel, NO ConfigDict(strict=True). The AST walker +# (app/core/tests/test_strict_mode_policy.py) only inspects strict=True +# request models — keep it that way. + +# GOTCHA — agents GET /agents/sessions/{id} 404s via plain HTTPException (not +# NotFoundError) — irrelevant to the probe (status code only), but do NOT +# "fix" the agents slice as a drive-by. + +# GOTCHA — an EXPIRED-but-existing agent session returns 200 (row exists) -> +# "alive". That is correct link-health semantics (the row is the link +# target); the artifacts card blurb already says "the recorded session has +# likely expired". + +# GOTCHA — ReplayConfirmDialog destructive styling: AlertDialogAction renders +# buttonVariants default; pass className="bg-destructive text-destructive- +# foreground hover:bg-destructive/90" (semantic tokens — NEVER raw colors +# like bg-red-500). Copy the shared-dialog open-state pattern from +# WorkspacePanel.tsx:191-216 (pendingX state, one dialog for all rows). + +# GOTCHA — confirm-dialog flow ordering: the confirmed replay must run the +# EXISTING handleReplayWorkspace body (setScenario BEFORE start() — the +# picker-desync gotcha from E4 still applies). Refactor: handleReplayWorkspace +# becomes "setPendingReplay(ws)"; a new executeReplay(ws) holds the old body +# + the CONTRACT(E1)-5 replayed_from_workspace_id field. + +# GOTCHA — lineage walking: a deleted ancestor's GET returns 404 (ApiError +# .status === 404) — render "(original deleted)" and STOP the walk; never +# throw. Implement as one useQuery whose queryFn loops (await api(...) per +# ancestor, depth cap 5), queryKey ['workspaces', id, 'lineage'] — N +# serial fetches inside one query keeps cache + loading states simple. + +# GOTCHA — useWorkspaces signature change (limit -> params object) touches its +# existing call sites + use-workspaces.test.ts — update them in the same +# commit; keep queryKey shape ['workspaces', paramsObject] so the blanket +# invalidateQueries({queryKey: ['workspaces']}) keeps matching everything. + +# GOTCHA — pnpm tsc --noEmit is VACUOUS (solution-style tsconfig, zero files) +# and `tsc -b` fails on dev with PRE-EXISTING errors (known issue — memory +# [[frontend-tsc-noemit-gate-vacuous]]). Do NOT chase those. JS gates that +# must be green: pnpm lint && pnpm test --run. Optionally verify ONLY your +# new files compile via their vitest imports. + +# GOTCHA — every shadcn primitive needed (alert-dialog, dialog, dropdown-menu, +# checkbox, input, textarea, select, table, tooltip, badge, card, button) is +# ALREADY in frontend/src/components/ui/ (verified 2026-06-12). Do NOT run +# `shadcn add`. If you believe a new primitive is required, stop and recheck +# (.claude/rules/shadcn-ui.md; memory [[shadcn-cli-version-pin]]). + +# GOTCHA — never call crypto.randomUUID directly (issue #332; ESLint guard) — +# safeRandomUUID from @/lib/uuid-utils if any client id is needed. + +# GOTCHA — repo has mixed CRLF/LF; Write/Edit emit LF. New files fine; for +# showcase.tsx / WorkspacePanel.tsx / routes.py edits run `git diff --stat` +# and confirm surgical line counts before committing. + +# GOTCHA — mypy --strict AND pyright --strict gate merge: full annotations on +# the new probe module (TypedDict/dataclass or Pydantic for probe targets), +# `-> None` on tests, annotated fixtures. + +# COORDINATION — E3 (#409), E4 (#410), E5 (#411), E6 (#412) are open parallel +# epics. Shared-file risk: schemas.py / routes.py / showcase.tsx / +# API_CONTRACTS.md. Keep every edit additive + self-contained; rebase on dev +# before the PR. + +# RUNTIME-VERIFICATION LOG (per prp-create step 3): +# - demo routes/handlers + line refs — read routes.py (2026-06-12) +# - list/count signatures + ordering — read workspace.py:174-234 +# - ShowcaseWorkspace pre-E1 columns — read models.py:59-89 +# - response-model non-strict precedent — read schemas.py:88-95,169-213 +# - ASGITransport in-process pattern — read pipeline.py:127-204 +# - scenario tags containment + GIN — read scenarios/service.py:462-465, models.py:74,97 +# - dimensions search/sort params — grep dimensions/routes.py:65-105 +# - probe targets exist: /registry/runs/{run_id} (registry/routes.py:200), +# /registry/aliases/{alias_name} (:503), /jobs/{job_id} (jobs/routes.py:219), +# /batch/{batch_id} (batch/routes.py:55), /agents/sessions/{session_id} +# (agents/routes.py:80), /scenarios/{scenario_id} (scenarios/routes.py:198) +# - RunUpdate partial-update shape — read registry/schemas.py:113-121 +# - frontend: WorkspacePanel AlertDialog pattern (191-216), run-compare +# useSearchParams pattern (87-109), installed ui primitives (ls), api.ts +# ApiError usage (WorkspacePanel.tsx:97) +# - E1 #407 OPEN / unmerged as of 2026-06-12 — CONTRACT(E1) tags mark every +# dependency; no third-party API claims beyond in-repo working patterns +# (httpx ASGITransport, sqlalchemy .contains, TanStack useQuery/useMutation +# — all already exercised in this repo; .nulls_last is standard +# SQLAlchemy 2.0 API but has NO in-repo precedent — verify at impl time). +``` + +## Implementation Blueprint + +### Data models and structure + +```python +# app/features/demo/schemas.py — APPEND (response models; NOT strict) + +RefHealthStatus = Literal["alive", "dead", "unknown"] +RefType = Literal["model_run", "scenario_plan", "alias", "batch", "agent_session", "job"] + + +class WorkspaceRefHealth(BaseModel): + """Liveness of one soft reference recorded on a workspace (E2, #408).""" + + key: str = Field(..., description="created_objects key, e.g. 'winning_run_id' or 'scenario_plan_ids[0]'.") + ref_type: RefType = Field(..., description="Kind of referenced object.") + ref_id: str = Field(..., description="The recorded soft-reference id.") + status: RefHealthStatus = Field(..., description="alive (2xx) / dead (404) / unknown (other).") + probe_path: str = Field(..., description="The public API path probed.") + + +class WorkspaceHealthResponse(BaseModel): + """Per-workspace link-health summary (E2, #408).""" + + workspace_id: str + workspace_status: str = Field(..., description="running / completed / failed.") + partial_run: bool = Field(..., description="True when workspace_status != 'completed'.") + references: list[WorkspaceRefHealth] = Field(default_factory=list) + alive: int = Field(..., ge=0) + dead: int = Field(..., ge=0) + unknown: int = Field(..., ge=0) + checked_at: datetime = Field(default_factory=_utc_now) +``` + +```python +# app/features/demo/link_health.py — NEW (sketch; CRITICAL details only) + +@dataclass(frozen=True) +class _ProbeTarget: + key: str # e.g. "scenario_plan_ids[1]" + ref_type: str # RefType value + ref_id: str + probe_path: str # e.g. f"/registry/runs/{ref_id}" + +def build_probe_targets(ws: ShowcaseWorkspace) -> list[_ProbeTarget]: + # created_objects keys (workspace.py:_collect_created_objects:82-103): + # winning_run_id / v2_run_id / stale_alias_run_id -> /registry/runs/{id} + # scenario_plan_ids[i] -> /scenarios/{id} + # alias -> /registry/aliases/{name} + # batch_id -> /batch/{id} + # agent_session_id -> /agents/sessions/{id} + # CONTRACT(E1)-6: job_ids JSONB slot [i] -> /jobs/{id} + # NON-probeable keys (v2_model_path, scenario_artifact_key, + # train_model_types) are SKIPPED — no HTTP identity to check. + ... + +async def probe_workspace_links(app: FastAPI, ws: ShowcaseWorkspace) -> WorkspaceHealthResponse: + targets = build_probe_targets(ws) + async with httpx.AsyncClient( + transport=httpx.ASGITransport(app=app, raise_app_exceptions=False), + base_url="http://demo.internal", + timeout=httpx.Timeout(10.0, connect=5.0), + ) as client: + results = await asyncio.gather( + *(_probe_one(client, t) for t in targets), return_exceptions=False + ) # _probe_one NEVER raises: try/except httpx.HTTPError/OSError -> "unknown" + # classify: 200<=s<300 alive; s==404 dead; else unknown + # partial_run = ws.status != WORKSPACE_STATUS_COMPLETED + ... +``` + +```typescript +// frontend/src/types/api.ts — extend the workspace block (806-831) + +// CONTRACT(E1)-4 — verify E1 added these; add additively if not: +export interface WorkspaceListItem { + /* existing fields ... */ + archived: boolean + pinned: boolean + tags: string[] + replayed_from_workspace_id: string | null +} +export interface WorkspaceDetail extends WorkspaceListItem { + /* existing fields ... */ + notes: string | null + config_schema_version: number +} + +// E2 (#408) — lifecycle PATCH body (CONTRACT(E1)-3 shape; adapt to E1 names): +export interface WorkspaceUpdate { + name?: string | null + notes?: string | null + tags?: string[] + archived?: boolean + pinned?: boolean +} + +export interface WorkspaceListParams { + limit?: number + offset?: number + q?: string + tags?: string + include_archived?: boolean + sort_by?: 'created_at' | 'name' | 'seed' | 'status' + sort_order?: 'asc' | 'desc' +} + +export type RefHealthStatus = 'alive' | 'dead' | 'unknown' +export interface WorkspaceRefHealth { + key: string + ref_type: 'model_run' | 'scenario_plan' | 'alias' | 'batch' | 'agent_session' | 'job' + ref_id: string + status: RefHealthStatus + probe_path: string +} +export interface WorkspaceHealth { + workspace_id: string + workspace_status: 'running' | 'completed' | 'failed' + partial_run: boolean + references: WorkspaceRefHealth[] + alive: number + dead: number + unknown: number + checked_at: string +} +``` + +### List of tasks (dependency order) + +```yaml +Task 1 — gate, branch & E1 reconciliation: + VERIFY: gh issue view 407 --json state -> MUST be closed (E1 merged) before continuing + RUN: git switch dev && git pull && git switch -c feat/showcase-completion-e2-safe-replay-lifecycle + VERIFY: gh issue view 408 --json state # open + RECONCILE every CONTRACT(E1) tag against the merged code: + - read app/features/demo/models.py -> column names (CONTRACT(E1)-1/-2) + - read app/features/demo/schemas.py -> PATCH body + GET response fields (CONTRACT(E1)-3/-4) + - read app/features/demo/routes.py -> PATCH route exists + - grep replayed_from app/features/demo/ -> provenance mechanism (CONTRACT(E1)-5) + - read PRPs/PRP-showcase-completion-E1-*.md (whatever E1's PRP file is named) + ADAPT all names below to E1's reality; note any E1 gap in the PR body. + +Task 2 — MODIFY app/features/demo/workspace.py (filters + sort): + - EXTEND list_workspaces(db, *, limit=50, offset=0, q=None, tags=None, + include_archived=False, sort_by=None, sort_order="desc"): + # base stmt; if not include_archived: .where(~ShowcaseWorkspace.archived) + # if q: .where(ShowcaseWorkspace.name.ilike(f"%{q}%")) (name only) + # if tags: .where(ShowcaseWorkspace.tags.contains(tags)) (CONTRACT(E1)-2) + # sort: allow-list {created_at,name,seed,status}; unknown -> created_at + # desc; name uses .nulls_last(); ALWAYS ORDER BY pinned.desc() first, + # then the sort expr, then id.desc() tiebreak + - EXTEND count_workspaces(db, *, q=None, tags=None, include_archived=False) + # SAME where-chain as list (scenarios/service.py:462-465 precedent) — + # extract a shared _apply_filters(stmt, ...) helper to keep them in sync + - Update module docstring (E2 routes the filters). + +Task 3 — MODIFY app/features/demo/schemas.py: + - APPEND WorkspaceRefHealth + WorkspaceHealthResponse (blueprint above); + docstring notes: response models, NOT strict (StepEvent precedent @88-95). + - CONTRACT(E1)-4 defensive check: if E1 did not expose archived/pinned/tags/ + replayed_from_workspace_id on WorkspaceListItem (+notes/ + config_schema_version on WorkspaceDetailResponse), ADD them here + additively (from_attributes picks them up from the ORM row). + +Task 4 — CREATE app/features/demo/link_health.py: + - build_probe_targets(ws) + probe_workspace_links(app, ws) per the blueprint. + - MIRROR pipeline._Client transport flags exactly (raise_app_exceptions=False). + - _probe_one catches (httpx.HTTPError, OSError) -> "unknown"; NEVER raises. + - Full --strict annotations; module docstring states the no-cross-slice- + import rationale (Decision 3) and the 2xx/404/other classification table. + +Task 5 — MODIFY app/features/demo/routes.py: + - EXTEND GET /workspaces signature with q / tags / include_archived / + sort_by / sort_order Query params (mirror dimensions/routes.py:65-75 + + scenarios/routes.py:180 styles; document the allow-list + silent fallback + in the docstring); pass through to workspace.list_workspaces / + count_workspaces (same filter args to BOTH). + - ADD GET /workspaces/{workspace_id}/health -> WorkspaceHealthResponse: + # async def get_workspace_health(workspace_id: str, request: Request, + # db: AsyncSession = Depends(get_db)): + # row = await workspace.get_workspace(db, workspace_id) + # if row is None: raise NotFoundError(message=f"Workspace not found: {workspace_id}") + # return await link_health.probe_workspace_links(request.app, row) + # Place between the GET detail (@110) and DELETE (@138). No path + # collision: /workspaces/{id}/health is more specific than /workspaces/{id}. + - Update the module docstring route inventory. + +Task 6 — backend tests: + - CREATE app/features/demo/tests/test_link_health.py (unit, no DB): + # build a THROWAWAY FastAPI stub app with routes returning 200 / 404 / + # 500 at the probed paths; construct a ShowcaseWorkspace instance + # in-memory (not persisted) with created_objects covering every key + + # job_ids slot; assert classification alive/dead/unknown + counts + + # partial_run on status='failed'; assert non-probeable keys skipped; + # assert empty created_objects -> empty references, partial_run logic. + - MODIFY app/features/demo/tests/test_routes.py: + UNIT (monkeypatch app.features.demo.routes.workspace / .link_health): + - list passes q/tags/include_archived/sort args through (capture kwargs) + - list rejects bad limit (existing) — keep green + - health 404 on missing workspace (problem+json content-type) + - health 200 happy path (monkeypatched probe returns canned response) + INTEGRATION (@pytest.mark.integration, db_session): + - seed rows: named/unnamed, archived, pinned, tagged -> + default list hides archived; include_archived=true shows it; + q matches name substring case-insensitively; tags containment; + sort_by=name asc with NULLS LAST; pinned row first regardless of sort; + total respects filters + - health integration: insert a workspace whose created_objects carry + one REAL reference (insert a scenario_plan row via its ORM, or use a + bogus-vs-real registry pair) + one bogus id -> assert alive + dead + - MODIFY app/features/demo/tests/test_workspace.py: filter unit coverage of + _apply_filters where practical (or fold into the integration tests above). + +Task 7 — MODIFY frontend/src/types/api.ts: + - Lifecycle fields per CONTRACT(E1)-4 (verify-or-add), WorkspaceUpdate, + WorkspaceListParams, WorkspaceRefHealth/WorkspaceHealth (blueprint above). + - DemoRunRequest: verify E1 added replayed_from_workspace_id?: string + (CONTRACT(E1)-5); add if missing. + +Task 8 — MODIFY frontend/src/hooks/use-workspaces.ts (+ test): + - useWorkspaces(params: WorkspaceListParams = {}, enabled = true): + queryKey ['workspaces', params]; api('/demo/workspaces', { params }) + # update existing call site: WorkspacePanel.tsx:77 (the sole useWorkspaces + # caller — showcase.tsx never calls it directly) + - ADD usePatchWorkspace(): + mutationFn: ({workspaceId, update}: {workspaceId: string; update: WorkspaceUpdate}) => + api(`/demo/workspaces/${workspaceId}`, { method: 'PATCH', body: update }) + onSuccess: invalidate ['workspaces'] # blanket key matches list+detail + - ADD useWorkspaceHealth(workspaceId: string, enabled = true): + queryKey ['workspaces', workspaceId, 'health']; staleTime 30_000 + - ADD useWorkspaceLineage(workspaceId: string | null): + one useQuery; queryFn walks replayed_from_workspace_id via sequential + api() calls, depth cap 5; a 404 (ApiError.status===404) + terminates the walk with a {deleted: true} sentinel entry; returns + Array<{workspace_id, name, deleted}> oldest-last. + - MODIFY use-workspaces.test.ts: params serialization, PATCH invalidation, + lineage walk incl. 404 termination (mock api module). + +Task 9 — CREATE frontend/src/components/demo/ReplayConfirmDialog.tsx (+ test): + - Props: { workspace: WorkspaceListItem | null, # null = closed + requestPreview: DemoRunRequest | null, # built by the page + onConfirm: () => void, onCancel: () => void } + - AlertDialog (open={workspace !== null}; onOpenChange close -> onCancel) — + copy the shared-dialog pattern from WorkspacePanel.tsx:191-216. + - Body: 3-column table (Field / Recorded / Will send) over seed, scenario, + reset, skip_seed, name, preservation, replayed_from; per-row mismatch + highlight (font-semibold text-destructive on the "Will send" cell when + values differ — defensive; verbatim replay normally matches). + - reset=true -> warning block (AlertTriangle + "Replaying this workspace + WIPES the database and reseeds it.") + AlertDialogAction + className="bg-destructive text-destructive-foreground hover:bg-destructive/90" + label "Replay & wipe database"; else label "Replay". + - Footer hint: "Want to change the config first? Use Load instead." (muted). + - data-testid="replay-confirm" on the action (WorkspacePanel test precedent). + - Test: renders preview values; destructive copy/label only when reset; + confirm fires onConfirm once; cancel fires onCancel; mismatch highlight. + +Task 10 — CREATE frontend/src/components/demo/WorkspaceEditDialog.tsx (+ test): + - Props: { workspace: WorkspaceListItem | null, onClose: () => void } + - Dialog (ui/dialog.tsx — form dialog, not AlertDialog) with: name Input + (reuse WORKSPACE_NAME_PATTERN from showcase.tsx:26 — export it from a + shared location, e.g. components/demo/workspace-name.ts, instead of + duplicating), notes Textarea, tags Input (comma-separated -> trimmed + string[]; render current tags as chips above the input). + - Save -> usePatchWorkspace().mutate({workspaceId, update}); toast on + success/failure (sonner pattern WorkspacePanel.tsx:88-99); close on success. + - Send ONLY changed fields (partial update — CONTRACT(E1)-3 semantics). + - Test: pattern violation disables Save with inline hint; save sends only + dirty fields; success closes + toasts (mock usePatchWorkspace). + +Task 11 — CREATE frontend/src/components/demo/WorkspaceLineageStrip.tsx (+ test): + - Props: { workspaceId: string, onLoadAncestor: (id: string) => void } + - useWorkspaceLineage(workspaceId); render breadcrumb: current ← parent ← + … oldest; ancestors as Button variant="link" size="sm" (click -> + onLoadAncestor); deleted sentinel renders muted "(original deleted)"; + depth-cap overflow renders trailing "…". Render nothing (null) when the + workspace has no replayed_from_workspace_id. + - Test: chain render order, deleted sentinel, null when no lineage. + +Task 12 — MODIFY frontend/src/components/demo/WorkspacePanel.tsx (+ test): + - Toolbar row above the list: search Input (icon lucide Search; applies as + `q` on Enter/debounce), "Show archived" Checkbox, sort Select + (Newest/Oldest/Name/Status -> sort_by+sort_order pairs), active-tag chip + (clearable) when a tag filter is set. + - Panel owns the list-params state and calls useWorkspaces(params). + - Row additions: leading multi-select Checkbox; Pin icon button (lucide Pin + / PinOff, fires usePatchWorkspace toggle); archived rows: opacity-60 + + outline Badge "archived"; replay Badge (outline, "replay") when + replayed_from_workspace_id != null; tags as clickable chips (sets the tag + filter); DropdownMenu (MoreHorizontal): Pin/Unpin, Archive/Unarchive, + Edit details…, Delete… (Delete keeps the existing pendingDelete dialog). + - Replay button now calls a NEW prop onRequestReplay(ws) (the page owns the + confirm dialog) — RENAME the old onReplay prop to make the break explicit. + - Selection footer: "N selected" + Delete selected (AlertDialog confirm -> + sequential `for (const id of selected) await deleteWorkspace.mutateAsync(id)` + with per-failure collection -> one summary toast; clear selection) + + Compare button (disabled unless exactly 2; useNavigate -> + `${ROUTES.SHOWCASE_COMPARE}?a=${id1}&b=${id2}`). + - Keep the component lean: extract WorkspaceToolbar + WorkspaceRow as + file-local components if the file passes ~300 lines. + - Tests: search/sort/archived params flow into useWorkspaces (mock + assert + last call args); multi-select count + delete-selected confirm calls N + mutateAsync; compare disabled at 1 and 3 selections; pin/archive fire + PATCH mutations; replay fires onRequestReplay (NOT start). + +Task 13 — MODIFY frontend/src/components/demo/WorkspaceArtifactsPanel.tsx (+ test): + - Props += { health?: WorkspaceHealth | null } + - buildCards gains the refId per card; a card whose refId matches a + health.references entry with status==='dead' renders AlertTriangle + (h-3 w-3 text-destructive) beside the label + title tooltip "This object + no longer exists — it was deleted after the run." ('unknown' -> no marker). + - Header chip row: `✓ {alive} live` (text-success) + `✕ {dead} dead` + (text-destructive, only when dead>0) + outline Badge "partial run" when + health.partial_run (tooltip: "This run never completed — artifacts may be + missing."). Skeleton/silent when health undefined (query in flight/disabled). + - Test: dead marker on matching card; summary chip counts; partial-run badge. + +Task 14 — MODIFY frontend/src/pages/showcase.tsx: + - State += pendingReplay: WorkspaceListItem | null. + - handleReplayWorkspace(ws) -> setPendingReplay(ws) (no start()). + - NEW executeReplay(ws): the post-E1 body (showcase.tsx:174-186 today — + setScenario first; E1 shifts these anchors and adds + replayed_from_workspace_id: ws.workspace_id, which executeReplay PRESERVES + — CONTRACT(E1)-5, preserve-not-add); clear pendingReplay. + - buildReplayRequest(ws): pure helper producing the DemoRunRequest preview + passed to the dialog AND used by executeReplay (single source — the diff + can never lie about what's sent). Export for unit testing. + - Mount pendingReplay && executeReplay(pendingReplay)} + onCancel={() => setPendingReplay(null)} /> + - Health: const health = useWorkspaceHealth(selectedWorkspaceId ?? '', + !!selectedWorkspaceId); pass health.data into WorkspaceArtifactsPanel. + - Lineage: mount { /* fetch list item via detail + handleLoad */ }} /> + inside the loaded-workspace block (@448-450 region); simplest + onLoadAncestor: setSelectedWorkspaceId(id) + repopulate controls from the + lineage entry's detail (the strip's hook already has the details — pass + the full WorkspaceDetail up instead of just the id if cleaner). + - WorkspacePanel prop rename: onRequestReplay={handleReplayWorkspace}. + +Task 15 — CREATE frontend/src/pages/workspace-compare.tsx (+ test) + routing: + - MODIFY frontend/src/lib/constants.ts: SHOWCASE_COMPARE: '/showcase/compare' + (beside SHOWCASE @4). + - MODIFY frontend/src/App.tsx: lazy WorkspaceComparePage + (mirror + RunComparePage @21, @119-126). '/showcase/compare' and '/showcase' are + distinct paths — no nesting needed. + - Page mirrors run-compare.tsx: useSearchParams a/b (@87-109 pattern); + pickers = Select over useWorkspaces({limit: 100, include_archived: true}) + items (label: name ?? id.slice(0,8) · scenario · status); two + useWorkspace(a/b) detail queries; render: + * config table — seed/scenario/reset/skip_seed/name/tags; mismatch rows + highlighted (font-semibold) + * results table — winner_model_type, winner_wape (DeltaCell-style + sign-only delta — copy the component from run-compare.tsx:33-54 + file-locally), wall_clock_s + * created-objects matrix — union of soft-reference keys × (A: ✓/—, + B: ✓/—) + * lineage note — "B is a replay of A" (or inverse) when + replayed_from_workspace_id links them + * partial-run outline Badge per side when status !== 'completed' + Missing/invalid id -> that side renders the picker + muted "select a + workspace" (no crash; ApiError 404 -> same fallback). + - Test: renders diff for two mocked details; mismatch highlight; lineage + note; 404 side falls back to picker state. + +Task 16 — barrel + docs: + - MODIFY frontend/src/components/demo/index.ts — export the three new + components. + - MODIFY docs/_base/API_CONTRACTS.md: + * GET /demo/workspaces row: append "E2 (#408) — `q` name search, `tags` + containment filter, `include_archived` (default false), allow-listed + `sort_by`/`sort_order`; pinned rows first; `total` respects filters" + * NEW row: | demo | GET | `/demo/workspaces/{workspace_id}/health` | + E2 (#408) — probe the workspace's soft references in-process; per-ref + alive/dead/unknown + counts + `partial_run`; `404` when missing | + - MODIFY docs/_base/RUNBOOKS.md § "Showcase workspace — …": + * item 1: replace "there is deliberately no confirm dialog" with the E2 + reality (every panel Replay confirms; reset=true gets destructive + copy; the DESTRUCTIVE row marker stays) + * item 3/4: one-line additions — multi-select delete = N metadata-only + singles; dead links now SURFACE via the health summary instead of + silently dangling + - VERIFY (not edit) DOMAIN_MODEL.md replayed_from note was updated by E1. + +Task 17 — gates, dogfood, commits, PR: + - Backend gates + integration suite (Validation Loop below). + - Frontend: cd frontend && pnpm lint && pnpm test --run. + - Browser dogfood via the webapp-testing skill (CLAUDE.md workflow step 4): + seeded stack -> save 3 workspaces (one reset=true, one tagged, one + replayed) -> search/sort/archive/pin -> replay with confirm (destructive + variant) -> lineage chain -> compare page -> delete a referenced scenario + plan -> reload workspace -> dead-link warning + health chip. + - git diff --stat (CRLF surgical-diff check on edited files). + - COMMITS (reference #408, no AI trailer), e.g.: + feat(api): add workspace list filters and link-health endpoint (#408) + feat(ui): add replay confirmation with config diff to showcase (#408) + feat(ui): add workspace lifecycle controls and lineage rendering (#408) + feat(ui): add two-workspace compare page (#408) + test(api): cover workspace filters and link-health probes (#408) + docs(api): document workspace lifecycle and health contracts (#408) + - PR into dev; title `feat(api,ui): showcase-completion E2 — safe replay & + workspace lifecycle (#408)`; body notes the replay-policy-picker deferral + (Decision 1) + any CONTRACT(E1) reconciliation deltas. +``` + +### Integration Points + +```yaml +DATABASE: none in E2 — reads the E1-migrated table; NO new migration. + +CONFIG: none — no new settings or env vars (probe timeout is a module constant). + +ROUTES: existing demo router only (app/main.py wiring unchanged): extended GET + /demo/workspaces + new GET /demo/workspaces/{id}/health. PATCH is E1's. + +FRONTEND ROUTES: one new React Router page at ROUTES.SHOWCASE_COMPARE + ('/showcase/compare'); registered in App.tsx beside the existing pages. + +DOCS: API_CONTRACTS.md + RUNBOOKS.md (Task 16). Full doc sweep belongs to the + E7 release gate — keep E2's edits additive and minimal. +``` + +## Validation Loop + +### Level 1: Syntax & Style + +```bash +uv run ruff check . && uv run ruff format --check . +uv run mypy app/ && uv run pyright app/ +cd frontend && pnpm lint +# Expected: clean. Both Python type checkers are --strict and gate merge. +# (pnpm tsc --noEmit is vacuous; tsc -b fails with PRE-EXISTING errors — do +# not chase them. lint + vitest are the JS gates.) +``` + +### Level 2: Unit Tests (no DB) + +```bash +uv run pytest app/features/demo -v -m "not integration" +uv run pytest app/core/tests/test_strict_mode_policy.py -v # AST walker still green +cd frontend && pnpm test --run +# New/changed: test_link_health (stub-app probe classification), test_routes +# filter/health unit tests, use-workspaces hooks, ReplayConfirmDialog, +# WorkspaceEditDialog, WorkspaceLineageStrip, WorkspacePanel rework, +# WorkspaceArtifactsPanel health markers, workspace-compare page. +``` + +### Level 3: Integration (real Postgres) + +```bash +docker compose up -d && uv run alembic upgrade head +uv run pytest app/features/demo -v -m integration +# List filters against seeded rows (archived hidden / shown, q, tags, +# sort + pinned-first, filtered total) + health probe (real + bogus refs). +``` + +### Level 4: Manual smoke + browser dogfood (seeded local stack, uvicorn :8123) + +```bash +# 1. Filtered list + health round-trip +curl -s "http://localhost:8123/demo/workspaces?q=demo&sort_by=name&sort_order=asc" | python3 -m json.tool | head -30 +curl -s "http://localhost:8123/demo/workspaces?include_archived=true" | python3 -c "import sys,json; d=json.load(sys.stdin); print(d['total'])" +WS_ID=$(curl -s -X POST http://localhost:8123/demo/run -H 'Content-Type: application/json' \ + -d '{"skip_seed": true, "preservation": "keep", "workspace_name": "e2-smoke"}' \ + | python3 -c "import sys,json; print(json.load(sys.stdin)['workspace_id'])") +curl -s "http://localhost:8123/demo/workspaces/${WS_ID}/health" | python3 -m json.tool +curl -s -o /dev/null -w "%{http_code} %{content_type}\n" \ + http://localhost:8123/demo/workspaces/deadbeefdeadbeefdeadbeefdeadbeef/health # 404 problem+json + +# 2. Dead-link proof: delete a referenced scenario plan, re-probe +# (pick a scenario_plan_id from the workspace detail's created_objects) +curl -s -X DELETE http://localhost:8123/scenarios/ -o /dev/null -w "%{http_code}\n" +curl -s "http://localhost:8123/demo/workspaces/${WS_ID}/health" \ + | python3 -c "import sys,json; print([r for r in json.load(sys.stdin)['references'] if r['status']=='dead'])" + +# 3. Browser dogfood (webapp-testing skill / agent-browser): +# /showcase -> save workspaces -> toolbar search/sort/show-archived -> +# pin (row jumps first) -> archive (vanishes until toggle) -> Edit details +# (rename + tags chips) -> Replay -> confirm dialog shows the diff table -> +# a reset=true workspace shows destructive copy + red button -> confirmed +# replay goes green, new row carries the "replay" badge -> Load it -> +# lineage strip shows the chain -> select 2 rows -> Compare page diff -> +# multi-select 2 -> Delete selected -> rows gone, created objects intact -> +# loaded workspace with the deleted plan shows the dead-link warning + chip. +``` + +## Final validation Checklist + +- [ ] All five gates green: `uv run ruff check . && uv run ruff format --check . && uv run mypy app/ && uv run pyright app/ && uv run pytest -v -m "not integration"` +- [ ] Integration suite green: `uv run pytest -v -m integration` (fresh docker-compose DB) +- [ ] Frontend gates green: `cd frontend && pnpm lint && pnpm test --run` +- [ ] No replay path bypasses the confirm dialog; reset=true shows destructive variant (vitest + dogfood) +- [ ] List filters: archived hidden by default, q/tags/sort behave, pinned-first, filtered total (route tests + curl) +- [ ] Health endpoint classifies alive/dead/unknown; dead-link warning + partial-run chip render (integration + dogfood step 2/3) +- [ ] Lineage chain renders incl. deleted-ancestor sentinel +- [ ] Compare page deep-links `?a=&b=` and degrades gracefully on bad ids +- [ ] Multi-select delete = N single DELETEs; **no new bulk endpoint in the diff** +- [ ] Legacy list calls + all pre-existing demo tests unchanged-green +- [ ] CONTRACT(E1) reconciliation notes in the PR body; replay-policy deferral noted +- [ ] `git diff --stat` surgical (no CRLF whole-file noise) +- [ ] docs/_base/API_CONTRACTS.md + RUNBOOKS.md updated additively +- [ ] Commits `type(scope): description (#408)`, no AI trailer; PR into dev; browser dogfood evidence per `.claude/rules/ui-design.md` + +--- + +## Anti-Patterns to Avoid + +- ❌ Don't start before E1 (#407) merges; don't re-implement E1 surface (migration, PATCH, provenance write). +- ❌ Don't import another feature slice from `app/features/demo/` — link health is in-process HTTP only. +- ❌ Don't add a bulk-delete endpoint or any "wipe everything" operation — N singles, period. +- ❌ Don't add a replay-policy picker (exact/safe-keep/modified) — explicitly deferred (Decision 1). +- ❌ Don't make health/response models strict — strict mode is request-body policy. +- ❌ Don't probe health for every list row — loaded workspace only. +- ❌ Don't let a probe exception 500 the health route — classify as `unknown`. +- ❌ Don't mutate the original workspace row on replay — replay still creates a NEW row (provenance points back). +- ❌ Don't duplicate the name pattern regex — share it between run controls and the edit dialog. +- ❌ Don't run `shadcn add` — every needed primitive is installed; don't use raw colors — semantic tokens only. +- ❌ Don't call `crypto.randomUUID` directly — `safeRandomUUID` (ESLint-enforced). +- ❌ Don't chase pre-existing `tsc -b` errors — lint + vitest are the JS gates. + +## Confidence Score + +**7.5/10** for one-pass implementation success. The backend half (list filters ++ health endpoint) is a composition of three verified in-repo precedents +(dimensions search/sort, scenarios tags containment, pipeline ASGITransport) +with clear test shapes. The deductions: (a) E2 is authored against a frozen +but UNMERGED E1 contract — seven CONTRACT(E1) points must reconcile against +E1's real merged shape, and any naming/shape divergence costs an adaptation +pass (mitigated by Task 1's reconciliation gate and verify-or-add fallbacks); +(b) the WorkspacePanel rework is the single largest UI delta of the showcase +initiative so far (toolbar + badges + dropdown + multi-select + confirm +rerouting in one component) where an interaction miss costs an iteration; and +(c) four parallel epics share `schemas.py` / `routes.py` / `showcase.tsx`, +so rebase friction is plausible even with additive-only edits. diff --git a/PRPs/PRP-showcase-completion-E3-seed-config-scope.md b/PRPs/PRP-showcase-completion-E3-seed-config-scope.md new file mode 100644 index 00000000..e5a0df6e --- /dev/null +++ b/PRPs/PRP-showcase-completion-E3-seed-config-scope.md @@ -0,0 +1,1080 @@ +name: "PRP — Showcase Completion E3: Advanced Seed Config MVP + Store/Product Scope Selection (issue #409)" +description: | + +## Purpose + +Implement Parallel epic E3 of the showcase-completion initiative (umbrella #406): +an additive, allow-listed nested override schema on the seeder HTTP contract +(7 curated knobs), an additive `seed_overrides` field on `DemoRunRequest` / the +WS start frame, a store/product focus-pair selector with pre-run preview on the +Showcase page, frontend + backend validation of every knob, and persistence of +overrides + user-selected scope into the workspace row (E1 #407 story slots) so +replay honors them verbatim. + +**Execution gate:** this epic is Parallel after Foundation — implementation +starts ONLY after E1 #407 merges to `dev` (its migration ships the +`seed_overrides` / `user_scope` JSONB story slots E3 writes into). Every +dependency on E1's surface is tagged `CONTRACT(E1):` below; re-verify each tag +against the merged E1 code before starting Task 1. + +## Core Principles + +1. **Context is King**: every file reference below was verified against the live code on 2026-06-12 (branch dev @ bdf85f6, post-E4/#404 merge — PRE-E1-#407; line numbers will drift slightly after E1 merges, re-anchor by symbol name). +2. **Validation Loops**: each level is executable as written. +3. **Information Dense**: patterns cite exact file:line (or symbol when post-E1 drift is likely). +4. **Progressive Success**: shared override schema → seeder contract → demo start frame → pipeline consumption → workspace persistence → frontend → docs → browser dogfood. +5. **Global rules**: follow CLAUDE.md / AGENTS.md; all five backend CI gates must pass; UI work follows `.claude/rules/ui-design.md` + `.claude/rules/shadcn-ui.md`. + +--- + +## Goal + +A user on `/showcase` ticking **Re-seed first** can open an **Advanced seed +config** panel and turn 7 curated knobs (store count, product count, window +days, sparsity, promotion intensity, stockout intensity, noise sigma) before +running; independently, the user can pick an explicit **store/product focus +pair** (with a pre-run preview of the selected entities and the seeded window) +that the pipeline models instead of the auto-discovered first pair. Both the +overrides and the scope persist into a kept workspace row and are re-submitted +verbatim on Replay. A start frame without the new fields behaves +byte-identically to today. + +**Deliverable** (all additive; ZERO migrations — E1 #407 owns the schema): + +- `app/shared/seeder/overrides.py` — NEW: `SeederOverrides` Pydantic model (the single shared allow-list, `extra="forbid"`), importable by both the seeder and demo slices through `app/shared/` (vertical-slice-legal). +- `app/features/seeder/schemas.py` — `GenerateParams.overrides: SeederOverrides | None = None` (additive nested optional object on the EXISTING endpoint — decision rationale below). +- `app/features/seeder/service.py` — `_apply_seed_overrides(config, overrides)` applied LAST in `_build_config_from_params` (wins over the legacy scalar `stores`/`products`/`sparsity`), mapping each knob onto its `SeederConfig` sub-dataclass via `dataclasses.replace`. +- `app/features/demo/schemas.py` — `DemoRunRequest.seed_overrides: SeederOverrides | None` + `DemoRunRequest.user_scope: UserScope | None` (NEW small model) + two cross-field validators. +- `app/features/demo/pipeline.py` — `DemoContext` carries both; `step_seed` forwards `overrides` to `POST /seeder/generate`; `step_status` honors `user_scope` (validate via `/dimensions/*/{id}`; warn + fallback to discovery on a dangling pair). +- `app/features/demo/workspace.py` — `create_workspace` writes the two E1 story slots; list/detail response schemas expose them (replay reads list rows). +- `frontend/src` — `SeedConfigPanel.tsx` + `ScopeSelector.tsx` (composed from installed shadcn primitives), `lib/workspace-replay.ts` pure replay-frame builder, `types/api.ts` additions, `showcase.tsx` wiring. +- Tests: seeder schema/route/service tests (incl. out-of-bounds 422 + unknown-knob 422), demo schema JSON-path tests, pipeline `_RecordingClient` forwarding tests, workspace slot persistence tests, replay-verbatim regression (backend integration + frontend pure-helper test), component vitests. +- Docs: `docs/_base/API_CONTRACTS.md` (3 rows), `docs/_base/RUNBOOKS.md` (new incident entry + workspace-section update), `docs/_base/DOMAIN_MODEL.md` (slot schema documentation). + +**Success definition**: all Success Criteria check off, the five backend gates + +frontend lint/test are green, and a real-browser dogfood shows: an +overridden re-seed run (e.g. 8 stores × 20 products, promo 0.3) goes green with +the seed card echoing the overrides; a scope-selected run models the chosen +pair; a kept run replays both verbatim. + +## Why + +- Umbrella #406: today the showcase accepts only `seed`/`scenario`/`reset`/`skip_seed`; the preset's behavioral character (noise, promos, stockouts, sparsity) is take-it-or-leave-it, and the modeled grain is always the first discovered `(store, product)` pair (`app/features/demo/pipeline.py:582-631`) — the operator cannot tell the story of a specific SKU. +- The seeder HTTP contract already accepts 25+ FLAT scalar/flag fields (`app/features/seeder/schemas.py:78-298`) — the umbrella's top risk is that surface growing unbounded. A curated nested object with `extra="forbid"` is the documented mitigation: 7 knobs, mechanically allow-listed, everything else stays preset-driven. +- E1 #407 reserves `seed_overrides` + `user_scope` JSONB story slots on `showcase_workspace` precisely so this epic's config survives into Replay — without persistence, replay of an overridden run would silently regenerate different data. +- E3 is Parallel after Foundation: it can land independently of E2 #408 / E4 #410 / E5 #411 / E6 #412 (no shared files beyond additive edits to `showcase.tsx` / `workspace.py` — coordinate merge order if simultaneous). + +## What + +### Open question resolved — seeder override contract shape + +**DECISION: expand `GenerateParams` with an additive nested optional object +(`overrides: SeederOverrides | None = None`). NO new endpoint.** Rationale, +researched against the current code: + +1. **The layering already exists.** `_build_config_from_params` (`app/features/seeder/service.py:202-247`) is a layered override pipeline: preset → scalar dims/window/sparsity → `_apply_phase1_overrides` (:74-137) → `_apply_phase2_overrides` (:139-199). A `_apply_seed_overrides` applied last is a fourth layer in an established pattern — a new endpoint would have to reimplement or call into this exact function anyway. +2. **A new endpoint duplicates load-bearing guards.** `POST /seeder/generate` carries `_check_seeder_enabled()` (production guard, `routes.py:21-33`), the ValueError→400 / Exception→500 RFC 7807 envelope (`routes.py:114-136`), and the seeder-is-the-only-bulk-mutation-path invariant. A second generate-shaped endpoint doubles that audit surface for zero contract benefit. +3. **Back-compat is free.** Absent field = `None` = byte-identical behavior — the exact precedent the Phase 1/Phase 2 field comments in `schemas.py:121-123,175-177` already promise and test. +4. **Nested (not more flat scalars) is the allow-list mechanism.** `ConfigDict(extra="forbid")` on the nested model makes an unknown knob a 422 — the umbrella's "contract grows unbounded" mitigation becomes machine-enforced, and the 7 curated knobs stay visually distinct from the 25+ legacy scalars. +5. **One schema serves both slices.** The demo start frame forwards the same object verbatim; placing `SeederOverrides` in `app/shared/seeder/overrides.py` lets `app/features/seeder/schemas.py` and `app/features/demo/schemas.py` both import it without a cross-slice import (precedent: `demo/schemas.py:16` already imports `ScenarioPreset` from `app/shared/seeder/config`). + +Trade-off accepted: `extra="forbid"` means a FUTURE knob sent by a newer client +to an older backend errors loudly instead of being ignored. That asymmetry vs. +the top-level start frame (unknown TOP-LEVEL keys remain ignored) is +deliberate — silent knob-dropping would fake-honor a config the run never used. + +### Allow-listed knob → config-field mapping (the complete MVP surface) + +| Knob (wire name) | Type / bounds | Maps to (via `dataclasses.replace`) | Preset reference values | +|---|---|---|---| +| `stores` | `int`, ge=1 le=100 | `config.dimensions.stores` (`DimensionConfig.stores`, `app/shared/seeder/config.py:118`) | demo profiles 3–5; scalar `GenerateParams.stores` caps 100 | +| `products` | `int`, ge=1 le=500 | `config.dimensions.products` (`DimensionConfig.products`, config.py:119) | demo profiles 10–25; scalar caps 500 | +| `window_days` | `int`, ge=75 le=365 | `config.start_date = config.end_date - timedelta(days=window_days)` (end_date untouched) | ≥75 keeps the `historical_backfill` gate clear (`pipeline.py` gate = `3*(14+1)+30 = 75`); ≤365 = `DEFAULT_SEED_SPAN_DAYS` | +| `sparsity` | `float`, ge=0.0 le=0.9 | `config.sparsity = replace(config.sparsity, missing_combinations_pct=v)` (`SparsityConfig.missing_combinations_pct`, config.py:141) — `replace` PRESERVES the preset's `random_gaps_*` fields | sparse preset uses 0.5; 1.0 would seed zero series (hard-fail), hence the 0.9 cap | +| `promotion_intensity` | `float`, ge=0.0 le=0.5 | `config.retail = replace(config.retail, promotion_probability=v)` (`RetailPatternConfig.promotion_probability`, config.py:101) | preset max 0.25 (holiday_rush); 0.5 cap = 2× headroom | +| `stockout_intensity` | `float`, ge=0.0 le=0.5 | `config.retail = replace(config.retail, stockout_probability=v)` (config.py:102) | preset max 0.25 (stockout_heavy); higher values risk NaN-WAPE (documented expected-fail, mirrors sparse) | +| `noise_sigma` | `float`, ge=0.0 le=0.5 | `config.time_series = replace(config.time_series, noise_sigma=v)` (`TimeSeriesConfig.noise_sigma`, config.py:72) | preset max 0.4 (high_variance) | + +Precedence (document in the field description AND a service test): nested +`overrides` is applied LAST in `_build_config_from_params` and therefore WINS +over the legacy scalar `stores` / `products` / `sparsity` when both are sent. +`window_days` recomputes `start_date` from the (scalar-or-default) `end_date`. +The pipeline keeps sending `sparsity=0.0` as the scalar (preserves preset +character per the `if params.sparsity > 0` guard at `service.py:225-226`); +`overrides.sparsity` is the only way the demo overrides sparsity. + +### `seed_overrides` / `user_scope` slot schemas (THIS PRP's contract to define) + +E1 #407 reserves the slots; the JSON inside them is defined HERE: + +```jsonc +// showcase_workspace.seed_overrides (JSONB; NULL when the run had none) +// = SeederOverrides.model_dump(mode="json", exclude_none=True) — SPARSE: +// only operator-set knobs appear; {} never stored (None instead). +{ + "stores": 8, // int 1..100, optional + "products": 20, // int 1..500, optional + "window_days": 120, // int 75..365, optional + "sparsity": 0.3, // float 0.0..0.9, optional + "promotion_intensity": 0.3, // float 0.0..0.5, optional + "stockout_intensity": 0.1, // float 0.0..0.5, optional + "noise_sigma": 0.25 // float 0.0..0.5, optional +} + +// showcase_workspace.user_scope (JSONB; NULL when no pair was picked) +// = UserScope.model_dump(mode="json") — both keys always present when non-null: +{ + "store_id": 12, // int ge=1 — REAL discovered id (sequences + "product_id": 47 // int ge=1 never reset; ids are NOT 1-based) +} +``` + +Replay semantics: the slots record the REQUESTED config (replay-verbatim +contract, mirrors the E1 seed/scenario/reset/skip_seed columns). The EFFECTIVE +grain a run actually modeled is already recorded separately by +`finalize_workspace` into the `store_id` / `product_id` columns +(`workspace.py:136-137`) — when a replayed `user_scope` dangles (warn+fallback, +below), the two will legitimately differ; that divergence is visible, not +hidden. + +### User-visible behavior + +- **Advanced seed config panel** (`/showcase`): a collapsible "Advanced seed config" section appears under the run controls, enabled ONLY while **Re-seed first** is ticked (overrides are meaningless on `skip_seed=true` and the backend rejects the combination). 7 controls with the bounds above; a "live summary" line echoes the effective config (e.g. "8 stores × 20 products × 120 days · promo 0.30"); a caveat notes high sparsity/stockout values can legitimately fail the backtest (NaN WAPE — same documented semantics as the `sparse` preset). `window_days` control is disabled with an explanatory tooltip when the `holiday_rush` preset is selected (calendar-pinned window). +- **Store/product focus-pair selector**: two dropdowns (stores, products — fed by `GET /dimensions/stores` / `GET /dimensions/products`, `page_size=100`) plus a pre-run preview card showing the chosen store (code/name/region/type), product (sku/name/category/brand) and the currently seeded window (from `GET /seeder/status`). Works WITHOUT re-seeding (scope selection on the existing dataset is the primary use). Ticking **Reset database** clears the selection with a caveat ("a wipe re-issues ids — re-pick after the run"), because Postgres sequences never reset (memory anchor: seeder-does-not-reset-id-sequences). +- **Run**: the start frame carries `seed_overrides` (only when re-seeding and ≥1 knob set) and `user_scope` (when a pair is picked). The seed step card echoes the overridden dims; the status step card says "user-selected pair" vs "discovered pair". +- **Replay** of a kept run re-submits recorded `seed_overrides` + `user_scope` verbatim alongside the existing 4 config fields. Load repopulates the panel + selector. +- **Legacy behavior**: a start frame without the new fields is byte-identical to today (contract test). + +### Technical requirements + +- All new request fields are additive `Optional` with `None` defaults; the WS start frame keeps ignoring unknown TOP-LEVEL keys (`DemoRunRequest` default `extra=ignore`); the nested models use `extra="forbid"` (allow-list enforcement). +- `SeederOverrides` and `UserScope` carry `ConfigDict(strict=True, extra="forbid")`. All fields are JSON-native (`int`/`float`) → NO `Field(strict=False)` override needed and the strict-mode AST policy test (`app/core/tests/test_strict_mode_policy.py`) stays green. Runtime-verified on pydantic 2.12.5: a nested-model field under a `strict=True` parent validates from the JSON-parsed dict (FastAPI's `validate_python` path) — see verification log. +- All config is start-frame-time. NOTHING is configurable mid-run — the pipeline is strictly linear under the module-level `asyncio.Lock` (design invariant from umbrella #406; do not add any mid-run mutation channel). +- The demo slice must not import `app/features/seeder/*` — `SeederOverrides` lives in `app/shared/seeder/overrides.py`; `UserScope` lives in `app/features/demo/schemas.py` (demo-only concept). `pipeline.py` may import both (`app.shared.*` + own-slice schemas are already imported at `pipeline.py:43-45`). +- The seeder stays the only bulk-mutation path; no new wipe semantics; `_check_seeder_enabled` untouched. +- E3 ships ZERO Alembic migrations. CONTRACT(E1): the `seed_overrides` + `user_scope` JSONB slots exist on `showcase_workspace` (E1 #407 migration) before this epic executes. + +### Success Criteria + +- [ ] `POST /seeder/generate` accepts `{"overrides": {"stores": 8, "promotion_intensity": 0.3}}` → 201, and the generated config reflects the knobs (service unit test); `{"overrides": {"stores": 0}}` → 422; `{"overrides": {"bogus_knob": 1}}` → 422; a body WITHOUT `overrides` produces a byte-identical `SeederConfig` to today (regression test). +- [ ] `DemoRunRequest.model_validate({...})` JSON-path tests: `seed_overrides` with `skip_seed=true` → ValidationError; `window_days` with `scenario="holiday_rush"` → ValidationError; legacy 4-field frame still validates; `user_scope` happy path. +- [ ] `step_seed` forwards `overrides` in the `/seeder/generate` POST body (`_RecordingClient` assertion); `step_status` uses a valid `user_scope` pair (asserts the GET-by-id calls + ctx fields), and WARNS + falls back to discovery on a 404 pair. +- [ ] A `preservation="keep"` run records `seed_overrides` + `user_scope` into the E1 story slots; `GET /demo/workspaces` list items AND `/{id}` detail expose both; the e2e replay regression (`tests/test_e2e_demo.py::test_demo_replay_same_config_twice` extended or sibling test) proves a replayed row carries identical slot JSON. +- [ ] Frontend: panel renders 7 bounded controls only when Re-seed is ticked; selector previews the chosen pair; `workspaceToRunRequest(ws)` unit test proves replay-verbatim including the new fields; `pnpm lint && pnpm test --run` green; no NEW `tsc -b` errors in touched files. +- [ ] Legacy start frames byte-identical (backend contract test + existing demo tests untouched-green). +- [ ] Backend gates green: `uv run ruff check . && uv run ruff format --check . && uv run mypy app/ && uv run pyright app/ && uv run pytest -v -m "not integration"`. +- [ ] Docs updated additively: API_CONTRACTS (seeder + demo + WS rows), RUNBOOKS (new showcase incident entry + workspace-section note), DOMAIN_MODEL (slot schemas under the `showcase_workspace` aggregate). +- [ ] Real-browser dogfood (Level 4) performed. + +## All Needed Context + +### Documentation & References + +```yaml +# MUST READ — codebase patterns (verified 2026-06-12, dev @ bdf85f6 — PRE-E1; +# re-anchor line numbers by symbol after E1 #407 merges) + +- file: app/features/seeder/schemas.py + why: | + GenerateParams at 78-298 — the contract to extend. Note the Phase 1 + comment block at 121-123 ("All flags default off so existing scenarios + remain byte-identical") — copy that promise onto the new field. The model + is plain BaseModel (NO ConfigDict(strict=True)) — do NOT add strict mode + to GenerateParams itself (it has date fields start_date/end_date; only + the NEW nested SeederOverrides model is strict). + ChangepointEventParam at 51-64 is the existing nested-model-in-params + precedent (list[ChangepointEventParam] at 153-156). + +- file: app/features/seeder/service.py + why: | + _build_config_from_params at 202-247 — THE integration point. Scalar + overrides at 218-226 (dataclasses.replace on dimensions; sparsity only + when > 0); _apply_phase1_overrides at 74-137 and _apply_phase2_overrides + at 139-199 are the mutate-config-in-place pattern to mirror for + _apply_seed_overrides. APPLY THE NEW LAYER LAST (after :241) so nested + wins over scalars. from dataclasses import replace already imported (:7). + +- file: app/shared/seeder/config.py + why: | + The override targets: TimeSeriesConfig.noise_sigma :72, + RetailPatternConfig.promotion_probability/stockout_probability :101-102, + DimensionConfig.stores/products :118-119, + SparsityConfig.missing_combinations_pct :141 (+ random_gaps fields to + PRESERVE via replace). ScenarioPreset :37-47. holiday_rush pinned window + :553-579 (the reason window_days is rejected for that preset). + DEFAULT_SEED_SPAN_DAYS=365 :10. NO Pydantic here — config.py stays + dataclasses; the new Pydantic model goes in a NEW sibling module + app/shared/seeder/overrides.py. + +- file: app/features/seeder/routes.py + why: | + POST /seeder/generate at 85-136 — NO route-code change needed (the body + model change flows through); read for the _check_seeder_enabled guard + (21-33) and the error envelope you must NOT duplicate (the + no-new-endpoint rationale). + +- file: app/features/demo/schemas.py + why: | + DemoRunRequest at 29-85 — the model to extend. The model_validator + _workspace_name_requires_keep at 80-85 is the EXACT cross-field-rule + pattern for the two new validators. The docstring at 30-38 explains the + strict-mode policy; scenario's strict=False override at 59-63 (enum) — + nested BaseModel fields need NO such override (runtime-verified). + WorkspaceListItem at 169-190 / WorkspaceDetailResponse at 192-203 — add + seed_overrides + user_scope to BOTH (replay reads LIST rows: + showcase.tsx:174-186). CONTRACT(E1): E1's PRP may already have surfaced + the story slots on these response models — if so, verify shape + (dict[str, Any] | None) and skip the duplicate edit. + +- file: app/features/demo/pipeline.py + why: | + DemoContext at 212-263 — add seed_overrides/user_scope fields (follow the + PRP-38/39/40 additive-Optional comment style). step_seed at 541-579 — + extend the POST body; _SCENARIO_SEED_PROFILE at 513-538 supplies the + defaults overrides partially replace. step_status at 582-631 — the + first-pair discovery to branch around for user_scope (its docstring + already states ids are NOT 1-based). run_pipeline ctx construction at + 2646-2651 — thread the two new req fields. StepStatus literal includes + "warn" (schemas.py:19) and only "fail" stops the run (:2729-2738) — the + warn+fallback path is safe. CRITICAL header rule :18-19: pipeline must + NOT import app.features.* outside its own slice — app.shared.* is fine. + +- file: app/features/demo/workspace.py + why: | + create_workspace at 46-79 — add the two slot writes on the + ShowcaseWorkspace(...) constructor; warn-and-continue contract at 10-13 + (a slot-write failure must never break the run — the try/except already + guarantees it). finalize_workspace at 106-155 — NO change for the slots + (recorded at create); note store_id/product_id columns at 136-137 record + the EFFECTIVE grain (divergence-visible design). + CONTRACT(E1): E1 refactors create_workspace to write its new columns — + rebase this edit onto E1's merged version. + +- file: app/features/demo/models.py + why: | + ShowcaseWorkspace ORM — E3 does NOT edit this file. CONTRACT(E1): after + E1 merges it carries seed_overrides/user_scope as JSONB story slots; + verify the exact attribute names/types there before writing + workspace.py code. (Assumed shape: nullable JSONB columns mirroring the + created_objects precedent at 77-79.) + +- file: app/features/demo/tests/test_pipeline.py + why: | + _RecordingClient at 1025-1068 (records (method, path, json_body) per + call, canned responses keyed by (method, path-prefix)); _as_client cast + at 1070+. Reuse for: overrides-forwarding, user_scope GET-by-id calls, + warn+fallback (404 canned response). + +- file: app/features/demo/tests/test_schemas.py + why: | + The JSON-path test conventions: test_demo_run_request_json_path_keep_ + with_name :67, test_demo_run_request_legacy_frame_still_validates :75, + test_demo_run_request_workspace_name_requires_keep :83 — mirror all + three shapes for the new fields. + +- file: app/features/seeder/tests/test_routes.py + why: | + Route-test harness: client fixture :15 (TestClient + mocked settings, + seeder_allow_production=True), TestGenerate :96 — add overrides 201 / + 422-bounds / 422-unknown-knob cases here. test_generate_validation_error + :157 is the 422 pattern. + +- file: app/features/seeder/tests/test_service.py + why: | + Service-test patterns for _build_config_from_params — add: knob→field + mapping, precedence-over-scalars, window_days math, preset-character + preservation (e.g. sparse preset's random_gaps survive an overrides. + sparsity replace), and the no-overrides byte-identical regression. + +- file: tests/test_e2e_demo.py + why: | + test_demo_replay_same_config_twice at 561-609 — the replay-regression + guard to extend (or sibling): a keep-run with seed_overrides+user_scope, + replayed, must produce a second row with identical slot JSON. + +- file: frontend/src/pages/showcase.tsx + why: | + Wiring surface. handleRun start frame at 139-156 (conditional-spread + pattern for optional fields — reuse for seed_overrides/user_scope); + handleLoadWorkspace at 160-168 (repopulate panel+selector); + handleReplayWorkspace at 174-186 (REPLACE its inline object with the new + workspaceToRunRequest helper); controls block at 269-363 (panel + + selector land after the existing checkboxes); reset checkbox at 301-311 + (hook the scope-clearing caveat here). + +- file: frontend/src/types/api.ts + why: | + DemoRunRequest at 778-788 (+ seed_overrides?/user_scope?); + WorkspaceListItem at 806-816 and WorkspaceDetail at 819-825 (+ both + fields, `| null`); add SeedOverrides + UserScope interfaces near the + demo block. WARNING: MIXED CRLF/LF line endings — surgical edits only; + verify `git diff --stat` stays small. + +- file: frontend/src/hooks/use-stores.ts + why: | + useStores at 16-43 (TanStack Query over /dimensions/stores with + page/page_size/enabled) — the selector's data source; use-products.ts + mirrors it (useProducts :16, useProduct :45). page_size hard cap is 100 + (app/features/dimensions/routes.py:62,187). + +- file: frontend/src/hooks/use-seeder.ts + why: useSeederStatus :15 — the seeded-window source for the preview card. + +- file: frontend/src/hooks/use-demo-pipeline.ts + why: | + start(req) at 241-249 sends the req object as the WS start frame + verbatim — generic over the widened DemoRunRequest; NO change needed + (read to confirm). RunHistoryStrip replays stored req objects, so + localStorage replays inherit the new fields for free. + +- file: frontend/src/components/demo/ScenarioPicker.test.tsx + why: | + The vitest + @testing-library/react + afterEach(cleanup) harness pattern + for the two new component test files. + +- file: frontend/src/components/ui/ + why: | + Installed primitives: collapsible.tsx, select.tsx, slider.tsx, input.tsx, + badge.tsx, card.tsx, tooltip.tsx, checkbox.tsx — the panel + selector + compose from these; NO new shadcn install required. If one becomes + necessary anyway: pin `pnpm dlx shadcn@4.7.0 add ...` (5.x writes a stub + pnpm-workspace.yaml and skips the component) and use per-component + @radix-ui/react-X imports, never the radix barrel. + +- file: docs/_base/RUNBOOKS.md + why: | + "Showcase page (/showcase) pipeline fails at step X" — numbered entries + end at 28; append entry 29 (overrides/scope incident matrix) in the same + bold-trigger/Cause/Fix format. The "Showcase workspace — + preserve/restore/replay/delete semantics" section's "Explicitly out of + scope" list says advanced seed configuration is NOT implemented — E3 + DELIVERS it: rewrite that bullet (move seed_overrides/user_scope to the + documented surface; phase-level config stays out of scope). + +- file: docs/_base/API_CONTRACTS.md + why: | + Rows to extend additively: the /seeder/* row (mention the overrides + object on POST /seeder/generate), POST /demo/run, and the WS + /demo/stream start-frame bullet (E1/E2 notes were just added — append an + "E3 (#409)" note, don't disturb them). + +- file: docs/_base/DOMAIN_MODEL.md + why: | + showcase_workspace aggregate section — document the seed_overrides / + user_scope slot JSON schemas (the umbrella's "JSONB story slots become a + junk drawer" mitigation requires documented slot schemas here). + +- file: PRPs/PRP-showcase-workspace-E2-preset-exposure.md + why: | + Closest predecessor (preset exposure + seed profiles) — its gotcha block + (holiday_rush pinning, seeder precedence, sparse NaN-WAPE, frontend tsc + gate) all recur in E3; this PRP inherits and extends them. + +# Issue / initiative context +- url: https://github.com/w7-mgfcode/ForecastLabAI/issues/409 + why: The epic this PRP implements (Parallel after Foundation E1 #407). +- url: https://github.com/w7-mgfcode/ForecastLabAI/issues/406 + why: | + Umbrella — Approach ("all configuration is start-frame-time", "no new + router outside existing slices"), Risks table row 1 (the allow-list + mitigation this PRP implements), out-of-scope list (NO mid-run controls, + NO embedded scenario-builder). +- url: https://github.com/w7-mgfcode/ForecastLabAI/issues/407 + why: | + Foundation epic whose contract is GIVEN: JSONB story slots incl. + seed_overrides + user_scope; columns replayed_from_workspace_id / + archived / pinned / notes / tags / config_schema_version; PATCH + /demo/workspaces/{id}. E3 builds on, never re-decides, this surface. + +# External references +- url: https://docs.pydantic.dev/latest/concepts/strict_mode/ + why: | + Strict-mode semantics for nested models: a model-typed field validates + dict input using the NESTED model's own config — confirmed empirically + (verification log) so no doc-faith is required. NOTE: the docs site + 301-redirects and anchors have drifted; the runtime verification in the + Known Gotchas log is the authoritative claim, not this URL. +- url: https://docs.pydantic.dev/latest/api/config/#pydantic.config.ConfigDict.extra + why: extra="forbid" → unknown nested keys raise ValidationError (the 422 allow-list mechanism). +``` + +### Current Codebase tree (relevant subset, pre-E1) + +```bash +app/shared/seeder/ +├── config.py # dataclasses; override TARGETS (no Pydantic here) +├── core.py / generators/ # consume SeederConfig — untouched by E3 +app/features/seeder/ +├── schemas.py # GenerateParams @78 (25+ flat fields) +├── service.py # _build_config_from_params @202; _apply_phaseN @74/@139 +├── routes.py # POST /generate @85 (guard @21; no route change) +└── tests/ # test_routes.py, test_service.py, test_schemas.py +app/features/demo/ +├── schemas.py # DemoRunRequest @29; Workspace* responses @169 +├── pipeline.py # DemoContext @212; step_seed @541; step_status @582; run_pipeline @2618 +├── workspace.py # create_workspace @46; finalize_workspace @106 +├── models.py # ShowcaseWorkspace (E1 adds the story slots — not edited here) +└── tests/ # test_pipeline.py (_RecordingClient @1025), test_schemas.py, test_workspace.py +tests/test_e2e_demo.py # replay regression @561 +frontend/src/ +├── pages/showcase.tsx # handleRun @139; handleLoad @160; handleReplay @174; controls @269 +├── types/api.ts # DemoRunRequest @778; WorkspaceListItem @806 (MIXED CRLF/LF) +├── hooks/use-stores.ts, use-products.ts, use-seeder.ts, use-demo-pipeline.ts +└── components/demo/ # ScenarioPicker, WorkspacePanel, ... (+ index.ts barrel) +``` + +### Desired Codebase tree (files added/modified) + +```bash +app/shared/seeder/overrides.py # NEW — SeederOverrides (strict, extra=forbid, 7 knobs) +app/shared/seeder/tests/test_overrides.py # NEW — bounds, forbid, JSON-path, sparse-dump tests +app/features/seeder/schemas.py # MOD — GenerateParams.overrides: SeederOverrides | None +app/features/seeder/service.py # MOD — _apply_seed_overrides, wired LAST in _build_config_from_params +app/features/seeder/tests/test_service.py # MOD — mapping/precedence/window/byte-identical tests +app/features/seeder/tests/test_routes.py # MOD — 201-with-overrides, 422-bounds, 422-unknown-knob +app/features/demo/schemas.py # MOD — UserScope; DemoRunRequest fields + validators; Workspace* responses +app/features/demo/pipeline.py # MOD — DemoContext fields; step_seed forward; step_status scope branch +app/features/demo/workspace.py # MOD — create_workspace writes both slots +app/features/demo/tests/test_schemas.py # MOD — JSON-path + validator tests +app/features/demo/tests/test_pipeline.py # MOD — forwarding + scope + warn/fallback tests +app/features/demo/tests/test_workspace.py # MOD — slot persistence tests +tests/test_e2e_demo.py # MOD — replay-verbatim regression incl. slots (integration) +frontend/src/types/api.ts # MOD — SeedOverrides, UserScope, DemoRunRequest, Workspace* (surgical) +frontend/src/lib/workspace-replay.ts # NEW — workspaceToRunRequest(ws) pure helper +frontend/src/lib/workspace-replay.test.ts # NEW — replay-verbatim FE regression +frontend/src/components/demo/SeedConfigPanel.tsx # NEW — collapsible 7-knob panel +frontend/src/components/demo/SeedConfigPanel.test.tsx # NEW +frontend/src/components/demo/ScopeSelector.tsx # NEW — pair selector + preview card +frontend/src/components/demo/ScopeSelector.test.tsx # NEW +frontend/src/components/demo/index.ts # MOD — export the two new components (match barrel style) +frontend/src/pages/showcase.tsx # MOD — wiring (state, panel, selector, start frames) +docs/_base/API_CONTRACTS.md # MOD — seeder overrides + /demo/run + WS start-frame E3 notes +docs/_base/RUNBOOKS.md # MOD — showcase incident 29 + workspace-section scope update +docs/_base/DOMAIN_MODEL.md # MOD — slot schemas on the showcase_workspace aggregate +``` + +### Known Gotchas & Library Quirks + +```python +# CRITICAL — EXECUTION ORDER: do not start until E1 #407 is merged to dev. +# E3 writes JSONB slots that E1's migration creates. First action of Task 1: +# re-read app/features/demo/models.py + workspace.py on the post-E1 dev and +# re-anchor every CONTRACT(E1) tag in this PRP. + +# CRITICAL — pydantic strict + nested models (runtime-verified 2026-06-12 on +# pydantic 2.12.5; re-run on lib upgrade): +# uv run python -c " +# from pydantic import BaseModel, ConfigDict, Field +# class N(BaseModel): +# model_config = ConfigDict(strict=True, extra='forbid') +# stores: int | None = Field(default=None, ge=1, le=100) +# class P(BaseModel): +# model_config = ConfigDict(strict=True) +# seed_overrides: N | None = None +# print(P.model_validate({'seed_overrides': {'stores': 5}})) # OK — dict→model under strict +# P.model_validate({'seed_overrides': {'stores': 999}}) # ValidationError (bounds) +# " +# and N.model_validate({'stores': 5, 'bogus': 1}) → ValidationError (forbid). +# Conclusions baked into the design: NO Field(strict=False) needed on the +# nested field; extra='forbid' IS the allow-list; FastAPI's validate_python +# path (the JSON dict) works. All knobs are int/float → the strict-mode AST +# policy test (app/core/tests/test_strict_mode_policy.py) does not fire. + +# CRITICAL — do NOT add ConfigDict(strict=True) to GenerateParams itself: it +# has date fields (start_date/end_date) and is deliberately non-strict today. +# Only the NEW nested models are strict. + +# CRITICAL — seeder override precedence (service.py:213-226 + the new layer): +# preset → scalar stores/products/window/sparsity → phase1 → phase2 → +# overrides (LAST, wins). Use dataclasses.replace for every sub-config so +# preset-customized sibling fields survive (e.g. sparse preset's +# random_gaps_per_series when overrides.sparsity is set; scenario-customized +# region/category lists when overrides.stores is set — same reason the +# existing scalar override at :218-222 uses replace). + +# CRITICAL — holiday_rush is CALENDAR-PINNED (config.py:553-579): its +# HolidayConfig spikes are fixed 2024 dates. seed_overrides.window_days on +# scenario='holiday_rush' must be REJECTED at DemoRunRequest validation +# (clear ValueError message), not silently ignored — a shifted window +# silently drops every holiday spike. Direct /seeder/generate callers who +# combine them are out of scope (the preset docstring already documents +# explicit-dates-to-shift). + +# CRITICAL — seed_overrides requires skip_seed=False. The seed step is skipped +# on skip_seed=true (pipeline.py:543-544) so overrides would be a silent +# no-op; reject in a model_validator (mirror _workspace_name_requires_keep, +# schemas.py:80-85). The frontend enforces the same by gating the panel on +# the Re-seed checkbox. + +# CRITICAL — ids are NOT 1-based (step_status docstring, pipeline.py:585-587; +# memory anchor seeder-does-not-reset-id-sequences). The scope selector MUST +# be fed from live /dimensions data, never synthesized ids. user_scope can +# dangle after reset+reseed → step_status WARN + fallback to discovery (the +# replay path of a reset=true workspace would otherwise hard-fail forever). +# "warn" does NOT stop the run (only "fail" does — pipeline.py:2729-2738). + +# CRITICAL — high stockout_intensity / sparsity overrides can legitimately +# FAIL the backtest (all-NaN WAPE → step_backtest FAIL by design; same +# semantics as the sparse preset, RUNBOOKS incident 28). Do NOT add a +# graceful-skip; ship the panel caveat + runbook entry 29 instead. + +# CRITICAL — workspace writes stay warn-and-continue (workspace.py:10-13). +# The slot writes go INSIDE the existing try/except in create_workspace; a +# failure yields workspace_id=None and a green run, never an exception. + +# GOTCHA — replay reads WorkspaceListItem (the LIST row — showcase.tsx:174): +# seed_overrides/user_scope must be on the LIST response, not detail-only. +# CONTRACT(E1): if E1 already exposed the slots detail-only, ADD them to the +# list item here (cheap; sparse JSONB). + +# GOTCHA — frontend type gates: `pnpm tsc --noEmit` is vacuous (solution-style +# tsconfig) and `pnpm tsc -b` fails with ~24 PRE-EXISTING errors on dev, +# none in demo components. Gate on `pnpm lint && pnpm test --run` plus: +# cd frontend && pnpm tsc -b 2>&1 | grep -E "SeedConfigPanel|ScopeSelector|workspace-replay|types/api|pages/showcase" # expect empty + +# GOTCHA — frontend/src/types/api.ts has MIXED CRLF/LF line endings; repo-wide +# files are inconsistently CRLF/LF. Keep edits surgical; check +# `git diff --stat` before committing (Edit/Write emit LF — avoid whole-file +# noise diffs). + +# GOTCHA — shadcn: compose from INSTALLED primitives (collapsible, select, +# slider, input, badge, tooltip — frontend/src/components/ui/). Semantic +# tokens only (text-muted-foreground, border-primary, text-destructive for +# the reset caveat — mirrors showcase.tsx:309). Never raw colors. + +# GOTCHA — mypy --strict AND pyright --strict gate every backend edit. The +# DemoContext additions need full annotations (SeederOverrides | None); +# pipeline.py imports them from app.shared.seeder.overrides (NOT from the +# seeder feature slice — vertical-slice rule, pipeline.py:18-19). + +# GOTCHA — step_seed currently derives the detail line from profile dims +# (pipeline.py:577). With overrides, compute effective stores/products = +# override-or-profile for BOTH the POST scalars and the detail string so +# the card tells the truth; keep scalar sparsity=0.0 (preset-character +# guard); the nested object carries the operator's sparsity. + +# CONVENTION — commits (every one references #409; no AI trailer; scopes from +# .claude/rules/commit-format.md — seeder slice ⊂ `data`, demo slice ⊂ `api`): +# feat(data): add allow-listed nested seed overrides to seeder contract (#409) +# feat(api): thread seed overrides and user scope through demo pipeline (#409) +# feat(ui): add advanced seed config panel and scope selector to showcase (#409) +# test(api): cover replay-verbatim seed overrides and scope slots (#409) +# docs(docs): document seed override contract and workspace slots (#409) +# docs(repo): track showcase completion e3 prp (#409) +# Branch off dev: feat/showcase-completion-e3-seed-config-scope (49 chars ≤ 50). + +# RUNTIME-VERIFICATION LOG (per prp-create step 3): +# - pydantic 2.12.5 nested-strict + extra=forbid + bounds behavior verified +# with the command in the CRITICAL block above (all four assertions pass). +# - Seeder precedence semantics read directly from service.py:202-247 (not +# inferred); the `if params.sparsity > 0` guard confirmed at :225-226. +# - dimensions page_size cap 100 confirmed at app/features/dimensions/ +# routes.py:62 and :187. +# - `pnpm tsc -b` pre-existing-failure state re-confirmed by the E2 PRP log +# (2026-06-12); no demo-component errors. +# - No other third-party API claims — everything else cites in-repo code. +``` + +## Implementation Blueprint + +### Data models and structure + +```python +# app/shared/seeder/overrides.py (NEW) +"""Curated, allow-listed seed-override schema (E3, issue #409). + +Shared between the seeder slice (GenerateParams.overrides) and the demo slice +(DemoRunRequest.seed_overrides) — app/shared is the sanctioned cross-slice +home (vertical-slice rule). extra='forbid' IS the allow-list: any knob not +listed here is a 422 at the HTTP boundary (umbrella #406 risk mitigation — +the full 25+ knob surface stays preset-driven). +""" +from pydantic import BaseModel, ConfigDict, Field + +class SeederOverrides(BaseModel): + # strict=True catches JSON-native coercion bugs ("5" → 5); every field is + # int/float so no Field(strict=False) override is needed (security-patterns.md). + model_config = ConfigDict(strict=True, extra="forbid") + + stores: int | None = Field(default=None, ge=1, le=100, description="Store count → DimensionConfig.stores; wins over the scalar `stores` param.") + products: int | None = Field(default=None, ge=1, le=500, description="Product count → DimensionConfig.products; wins over the scalar `products` param.") + window_days: int | None = Field(default=None, ge=75, le=365, description="Seeded window length; start_date = end_date - window_days. >=75 keeps the showcase historical_backfill gate clear. Rejected on the calendar-pinned holiday_rush preset (demo surface).") + sparsity: float | None = Field(default=None, ge=0.0, le=0.9, description="Missing (store,product) grain fraction → SparsityConfig.missing_combinations_pct; preserves the preset's gap config. 1.0 disallowed (zero series).") + promotion_intensity: float | None = Field(default=None, ge=0.0, le=0.5, description="→ RetailPatternConfig.promotion_probability (preset max 0.25).") + stockout_intensity: float | None = Field(default=None, ge=0.0, le=0.5, description="→ RetailPatternConfig.stockout_probability. High values can legitimately NaN-WAPE-fail the backtest (documented).") + noise_sigma: float | None = Field(default=None, ge=0.0, le=0.5, description="→ TimeSeriesConfig.noise_sigma (preset max 0.4).") + + def is_empty(self) -> bool: + """True when no knob is set ({} on the wire) — treated as None everywhere.""" + return not self.model_dump(exclude_none=True) +``` + +```python +# app/features/demo/schemas.py — additions (demo-only concept stays in-slice) +class UserScope(BaseModel): + """Operator-selected (store, product) focus pair (E3, issue #409). + + Ids are REAL discovered ids (sequences never reset — ids are not 1-based); + step_status validates them and warn-falls-back to discovery when dangling. + """ + model_config = ConfigDict(strict=True, extra="forbid") + store_id: int = Field(..., ge=1) + product_id: int = Field(..., ge=1) + +# DemoRunRequest — two additive Optional fields + two validators: +# seed_overrides: SeederOverrides | None = None (import from app.shared.seeder.overrides) +# user_scope: UserScope | None = None +# +# @model_validator(mode="after") _seed_overrides_require_reseed: +# if self.seed_overrides is not None and not self.seed_overrides.is_empty() +# and self.skip_seed: +# raise ValueError("seed_overrides requires skip_seed=false (Re-seed first)") +# # normalize: an empty overrides object collapses to None +# if self.seed_overrides is not None and self.seed_overrides.is_empty(): +# self.seed_overrides = None # NOTE: model_validator(after) may mutate self +# +# @model_validator(mode="after") _window_days_forbidden_on_holiday_rush: +# if (self.seed_overrides is not None +# and self.seed_overrides.window_days is not None +# and self.scenario is ScenarioPreset.HOLIDAY_RUSH): +# raise ValueError("window_days cannot override the calendar-pinned holiday_rush window") +# +# WorkspaceListItem (+ WorkspaceDetailResponse inherits): +# seed_overrides: dict[str, Any] | None = Field(default=None, ...) +# user_scope: dict[str, Any] | None = Field(default=None, ...) +# (from_attributes=True already set — ORM JSONB maps straight through. +# CONTRACT(E1): skip if E1's PRP already added them; ensure LIST exposure.) +``` + +```python +# app/features/seeder/service.py — the new layer (mirror _apply_phase2_overrides) +def _apply_seed_overrides(config: SeederConfig, overrides: SeederOverrides | None) -> None: + """Apply the curated nested overrides LAST — wins over scalar params. + + dataclasses.replace is field-precise: preset-customized sibling fields + (region/category lists, random_gaps_*) survive every knob. + """ + if overrides is None: + return + if overrides.stores is not None or overrides.products is not None: + config.dimensions = replace( + config.dimensions, + stores=overrides.stores if overrides.stores is not None else config.dimensions.stores, + products=overrides.products if overrides.products is not None else config.dimensions.products, + ) + if overrides.window_days is not None: + config.start_date = config.end_date - timedelta(days=overrides.window_days) + if overrides.sparsity is not None: + config.sparsity = replace(config.sparsity, missing_combinations_pct=overrides.sparsity) + if overrides.promotion_intensity is not None or overrides.stockout_intensity is not None: + config.retail = replace( + config.retail, + promotion_probability=(overrides.promotion_intensity + if overrides.promotion_intensity is not None + else config.retail.promotion_probability), + stockout_probability=(overrides.stockout_intensity + if overrides.stockout_intensity is not None + else config.retail.stockout_probability), + ) + if overrides.noise_sigma is not None: + config.time_series = replace(config.time_series, noise_sigma=overrides.noise_sigma) +# Wire-in (one line, AFTER _apply_phase2_overrides at :241): +# _apply_seed_overrides(config, params.overrides) +``` + +```python +# app/features/demo/pipeline.py — step changes (sketch) + +# DemoContext additions (after workspace_name, with an E3 #409 comment): +# seed_overrides: SeederOverrides | None = None +# user_scope: UserScope | None = None +# run_pipeline ctx construction: thread req.seed_overrides / req.user_scope. + +# step_seed — effective dims + verbatim forward: +# stores = ctx.seed_overrides.stores if (ctx.seed_overrides and ctx.seed_overrides.stores) else profile.stores +# products = ... same for products ... +# window: if ctx.seed_overrides and ctx.seed_overrides.window_days: +# seed_end = datetime.now(UTC).date(); seed_start = seed_end - timedelta(days=ctx.seed_overrides.window_days) +# elif profile.window is not None: ... (existing pinned branch; validator already +# guarantees window_days is never set on holiday_rush) +# json_body gains: **({"overrides": ctx.seed_overrides.model_dump(exclude_none=True)} +# if ctx.seed_overrides else {}) +# detail line + data echo the effective dims and "overrides" keys applied. + +# step_status — user-scope branch BEFORE first-pair discovery: +# if ctx.user_scope is not None: +# try: +# store_body = await client.request("status[scope-store]", "GET", +# f"/dimensions/stores/{ctx.user_scope.store_id}") +# product_body = await client.request("status[scope-product]", "GET", +# f"/dimensions/products/{ctx.user_scope.product_id}") +# except _StepError: +# scope_warn = ("user_scope (store=%d, product=%d) not found — fell back " +# "to discovered pair" % (...)) # WARN, never fail (replay safety) +# else: +# ctx.store_id, ctx.product_id = ctx.user_scope.store_id, ctx.user_scope.product_id +# -> return ("pass", f"... store_id={..} product_id={..} (user-selected)", +# {..., "user_scope_applied": True}) +# # fallback / no-scope path: existing discovery (582-631) unchanged; when the +# # scope dangled return ("warn", scope_warn + discovery detail, +# # {..., "user_scope_applied": False}). +``` + +```python +# app/features/demo/workspace.py — create_workspace constructor additions +# (INSIDE the existing try; attribute names per the merged E1 model — +# CONTRACT(E1): assumed `seed_overrides` / `user_scope` nullable JSONB): +# seed_overrides=(req.seed_overrides.model_dump(mode="json", exclude_none=True) +# if req.seed_overrides else None), +# user_scope=(req.user_scope.model_dump(mode="json") if req.user_scope else None), +``` + +```tsx +// frontend/src/lib/workspace-replay.ts (NEW) — replay-verbatim in ONE place +import type { DemoRunRequest, WorkspaceListItem } from '@/types/api' + +/** Build the verbatim replay start frame for a saved workspace (E4 semantics + * + E3 #409 slots). Omits absent optionals so legacy rows replay byte- + * identically to today. */ +export function workspaceToRunRequest(ws: WorkspaceListItem): DemoRunRequest { + return { + seed: ws.seed, + scenario: ws.scenario, + reset: ws.reset, + skip_seed: ws.skip_seed, + preservation: 'keep', + // CONTRACT(E1): replay provenance — post-E1, handleReplayWorkspace's inline + // object sends this field (an E1 frozen success criterion); this helper + // REPLACES that object and must preserve it or lineage silently regresses. + replayed_from_workspace_id: ws.workspace_id, + ...(ws.name ? { workspace_name: ws.name } : {}), + ...(ws.seed_overrides ? { seed_overrides: ws.seed_overrides } : {}), + ...(ws.user_scope ? { user_scope: ws.user_scope } : {}), + } +} + +// types/api.ts additions (surgical): +// export interface SeedOverrides { stores?: number; products?: number; +// window_days?: number; sparsity?: number; promotion_intensity?: number; +// stockout_intensity?: number; noise_sigma?: number } +// export interface UserScope { store_id: number; product_id: number } +// DemoRunRequest += seed_overrides?: SeedOverrides; user_scope?: UserScope +// WorkspaceListItem += seed_overrides: SeedOverrides | null; user_scope: UserScope | null + +// SeedConfigPanel.tsx — props: { value: SeedOverrides | null; onChange(v: SeedOverrides | null): void; +// disabled?: boolean; windowLocked?: boolean /* holiday_rush */ } +// "Advanced seed config"; Inputs (stores 1..20 UI-range, products 1..50, +// window_days 75..365) + Sliders (sparsity 0..0.9 step .05, promo/stockout 0..0.5, +// noise 0..0.5); live summary line; NaN-WAPE caveat ; emits null when all unset. +// UI ranges are TIGHTER than the API bounds (laptop-scale); the API bounds are the law. + +// ScopeSelector.tsx — props: { value: UserScope | null; onChange(v: UserScope | null): void; +// disabled?: boolean } +// two shadcn