diff --git a/.env.example b/.env.example index 38ef75b4..62da51b9 100644 --- a/.env.example +++ b/.env.example @@ -29,6 +29,10 @@ FORECAST_ENABLE_LIGHTGBM=false # FORECAST_ENABLE_XGBOOST defaults to false (opt-in; install ml-xgboost extra) # FORECAST_ENABLE_RANDOM_FOREST=false # PRP-36 optional model — pure sklearn, no extra needed +# Demo / Showcase settings +# E6 (#412) — root for saved-workspace export bundles (manifest + checksums). +SHOWCASE_EXPORT_ROOT=./artifacts/showcase + # RAG Configuration # Embedding Provider: "openai" or "ollama" RAG_EMBEDDING_PROVIDER=openai diff --git a/PRPs/PRP-showcase-completion-E1-metadata-provenance-backbone.md b/PRPs/PRP-showcase-completion-E1-metadata-provenance-backbone.md new file mode 100644 index 00000000..101fdf00 --- /dev/null +++ b/PRPs/PRP-showcase-completion-E1-metadata-provenance-backbone.md @@ -0,0 +1,1031 @@ +name: "PRP — Showcase Completion E1: Workspace Metadata & Provenance Backbone (issue #407)" +description: | + +## Purpose + +Implement the Foundation epic of the showcase-completion initiative (umbrella #406): +one Alembic migration extends `showcase_workspace` with lifecycle + provenance columns +(`replayed_from_workspace_id`, `archived`, `pinned`, `notes`, `tags`, +`config_schema_version`) and six documented JSONB story-slot columns +(`seed_overrides`, `user_scope`, `approval_events`, `rag_events`, `job_ids`, +`phase_summaries`); a `PATCH /demo/workspaces/{id}` lifecycle endpoint +(rename/notes/tags/archive/pin) lands with its Pydantic schema surface; and Replay +writes `replayed_from_workspace_id`. Every Parallel epic (#408–#412) writes into or +reads from this surface, so it ships first. Blocks E2 #408, E3 #409, E4 #410, +E5 #411, E6 #412. + +## Core Principles + +1. **Context is King**: every reference below was verified against the live code on 2026-06-12 (branch `dev` @ `bdf85f6`). +2. **Validation Loops**: each level is executable as written. +3. **Information Dense**: patterns cite exact file:line. +4. **Progressive Success**: model+migration → schemas → service helpers → PATCH route → replay wiring → tests → docs. +5. **Global rules**: follow CLAUDE.md / AGENTS.md; all five CI gates must pass; all changes ADDITIVE. + +--- + +## Goal + +The `showcase_workspace` table gains the metadata + provenance backbone every other +epic of umbrella #406 consumes: + +- **Lifecycle columns**: `archived` (bool), `pinned` (bool), `notes` (free text), + `tags` (queryable JSONB string array, GIN-indexed — exact `scenario_plan.tags` + pattern), `config_schema_version` (int, schema-evolution marker). +- **Provenance column**: `replayed_from_workspace_id` — a SOFT reference (String(32), + indexed, deliberately **no ForeignKey**, not even self-referential) recorded when a + run is a Replay of a saved workspace. +- **Six documented JSONB story slots** as dedicated nullable JSONB columns: + `seed_overrides`, `user_scope`, `approval_events`, `rag_events`, `job_ids`, + `phase_summaries`. E1 ships the columns + the documented per-slot schema; E1 writes + NONE of them (all stay NULL) — E3 (#409) writes `seed_overrides` + `user_scope`, + E5 (#411) writes `approval_events` + `rag_events`, later parallel epics write + `job_ids` + `phase_summaries`. +- **`PATCH /demo/workspaces/{workspace_id}`** — partial-update lifecycle endpoint: + rename / notes / tags / archive / pin. Missing id → RFC 7807 404. Returns the + updated `WorkspaceDetailResponse`. +- **Replay provenance**: `DemoRunRequest` gains an additive Optional + `replayed_from_workspace_id` field; the frontend Replay handler sends the source + row's `workspace_id`; `create_workspace` records it on the NEW row. + +A run/request without any new field behaves **byte-identically to today** (legacy WS +start frames and HTTP bodies unchanged). One migration applies AND downgrades cleanly +on a fresh DB. + +**Deliverable** (all additive): + +- `app/features/demo/models.py` — 12 new columns on `ShowcaseWorkspace` + tags GIN index + replayed-from index. +- `alembic/versions/_add_showcase_workspace_metadata_provenance.py` — `down_revision = "324a2fa37fcc"`; add-columns + indexes; clean downgrade. +- `app/features/demo/schemas.py` — `DemoRunRequest.replayed_from_workspace_id`; new `WorkspaceUpdateRequest`; `WorkspaceListItem` / `WorkspaceDetailResponse` additive response fields. +- `app/features/demo/workspace.py` — `create_workspace` records `replayed_from_workspace_id`; new `update_workspace` helper. +- `app/features/demo/routes.py` — `PATCH /demo/workspaces/{workspace_id}`. +- `frontend/src/types/api.ts` + `frontend/src/pages/showcase.tsx` — two-line additive Replay wiring (see "Why the (ui) sliver" below). +- Tests: schema unit tests, model constraint/roundtrip integration tests, workspace-helper integration tests, PATCH route tests (2xx + 404 + 422), migration up/down. +- Docs: `docs/_base/API_CONTRACTS.md` + `docs/_base/DOMAIN_MODEL.md` additive notes (the documented story-slot schema lives in DOMAIN_MODEL — umbrella #406 risk mitigation). + +**Success definition**: all Success Criteria below check off; the five CI gates are +green; integration suite green; a manual Replay from the `/showcase` Saved-workspaces +panel produces a new row whose `replayed_from_workspace_id` equals the source row's +`workspace_id`; `PATCH /demo/workspaces/{id}` round-trips rename/notes/tags/archive/pin. + +## Why + +- Umbrella #406: today workspaces cannot be renamed/archived/annotated/searched, the + row lacks replay lineage, seed overrides, user scope, approval history, and RAG + events. E1 is the Foundation — **every** Parallel epic writes into or reads from + the columns added here, so the frozen column/slot contract ships first. +- Replays are currently indistinguishable from fresh keep-runs except by + name/timestamp (documented gap, `docs/_base/RUNBOOKS.md` § Showcase workspace, + "Explicitly out of scope" — the `replayed_from` provenance column is this epic). +- The umbrella's junk-drawer risk ("JSONB story slots become a junk drawer") is + mitigated here by `config_schema_version` + a documented per-slot schema in + `docs/_base/DOMAIN_MODEL.md`. + +### Why the (ui) sliver in an (api,db) epic + +"Replay writes `replayed_from_workspace_id`" is a frozen epic-level success +criterion, and Replay is frontend-initiated: `handleReplayWorkspace` +(`frontend/src/pages/showcase.tsx:174-186`) re-submits the recorded config through +the WS start frame. Without the sender including the field, the backend has nothing +to record. The wiring is two additive lines (one TS interface field + one start-frame +key) — deliberately included here so the criterion is verifiable in E1; the lineage +*rendering* (badge + chain) stays in E2 (#408). + +## What + +### User-visible behavior + +- `PATCH /demo/workspaces/{workspace_id}` accepts a partial body of + `{name?, notes?, tags?, archived?, pinned?}`; only provided fields change; explicit + `null` clears `name` / `notes`. Missing id → `404 application/problem+json`. A + malformed body (bad name pattern, unknown key, >20 tags) → `422 + application/problem+json`. Empty body `{}` → `200` no-op returning the current row + (mirrors the `RunUpdate` precedent — see Decisions). +- `POST /demo/run` and the `WS /demo/stream` start frame accept an additive Optional + `replayed_from_workspace_id: str | null` (`^[0-9a-f]{32}$`); supplying it without + `preservation="keep"` is a 422 (a lineage pointer is meaningless when no row is + written — same validator pattern as `workspace_name`). +- Clicking **Replay** on the Saved-workspaces panel now records the source + `workspace_id` on the new row. The original row is never mutated (E4 #393 + invariant preserved). +- `GET /demo/workspaces` list items additively carry `archived`, `pinned`, `tags`, + `replayed_from_workspace_id`; the detail response additively carries those plus + `notes`, `config_schema_version`, and the six story slots. **List behavior is + otherwise unchanged in E1** — archived rows are still listed; default-filtering / + search / sort is E2 (#408). + +### Technical requirements + +- One Alembic migration off head `324a2fa37fcc` (verified `uv run alembic heads`, + 2026-06-12). Forward-only: a NEW revision — never edit + `324a2fa37fcc_create_showcase_workspace_table.py`. +- Every new column is nullable OR carries a `server_default` so the migration applies + on a table with existing rows; downgrade drops indexes then columns, cleanly. +- **No ForeignKeys anywhere** — `replayed_from_workspace_id` is an opaque soft + reference, consistent with the table-wide invariant + (`docs/_base/DOMAIN_MODEL.md` § `showcase_workspace`: "`created_objects` carries + SOFT references only — no ForeignKeys by design"). Even a *self-referential* FK is + ruled out: ancestor workspace rows must remain independently deletable + (metadata-only delete, #404) without cascading to or blocking descendants. State + this in the model docstring. +- `status` is NOT patchable — the pipeline finalize hook owns the + running/completed/failed lifecycle; `archived` is an orthogonal boolean so the + existing `ck_showcase_workspace_status` CHECK is untouched. +- Vertical slice: all backend changes inside `app/features/demo/` + + `alembic/versions/`; no cross-slice imports (demo imports only `app.core.*`, + `app.shared.*`, stdlib/3rd-party). +- RFC 7807 errors only — `NotFoundError` from `app/core/exceptions.py` (the demo + routes' existing pattern, `routes.py:134`), never bare `HTTPException`. +- Pydantic v2 `ConfigDict(strict=True)` on the new request body. All new fields are + JSON-native (`str`/`bool`/`list[str]`) → NO `Field(strict=False)` override needed; + the AST policy walker (`app/core/tests/test_strict_mode_policy.py`) only fires on + date/datetime/time/UUID/Decimal. +- Warn-and-continue invariant untouched: `create_workspace` /`finalize_workspace` + keep swallowing all DB errors. The new `update_workspace` helper is + request-scoped (caller-owned session, raises normally) — it backs an HTTP + endpoint, not the pipeline. + +### Success Criteria + +- [ ] Migration applies AND downgrades cleanly on a fresh DB (`upgrade head` → + `downgrade -1` → `upgrade head`); applies on a DB with pre-existing + `showcase_workspace` rows (server defaults backfill `archived=false`, + `pinned=false`, `tags=[]`, `config_schema_version=1`). +- [ ] `DemoRunRequest()` (no args) serializes identically to today plus + `replayed_from_workspace_id=None`; a legacy start frame (no new keys) validates; + `replayed_from_workspace_id` without `preservation="keep"` → 422; a non-32-hex + value → 422. +- [ ] A keep-run with `replayed_from_workspace_id="<32hex>"` produces a row whose + `replayed_from_workspace_id` column equals that value; the source row is unread + and unmodified (the value is recorded verbatim — no existence check, it is a soft + reference). +- [ ] Frontend Replay sends `replayed_from_workspace_id: ws.workspace_id`; + `pnpm tsc -b` introduces no NEW errors (see gotcha on the pre-existing-failure + baseline) and `pnpm test --run` green. +- [ ] `PATCH /demo/workspaces/{id}`: happy path updates exactly the provided fields + and returns the updated detail; `{}` is a 200 no-op; missing id → 404 + problem+json; bad name pattern / unknown key / 21 tags → 422 problem+json. +- [ ] `tags` round-trips as a JSONB string array and is GIN-indexed + (`ix_showcase_workspace_tags_gin`); a `.contains(["x"])` containment query works + (E2 will route it — E1 proves it in an integration test). +- [ ] All six story-slot columns exist, default NULL, and round-trip a JSONB payload + in an integration test; E1 production code writes none of them. +- [ ] `uv run ruff check . && uv run ruff format --check . && uv run mypy app/ && + uv run pyright app/ && uv run pytest -v -m "not integration"` all green; + integration suite green against docker-compose Postgres; + `test_strict_mode_policy.py` green. + +## Decisions (the open questions this PRP resolves) + +> These are FROZEN for the parallel epics. #408–#412 PRP authors: consume, don't re-decide. + +1. **`tags` representation — CONFIRMED: mirror `scenario_plan.tags` exactly.** + A dedicated JSONB string-array column, `nullable=False`, + `server_default=text("'[]'::jsonb")`, with a GIN index + (`ix_showcase_workspace_tags_gin`). Verified in code: + `app/features/scenarios/models.py:74-76,97` (column + index), migration + `alembic/versions/bb8c4587ef1d_add_scenario_library_columns.py:26-45` + (add_column + GIN), and the containment query + `app/features/scenarios/service.py:464` (`ScenarioPlan.tags.contains(tags)`). + No deviation: the pattern is proven, queryable, and E2's tag filter reuses the + same `.contains()` shape. Tags are free-text strings (scenario precedent has no + per-item pattern); the PATCH boundary caps the list at 20 items + (`Field(max_length=20)` — same cap as `ScenarioCreateRequest.tags`, + `app/features/scenarios/schemas.py:203-206`). + +2. **Story slots — six dedicated nullable JSONB columns** (NOT keys inside one + `story` blob, NOT keys inside `created_objects`). Rationale: the existing + precedent is purpose-named JSONB columns with documented internal schemas + (`created_objects`, `result_summary` — `app/features/demo/models.py:77-81`); + each slot has a different writer epic and a different write moment + (create-time vs mid-run append vs finalize), and separate columns keep each + write isolated, independently nullable (NULL = "never written", distinct from + empty), individually typed in the ORM (`dict[str, Any] | None` vs + `list[dict] | None`), and trivially additive in responses. A single `story` + column would force read-modify-write of one blob across four epics and would + itself need a documented sub-schema anyway — more coupling, zero benefit on a + low-cardinality audit table. Per-slot documented schema: see the Data-models + blueprint below + the DOMAIN_MODEL doc task. + +3. **`replayed_from_workspace_id` — SOFT reference, no FK, confirmed.** String(32) + nullable, btree index (`ix_showcase_workspace_replayed_from`), NO ForeignKey — + including no self-referential FK: `docs/_base/DOMAIN_MODEL.md` pins + "deletion in either direction never cascades", and an FK (even `ON DELETE SET + NULL`) would couple delete behavior to lineage. Dangling lineage pointers after + an ancestor delete are expected and harmless (same semantics as every + `created_objects` id). Recorded verbatim from the request — no existence + validation (a replay of a just-deleted workspace still records the id it came + from; E2's liveness check surfaces dangles). + +4. **PATCH semantics — `exclude_unset` partial update, `extra="forbid"`, empty body + = no-op 200.** `model_dump(exclude_unset=True)` distinguishes absent from + explicit-null (runtime-verified, see Gotchas); explicit `null` clears `name` / + `notes`; `extra="forbid"` catches typo'd field names (the `RunUpdate` precedent, + `app/features/registry/schemas.py:113-123`); an empty body is a valid no-op + (mirrors `RunUpdate`, which has no min-fields validator). `archived`/`pinned` + accept only `true`/`false` and `tags` accepts only a list (not null — all + three back NOT NULL columns; send `[]` to clear tags). Explicit `null` on any + of the three is rejected at the schema boundary (422), never reaching + `setattr` → IntegrityError 500. + +5. **E1 writes no story slot.** `seed_overrides`/`user_scope` writers land in E3 + (#409), `approval_events`/`rag_events` in E5 (#411), `job_ids`/ + `phase_summaries` in the remaining parallel epics (E2 #408 health summary / + E4 #410 run-config echo — whichever lands first follows the documented schema). + E1 ships columns + schema docs + roundtrip tests only. + +6. **`config_schema_version` starts at 1.** Integer NOT NULL, `server_default + text("1")`, ORM `default=1`. It versions the *workspace config + story-slot + schema* as a whole; any epic that changes a documented slot shape bumps the + ORM default and documents the delta in DOMAIN_MODEL. E1 does not branch on it. + +### Assumptions (explicit, decided without user input) + +- `notes` is `sa.Text()` in the DB with a 2000-char cap enforced at the Pydantic + boundary only (no DB CHECK) — matches the repo's boundary-validation style + (`RunUpdate.error_message` caps at the schema layer, `registry/schemas.py:123`). +- Renaming via PATCH uses the same `^[a-z0-9][a-z0-9\-_]*$` / ≤100 pattern as + `DemoRunRequest.workspace_name` (`demo/schemas.py:72-77`) — names stay + non-unique by design (E4 #393 invariant). +- The PATCH route reuses `WorkspaceDetailResponse` as its response model (the + updated row, full detail) rather than introducing a new response shape. +- Pin/archive carry NO behavioral semantics in E1 (no list reordering, no + default-filtering) — E2 (#408) wires the UX. E1 just persists the booleans. +- The umbrella's "destructive-replay confirmation" is E2 (#408) — NOT here. + E1's replay change is provenance-recording only. +- `replayed_from_workspace_id` requires `preservation="keep"`: a lineage pointer + on an ephemeral run has no row to land on. (The frontend Replay always sends + `preservation: 'keep'` — `showcase.tsx:179-185` — so this constraint is + invisible to the shipped UI.) + +## All Needed Context + +### Documentation & References + +```yaml +# MUST READ — codebase patterns (all verified 2026-06-12, branch dev @ bdf85f6) + +- file: app/features/demo/models.py + why: | + THE file you extend. ShowcaseWorkspace at line 37; status constants 32-34; + JSONB precedent created_objects/result_summary at 77-81; __table_args__ with + named CheckConstraint + composite index at 83-89. Module docstring documents + the no-FK soft-reference decision — extend that docstring for + replayed_from_workspace_id. GOTCHA in docstring: SQLAlchemy reserves the + attr name `metadata`. + +- file: alembic/versions/324a2fa37fcc_create_showcase_workspace_table.py + why: | + CURRENT HEAD (verified `uv run alembic heads` → 324a2fa37fcc). Your + down_revision. Header/docstring format, typing (`revision: str`, + `down_revision: str | None`), op.f() index-naming convention to mirror. + NEVER edit this file — forward-only. + +- file: alembic/versions/bb8c4587ef1d_add_scenario_library_columns.py + why: | + THE add-columns migration to mirror: op.add_column with JSONB + server_default text("'[]'::jsonb") (lines 26-34), GIN index creation + (39-45), downgrade drops index-then-columns (48-52) incl. the + postgresql_using='gin' kwarg on drop_index. + +- file: app/features/scenarios/models.py + why: | + tags JSONB-array pattern (lines 74-76: Mapped[list[str]], nullable=False, + default=list, server_default=text("'[]'::jsonb")) + GIN index in + __table_args__ (line 97). This is the tags representation E1 mirrors + verbatim (Decision 1). + +- file: app/features/scenarios/service.py + why: | + Line 464: `ScenarioPlan.tags.contains(tags)` — the JSONB containment query + shape the tags column must support (prove it in an integration test; E2 + routes it). + +- file: app/features/demo/schemas.py + why: | + DemoRunRequest at 29-85: ConfigDict(strict=True) line 40; the + workspace_name pattern + model_validator _workspace_name_requires_keep + (72-85) — copy this exact validator shape for replayed_from_workspace_id. + WorkspaceListItem (169-189) / WorkspaceDetailResponse (192-203) / + WorkspaceListResponse (205-213) — the response models you extend + additively. Response models are plain BaseModel + from_attributes (NOT + strict) — keep that split. + +- file: app/features/demo/workspace.py + why: | + create_workspace (46-79): the insert you extend with one kwarg + (replayed_from_workspace_id=req.replayed_from_workspace_id). get_workspace + (158-171) — reuse inside update_workspace. delete_workspace (199-221) — + the caller-owned-session + commit + logger.info shape update_workspace + mirrors. NOTE the split: create/finalize open their OWN sessions + (pipeline-scoped, warn-and-continue); get/list/delete take a caller-owned + AsyncSession (request-scoped, raise normally) — update_workspace is the + second kind. + +- file: app/features/demo/routes.py + why: | + The router you extend. delete_showcase_workspace (138-163) — the exact + route shape for PATCH: Depends(get_db), NotFoundError on missing (RFC 7807 + via registered handler), docstring style. get_showcase_workspace (110-135) + — WorkspaceDetailResponse return shape. + +- file: app/features/registry/schemas.py + why: | + RunUpdate (113-123) — THE partial-update request precedent: + ConfigDict(extra="forbid"), all-Optional fields, no min-fields validator + (empty body = no-op). E1's WorkspaceUpdateRequest adds strict=True on top + (post-PRP-14 request-body policy; RunUpdate predates it). + +- file: app/features/demo/pipeline.py + why: | + DemoContext workspace fields at 258-263; the keep-branch create hook at + 2652-2657; finalize hook at 2741-2746. E1 does NOT touch the pipeline — + create_workspace reads the new field straight off `req`. Read only to + confirm no hook change is needed. + +- file: app/core/exceptions.py + why: | + NotFoundError (line 72) → RFC 7807 404 via registered handler. The 422s + come FREE from Pydantic validation at the boundary (FastAPI → 422 + problem+json). + +- file: app/features/demo/tests/test_schemas.py + why: | + Existing DemoRunRequest tests INCLUDING the mandatory JSON-dict path + (Model.model_validate({...}) per .claude/rules/security-patterns.md + § strict mode). Extend for the new field + add a WorkspaceUpdateRequest + block. + +- file: app/features/demo/tests/test_workspace.py + why: | + Integration-test patterns for create/finalize/get/list/delete — session + fixture, @pytest.mark.integration, row-cleanup conventions. Extend with + update_workspace + replayed_from cases. + +- file: app/features/demo/tests/test_models.py + why: | + Constraint/roundtrip integration tests for ShowcaseWorkspace — extend with + new-column defaults, tags containment, story-slot roundtrip. + +- file: app/features/demo/tests/test_routes.py + why: | + Route-test conventions: ASGITransport client from conftest, workspace + module monkeypatched for unit-shaped route tests, integration-marked tests + for DB-backed paths. The DELETE 404 test is the template for PATCH 404. + +- file: frontend/src/pages/showcase.tsx + why: | + handleReplayWorkspace at 174-186 — the start() call that gains ONE key: + `replayed_from_workspace_id: ws.workspace_id`. handleLoadWorkspace + (160-168) stays untouched (Load is read-only). + +- file: frontend/src/types/api.ts + why: | + DemoRunRequest interface at 778-788 — add + `replayed_from_workspace_id?: string` with an `// E1 (#407)` comment in + the existing style. + +- file: docs/_base/DOMAIN_MODEL.md + why: | + § showcase_workspace aggregate — additively document the new columns, the + six story-slot schemas, the config_schema_version semantics, and restate + that replayed_from_workspace_id is a soft reference (no FK). This is the + umbrella's junk-drawer risk mitigation — non-optional. + +- file: docs/_base/API_CONTRACTS.md + why: | + The /demo rows + "WebSocket Events (/demo/stream)" section — additive + notes for the PATCH endpoint, the new request field, and the response + additions, in the established "E1 (#407) — ..." style. + +# Issue / initiative context +- url: https://github.com/w7-mgfcode/ForecastLabAI/issues/407 + why: The epic this PRP implements (Foundation; frozen column/slot/endpoint contract). +- url: https://github.com/w7-mgfcode/ForecastLabAI/issues/406 + why: Umbrella — success criteria, out-of-scope list, risk table (junk-drawer mitigation = config_schema_version + documented slot schema). + +# Exemplar PRPs (style + validation-gate conventions) +- file: PRPs/PRP-showcase-workspace-E1-persistence-backbone.md + why: Closest analog — created the table this PRP extends; task style, gates, anti-patterns. +- file: PRPs/PRP-showcase-workspace-E4-restore-replay.md + why: Replay flow context — verbatim re-submission through the WS path; original row never mutated. +``` + +### Current Codebase tree (relevant subset) + +```bash +app/features/demo/ +├── models.py # ShowcaseWorkspace @37 (16 columns today) +├── workspace.py # create @46 / finalize @106 / get @158 / list @174 / delete @199 / count @224 +├── schemas.py # DemoRunRequest @29; WorkspaceListItem @169; WorkspaceDetailResponse @192 +├── routes.py # GET list @80; GET detail @110; DELETE @138; POST /run @51; WS @166 +├── pipeline.py # keep-branch create hook @2652; finalize hook @2741 (NO E1 changes) +├── service.py # (NO E1 changes) +└── tests/ # conftest, test_models, test_workspace, test_schemas, test_routes, test_pipeline +alembic/ +├── env.py # demo models import already present @19 +└── versions/ # head: 324a2fa37fcc +frontend/src/ +├── pages/showcase.tsx # handleReplayWorkspace @174 +└── types/api.ts # DemoRunRequest @778 +``` + +### Desired Codebase tree (files added/modified) + +```bash +app/features/demo/ +├── models.py # MOD — +12 columns, +2 indexes, extended docstring +├── schemas.py # MOD — DemoRunRequest +replayed_from_workspace_id (+validator); +│ # NEW WorkspaceUpdateRequest; ListItem/Detail additive fields +├── workspace.py # MOD — create_workspace records replayed_from; NEW update_workspace +├── routes.py # MOD — PATCH /demo/workspaces/{workspace_id} +└── tests/ + ├── test_schemas.py # MOD — new-field + WorkspaceUpdateRequest unit tests + ├── test_models.py # MOD — column defaults, tags containment, slot roundtrip (integration) + ├── test_workspace.py # MOD — replayed_from recording; update_workspace semantics (integration) + └── test_routes.py # MOD — PATCH 200/404/422 (+ list/detail field passthrough) +alembic/versions/_add_showcase_workspace_metadata_provenance.py # NEW +frontend/src/types/api.ts # MOD — +replayed_from_workspace_id?: string +frontend/src/pages/showcase.tsx # MOD — one start-frame key in handleReplayWorkspace +docs/_base/API_CONTRACTS.md # MOD — additive contract notes +docs/_base/DOMAIN_MODEL.md # MOD — columns + documented story-slot schemas +``` + +### Known Gotchas & Library Quirks + +```python +# CRITICAL — forward-only migrations: down_revision = "324a2fa37fcc" (verified +# `uv run alembic heads` → 324a2fa37fcc, 2026-06-12). NEVER edit the merged +# create-table migration. Revision ids are hand-written 12-hex continuing the +# chain (or keep an `alembic revision -m ...` generated id). + +# CRITICAL — every new NOT NULL column needs a server_default or the migration +# fails on tables with existing rows: archived/pinned text("false"), +# config_schema_version text("1"), tags text("'[]'::jsonb"). All six story +# slots + notes + replayed_from_workspace_id are nullable (no default needed). + +# CRITICAL — strict-mode policy: WorkspaceUpdateRequest and the new +# DemoRunRequest field are all JSON-native (str/bool/list[str]) → NO +# Field(strict=False) override. The AST walker +# (app/core/tests/test_strict_mode_policy.py) only fires on +# date/datetime/time/UUID/Decimal — nothing here triggers it. + +# CRITICAL — do NOT add extra="forbid" to DemoRunRequest (unknown-key tolerance +# is the WS forward/backward-compat contract, routes.py:182). DO add it to +# WorkspaceUpdateRequest (HTTP-only body; typo'd PATCH fields must 422, not +# silently no-op — RunUpdate precedent). + +# CRITICAL — JSONB change detection: always ASSIGN whole values +# (row.tags = [...]), never mutate in place (row.tags.append(...)) — in-place +# mutation is invisible to SQLAlchemy without flag_modified. The existing +# finalize_workspace assigns; keep that style in update_workspace. + +# GOTCHA — SQLAlchemy reserves the declarative attr name `metadata` +# (demo/models.py docstring). None of the new names collide — keep it that way. + +# GOTCHA — `status` stays out of WorkspaceUpdateRequest; the CHECK constraint +# ck_showcase_workspace_status is untouched. `archived` is orthogonal. + +# GOTCHA — update_workspace is caller-owned-session + raises normally (it backs +# an HTTP route). Do NOT wrap it in the warn-and-continue pattern — that +# contract is for the PIPELINE-scoped create/finalize only. + +# GOTCHA — repo has mixed CRLF/LF line endings; run `git diff --stat` before +# committing — Edit/Write emit LF, so verify schema/route/model diffs are +# surgical, not whole-file noise. + +# GOTCHA — frontend type gate: `pnpm tsc --noEmit` is vacuous (solution-style +# tsconfig checks zero files) and `pnpm tsc -b` already fails on dev with +# pre-existing errors. Gate on "no NEW errors vs the dev baseline" + +# `pnpm lint` + `pnpm test --run`. + +# GOTCHA — mypy --strict AND pyright --strict gate merge: full annotations incl. +# `-> None` on tests and typed fixtures. + +# CONVENTION — branch: feat/showcase-completion-e1-metadata-provenance (off dev). +# Commits reference #407, e.g. `feat(db): ... (#407)` for the migration, +# `feat(api): ... (#407)` for slice code, `feat(ui): ... (#407)` for the +# replay wiring (or `feat(api,ui)` if combined). NO AI trailer (hook-enforced). + +# RUNTIME-VERIFICATION LOG (per prp-create step 3 — re-run on library upgrade): +# 1. `uv run alembic heads` → 324a2fa37fcc (2026-06-12). +# 2. Pydantic exclude_unset distinguishes absent vs explicit-null, pattern +# constraint skips the None arm of `str | None`, extra="forbid" 422s +# unknown keys, strict=True accepts list[str] and rejects a bare str: +# uv run python -c " +# from pydantic import BaseModel, ConfigDict, Field +# class P(BaseModel): +# model_config = ConfigDict(strict=True, extra='forbid') +# name: str | None = Field(default=None, max_length=100, pattern=r'^[a-z0-9][a-z0-9\-_]*$') +# notes: str | None = Field(default=None, max_length=2000) +# tags: list[str] | None = Field(default=None, max_length=20) +# p = P.model_validate({'notes': None}); assert p.model_fields_set == {'notes'} +# assert p.model_dump(exclude_unset=True) == {'notes': None} +# assert P.model_validate({'name': None}).name is None # null clears +# assert P.model_validate({'tags': ['a','b']}).tags == ['a','b'] +# " +# → verified on pydantic in-repo (2026-06-12). +# 3. SQLAlchemy 2.0.46: Boolean/Integer/JSONB server_default DDL compiles as +# expected (`DEFAULT false NOT NULL`, `DEFAULT 1 NOT NULL`, +# `DEFAULT '[]'::jsonb NOT NULL`): +# uv run python -c "import sqlalchemy as sa; from sqlalchemy.dialects import postgresql; from sqlalchemy.schema import CreateTable; md=sa.MetaData(); t=sa.Table('x',md, sa.Column('archived',sa.Boolean(),nullable=False,server_default=sa.text('false')), sa.Column('v',sa.Integer(),nullable=False,server_default=sa.text('1')), sa.Column('tags',postgresql.JSONB(),nullable=False,server_default=sa.text(\"'[]'::jsonb\"))); print(CreateTable(t).compile(dialect=postgresql.dialect()))" +# → verified (2026-06-12). +# 4. JSONB .contains() containment is already production code in this repo +# (scenarios/service.py:464) — no external claim to probe. +``` + +## Implementation Blueprint + +### Data models and structure + +```python +# app/features/demo/models.py — ADD after result_summary (line 81), keep the +# existing __table_args__ entries and append the two new indexes. + + # ── E1 (#407) — lifecycle metadata ──────────────────────────────────── + # Orthogonal to `status` (which the pipeline owns): archive/pin are + # operator curation flags, PATCH-mutable, default false. + archived: Mapped[bool] = mapped_column( + nullable=False, default=False, server_default=text("false") + ) + pinned: Mapped[bool] = mapped_column( + nullable=False, default=False, server_default=text("false") + ) + # Free-text operator annotation; length capped at the Pydantic boundary (2000). + notes: Mapped[str | None] = mapped_column(Text, nullable=True) + # Queryable JSONB string array — EXACT scenario_plan.tags pattern + # (app/features/scenarios/models.py:74-76); GIN-indexed below. + tags: Mapped[list[str]] = mapped_column( + JSONB, nullable=False, default=list, server_default=text("'[]'::jsonb") + ) + # Version of the workspace config + story-slot schema (umbrella #406 + # junk-drawer mitigation). Bump the ORM default when a slot shape changes. + config_schema_version: Mapped[int] = mapped_column( + Integer, nullable=False, default=1, server_default=text("1") + ) + + # ── E1 (#407) — replay provenance ───────────────────────────────────── + # SOFT reference to the workspace this run replayed (uuid4().hex of the + # source row). Deliberately NO ForeignKey — not even self-referential: + # ancestor rows must stay independently deletable (metadata-only delete), + # and dangling lineage pointers are expected, like every created_objects id. + replayed_from_workspace_id: Mapped[str | None] = mapped_column( + String(32), nullable=True + ) + + # ── E1 (#407) — documented JSONB story slots ────────────────────────── + # Six dedicated nullable JSONB columns (precedent: created_objects / + # result_summary). NULL = "slot never written" (distinct from empty). + # E1 writes NONE of them; documented schema per slot (authoritative copy + # in docs/_base/DOMAIN_MODEL.md): + # seed_overrides (E3 #409 writes) — dict: the curated seeder-override + # payload from the start frame, stored verbatim + # (model_dump(mode="json")); replay echoes it. + # user_scope (E3 #409 writes) — dict: operator-selected focus, + # {"store_id": int, "product_id": int} (additive keys + # allowed later). + # approval_events (E5 #411 writes) — list[dict], append-only: + # {"action_id": str, "tool_name": str, + # "decision": "approved"|"rejected", + # "decided_at": iso8601-str, "session_id": str}. + # rag_events (E5 #411 writes) — list[dict], append-only: + # {"event": "index"|"retrieve"|"skip", "detail": str, + # "count": int, "occurred_at": iso8601-str}. + # job_ids (later parallel epic) — list[str]: job / batch + # sub-job ids the run submitted (soft references). + # phase_summaries (later parallel epic) — list[dict], one per phase: + # {"phase_name": str, "status": "pass"|"fail"|"warn"|"skip", + # "steps": int, "duration_ms": float}. + seed_overrides: Mapped[dict[str, Any] | None] = mapped_column(JSONB, nullable=True) + user_scope: Mapped[dict[str, Any] | None] = mapped_column(JSONB, nullable=True) + approval_events: Mapped[list[dict[str, Any]] | None] = mapped_column(JSONB, nullable=True) + rag_events: Mapped[list[dict[str, Any]] | None] = mapped_column(JSONB, nullable=True) + job_ids: Mapped[list[str] | None] = mapped_column(JSONB, nullable=True) + phase_summaries: Mapped[list[dict[str, Any]] | None] = mapped_column(JSONB, nullable=True) + + # __table_args__ — APPEND (keep existing CheckConstraint + composite index): + # Index("ix_showcase_workspace_tags_gin", "tags", postgresql_using="gin"), + # Index("ix_showcase_workspace_replayed_from", "replayed_from_workspace_id"), + # imports to extend: Text from sqlalchemy (others already imported). +``` + +```python +# app/features/demo/schemas.py — DemoRunRequest addition (after workspace_name, +# line 78) + validator extension. + + # E1 (#407): replay provenance. The frontend Replay handler sends the + # SOURCE row's workspace_id; create_workspace records it verbatim on the + # NEW row (soft reference — no existence check). JSON-native str → no + # Field(strict=False) needed. + replayed_from_workspace_id: str | None = Field( + default=None, + pattern=r"^[0-9a-f]{32}$", # uuid4().hex shape of workspace_id + description="workspace_id this run replays; requires preservation='keep'.", + ) + + @model_validator(mode="after") + def _replayed_from_requires_keep(self) -> DemoRunRequest: + """Reject a lineage pointer on a run that writes no workspace row.""" + if self.replayed_from_workspace_id is not None and self.preservation != "keep": + raise ValueError("replayed_from_workspace_id requires preservation='keep'") + return self + + +# NEW request model — place after DemoRunRequest. +# (add `field_validator` to the pydantic import at schemas.py:14 — the file +# currently imports only BaseModel/ConfigDict/Field/model_validator) +class WorkspaceUpdateRequest(BaseModel): + """Partial lifecycle update for PATCH /demo/workspaces/{workspace_id}. + + exclude_unset semantics: only fields present in the body are applied; + explicit ``null`` clears ``name`` / ``notes``. Explicit ``null`` on + ``archived`` / ``pinned`` / ``tags`` is rejected (422) — they back NOT NULL + columns; send ``[]`` to clear tags. ``extra="forbid"`` so a typo'd field + 422s instead of silently no-opping (RunUpdate precedent, + app/features/registry/schemas.py:113). All fields JSON-native -> the + model-level strict=True needs no per-field override. ``status`` is + deliberately absent — the pipeline owns the run lifecycle. + """ + + model_config = ConfigDict(strict=True, extra="forbid") + + name: str | None = Field( + default=None, + max_length=100, + pattern=r"^[a-z0-9][a-z0-9\-_]*$", # same as workspace_name + description="Rename the workspace; explicit null clears the label.", + ) + notes: str | None = Field( + default=None, max_length=2000, + description="Free-text annotation; explicit null clears it.", + ) + tags: list[str] | None = Field( + default=None, max_length=20, + description="Replace the full tag list (not a merge).", + ) + archived: bool | None = Field(default=None, description="Archive flag.") + pinned: bool | None = Field(default=None, description="Pin flag.") + + @field_validator("archived", "pinned", "tags") + @classmethod + def _reject_explicit_null(cls, v: bool | list[str] | None) -> bool | list[str]: + # Fires only on explicitly provided values (pydantic skips validators for + # defaults unless validate_default=True), so absent stays None/unset while + # an explicit {"archived": null} / {"tags": null} 422s instead of reaching + # the NOT NULL column via exclude_unset -> setattr -> IntegrityError 500. + # tags: send [] to clear, never null. + if v is None: + raise ValueError( + "archived/pinned accept only true/false and tags accepts a list " + "(send [] to clear) — explicit null is not allowed" + ) + return v + + +# Response additions (additive — keep from_attributes, NOT strict): +# WorkspaceListItem += archived: bool, pinned: bool, tags: list[str] +# (default_factory=list), replayed_from_workspace_id: str | None +# WorkspaceDetailResponse += notes: str | None, config_schema_version: int, +# seed_overrides / user_scope: dict[str, Any] | None, +# approval_events / rag_events / phase_summaries: +# list[dict[str, Any]] | None, job_ids: list[str] | None +``` + +```python +# app/features/demo/workspace.py — update_workspace (NEW; caller-owned session, +# raises normally — this backs an HTTP route, NOT the pipeline). +async def update_workspace( + db: AsyncSession, + workspace_id: str, + update: WorkspaceUpdateRequest, +) -> ShowcaseWorkspace | None: + """Apply a partial lifecycle update; return the row or None when missing.""" + row = await get_workspace(db, workspace_id) + if row is None: + return None + changes = update.model_dump(exclude_unset=True) # absent != explicit null + for field, value in changes.items(): + setattr(row, field, value) # whole-value ASSIGNMENT (JSONB gotcha) + await db.commit() + await db.refresh(row) + logger.info("demo.workspace_updated", workspace_id=workspace_id, fields=sorted(changes)) + return row + +# create_workspace — ONE added kwarg in the ShowcaseWorkspace(...) constructor: +# replayed_from_workspace_id=req.replayed_from_workspace_id, +``` + +```python +# app/features/demo/routes.py — PATCH route (mirror the DELETE shape @138). +@router.patch( + "/workspaces/{workspace_id}", + response_model=WorkspaceDetailResponse, + summary="Update a saved showcase workspace's lifecycle metadata", + description=( + "Partial update: rename / notes / tags / archive / pin. Only fields " + "present in the body change; explicit null clears name/notes. The run " + "lifecycle status is not patchable." + ), +) +async def update_showcase_workspace( + workspace_id: str, + update: WorkspaceUpdateRequest, + db: AsyncSession = Depends(get_db), +) -> WorkspaceDetailResponse: + row = await workspace.update_workspace(db, workspace_id, update) + if row is None: + raise NotFoundError(message=f"Workspace not found: {workspace_id}") + return WorkspaceDetailResponse.model_validate(row) +``` + +### List of tasks (dependency order) + +```yaml +Task 1 — branch & issue hygiene: + RUN: git switch dev && git pull && git switch -c feat/showcase-completion-e1-metadata-provenance + VERIFY: gh issue view 407 --json state # open + NOTE: git status shows untracked docker-compose.lan.yml on this host — leave it alone. + +Task 2 — MODIFY app/features/demo/models.py: + - ADD the 12 columns per the blueprint (lifecycle block, provenance column, six slots) + - ADD `Text` to the sqlalchemy import line (others already imported) + - APPEND the two indexes to __table_args__ (tags GIN + replayed_from btree) + - EXTEND the module docstring: replayed_from_workspace_id is a soft reference + (no FK, not even self-referential); story slots NULL until their writer epic lands + - PRESERVE: existing columns, constants, CheckConstraint, composite index — untouched + +Task 3 — CREATE alembic/versions/_add_showcase_workspace_metadata_provenance.py: + - down_revision = "324a2fa37fcc" + - MIRROR: bb8c4587ef1d_add_scenario_library_columns.py (add_column + GIN + downgrade order) + - upgrade(): op.add_column x12 (server_defaults: archived/pinned text("false"), + config_schema_version text("1"), tags text("'[]'::jsonb"); the rest nullable), + then op.create_index("ix_showcase_workspace_tags_gin", ..., postgresql_using="gin") + and op.create_index("ix_showcase_workspace_replayed_from", ...) + - downgrade(): drop the two indexes (GIN drop with postgresql_using="gin", + matching bb8c4587ef1d:50), then drop the 12 columns in reverse order + - VERIFY: docker compose up -d && + uv run alembic upgrade head && uv run alembic downgrade -1 && uv run alembic upgrade head + +Task 4 — MODIFY app/features/demo/schemas.py: + - ADD DemoRunRequest.replayed_from_workspace_id + _replayed_from_requires_keep + validator (blueprint); UPDATE the docstring sentence listing JSON-native fields + - ADD WorkspaceUpdateRequest (blueprint) — placed after DemoRunRequest + - EXTEND WorkspaceListItem (+archived/pinned/tags/replayed_from_workspace_id) + and WorkspaceDetailResponse (+notes/config_schema_version/six slots) additively + +Task 5 — MODIFY app/features/demo/workspace.py: + - create_workspace: add replayed_from_workspace_id=req.replayed_from_workspace_id + to the ShowcaseWorkspace(...) constructor (one line; warn-and-continue untouched) + - ADD update_workspace (blueprint) + the WorkspaceUpdateRequest import + - UPDATE module docstring routing note (PATCH now routed too) + +Task 6 — MODIFY app/features/demo/routes.py: + - ADD the PATCH route (blueprint) between GET detail and DELETE + - ADD WorkspaceUpdateRequest to the schemas import block + - UPDATE the module docstring endpoint list + +Task 7 — MODIFY frontend (two additive lines): + - frontend/src/types/api.ts DemoRunRequest (@778): add + `// E1 (#407) — replay provenance: the source workspace_id a Replay re-runs.` + `replayed_from_workspace_id?: string` + - frontend/src/pages/showcase.tsx handleReplayWorkspace start() call (@179-185): + add `replayed_from_workspace_id: ws.workspace_id,` + - DO NOT touch handleLoadWorkspace (Load is read-only) or WorkspacePanel + +Task 8 — tests (full matrix in Validation Loop): + - MODIFY tests/test_schemas.py (unit) + - MODIFY tests/test_models.py (@pytest.mark.integration) + - MODIFY tests/test_workspace.py (@pytest.mark.integration) + - MODIFY tests/test_routes.py (PATCH 200/404/422; unit-shaped via monkeypatched + workspace.update_workspace where the existing file does so, integration otherwise — + follow whichever convention the existing GET/DELETE tests use) + +Task 9 — docs (additive): + - docs/_base/API_CONTRACTS.md: + * NEW row: `demo | PATCH | /demo/workspaces/{workspace_id} | E1 (#407) — partial + lifecycle update (name/notes/tags/archived/pinned; exclude_unset, explicit null + clears name/notes; status NOT patchable); 404 problem+json when missing; 422 on + unknown keys / bad name pattern / >20 tags; empty body = 200 no-op` + * POST /demo/run row + WS /demo/stream section: additive Optional + `replayed_from_workspace_id` (`^[0-9a-f]{32}$`, requires preservation='keep'); + Replay now sends it; recorded verbatim as a soft reference + * GET /demo/workspaces rows: note the additive response fields + - docs/_base/DOMAIN_MODEL.md § showcase_workspace: + * Stored metadata: add lifecycle columns + config_schema_version semantics + * JSONB fields: add the six story slots WITH their documented schemas (copy the + model-comment schemas verbatim — this is the authoritative copy) + * Invariants: replayed_from_workspace_id is a SOFT reference (no FK, dangles OK); + status not patchable; archived orthogonal to status + * Trim the "Out of scope" line that lists `replayed_from` as not-modeled (now shipped) + - docs/_base/RUNBOOKS.md § Showcase workspace: remove `replayed_from` from the + "Explicitly out of scope" list (one-line edit; the full runbook sweep is E7) + +Task 10 — gates, commit, PR: + - RUN the full Validation Loop (Levels 1-4) + - git diff --stat # surgical diffs only (CRLF noise check) + - COMMITS (reference #407, no AI trailer), e.g.: + feat(db): extend showcase_workspace with metadata and provenance columns (#407) + feat(api): add workspace patch lifecycle endpoint and replay provenance (#407) + feat(ui): send replayed_from_workspace_id on showcase replay (#407) + docs(repo): document workspace story slots and patch contract (#407) + - PR into dev; title `feat(api,db): showcase-completion E1 — workspace metadata & provenance backbone (#407)` +``` + +### Integration Points + +```yaml +DATABASE: + - migration: 12 add_column on showcase_workspace + ix_showcase_workspace_tags_gin (GIN) + + ix_showcase_workspace_replayed_from (btree); clean downgrade + - registration: alembic/env.py already imports demo models (line 19) — NO change + +CONFIG: none — no new settings, no env vars. + +ROUTES: PATCH /demo/workspaces/{workspace_id} on the existing demo router — no + app/main.py change (router already wired). + +PIPELINE: none — create_workspace reads the new field straight off req; the + keep-branch hook (pipeline.py:2652) and finalize hook (2741) are untouched. + +FRONTEND: two additive lines (Task 7). No new components; lineage badge/chain is E2. + +DOCS: API_CONTRACTS + DOMAIN_MODEL (+ one-line RUNBOOKS trim). Full sweep is E7. +``` + +## Validation Loop + +### Level 1: Syntax & Style + +```bash +uv run ruff check . && uv run ruff format --check . +uv run mypy app/ && uv run pyright app/ +# Expected: clean. Both type checkers are --strict and gate merge. +``` + +### Level 2: Unit Tests (no DB) + +```python +# tests/test_schemas.py — add: +def test_demo_run_request_replayed_from_default_none() -> None: ... + # DemoRunRequest() -> replayed_from_workspace_id is None; legacy frame + # model_validate({"seed": 7}) still validates + +def test_demo_run_request_replayed_from_json_path() -> None: ... + # MANDATORY json-dict path (security-patterns.md § strict mode): + # model_validate({"preservation": "keep", "replayed_from_workspace_id": "a"*32}) + +def test_demo_run_request_replayed_from_requires_keep() -> None: ... + # pytest.raises(ValidationError): model_validate({"replayed_from_workspace_id": "a"*32}) + +def test_demo_run_request_replayed_from_pattern_rejected() -> None: ... + # "not-hex!", "ABC..." (uppercase), 31-char and 33-char values all raise + +def test_workspace_update_request_partial_fields_set() -> None: ... + # model_validate({"notes": None}).model_dump(exclude_unset=True) == {"notes": None} + # model_validate({}).model_dump(exclude_unset=True) == {} + +def test_workspace_update_request_rejects_unknown_key() -> None: ... + # model_validate({"status": "archived"}) raises (extra="forbid" — status not patchable) + +def test_workspace_update_request_name_pattern_and_tags_cap() -> None: ... + # "Bad Name!" raises; 21 tags raises; ["workspace:x", "demo"] passes + +def test_workspace_update_request_rejects_explicit_null_flags() -> None: ... + # pytest.raises(ValidationError): model_validate({"archived": None}) + # pytest.raises(ValidationError): model_validate({"pinned": None}) + # pytest.raises(ValidationError): model_validate({"tags": None}) + # model_validate({"tags": []}) passes (the sanctioned clear path) + # (NOT NULL columns — explicit null must 422, never reach setattr) + +# tests/test_routes.py — add (follow the file's existing GET/DELETE conventions): +async def test_patch_workspace_happy_path(...) -> None: ... + # PATCH {"name": "renamed", "pinned": true, "tags": ["t1"]} -> 200; response + # echoes the changes and the untouched fields +async def test_patch_workspace_missing_404_problem_json(...) -> None: ... + # status 404; content-type application/problem+json +async def test_patch_workspace_unknown_field_422(...) -> None: ... + # body {"bogus": 1} -> 422 problem+json +async def test_patch_workspace_explicit_null_archived_422(...) -> None: ... + # body {"archived": null} -> 422 problem+json (NOT NULL column guard) +async def test_patch_workspace_empty_body_noop_200(...) -> None: ... +async def test_run_demo_rejects_replayed_from_without_keep_422(...) -> None: ... +``` + +```bash +uv run pytest app/features/demo -v -m "not integration" +uv run pytest app/core/tests/test_strict_mode_policy.py -v # AST walker still green +``` + +### Level 3: Integration (real Postgres) + +```python +# tests/test_models.py — @pytest.mark.integration, extend: +# - insert with NO new kwargs -> archived=False, pinned=False, tags=[], +# config_schema_version=1, all six slots None, replayed_from None +# (server_default + ORM default agreement) +# - tags JSONB roundtrip + containment: insert tags=["workspace:x","demo"]; +# select(...).where(ShowcaseWorkspace.tags.contains(["demo"])) finds it +# (scenarios/service.py:464 query shape) +# - story-slot roundtrip: write a dict into seed_overrides and a list[dict] +# into approval_events; read back identical +# - status CHECK still enforced (regression — constraint untouched) + +# tests/test_workspace.py — @pytest.mark.integration, extend: +# - create_workspace with req.replayed_from_workspace_id set -> column recorded +# verbatim; without it -> None (legacy identical) +# - update_workspace partial: set name+pinned only -> other fields untouched; +# explicit name=None clears; tags replaced whole (not merged); +# missing workspace_id -> returns None (route maps to 404) +# - update_workspace empty request -> no-op, row returned +``` + +```bash +docker compose up -d +uv run alembic upgrade head +uv run alembic downgrade -1 && uv run alembic upgrade head # downgrade is clean +uv run pytest app/features/demo -v -m integration +``` + +### Level 4: Manual smoke (seeded local stack, uvicorn on :8123 + vite) + +```bash +# 1. Keep-run, then PATCH lifecycle round-trip: +curl -s -X POST http://localhost:8123/demo/run -H 'Content-Type: application/json' \ + -d '{"skip_seed": true, "preservation": "keep", "workspace_name": "e1-smoke"}' \ + | python3 -c "import sys,json; print(json.load(sys.stdin)['workspace_id'])" +WS= +curl -s -X PATCH http://localhost:8123/demo/workspaces/$WS \ + -H 'Content-Type: application/json' \ + -d '{"name": "e1-renamed", "notes": "smoke", "tags": ["smoke"], "pinned": true}' | python3 -m json.tool +curl -s -X PATCH http://localhost:8123/demo/workspaces/deadbeef -H 'Content-Type: application/json' -d '{}' \ + | python3 -m json.tool # 404 problem+json + +# 2. Replay provenance (browser): /showcase -> Saved workspaces -> Replay on +# the e1-renamed row; after the run: +docker exec forecastlab-postgres psql -U forecastlab -d forecastlab -c \ + "SELECT workspace_id, name, replayed_from_workspace_id FROM showcase_workspace ORDER BY created_at DESC LIMIT 2;" +# Expect: newest row's replayed_from_workspace_id == $WS; the $WS row unchanged. + +# 3. Frontend gates: +cd frontend && pnpm lint && pnpm test --run +# pnpm tsc -b — confirm no NEW errors vs the dev baseline (gate is vacuous-aware, +# see Known Gotchas). +``` + +## Final validation Checklist + +- [ ] All five gates green: `uv run ruff check . && uv run ruff format --check . && uv run mypy app/ && uv run pyright app/ && uv run pytest -v -m "not integration"` +- [ ] Integration suite green: `uv run pytest -v -m integration` (fresh docker-compose DB; reset first if the shared DB is polluted) +- [ ] Migration upgrade + downgrade clean on a fresh DB AND applies on a DB with existing workspace rows +- [ ] Legacy surfaces byte-identical: start frame without new keys, GET list/detail for old rows (new fields all default/null), `test_strict_mode_policy.py` green +- [ ] PATCH 200 / 404 / 422 paths verified (Level 2 + Level 4) +- [ ] Replay records `replayed_from_workspace_id`; source row untouched (Level 4 step 2) +- [ ] `git diff --stat` shows surgical diffs (no CRLF whole-file noise) +- [ ] docs/_base/API_CONTRACTS.md + DOMAIN_MODEL.md updated additively (slot schemas documented); RUNBOOKS out-of-scope line trimmed +- [ ] Commits `feat(db)/feat(api)/feat(ui)/docs(repo): ... (#407)`, no AI trailer; PR into dev + +--- + +## Anti-Patterns to Avoid + +- ❌ Don't add ANY ForeignKey — not even self-referential on `replayed_from_workspace_id`. Soft references only. +- ❌ Don't edit `324a2fa37fcc_create_showcase_workspace_table.py` — new revision off head `324a2fa37fcc`. +- ❌ Don't make `status` patchable or widen `ck_showcase_workspace_status` — `archived` is the orthogonal flag. +- ❌ Don't add `extra="forbid"` to `DemoRunRequest` (WS compat) — but DO add it to `WorkspaceUpdateRequest`. +- ❌ Don't write any story slot from E1 production code — columns + docs + roundtrip tests only. +- ❌ Don't validate that `replayed_from_workspace_id` points at an existing row — it's a soft reference; dangles are designed. +- ❌ Don't wrap `update_workspace` in warn-and-continue — that contract is pipeline-only; HTTP helpers raise. +- ❌ Don't add list filtering/sorting/search or archive-hiding — that's E2 (#408). +- ❌ Don't add a replay confirmation dialog or lineage UI — E2 (#408). +- ❌ Don't mutate JSONB values in place — always assign whole values. +- ❌ Don't import another feature slice from `app/features/demo/` — core/shared only. + +## Notes for parallel-epic PRP authors (#408–#412) + +- The column set, slot names, and per-slot schemas above are the frozen E1 contract. + `job_ids` / `phase_summaries` have a documented schema but NO assigned writer in + E1 — E2 (#408, health summary) and E4 (#410, config echo) should agree on which + populates which and follow the documented shapes. +- Slot writes that happen DURING a pipeline run inherit the warn-and-continue + invariant (extend `finalize_workspace` / add sibling helpers in `workspace.py`); + slot writes via HTTP go through caller-owned-session helpers like + `update_workspace`. +- Tag filtering on `GET /demo/workspaces` (E2) should reuse the + `ShowcaseWorkspace.tags.contains([...])` containment shape proven in E1's + integration test, mirroring `GET /scenarios?tags=` (scenarios/routes.py:180). +- A schema change to any slot bumps `config_schema_version` (ORM default) and + documents the delta in DOMAIN_MODEL. + +## Confidence Score + +**9/10** for one-pass implementation success. Every element has a verified in-repo +precedent: the add-columns+GIN migration (`bb8c4587ef1d`), the tags column +(`scenarios/models.py:74`), the partial-update schema (`registry RunUpdate`), the +404-on-missing route shape (the demo DELETE), and the request-field+validator pattern +(`workspace_name`, same file). The three judgment calls (tags representation, slot +shape, no-FK soft reference) are resolved and frozen above, and all changes are +additive — a wrong slot-schema guess costs a documented `config_schema_version` bump, +not a rework. The −1: the PATCH route tests must match whichever +unit-vs-integration convention `test_routes.py` currently uses for the workspace +GET/DELETE endpoints (read it first), and the frontend type-gate baseline is fuzzy +on this host (`tsc -b` has pre-existing dev failures — gate on "no NEW errors"). diff --git a/PRPs/PRP-showcase-completion-E2-safe-replay-lifecycle.md b/PRPs/PRP-showcase-completion-E2-safe-replay-lifecycle.md new file mode 100644 index 00000000..6adfe994 --- /dev/null +++ b/PRPs/PRP-showcase-completion-E2-safe-replay-lifecycle.md @@ -0,0 +1,1247 @@ +name: "PRP — Showcase Completion E2: Safe Replay & Workspace Lifecycle (issue #408)" +description: | + +## Purpose + +Implement the safe-replay + workspace-lifecycle epic of the showcase-completion +initiative (umbrella #406): an explicit confirmation step (with preview/diff) +before every replay — destructive copy when `reset=true` — lineage rendering of +the E1 `replayed_from_workspace_id` chain, full lifecycle management on the +saved-workspaces panel (rename / archive / pin / notes / tags / search / +filter / sort / multi-select delete), a two-workspace compare view, and the +folded-in ops slice: artifact-link liveness checks with dead-link warnings on +soft references plus a per-workspace health summary (partial-run warning +included). Parallel epic after Foundation E1 (#407) — **execution starts only +AFTER E1 merges**; this PRP treats E1's epic body as a frozen contract (every +dependency on it is tagged `CONTRACT(E1)` below). + +## Core Principles + +1. **Context is King**: every reference below was verified against the live code on 2026-06-12 (branch `dev`, post-#404/#405 merge — E1 #407 NOT yet merged; see the E1-reconciliation task). +2. **Validation Loops**: each level is executable as written. +3. **Information Dense**: patterns cite exact file:line. +4. **Progressive Success**: backend list-filters + health endpoint → frontend types/hooks → confirm/diff dialog → lifecycle panel rework → lineage → compare page → docs. +5. **Global rules**: follow CLAUDE.md / AGENTS.md; all five CI gates must pass; UI work follows `.claude/rules/ui-design.md` + `.claude/rules/shadcn-ui.md`. + +--- + +## Goal + +An operator on `/showcase` can: + +- (a) **Replay safely** — clicking Replay opens a confirmation dialog showing a + preview/diff: the recorded config (seed / scenario / reset / skip_seed / + name) side-by-side with the exact `DemoRunRequest` about to be sent, any + divergence highlighted. When the recorded config has `reset=true`, the + dialog carries explicit destructive copy ("Replaying this workspace WIPES + the database") and a destructive-styled confirm button. No replay starts + without confirmation. +- (b) **See lineage** — a workspace created by a replay carries a "replay" + badge in the list; the loaded-workspace view renders the + `replayed_from_workspace_id` chain (newest → original), with dangling + ancestors (deleted rows) marked rather than erroring. +- (c) **Manage the library** — per-row actions: rename, edit notes, edit tags, + pin/unpin, archive/unarchive (all via the E1 `PATCH /demo/workspaces/{id}`), + plus the existing single delete. The list gains a search box (name), a + show-archived toggle (archived hidden by default), a tag filter, and an + allow-listed sort; pinned rows always sort first. +- (d) **Multi-select delete** — checkbox per row, "Delete selected (N)" behind + one confirmation dialog, implemented as N sequential single + `DELETE /demo/workspaces/{id}` calls. **No new bulk endpoint** (metadata-only + singles; vision-compatible — no "wipe everything" operation). +- (e) **Compare two workspaces** — select exactly two rows → Compare navigates + to a new deep-linkable page (`/showcase/compare?a=&b=`) mirroring the + run-compare two-picker pattern: config diff, result-summary diff (winner / + WAPE delta / wall-clock), created-objects presence matrix, lineage relation. +- (f) **See link health** — loading a workspace probes its soft references + (model runs, scenario plans, alias, batch, agent session, E1 `job_ids`) + through a new backend aggregation endpoint + `GET /demo/workspaces/{id}/health`; dead references render a warning marker + on the artifact cards and a per-workspace health summary chip shows + alive/dead counts plus a partial-run warning when the run never completed. + +**Deliverable** (all additive — no migration in E2; the schema delta is E1's): + +- `app/features/demo/workspace.py` — `list_workspaces` / `count_workspaces` + gain filter/sort parameters (`q`, `tags`, `include_archived`, `sort_by`, + `sort_order`; pinned-first ordering). +- `app/features/demo/link_health.py` — NEW: in-process soft-reference probe + module (httpx `ASGITransport`, mirroring `pipeline._Client`). +- `app/features/demo/schemas.py` — `WorkspaceRefHealth`, + `WorkspaceHealthResponse` response models (plain BaseModel, NOT strict). +- `app/features/demo/routes.py` — query params on `GET /demo/workspaces`; + NEW `GET /demo/workspaces/{workspace_id}/health`. +- `frontend/src/types/api.ts` — lifecycle fields on the workspace types + (verify-or-add per CONTRACT(E1)), health types, list-params type, + `WorkspaceUpdate` type. +- `frontend/src/hooks/use-workspaces.ts` — params-aware `useWorkspaces`, + `usePatchWorkspace`, `useWorkspaceHealth`, `useWorkspaceLineage`. +- `frontend/src/components/demo/ReplayConfirmDialog.tsx` — NEW confirm + + preview/diff dialog. +- `frontend/src/components/demo/WorkspaceEditDialog.tsx` — NEW + rename/notes/tags editor. +- `frontend/src/components/demo/WorkspaceLineageStrip.tsx` — NEW lineage chain. +- `frontend/src/components/demo/WorkspacePanel.tsx` — reworked: toolbar + (search / show-archived / sort), row badges (pinned, archived, replay), + per-row actions dropdown, multi-select + delete-selected + compare-selected. +- `frontend/src/components/demo/WorkspaceArtifactsPanel.tsx` — health-aware + cards (dead-link warnings) + health summary chip. +- `frontend/src/pages/workspace-compare.tsx` — NEW two-workspace compare page; + route + `ROUTES.SHOWCASE_COMPARE` constant. +- `frontend/src/pages/showcase.tsx` — replay-confirm flow, lineage strip + + health wiring, `replayed_from_workspace_id` on the replay start frame. +- Tests: backend route + module unit tests, integration tests for list filters + and health; frontend vitest for every new/changed component + hook. +- `docs/_base/API_CONTRACTS.md` + `docs/_base/RUNBOOKS.md` — additive updates + (incl. superseding the "deliberately no confirm dialog" note). + +**Success definition**: all Success Criteria below check off, the five backend +CI gates and the frontend gates are green, and a manual browser dogfood on a +seeded stack walks: save → search/sort → rename/pin/archive → replay (confirm +dialog with diff, destructive variant on a reset workspace) → lineage chain +visible → two-workspace compare → delete a referenced run → health shows the +dead link. + +## Why + +- Umbrella #406 success criteria commit: "a `reset=true` replay requires an + explicit confirmation step before it runs" and "Workspaces can be renamed, + archived, pinned, annotated (notes/tags), searched, filtered, sorted, and + multi-select-deleted (metadata-only) from the saved-workspaces panel". +- Today a replay of a `reset=true` workspace wipes the database with **no + confirmation** — documented designed behavior + (`docs/_base/RUNBOOKS.md` § "Showcase workspace", item 1: "there is + deliberately no confirm dialog") that #406 explicitly reverses. +- E1 (#407) ships the storage + PATCH surface but no UI consumes it; E2 is the + delivery surface that makes lifecycle, lineage, and provenance visible. +- `created_objects` ids are soft references by design — operator deletes leave + dangling deep links ("expected; the workspace row records what WAS created, + not what still exists", RUNBOOKS § Showcase workspace item 4). Link health + turns that silent staleness into a visible, per-workspace signal — the novel + ops slice #406 folded into this epic. + +## What + +### Decisions locked here (so implementation doesn't re-litigate) + +These were the open questions this PRP owns; the decisions below are final for E2. + +1. **Replay-policy picker (exact / safe-keep / modified): OUT OF SCOPE.** + Replay stays verbatim (`E4 #393` semantics). Rationale: the umbrella + commits only confirm + preview/diff; a "modified replay" already exists as + Load → edit controls → Run (the Load path repopulates every control); a + policy enum would add request-surface + backend validation for zero new + capability. The confirm dialog's footer carries a one-line hint — + "Want to change the config first? Use Load instead." Document the + deferral in the PR description. +2. **Confirmation applies to EVERY replay, not just `reset=true`.** The + preview/diff panel needs a pre-flight surface and a sometimes-there dialog + is worse UX than an always-there one. The `reset=true` variant escalates: + destructive copy + destructive-styled action button. This satisfies the + umbrella's "explicit confirmation before any reset=true replay" as a + strict superset. The direct Run button (operator-configured runs) is + unchanged — confirmation guards replays only. +3. **Link-health architecture: BACKEND aggregation endpoint** + (`GET /demo/workspaces/{id}/health`), implemented by probing the public + API **in-process** via `httpx.ASGITransport` — the exact mechanism + `pipeline._Client` already uses from inside a request context + (`app/features/demo/pipeline.py:141-148`; `POST /demo/run` passes + `request.app` into the pipeline at `routes.py:75`). Justification: + (a) the demo slice may NOT import registry/scenarios/jobs/agents services + (vertical-slice rule), and in-process HTTP through the public surface is + the slice's established cross-slice seam; (b) one workspace has up to ~10+ + references (3 runs + N plans + alias + batch + session + M jobs) — a + frontend-probed design costs 1+N browser round-trips per workspace and + duplicates existence semantics per artifact type; (c) a backend endpoint + gives the health summary a single testable contract and a place for the + partial-run flag. Probes run concurrently (`asyncio.gather`), classify + 2xx→`alive`, 404→`dead`, anything else→`unknown`, and are fetched + on-demand (loaded workspace only — never for every list row). +4. **Compare view: FRONTEND-ONLY page.** A workspace compare is a plain field + diff over two already-served `WorkspaceDetail` payloads — no new backend + endpoint (contrast: `GET /registry/compare/{a}/{b}` exists because metric + diffing has server-side logic). New page `/showcase/compare?a=&b=` + mirroring `frontend/src/pages/explorer/run-compare.tsx` (two `Select` + pickers + `useSearchParams` deep-linking). +5. **Multi-select delete = N sequential single DELETEs.** The existing + `DELETE /demo/workspaces/{id}` is called once per selected row behind one + confirmation dialog. NO new bulk endpoint — product-vision guardrail ("no + wipe-everything operations"); failures are collected and toasted, the list + refetches once at the end. +6. **Search/filter/sort: SERVER-SIDE additive query params** on + `GET /demo/workspaces`, mirroring established precedents: name search → + `dimensions` `search` ILIKE pattern (`app/features/dimensions/routes.py:65`), + tags → `scenarios` repeated-`tags` JSONB containment + (`app/features/scenarios/routes.py:180`, `service.py:462-465`), sort → + allow-listed `sort_by`/`sort_order` with silent fallback to default + (`dimensions/routes.py:70-75`). `include_archived=false` is the default + (archived rows hidden). Pinned rows always order first + (`ORDER BY pinned DESC, `). Server-side keeps the panel honest as + rows accumulate and gives the filter a route-test contract. + +### Frozen contract — CONTRACT(E1) (#407 ships these; E2 consumes, never re-decides) + +Every assumption below MUST be reconciled against the merged E1 diff before +implementation (Task 1). Where E1's PRP chose different names, adapt E2's code +to E1's names — never the reverse. + +- `CONTRACT(E1)-1` — `showcase_workspace` columns exist post-migration: + `replayed_from_workspace_id` (nullable String(32), soft reference — NO FK, + consistent with `models.py` no-FK doctrine), `archived` (bool, default + false), `pinned` (bool, default false), `notes` (nullable text), `tags` + (JSONB string array, default `[]`), `config_schema_version` (int). +- `CONTRACT(E1)-2` — `tags` representation is a JSONB string array with a GIN + index, mirroring `scenario_plan.tags` + (`app/features/scenarios/models.py:74,97`), so SQLAlchemy + `.contains([tag])` containment filtering works. +- `CONTRACT(E1)-3` — `PATCH /demo/workspaces/{workspace_id}` exists with an + all-Optional partial-update body (rename/notes/tags/archive/pin — assumed + schema name `WorkspaceUpdateRequest`, semantics mirroring registry + `RunUpdate`, `app/features/registry/schemas.py:113-121`: absent field = + unchanged), returns the updated workspace (assumed + `WorkspaceDetailResponse`), 404 problem+json on a missing id. +- `CONTRACT(E1)-4` — the GET list/detail response schemas expose the new + columns (`WorkspaceListItem` += `archived`, `pinned`, `tags`, + `replayed_from_workspace_id`; `WorkspaceDetailResponse` += `notes`, + `config_schema_version` and the JSONB story slots it serves). **Defensive + rule**: if E1 did NOT extend the GET responses, E2 adds the fields + additively in Task 3 (they are required reading surface for this epic). +- `CONTRACT(E1)-5` — replay provenance mechanism: `DemoRunRequest` (and the + WS start frame) carries an additive Optional + `replayed_from_workspace_id: str | None` that `workspace.create_workspace` + persists onto the new row (E1's epic body: "Replay writes + `replayed_from_workspace_id`"). NOTE: E1's PRP itself wires the frontend + send (handleReplayWorkspace sends `ws.workspace_id` — an E1 success + criterion), so E2 PRESERVES the field through the executeReplay refactor + rather than adding it; if E1 instead derived it server-side, E2 adapts. +- `CONTRACT(E1)-6` — the `job_ids` JSONB story slot is a `list[str]` of job + ids; the health endpoint probes each via `GET /jobs/{job_id}` when the slot + is non-empty (and silently skips when absent/empty — pre-E1-backfill rows). +- `CONTRACT(E1)-7` — E1 does NOT add filtering/sorting to + `GET /demo/workspaces` (its scope is migration + PATCH + schemas); the list + query params are E2's to add. If E1's merged code already added any of + them, reuse instead of duplicating. + +### User-visible behavior + +- **Replay confirm/diff**: Replay button → dialog titled "Replay workspace + \"name\"?" with a two-column table (Recorded / Will send) over seed, + scenario, reset, skip_seed, workspace name, preservation (always `keep`), + replayed-from (the source workspace id). Rows where the two values differ + are highlighted (defensive — verbatim replay means they normally match). + `reset=true` → red warning block + destructive confirm button labeled + "Replay & wipe database"; otherwise a default confirm labeled "Replay". + Cancel never starts a run. +- **Lineage**: list rows with `replayed_from_workspace_id != null` show an + outline `Badge` "replay". The loaded-workspace view renders a breadcrumb + strip: `this ← parent ← grandparent …` (depth-capped at 5), each ancestor + clickable (loads it); a deleted ancestor renders as + "(original deleted)" — dangling soft references are expected, never errors. +- **Lifecycle panel**: toolbar = search `Input` (filters by name, + debounced/enter-applied), "Show archived" `Checkbox`, sort `Select` + (Newest / Oldest / Name / Status). Rows: pin icon (filled when pinned), + muted styling + "archived" badge on archived rows, tags rendered as small + chips (clicking a chip filters the list by that tag; an active tag filter + shows as a clearable chip in the toolbar). Per-row `DropdownMenu` (lucide + `MoreHorizontal`): Pin/Unpin, Archive/Unarchive, Edit details…, Delete…. + "Edit details…" opens `WorkspaceEditDialog` (name input with the + `^[a-z0-9][a-z0-9\-_]*$` client validation already used by the run controls, + notes `Textarea`, tags comma-separated input). +- **Multi-select**: leading `Checkbox` per row + header select-all; selection + shows "N selected" with **Delete selected** (AlertDialog: "Delete N + workspace records? Their created objects are NOT deleted.") and **Compare** + (enabled only when exactly 2 selected → navigates to the compare page). +- **Compare page** (`/showcase/compare?a=&b=`): back-link to `/showcase`, two + workspace `Select` pickers (deep-linkable URL params), then: config table + (seed/scenario/reset/skip_seed/name/tags, mismatches highlighted), + result-summary table (winner, WAPE with the `DeltaCell` sign-only + indicator, wall-clock), created-objects presence matrix (per soft-reference + key: recorded A / recorded B), lineage note when one side is a replay of + the other, partial-run badge per side when `status != "completed"`. +- **Link health**: loading a workspace fires + `GET /demo/workspaces/{id}/health`; the artifacts panel shows a summary + chip — `✓ N live · ✕ M dead` (plus "partial run" warning chip when the + row's status is not `completed`) — and each card whose reference probed + `dead` gets a lucide `AlertTriangle` + tooltip "This object no longer + exists — it was deleted after the run." `unknown` references render + without a marker (no false alarms on transient 5xx). + +### Technical requirements + +- All five backend gates green; frontend `pnpm lint && pnpm test --run` green. +- New/changed endpoints: route tests covering 2xx + at least one error path + (`.claude/rules/test-requirements.md`). +- RFC 7807 for every error path (`NotFoundError` from `app/core/exceptions.py:72`). +- Response models stay plain `BaseModel` (+`from_attributes` where ORM-built) + — strict mode is request-body-only policy (`demo/schemas.py:88-95` precedent). +- The demo slice imports NO other feature slice — link health goes through + in-process HTTP (`request.app` + `ASGITransport`), never a service import. +- Frontend: TanStack Query for all IO; shadcn/ui new-york primitives only + (everything needed is already installed — see gotchas); lucide icons; + semantic tokens only (`text-destructive`, `bg-muted`, …) — no raw colors. +- Legacy behavior byte-identical: a client that never touches the new query + params / endpoints sees today's responses (new list params all default to + today's semantics EXCEPT archived-hidden — see gotcha on `include_archived`). + +### Success Criteria + +- [ ] Replay (panel button) always opens the confirm dialog with the + recorded-vs-sent preview; confirming a `reset=true` workspace requires + the destructive-styled button; Cancel starts nothing. No code path + starts a replay without the dialog. +- [ ] A confirmed replay sends the recorded config verbatim + + `preservation="keep"` + the recorded name + `replayed_from_workspace_id` + (CONTRACT(E1)-5); the new row carries the provenance id and the list + shows its "replay" badge; the loaded view renders the ancestor chain, + tolerating deleted ancestors. +- [ ] Rename / notes / tags / pin / archive each round-trip through + `PATCH /demo/workspaces/{id}` and re-render without a manual refresh + (query invalidation on list + detail). +- [ ] `GET /demo/workspaces` supports `q` (name ILIKE), `tags` (repeated, + containment), `include_archived` (default false), allow-listed + `sort_by`/`sort_order` (unknown → default `created_at desc`); pinned + rows order first; `total` respects the active filters; route tests + cover each param + the bad-param paths. +- [ ] Multi-select delete removes N metadata rows via N single DELETEs behind + one confirmation; created objects untouched; NO new bulk endpoint exists. +- [ ] `/showcase/compare?a=&b=` deep-links two workspaces and renders config + diff, result diff, created-objects matrix, lineage note, partial-run + badges; invalid/missing ids degrade to the picker (no crash). +- [ ] `GET /demo/workspaces/{id}/health` returns per-reference + `alive`/`dead`/`unknown` + counts + `partial_run`; 404 problem+json on a + missing workspace; integration test proves a bogus reference probes + `dead` and a real one probes `alive`. +- [ ] Loaded-workspace artifact cards show dead-link warnings + the health + summary chip; the partial-run warning renders for non-completed rows. +- [ ] Legacy list calls (no new params) return archived-free, pinned-first, + newest-first pages; all pre-existing demo tests still pass. +- [ ] `uv run ruff check . && uv run ruff format --check . && uv run mypy app/ + && uv run pyright app/ && uv run pytest -v -m "not integration"` green; + integration suite green; `cd frontend && pnpm lint && pnpm test --run` + green. + +## Assumptions (no user available — documented, not asked) + +1. E1 (#407) merges before E2 execution begins (implementation-order gate from + the umbrella). This PRP is authored against pre-E1 `dev`; Task 1 + reconciles every CONTRACT(E1) point against E1's actual merged shape. +2. Exact E1 schema/endpoint names (`WorkspaceUpdateRequest`, field names as + listed in CONTRACT(E1)) — adapt to E1's real names on divergence. +3. Archived-by-default-hidden is the correct list semantics (that is what + "archive" means for a library); the only consumer of `GET /demo/workspaces` + is the Showcase panel (verified — no other frontend or backend caller), so + the default-flip is safe. +4. Health probing is acceptable on-demand-only (loaded workspace), not for + every list row — probing N rows × M references on list render would be a + self-inflicted thundering herd through the in-process transport. +5. The lineage chain depth cap of 5 is sufficient (a replay-of-a-replay chain + deeper than 5 is a pathological case; the strip renders "…" beyond it). +6. `sonner` `toast` (already used by `WorkspacePanel.tsx:20`) is the + feedback surface for mutation success/failure — no new notification system. +7. Tag editing via a comma-separated text input is acceptable UX for a + single-operator tool (no tag-autocomplete component is installed; building + one is out of scope). + +## All Needed Context + +### Documentation & References + +```yaml +# MUST READ — issues (the contract stack) +- url: https://github.com/w7-mgfcode/ForecastLabAI/issues/408 + why: The epic this PRP implements — scope list is exhaustive (this PRP covers all of it). +- url: https://github.com/w7-mgfcode/ForecastLabAI/issues/406 + why: Umbrella — success criteria rows 2 & 3 are E2's acceptance bar; out-of-scope list (no replay-policy infra beyond confirm+diff). +- url: https://github.com/w7-mgfcode/ForecastLabAI/issues/407 + why: Foundation epic body = the frozen CONTRACT(E1) surface (columns, JSONB slots, PATCH endpoint, replay provenance write). +- file: PRPs/PRP-showcase-workspace-E4-restore-replay.md + why: Closest-analog predecessor PRP — the E4 restore/replay semantics E2 hardens; its "decisions locked" #2/#3 (no confirm dialog, no provenance) are the two designed behaviors #406/#407 now reverse. + +# MUST READ — backend (verified 2026-06-12, dev pre-E1) +- file: app/features/demo/routes.py + why: | + Current surface: POST /run @51 (passes request.app into the pipeline @75 — + the request-context app handle the health route also needs), GET + /workspaces @80-107 (limit/offset only — EXTEND with filters), GET + /workspaces/{workspace_id} @110-135 (NotFoundError 404 pattern @133-134), + DELETE @138-163, WS /stream @166. Router prefix="/demo" @48. Health route + lands between the GET detail and DELETE. +- file: app/features/demo/workspace.py + why: | + list_workspaces @174-196 (order created_at.desc, id.desc @192) and + count_workspaces @224-234 — the two functions E2 extends with q/tags/ + include_archived/sort_by/sort_order. get_workspace @158, delete_workspace + @199. All take caller-owned AsyncSession. create_workspace @46 is E1's to + extend (replayed_from) — DO NOT touch unless E1 missed it. +- file: app/features/demo/models.py + why: | + ShowcaseWorkspace @37; current columns @59-81; CHECK + composite index + @83-89. E1 adds the lifecycle/provenance columns here — E2 reads them, + never migrates. No-FK doctrine in the module docstring @4-11 (the health + feature exists BECAUSE of this doctrine). +- file: app/features/demo/schemas.py + why: | + DemoRunRequest @29 (strict=True @40; preservation @68; workspace_name + pattern @72-78; requires-keep validator @80-85 — the model E1 extends with + replayed_from_workspace_id). Response-model non-strict precedent: StepEvent + docstring @88-95, WorkspaceListItem @169 (from_attributes @177), + WorkspaceDetailResponse @192, WorkspaceListResponse @205. Append the two + health models here. +- file: app/features/demo/pipeline.py + why: | + THE in-process probe mechanism to copy into link_health.py: _Client + @127-204 — httpx.AsyncClient(transport=httpx.ASGITransport(app=app, + raise_app_exceptions=False), base_url cosmetic, timeout @98) and + request() status handling @188-200. link_health needs a SIMPLER client: + status-code classification only, no _StepError. DO NOT modify pipeline.py + in E2 (E1 owns the provenance write; replay flows through unchanged). +- file: app/features/demo/tests/test_routes.py + why: | + Route-test conventions to extend: unit tests monkeypatch the workspace + module functions (list @236-251, pagination pass-through @253-276, 404 + @286-298, delete @324-347); integration tests @359+ use the db_session + fixture and seed real rows. New filter/health tests follow these shapes. +- file: app/features/demo/tests/conftest.py + why: client fixture (ASGITransport over app.main.app) + db_session fixture + (real Postgres, wipes showcase_workspace on teardown). +- file: app/features/scenarios/routes.py + why: | + Repeated-tags Query param precedent @168-195 (tags: list[str] | None = + Query(default=None)) — copy for the workspace list. GET detail 404 style + @198-223. +- file: app/features/scenarios/service.py + why: list_plans @436-472 — tags containment filter @462-465 + (stmt.where(ScenarioPlan.tags.contains(tags))) applied to BOTH count and + rows statements; total respects filters. Mirror exactly. +- file: app/features/scenarios/models.py + why: tags JSONB string-array column @70-74 + GIN index @97 — the + representation CONTRACT(E1)-2 assumes for workspace tags. +- file: app/features/dimensions/routes.py + why: | + search + allow-listed sort precedent @65-105 (search Query min-2-chars, + sort_by Query with allow-list note "unknown values use default order", + sort_order asc|desc). Mirror the docstring + silent-fallback style. +- file: app/features/registry/schemas.py + why: RunUpdate @113-121 — the all-Optional partial-update body shape + CONTRACT(E1)-3 assumes for WorkspaceUpdateRequest (extra="forbid"). +- file: app/features/registry/routes.py + why: | + PATCH precedent @235; probe targets for link health: GET /registry/runs/ + {run_id} @200-201, GET /registry/aliases/{alias_name} @503-504. +- file: app/features/jobs/routes.py + why: probe target GET /jobs/{job_id} @219-220. +- file: app/features/batch/routes.py + why: probe target GET /batch/{batch_id} @55-62 (NotFoundError on miss). +- file: app/features/agents/routes.py + why: probe target GET /agents/sessions/{session_id} @80-104 — 404 via plain + HTTPException (status code is all the probe reads; body shape irrelevant). +- file: app/core/exceptions.py + why: NotFoundError @72 (RFC 7807 404). No new exception classes needed. + +# MUST READ — frontend (verified 2026-06-12) +- file: frontend/src/pages/showcase.tsx + why: | + 453 lines. State block @118-131 (seed/keepWorkspace/workspaceName/ + selectedWorkspaceId + useWorkspace detail resolution @128-131); handleRun + @139-156; handleLoadWorkspace @160-168; handleReplayWorkspace @174-186 — + THE function the confirm dialog intercepts (today it calls start() + directly); WorkspacePanel mount @245-255; name-pattern client validation + @26 + @135-137 (reuse in WorkspaceEditDialog); WorkspaceArtifactsPanel + mount @448-450 (gets health props). +- file: frontend/src/components/demo/WorkspacePanel.tsx + why: | + 219 lines — the component this epic reworks. Props @37-48; statusClass + @50-59 (semantic-token status colors); DESTRUCTIVE marker @144-148 + (text-destructive span); per-row buttons @153-183; the AlertDialog + delete-confirm pattern @191-216 (open-state via pendingDelete, shared + one dialog for all rows, data-testid on the action) — COPY this pattern + for ReplayConfirmDialog + the multi-delete confirm; list invalidation + effect @106-110. +- file: frontend/src/components/demo/WorkspacePanel.test.tsx + why: vitest conventions for this component family (mock use-workspaces + hooks via vi.mock, fire dialog actions, assert mutation calls). +- file: frontend/src/components/demo/WorkspaceArtifactsPanel.tsx + why: | + 157 lines. ArtifactCard shape @15-20, buildCards key mapping @30-107 + (winning_run_id/v2_run_id/scenario_plan_ids/batch_id/alias/ + agent_session_id + grain), disabled-card opacity-50 + title tooltip + @128-149. Health markers extend buildCards: each card gains an optional + `dead: boolean` resolved from the health response keyed by reference id. +- file: frontend/src/hooks/use-workspaces.ts + why: | + 43 lines — extend in place. useWorkspaces @10-16 (queryKey ['workspaces', + {limit}] — params object grows), useWorkspace @19-25, useDeleteWorkspace + @33-42 (invalidate ['workspaces'] on success — same invalidation for + usePatchWorkspace). useWorkspaceHealth + useWorkspaceLineage are new + siblings here. +- file: frontend/src/pages/explorer/run-compare.tsx + why: | + THE compare-page pattern (370 lines): useSearchParams a/b @87-89, + selectRun setParams updater @103-109, RunPicker Select @56-84, DeltaCell + sign-only indicator @33-54, side-by-side Card/Table layout @114+. The + workspace compare page mirrors all of it with useWorkspace×2 instead of + useCompareRuns (frontend-only diff — Decision 4). +- file: frontend/src/lib/constants.ts + why: ROUTES.SHOWCASE='/showcase' @4, ROUTES.EXPLORER.RUN_COMPARE @20 — add + SHOWCASE_COMPARE='/showcase/compare' beside SHOWCASE. +- file: frontend/src/App.tsx + why: lazy-page + Suspense route registration pattern (ShowcasePage @12, + @54-61; RunComparePage @21, @119-126) — register WorkspaceComparePage + identically. +- file: frontend/src/lib/api.ts + why: api(endpoint, {params, method, body}) wrapper; ApiError carries + status (WorkspacePanel.tsx:97 shows instanceof usage); getErrorMessage. +- file: frontend/src/types/api.ts + why: workspace types block @806-831 (WorkspaceListItem @806, WorkspaceDetail + @819, WorkspaceListResponse @828); DemoRunRequest @778-787 — extend here. +- file: frontend/src/hooks/use-demo-pipeline.ts + why: start(req) signature + the picker-desync gotcha (start() does not sync + the scenario picker — Replay must setScenario first; already handled in + handleReplayWorkspace, keep that ordering inside the confirmed path). + +# Project docs to update (additive) +- file: docs/_base/API_CONTRACTS.md + why: GET /demo/workspaces row gains the filter params; new health-endpoint + row; WS section note for replayed_from (if E1 didn't already add it). +- file: docs/_base/RUNBOOKS.md + why: § "Showcase workspace — preserve/restore/replay/delete semantics" item 1 + says "there is deliberately no confirm dialog" — E2 supersedes this + (update the item; keep the DESTRUCTIVE-marker sentence). Items 2-4 gain + one-line pointers to lineage badges / metadata-only multi-delete / health. +- file: docs/_base/DOMAIN_MODEL.md + why: showcase_workspace § "Out of scope" lists the replayed_from column — + E1's PRP owns that doc edit; E2 only verifies it happened (do not double-edit). +``` + +### Current Codebase tree (relevant subset, pre-E1) + +```bash +app/features/demo/ +├── link_health.py # DOES NOT EXIST — E2 creates +├── models.py # ShowcaseWorkspace @37 (E1 extends; E2 reads) +├── pipeline.py # 2771 lines; _Client @127 — UNTOUCHED in E2 +├── routes.py # POST /run @51; GETs @80,@110; DELETE @138; WS @166 +├── schemas.py # 214 lines; workspace response models @169-213 +├── service.py # lock + PipelineBusyError — untouched +├── workspace.py # 235 lines; list @174 / count @224 — E2 extends +└── tests/ # conftest, test_{models,pipeline,routes,schemas,workspace}.py +frontend/src/ +├── pages/showcase.tsx # 453 lines +├── pages/explorer/run-compare.tsx # 370 lines — compare pattern +├── components/demo/WorkspacePanel.tsx # 219 lines — reworked in E2 +├── components/demo/WorkspaceArtifactsPanel.tsx # 157 lines — health-aware in E2 +├── hooks/use-workspaces.ts # 43 lines — extended in E2 +├── types/api.ts # workspace block @806-831 +└── components/ui/ # 27 primitives incl. alert-dialog, + # dialog, dropdown-menu, textarea, + # table, select, tooltip, badge +``` + +### Desired Codebase tree (files added/modified) + +```bash +app/features/demo/ +├── link_health.py # NEW — probe targets + probe_workspace_links() +├── schemas.py # MOD — +WorkspaceRefHealth +WorkspaceHealthResponse +├── workspace.py # MOD — list/count filters + sort +├── routes.py # MOD — list query params; +GET /workspaces/{id}/health +└── tests/ + ├── test_link_health.py # NEW — probe classification vs a stub ASGI app + ├── test_routes.py # MOD — filter/sort/health unit + integration tests + └── test_workspace.py # MOD — list/count filter unit coverage (db-less where possible) +frontend/src/ +├── types/api.ts # MOD — lifecycle fields (verify-or-add), health types, params, update type +├── hooks/use-workspaces.ts # MOD — params-aware list; +usePatchWorkspace +useWorkspaceHealth +useWorkspaceLineage +├── hooks/use-workspaces.test.ts # MOD — new hooks covered +├── components/demo/ReplayConfirmDialog.tsx # NEW (+ .test.tsx) +├── components/demo/WorkspaceEditDialog.tsx # NEW (+ .test.tsx) +├── components/demo/WorkspaceLineageStrip.tsx # NEW (+ .test.tsx) +├── components/demo/WorkspacePanel.tsx # MOD — toolbar/badges/dropdown/multi-select (+ test MOD) +├── components/demo/WorkspaceArtifactsPanel.tsx # MOD — health markers + summary chip (+ test MOD) +├── components/demo/index.ts # MOD — barrel exports +├── pages/workspace-compare.tsx # NEW (+ workspace-compare.test.tsx) +├── pages/showcase.tsx # MOD — confirm flow, lineage, health, provenance field +├── lib/constants.ts # MOD — ROUTES.SHOWCASE_COMPARE +└── App.tsx # MOD — compare route registration +docs/_base/API_CONTRACTS.md # MOD — list params + health endpoint +docs/_base/RUNBOOKS.md # MOD — supersede "no confirm dialog"; lifecycle notes +``` + +### Known Gotchas & Library Quirks + +```python +# CRITICAL — EXECUTION GATE: do not start until E1 (#407) is merged to dev. +# Task 1 reconciles every CONTRACT(E1) point against the real merged code +# (git log --oneline --grep "#407"; read the E1 PRP + diff). Adapt E2 to +# E1's names; flag (don't silently fix) any E1 contract gap in the PR body. + +# CRITICAL — NO migration, NO models.py edit, NO pipeline.py edit in E2. +# The schema delta and the provenance/PATCH plumbing are E1's. If a column +# you need is missing post-E1, STOP and surface it — don't ship a stealth +# migration under E2. + +# CRITICAL — no cross-slice imports from app/features/demo/. Link health MUST +# go through in-process HTTP (request.app + httpx.ASGITransport — precedent +# pipeline.py:141-148 driven from a request context via routes.py:75). +# Importing RegistryService/ScenarioService/etc. fails the architecture rule. + +# CRITICAL — health probe classification: 2xx -> "alive", 404 -> "dead", +# EVERYTHING else (5xx, timeout, transport error) -> "unknown". Never let a +# probe exception escape the endpoint (asyncio.gather(..., return_exceptions= +# True) or per-probe try/except) — a flaky slice must not 500 the health +# route. raise_app_exceptions=False is REQUIRED on the ASGITransport (an +# unhandled error in a probed endpoint must surface as a 500 *response*). + +# CRITICAL — multi-select delete is N SINGLE DELETEs (existing endpoint). +# Adding POST /demo/workspaces/bulk-delete or DELETE /demo/workspaces is a +# product-vision violation (no bulk-wipe operations) — do not create it. + +# CRITICAL — the `total` returned by the filtered list MUST respect the active +# filters (scenarios precedent: BOTH count_stmt and rows_stmt get the same +# .where chain, scenarios/service.py:462-465). A filter-blind total breaks +# the "showing X of Y" header. + +# GOTCHA — include_archived default false flips list semantics for archived +# rows. Pre-E1 rows have archived=false (E1 migration default), so legacy +# lists are unchanged; route tests must still pin: no-param call returns +# only archived=false rows, include_archived=true returns both. + +# GOTCHA — sort allow-list: {created_at, name, seed, status}; unknown sort_by +# silently falls back to created_at desc (dimensions precedent — NOT a 422). +# Pinned-first is unconditional: ORDER BY pinned DESC, , id DESC +# tiebreak. name sort: NULLS LAST (unnamed rows sink) — use +# sqlalchemy .nulls_last() on the asc/desc expression. + +# GOTCHA — tags Query param: list[str] | None = Query(default=None) gives +# repeated-param parsing (?tags=a&tags=b). JSONB containment via +# ShowcaseWorkspace.tags.contains(tags) requires CONTRACT(E1)-2 (JSONB array +# column). Frontend sends ONE tag at a time (chip filter) — a single +# `tags` param serializes fine through api()'s params. + +# GOTCHA — q search: mirror dimensions ILIKE (case-insensitive, escape % and _ +# if the precedent does; check dimensions/service.py before writing). +# Search NAME only (workspace_id prefixes are copy-paste handles, not search). + +# GOTCHA — strict-mode policy: the new health/response models are response +# models -> plain BaseModel, NO ConfigDict(strict=True). The AST walker +# (app/core/tests/test_strict_mode_policy.py) only inspects strict=True +# request models — keep it that way. + +# GOTCHA — agents GET /agents/sessions/{id} 404s via plain HTTPException (not +# NotFoundError) — irrelevant to the probe (status code only), but do NOT +# "fix" the agents slice as a drive-by. + +# GOTCHA — an EXPIRED-but-existing agent session returns 200 (row exists) -> +# "alive". That is correct link-health semantics (the row is the link +# target); the artifacts card blurb already says "the recorded session has +# likely expired". + +# GOTCHA — ReplayConfirmDialog destructive styling: AlertDialogAction renders +# buttonVariants default; pass className="bg-destructive text-destructive- +# foreground hover:bg-destructive/90" (semantic tokens — NEVER raw colors +# like bg-red-500). Copy the shared-dialog open-state pattern from +# WorkspacePanel.tsx:191-216 (pendingX state, one dialog for all rows). + +# GOTCHA — confirm-dialog flow ordering: the confirmed replay must run the +# EXISTING handleReplayWorkspace body (setScenario BEFORE start() — the +# picker-desync gotcha from E4 still applies). Refactor: handleReplayWorkspace +# becomes "setPendingReplay(ws)"; a new executeReplay(ws) holds the old body +# + the CONTRACT(E1)-5 replayed_from_workspace_id field. + +# GOTCHA — lineage walking: a deleted ancestor's GET returns 404 (ApiError +# .status === 404) — render "(original deleted)" and STOP the walk; never +# throw. Implement as one useQuery whose queryFn loops (await api(...) per +# ancestor, depth cap 5), queryKey ['workspaces', id, 'lineage'] — N +# serial fetches inside one query keeps cache + loading states simple. + +# GOTCHA — useWorkspaces signature change (limit -> params object) touches its +# existing call sites + use-workspaces.test.ts — update them in the same +# commit; keep queryKey shape ['workspaces', paramsObject] so the blanket +# invalidateQueries({queryKey: ['workspaces']}) keeps matching everything. + +# GOTCHA — pnpm tsc --noEmit is VACUOUS (solution-style tsconfig, zero files) +# and `tsc -b` fails on dev with PRE-EXISTING errors (known issue — memory +# [[frontend-tsc-noemit-gate-vacuous]]). Do NOT chase those. JS gates that +# must be green: pnpm lint && pnpm test --run. Optionally verify ONLY your +# new files compile via their vitest imports. + +# GOTCHA — every shadcn primitive needed (alert-dialog, dialog, dropdown-menu, +# checkbox, input, textarea, select, table, tooltip, badge, card, button) is +# ALREADY in frontend/src/components/ui/ (verified 2026-06-12). Do NOT run +# `shadcn add`. If you believe a new primitive is required, stop and recheck +# (.claude/rules/shadcn-ui.md; memory [[shadcn-cli-version-pin]]). + +# GOTCHA — never call crypto.randomUUID directly (issue #332; ESLint guard) — +# safeRandomUUID from @/lib/uuid-utils if any client id is needed. + +# GOTCHA — repo has mixed CRLF/LF; Write/Edit emit LF. New files fine; for +# showcase.tsx / WorkspacePanel.tsx / routes.py edits run `git diff --stat` +# and confirm surgical line counts before committing. + +# GOTCHA — mypy --strict AND pyright --strict gate merge: full annotations on +# the new probe module (TypedDict/dataclass or Pydantic for probe targets), +# `-> None` on tests, annotated fixtures. + +# COORDINATION — E3 (#409), E4 (#410), E5 (#411), E6 (#412) are open parallel +# epics. Shared-file risk: schemas.py / routes.py / showcase.tsx / +# API_CONTRACTS.md. Keep every edit additive + self-contained; rebase on dev +# before the PR. + +# RUNTIME-VERIFICATION LOG (per prp-create step 3): +# - demo routes/handlers + line refs — read routes.py (2026-06-12) +# - list/count signatures + ordering — read workspace.py:174-234 +# - ShowcaseWorkspace pre-E1 columns — read models.py:59-89 +# - response-model non-strict precedent — read schemas.py:88-95,169-213 +# - ASGITransport in-process pattern — read pipeline.py:127-204 +# - scenario tags containment + GIN — read scenarios/service.py:462-465, models.py:74,97 +# - dimensions search/sort params — grep dimensions/routes.py:65-105 +# - probe targets exist: /registry/runs/{run_id} (registry/routes.py:200), +# /registry/aliases/{alias_name} (:503), /jobs/{job_id} (jobs/routes.py:219), +# /batch/{batch_id} (batch/routes.py:55), /agents/sessions/{session_id} +# (agents/routes.py:80), /scenarios/{scenario_id} (scenarios/routes.py:198) +# - RunUpdate partial-update shape — read registry/schemas.py:113-121 +# - frontend: WorkspacePanel AlertDialog pattern (191-216), run-compare +# useSearchParams pattern (87-109), installed ui primitives (ls), api.ts +# ApiError usage (WorkspacePanel.tsx:97) +# - E1 #407 OPEN / unmerged as of 2026-06-12 — CONTRACT(E1) tags mark every +# dependency; no third-party API claims beyond in-repo working patterns +# (httpx ASGITransport, sqlalchemy .contains, TanStack useQuery/useMutation +# — all already exercised in this repo; .nulls_last is standard +# SQLAlchemy 2.0 API but has NO in-repo precedent — verify at impl time). +``` + +## Implementation Blueprint + +### Data models and structure + +```python +# app/features/demo/schemas.py — APPEND (response models; NOT strict) + +RefHealthStatus = Literal["alive", "dead", "unknown"] +RefType = Literal["model_run", "scenario_plan", "alias", "batch", "agent_session", "job"] + + +class WorkspaceRefHealth(BaseModel): + """Liveness of one soft reference recorded on a workspace (E2, #408).""" + + key: str = Field(..., description="created_objects key, e.g. 'winning_run_id' or 'scenario_plan_ids[0]'.") + ref_type: RefType = Field(..., description="Kind of referenced object.") + ref_id: str = Field(..., description="The recorded soft-reference id.") + status: RefHealthStatus = Field(..., description="alive (2xx) / dead (404) / unknown (other).") + probe_path: str = Field(..., description="The public API path probed.") + + +class WorkspaceHealthResponse(BaseModel): + """Per-workspace link-health summary (E2, #408).""" + + workspace_id: str + workspace_status: str = Field(..., description="running / completed / failed.") + partial_run: bool = Field(..., description="True when workspace_status != 'completed'.") + references: list[WorkspaceRefHealth] = Field(default_factory=list) + alive: int = Field(..., ge=0) + dead: int = Field(..., ge=0) + unknown: int = Field(..., ge=0) + checked_at: datetime = Field(default_factory=_utc_now) +``` + +```python +# app/features/demo/link_health.py — NEW (sketch; CRITICAL details only) + +@dataclass(frozen=True) +class _ProbeTarget: + key: str # e.g. "scenario_plan_ids[1]" + ref_type: str # RefType value + ref_id: str + probe_path: str # e.g. f"/registry/runs/{ref_id}" + +def build_probe_targets(ws: ShowcaseWorkspace) -> list[_ProbeTarget]: + # created_objects keys (workspace.py:_collect_created_objects:82-103): + # winning_run_id / v2_run_id / stale_alias_run_id -> /registry/runs/{id} + # scenario_plan_ids[i] -> /scenarios/{id} + # alias -> /registry/aliases/{name} + # batch_id -> /batch/{id} + # agent_session_id -> /agents/sessions/{id} + # CONTRACT(E1)-6: job_ids JSONB slot [i] -> /jobs/{id} + # NON-probeable keys (v2_model_path, scenario_artifact_key, + # train_model_types) are SKIPPED — no HTTP identity to check. + ... + +async def probe_workspace_links(app: FastAPI, ws: ShowcaseWorkspace) -> WorkspaceHealthResponse: + targets = build_probe_targets(ws) + async with httpx.AsyncClient( + transport=httpx.ASGITransport(app=app, raise_app_exceptions=False), + base_url="http://demo.internal", + timeout=httpx.Timeout(10.0, connect=5.0), + ) as client: + results = await asyncio.gather( + *(_probe_one(client, t) for t in targets), return_exceptions=False + ) # _probe_one NEVER raises: try/except httpx.HTTPError/OSError -> "unknown" + # classify: 200<=s<300 alive; s==404 dead; else unknown + # partial_run = ws.status != WORKSPACE_STATUS_COMPLETED + ... +``` + +```typescript +// frontend/src/types/api.ts — extend the workspace block (806-831) + +// CONTRACT(E1)-4 — verify E1 added these; add additively if not: +export interface WorkspaceListItem { + /* existing fields ... */ + archived: boolean + pinned: boolean + tags: string[] + replayed_from_workspace_id: string | null +} +export interface WorkspaceDetail extends WorkspaceListItem { + /* existing fields ... */ + notes: string | null + config_schema_version: number +} + +// E2 (#408) — lifecycle PATCH body (CONTRACT(E1)-3 shape; adapt to E1 names): +export interface WorkspaceUpdate { + name?: string | null + notes?: string | null + tags?: string[] + archived?: boolean + pinned?: boolean +} + +export interface WorkspaceListParams { + limit?: number + offset?: number + q?: string + tags?: string + include_archived?: boolean + sort_by?: 'created_at' | 'name' | 'seed' | 'status' + sort_order?: 'asc' | 'desc' +} + +export type RefHealthStatus = 'alive' | 'dead' | 'unknown' +export interface WorkspaceRefHealth { + key: string + ref_type: 'model_run' | 'scenario_plan' | 'alias' | 'batch' | 'agent_session' | 'job' + ref_id: string + status: RefHealthStatus + probe_path: string +} +export interface WorkspaceHealth { + workspace_id: string + workspace_status: 'running' | 'completed' | 'failed' + partial_run: boolean + references: WorkspaceRefHealth[] + alive: number + dead: number + unknown: number + checked_at: string +} +``` + +### List of tasks (dependency order) + +```yaml +Task 1 — gate, branch & E1 reconciliation: + VERIFY: gh issue view 407 --json state -> MUST be closed (E1 merged) before continuing + RUN: git switch dev && git pull && git switch -c feat/showcase-completion-e2-safe-replay-lifecycle + VERIFY: gh issue view 408 --json state # open + RECONCILE every CONTRACT(E1) tag against the merged code: + - read app/features/demo/models.py -> column names (CONTRACT(E1)-1/-2) + - read app/features/demo/schemas.py -> PATCH body + GET response fields (CONTRACT(E1)-3/-4) + - read app/features/demo/routes.py -> PATCH route exists + - grep replayed_from app/features/demo/ -> provenance mechanism (CONTRACT(E1)-5) + - read PRPs/PRP-showcase-completion-E1-*.md (whatever E1's PRP file is named) + ADAPT all names below to E1's reality; note any E1 gap in the PR body. + +Task 2 — MODIFY app/features/demo/workspace.py (filters + sort): + - EXTEND list_workspaces(db, *, limit=50, offset=0, q=None, tags=None, + include_archived=False, sort_by=None, sort_order="desc"): + # base stmt; if not include_archived: .where(~ShowcaseWorkspace.archived) + # if q: .where(ShowcaseWorkspace.name.ilike(f"%{q}%")) (name only) + # if tags: .where(ShowcaseWorkspace.tags.contains(tags)) (CONTRACT(E1)-2) + # sort: allow-list {created_at,name,seed,status}; unknown -> created_at + # desc; name uses .nulls_last(); ALWAYS ORDER BY pinned.desc() first, + # then the sort expr, then id.desc() tiebreak + - EXTEND count_workspaces(db, *, q=None, tags=None, include_archived=False) + # SAME where-chain as list (scenarios/service.py:462-465 precedent) — + # extract a shared _apply_filters(stmt, ...) helper to keep them in sync + - Update module docstring (E2 routes the filters). + +Task 3 — MODIFY app/features/demo/schemas.py: + - APPEND WorkspaceRefHealth + WorkspaceHealthResponse (blueprint above); + docstring notes: response models, NOT strict (StepEvent precedent @88-95). + - CONTRACT(E1)-4 defensive check: if E1 did not expose archived/pinned/tags/ + replayed_from_workspace_id on WorkspaceListItem (+notes/ + config_schema_version on WorkspaceDetailResponse), ADD them here + additively (from_attributes picks them up from the ORM row). + +Task 4 — CREATE app/features/demo/link_health.py: + - build_probe_targets(ws) + probe_workspace_links(app, ws) per the blueprint. + - MIRROR pipeline._Client transport flags exactly (raise_app_exceptions=False). + - _probe_one catches (httpx.HTTPError, OSError) -> "unknown"; NEVER raises. + - Full --strict annotations; module docstring states the no-cross-slice- + import rationale (Decision 3) and the 2xx/404/other classification table. + +Task 5 — MODIFY app/features/demo/routes.py: + - EXTEND GET /workspaces signature with q / tags / include_archived / + sort_by / sort_order Query params (mirror dimensions/routes.py:65-75 + + scenarios/routes.py:180 styles; document the allow-list + silent fallback + in the docstring); pass through to workspace.list_workspaces / + count_workspaces (same filter args to BOTH). + - ADD GET /workspaces/{workspace_id}/health -> WorkspaceHealthResponse: + # async def get_workspace_health(workspace_id: str, request: Request, + # db: AsyncSession = Depends(get_db)): + # row = await workspace.get_workspace(db, workspace_id) + # if row is None: raise NotFoundError(message=f"Workspace not found: {workspace_id}") + # return await link_health.probe_workspace_links(request.app, row) + # Place between the GET detail (@110) and DELETE (@138). No path + # collision: /workspaces/{id}/health is more specific than /workspaces/{id}. + - Update the module docstring route inventory. + +Task 6 — backend tests: + - CREATE app/features/demo/tests/test_link_health.py (unit, no DB): + # build a THROWAWAY FastAPI stub app with routes returning 200 / 404 / + # 500 at the probed paths; construct a ShowcaseWorkspace instance + # in-memory (not persisted) with created_objects covering every key + + # job_ids slot; assert classification alive/dead/unknown + counts + + # partial_run on status='failed'; assert non-probeable keys skipped; + # assert empty created_objects -> empty references, partial_run logic. + - MODIFY app/features/demo/tests/test_routes.py: + UNIT (monkeypatch app.features.demo.routes.workspace / .link_health): + - list passes q/tags/include_archived/sort args through (capture kwargs) + - list rejects bad limit (existing) — keep green + - health 404 on missing workspace (problem+json content-type) + - health 200 happy path (monkeypatched probe returns canned response) + INTEGRATION (@pytest.mark.integration, db_session): + - seed rows: named/unnamed, archived, pinned, tagged -> + default list hides archived; include_archived=true shows it; + q matches name substring case-insensitively; tags containment; + sort_by=name asc with NULLS LAST; pinned row first regardless of sort; + total respects filters + - health integration: insert a workspace whose created_objects carry + one REAL reference (insert a scenario_plan row via its ORM, or use a + bogus-vs-real registry pair) + one bogus id -> assert alive + dead + - MODIFY app/features/demo/tests/test_workspace.py: filter unit coverage of + _apply_filters where practical (or fold into the integration tests above). + +Task 7 — MODIFY frontend/src/types/api.ts: + - Lifecycle fields per CONTRACT(E1)-4 (verify-or-add), WorkspaceUpdate, + WorkspaceListParams, WorkspaceRefHealth/WorkspaceHealth (blueprint above). + - DemoRunRequest: verify E1 added replayed_from_workspace_id?: string + (CONTRACT(E1)-5); add if missing. + +Task 8 — MODIFY frontend/src/hooks/use-workspaces.ts (+ test): + - useWorkspaces(params: WorkspaceListParams = {}, enabled = true): + queryKey ['workspaces', params]; api('/demo/workspaces', { params }) + # update existing call site: WorkspacePanel.tsx:77 (the sole useWorkspaces + # caller — showcase.tsx never calls it directly) + - ADD usePatchWorkspace(): + mutationFn: ({workspaceId, update}: {workspaceId: string; update: WorkspaceUpdate}) => + api(`/demo/workspaces/${workspaceId}`, { method: 'PATCH', body: update }) + onSuccess: invalidate ['workspaces'] # blanket key matches list+detail + - ADD useWorkspaceHealth(workspaceId: string, enabled = true): + queryKey ['workspaces', workspaceId, 'health']; staleTime 30_000 + - ADD useWorkspaceLineage(workspaceId: string | null): + one useQuery; queryFn walks replayed_from_workspace_id via sequential + api() calls, depth cap 5; a 404 (ApiError.status===404) + terminates the walk with a {deleted: true} sentinel entry; returns + Array<{workspace_id, name, deleted}> oldest-last. + - MODIFY use-workspaces.test.ts: params serialization, PATCH invalidation, + lineage walk incl. 404 termination (mock api module). + +Task 9 — CREATE frontend/src/components/demo/ReplayConfirmDialog.tsx (+ test): + - Props: { workspace: WorkspaceListItem | null, # null = closed + requestPreview: DemoRunRequest | null, # built by the page + onConfirm: () => void, onCancel: () => void } + - AlertDialog (open={workspace !== null}; onOpenChange close -> onCancel) — + copy the shared-dialog pattern from WorkspacePanel.tsx:191-216. + - Body: 3-column table (Field / Recorded / Will send) over seed, scenario, + reset, skip_seed, name, preservation, replayed_from; per-row mismatch + highlight (font-semibold text-destructive on the "Will send" cell when + values differ — defensive; verbatim replay normally matches). + - reset=true -> warning block (AlertTriangle + "Replaying this workspace + WIPES the database and reseeds it.") + AlertDialogAction + className="bg-destructive text-destructive-foreground hover:bg-destructive/90" + label "Replay & wipe database"; else label "Replay". + - Footer hint: "Want to change the config first? Use Load instead." (muted). + - data-testid="replay-confirm" on the action (WorkspacePanel test precedent). + - Test: renders preview values; destructive copy/label only when reset; + confirm fires onConfirm once; cancel fires onCancel; mismatch highlight. + +Task 10 — CREATE frontend/src/components/demo/WorkspaceEditDialog.tsx (+ test): + - Props: { workspace: WorkspaceListItem | null, onClose: () => void } + - Dialog (ui/dialog.tsx — form dialog, not AlertDialog) with: name Input + (reuse WORKSPACE_NAME_PATTERN from showcase.tsx:26 — export it from a + shared location, e.g. components/demo/workspace-name.ts, instead of + duplicating), notes Textarea, tags Input (comma-separated -> trimmed + string[]; render current tags as chips above the input). + - Save -> usePatchWorkspace().mutate({workspaceId, update}); toast on + success/failure (sonner pattern WorkspacePanel.tsx:88-99); close on success. + - Send ONLY changed fields (partial update — CONTRACT(E1)-3 semantics). + - Test: pattern violation disables Save with inline hint; save sends only + dirty fields; success closes + toasts (mock usePatchWorkspace). + +Task 11 — CREATE frontend/src/components/demo/WorkspaceLineageStrip.tsx (+ test): + - Props: { workspaceId: string, onLoadAncestor: (id: string) => void } + - useWorkspaceLineage(workspaceId); render breadcrumb: current ← parent ← + … oldest; ancestors as Button variant="link" size="sm" (click -> + onLoadAncestor); deleted sentinel renders muted "(original deleted)"; + depth-cap overflow renders trailing "…". Render nothing (null) when the + workspace has no replayed_from_workspace_id. + - Test: chain render order, deleted sentinel, null when no lineage. + +Task 12 — MODIFY frontend/src/components/demo/WorkspacePanel.tsx (+ test): + - Toolbar row above the list: search Input (icon lucide Search; applies as + `q` on Enter/debounce), "Show archived" Checkbox, sort Select + (Newest/Oldest/Name/Status -> sort_by+sort_order pairs), active-tag chip + (clearable) when a tag filter is set. + - Panel owns the list-params state and calls useWorkspaces(params). + - Row additions: leading multi-select Checkbox; Pin icon button (lucide Pin + / PinOff, fires usePatchWorkspace toggle); archived rows: opacity-60 + + outline Badge "archived"; replay Badge (outline, "replay") when + replayed_from_workspace_id != null; tags as clickable chips (sets the tag + filter); DropdownMenu (MoreHorizontal): Pin/Unpin, Archive/Unarchive, + Edit details…, Delete… (Delete keeps the existing pendingDelete dialog). + - Replay button now calls a NEW prop onRequestReplay(ws) (the page owns the + confirm dialog) — RENAME the old onReplay prop to make the break explicit. + - Selection footer: "N selected" + Delete selected (AlertDialog confirm -> + sequential `for (const id of selected) await deleteWorkspace.mutateAsync(id)` + with per-failure collection -> one summary toast; clear selection) + + Compare button (disabled unless exactly 2; useNavigate -> + `${ROUTES.SHOWCASE_COMPARE}?a=${id1}&b=${id2}`). + - Keep the component lean: extract WorkspaceToolbar + WorkspaceRow as + file-local components if the file passes ~300 lines. + - Tests: search/sort/archived params flow into useWorkspaces (mock + assert + last call args); multi-select count + delete-selected confirm calls N + mutateAsync; compare disabled at 1 and 3 selections; pin/archive fire + PATCH mutations; replay fires onRequestReplay (NOT start). + +Task 13 — MODIFY frontend/src/components/demo/WorkspaceArtifactsPanel.tsx (+ test): + - Props += { health?: WorkspaceHealth | null } + - buildCards gains the refId per card; a card whose refId matches a + health.references entry with status==='dead' renders AlertTriangle + (h-3 w-3 text-destructive) beside the label + title tooltip "This object + no longer exists — it was deleted after the run." ('unknown' -> no marker). + - Header chip row: `✓ {alive} live` (text-success) + `✕ {dead} dead` + (text-destructive, only when dead>0) + outline Badge "partial run" when + health.partial_run (tooltip: "This run never completed — artifacts may be + missing."). Skeleton/silent when health undefined (query in flight/disabled). + - Test: dead marker on matching card; summary chip counts; partial-run badge. + +Task 14 — MODIFY frontend/src/pages/showcase.tsx: + - State += pendingReplay: WorkspaceListItem | null. + - handleReplayWorkspace(ws) -> setPendingReplay(ws) (no start()). + - NEW executeReplay(ws): the post-E1 body (showcase.tsx:174-186 today — + setScenario first; E1 shifts these anchors and adds + replayed_from_workspace_id: ws.workspace_id, which executeReplay PRESERVES + — CONTRACT(E1)-5, preserve-not-add); clear pendingReplay. + - buildReplayRequest(ws): pure helper producing the DemoRunRequest preview + passed to the dialog AND used by executeReplay (single source — the diff + can never lie about what's sent). Export for unit testing. + - Mount pendingReplay && executeReplay(pendingReplay)} + onCancel={() => setPendingReplay(null)} /> + - Health: const health = useWorkspaceHealth(selectedWorkspaceId ?? '', + !!selectedWorkspaceId); pass health.data into WorkspaceArtifactsPanel. + - Lineage: mount { /* fetch list item via detail + handleLoad */ }} /> + inside the loaded-workspace block (@448-450 region); simplest + onLoadAncestor: setSelectedWorkspaceId(id) + repopulate controls from the + lineage entry's detail (the strip's hook already has the details — pass + the full WorkspaceDetail up instead of just the id if cleaner). + - WorkspacePanel prop rename: onRequestReplay={handleReplayWorkspace}. + +Task 15 — CREATE frontend/src/pages/workspace-compare.tsx (+ test) + routing: + - MODIFY frontend/src/lib/constants.ts: SHOWCASE_COMPARE: '/showcase/compare' + (beside SHOWCASE @4). + - MODIFY frontend/src/App.tsx: lazy WorkspaceComparePage + (mirror + RunComparePage @21, @119-126). '/showcase/compare' and '/showcase' are + distinct paths — no nesting needed. + - Page mirrors run-compare.tsx: useSearchParams a/b (@87-109 pattern); + pickers = Select over useWorkspaces({limit: 100, include_archived: true}) + items (label: name ?? id.slice(0,8) · scenario · status); two + useWorkspace(a/b) detail queries; render: + * config table — seed/scenario/reset/skip_seed/name/tags; mismatch rows + highlighted (font-semibold) + * results table — winner_model_type, winner_wape (DeltaCell-style + sign-only delta — copy the component from run-compare.tsx:33-54 + file-locally), wall_clock_s + * created-objects matrix — union of soft-reference keys × (A: ✓/—, + B: ✓/—) + * lineage note — "B is a replay of A" (or inverse) when + replayed_from_workspace_id links them + * partial-run outline Badge per side when status !== 'completed' + Missing/invalid id -> that side renders the picker + muted "select a + workspace" (no crash; ApiError 404 -> same fallback). + - Test: renders diff for two mocked details; mismatch highlight; lineage + note; 404 side falls back to picker state. + +Task 16 — barrel + docs: + - MODIFY frontend/src/components/demo/index.ts — export the three new + components. + - MODIFY docs/_base/API_CONTRACTS.md: + * GET /demo/workspaces row: append "E2 (#408) — `q` name search, `tags` + containment filter, `include_archived` (default false), allow-listed + `sort_by`/`sort_order`; pinned rows first; `total` respects filters" + * NEW row: | demo | GET | `/demo/workspaces/{workspace_id}/health` | + E2 (#408) — probe the workspace's soft references in-process; per-ref + alive/dead/unknown + counts + `partial_run`; `404` when missing | + - MODIFY docs/_base/RUNBOOKS.md § "Showcase workspace — …": + * item 1: replace "there is deliberately no confirm dialog" with the E2 + reality (every panel Replay confirms; reset=true gets destructive + copy; the DESTRUCTIVE row marker stays) + * item 3/4: one-line additions — multi-select delete = N metadata-only + singles; dead links now SURFACE via the health summary instead of + silently dangling + - VERIFY (not edit) DOMAIN_MODEL.md replayed_from note was updated by E1. + +Task 17 — gates, dogfood, commits, PR: + - Backend gates + integration suite (Validation Loop below). + - Frontend: cd frontend && pnpm lint && pnpm test --run. + - Browser dogfood via the webapp-testing skill (CLAUDE.md workflow step 4): + seeded stack -> save 3 workspaces (one reset=true, one tagged, one + replayed) -> search/sort/archive/pin -> replay with confirm (destructive + variant) -> lineage chain -> compare page -> delete a referenced scenario + plan -> reload workspace -> dead-link warning + health chip. + - git diff --stat (CRLF surgical-diff check on edited files). + - COMMITS (reference #408, no AI trailer), e.g.: + feat(api): add workspace list filters and link-health endpoint (#408) + feat(ui): add replay confirmation with config diff to showcase (#408) + feat(ui): add workspace lifecycle controls and lineage rendering (#408) + feat(ui): add two-workspace compare page (#408) + test(api): cover workspace filters and link-health probes (#408) + docs(api): document workspace lifecycle and health contracts (#408) + - PR into dev; title `feat(api,ui): showcase-completion E2 — safe replay & + workspace lifecycle (#408)`; body notes the replay-policy-picker deferral + (Decision 1) + any CONTRACT(E1) reconciliation deltas. +``` + +### Integration Points + +```yaml +DATABASE: none in E2 — reads the E1-migrated table; NO new migration. + +CONFIG: none — no new settings or env vars (probe timeout is a module constant). + +ROUTES: existing demo router only (app/main.py wiring unchanged): extended GET + /demo/workspaces + new GET /demo/workspaces/{id}/health. PATCH is E1's. + +FRONTEND ROUTES: one new React Router page at ROUTES.SHOWCASE_COMPARE + ('/showcase/compare'); registered in App.tsx beside the existing pages. + +DOCS: API_CONTRACTS.md + RUNBOOKS.md (Task 16). Full doc sweep belongs to the + E7 release gate — keep E2's edits additive and minimal. +``` + +## Validation Loop + +### Level 1: Syntax & Style + +```bash +uv run ruff check . && uv run ruff format --check . +uv run mypy app/ && uv run pyright app/ +cd frontend && pnpm lint +# Expected: clean. Both Python type checkers are --strict and gate merge. +# (pnpm tsc --noEmit is vacuous; tsc -b fails with PRE-EXISTING errors — do +# not chase them. lint + vitest are the JS gates.) +``` + +### Level 2: Unit Tests (no DB) + +```bash +uv run pytest app/features/demo -v -m "not integration" +uv run pytest app/core/tests/test_strict_mode_policy.py -v # AST walker still green +cd frontend && pnpm test --run +# New/changed: test_link_health (stub-app probe classification), test_routes +# filter/health unit tests, use-workspaces hooks, ReplayConfirmDialog, +# WorkspaceEditDialog, WorkspaceLineageStrip, WorkspacePanel rework, +# WorkspaceArtifactsPanel health markers, workspace-compare page. +``` + +### Level 3: Integration (real Postgres) + +```bash +docker compose up -d && uv run alembic upgrade head +uv run pytest app/features/demo -v -m integration +# List filters against seeded rows (archived hidden / shown, q, tags, +# sort + pinned-first, filtered total) + health probe (real + bogus refs). +``` + +### Level 4: Manual smoke + browser dogfood (seeded local stack, uvicorn :8123) + +```bash +# 1. Filtered list + health round-trip +curl -s "http://localhost:8123/demo/workspaces?q=demo&sort_by=name&sort_order=asc" | python3 -m json.tool | head -30 +curl -s "http://localhost:8123/demo/workspaces?include_archived=true" | python3 -c "import sys,json; d=json.load(sys.stdin); print(d['total'])" +WS_ID=$(curl -s -X POST http://localhost:8123/demo/run -H 'Content-Type: application/json' \ + -d '{"skip_seed": true, "preservation": "keep", "workspace_name": "e2-smoke"}' \ + | python3 -c "import sys,json; print(json.load(sys.stdin)['workspace_id'])") +curl -s "http://localhost:8123/demo/workspaces/${WS_ID}/health" | python3 -m json.tool +curl -s -o /dev/null -w "%{http_code} %{content_type}\n" \ + http://localhost:8123/demo/workspaces/deadbeefdeadbeefdeadbeefdeadbeef/health # 404 problem+json + +# 2. Dead-link proof: delete a referenced scenario plan, re-probe +# (pick a scenario_plan_id from the workspace detail's created_objects) +curl -s -X DELETE http://localhost:8123/scenarios/ -o /dev/null -w "%{http_code}\n" +curl -s "http://localhost:8123/demo/workspaces/${WS_ID}/health" \ + | python3 -c "import sys,json; print([r for r in json.load(sys.stdin)['references'] if r['status']=='dead'])" + +# 3. Browser dogfood (webapp-testing skill / agent-browser): +# /showcase -> save workspaces -> toolbar search/sort/show-archived -> +# pin (row jumps first) -> archive (vanishes until toggle) -> Edit details +# (rename + tags chips) -> Replay -> confirm dialog shows the diff table -> +# a reset=true workspace shows destructive copy + red button -> confirmed +# replay goes green, new row carries the "replay" badge -> Load it -> +# lineage strip shows the chain -> select 2 rows -> Compare page diff -> +# multi-select 2 -> Delete selected -> rows gone, created objects intact -> +# loaded workspace with the deleted plan shows the dead-link warning + chip. +``` + +## Final validation Checklist + +- [ ] All five gates green: `uv run ruff check . && uv run ruff format --check . && uv run mypy app/ && uv run pyright app/ && uv run pytest -v -m "not integration"` +- [ ] Integration suite green: `uv run pytest -v -m integration` (fresh docker-compose DB) +- [ ] Frontend gates green: `cd frontend && pnpm lint && pnpm test --run` +- [ ] No replay path bypasses the confirm dialog; reset=true shows destructive variant (vitest + dogfood) +- [ ] List filters: archived hidden by default, q/tags/sort behave, pinned-first, filtered total (route tests + curl) +- [ ] Health endpoint classifies alive/dead/unknown; dead-link warning + partial-run chip render (integration + dogfood step 2/3) +- [ ] Lineage chain renders incl. deleted-ancestor sentinel +- [ ] Compare page deep-links `?a=&b=` and degrades gracefully on bad ids +- [ ] Multi-select delete = N single DELETEs; **no new bulk endpoint in the diff** +- [ ] Legacy list calls + all pre-existing demo tests unchanged-green +- [ ] CONTRACT(E1) reconciliation notes in the PR body; replay-policy deferral noted +- [ ] `git diff --stat` surgical (no CRLF whole-file noise) +- [ ] docs/_base/API_CONTRACTS.md + RUNBOOKS.md updated additively +- [ ] Commits `type(scope): description (#408)`, no AI trailer; PR into dev; browser dogfood evidence per `.claude/rules/ui-design.md` + +--- + +## Anti-Patterns to Avoid + +- ❌ Don't start before E1 (#407) merges; don't re-implement E1 surface (migration, PATCH, provenance write). +- ❌ Don't import another feature slice from `app/features/demo/` — link health is in-process HTTP only. +- ❌ Don't add a bulk-delete endpoint or any "wipe everything" operation — N singles, period. +- ❌ Don't add a replay-policy picker (exact/safe-keep/modified) — explicitly deferred (Decision 1). +- ❌ Don't make health/response models strict — strict mode is request-body policy. +- ❌ Don't probe health for every list row — loaded workspace only. +- ❌ Don't let a probe exception 500 the health route — classify as `unknown`. +- ❌ Don't mutate the original workspace row on replay — replay still creates a NEW row (provenance points back). +- ❌ Don't duplicate the name pattern regex — share it between run controls and the edit dialog. +- ❌ Don't run `shadcn add` — every needed primitive is installed; don't use raw colors — semantic tokens only. +- ❌ Don't call `crypto.randomUUID` directly — `safeRandomUUID` (ESLint-enforced). +- ❌ Don't chase pre-existing `tsc -b` errors — lint + vitest are the JS gates. + +## Confidence Score + +**7.5/10** for one-pass implementation success. The backend half (list filters ++ health endpoint) is a composition of three verified in-repo precedents +(dimensions search/sort, scenarios tags containment, pipeline ASGITransport) +with clear test shapes. The deductions: (a) E2 is authored against a frozen +but UNMERGED E1 contract — seven CONTRACT(E1) points must reconcile against +E1's real merged shape, and any naming/shape divergence costs an adaptation +pass (mitigated by Task 1's reconciliation gate and verify-or-add fallbacks); +(b) the WorkspacePanel rework is the single largest UI delta of the showcase +initiative so far (toolbar + badges + dropdown + multi-select + confirm +rerouting in one component) where an interaction miss costs an iteration; and +(c) four parallel epics share `schemas.py` / `routes.py` / `showcase.tsx`, +so rebase friction is plausible even with additive-only edits. diff --git a/PRPs/PRP-showcase-completion-E3-seed-config-scope.md b/PRPs/PRP-showcase-completion-E3-seed-config-scope.md new file mode 100644 index 00000000..e5a0df6e --- /dev/null +++ b/PRPs/PRP-showcase-completion-E3-seed-config-scope.md @@ -0,0 +1,1080 @@ +name: "PRP — Showcase Completion E3: Advanced Seed Config MVP + Store/Product Scope Selection (issue #409)" +description: | + +## Purpose + +Implement Parallel epic E3 of the showcase-completion initiative (umbrella #406): +an additive, allow-listed nested override schema on the seeder HTTP contract +(7 curated knobs), an additive `seed_overrides` field on `DemoRunRequest` / the +WS start frame, a store/product focus-pair selector with pre-run preview on the +Showcase page, frontend + backend validation of every knob, and persistence of +overrides + user-selected scope into the workspace row (E1 #407 story slots) so +replay honors them verbatim. + +**Execution gate:** this epic is Parallel after Foundation — implementation +starts ONLY after E1 #407 merges to `dev` (its migration ships the +`seed_overrides` / `user_scope` JSONB story slots E3 writes into). Every +dependency on E1's surface is tagged `CONTRACT(E1):` below; re-verify each tag +against the merged E1 code before starting Task 1. + +## Core Principles + +1. **Context is King**: every file reference below was verified against the live code on 2026-06-12 (branch dev @ bdf85f6, post-E4/#404 merge — PRE-E1-#407; line numbers will drift slightly after E1 merges, re-anchor by symbol name). +2. **Validation Loops**: each level is executable as written. +3. **Information Dense**: patterns cite exact file:line (or symbol when post-E1 drift is likely). +4. **Progressive Success**: shared override schema → seeder contract → demo start frame → pipeline consumption → workspace persistence → frontend → docs → browser dogfood. +5. **Global rules**: follow CLAUDE.md / AGENTS.md; all five backend CI gates must pass; UI work follows `.claude/rules/ui-design.md` + `.claude/rules/shadcn-ui.md`. + +--- + +## Goal + +A user on `/showcase` ticking **Re-seed first** can open an **Advanced seed +config** panel and turn 7 curated knobs (store count, product count, window +days, sparsity, promotion intensity, stockout intensity, noise sigma) before +running; independently, the user can pick an explicit **store/product focus +pair** (with a pre-run preview of the selected entities and the seeded window) +that the pipeline models instead of the auto-discovered first pair. Both the +overrides and the scope persist into a kept workspace row and are re-submitted +verbatim on Replay. A start frame without the new fields behaves +byte-identically to today. + +**Deliverable** (all additive; ZERO migrations — E1 #407 owns the schema): + +- `app/shared/seeder/overrides.py` — NEW: `SeederOverrides` Pydantic model (the single shared allow-list, `extra="forbid"`), importable by both the seeder and demo slices through `app/shared/` (vertical-slice-legal). +- `app/features/seeder/schemas.py` — `GenerateParams.overrides: SeederOverrides | None = None` (additive nested optional object on the EXISTING endpoint — decision rationale below). +- `app/features/seeder/service.py` — `_apply_seed_overrides(config, overrides)` applied LAST in `_build_config_from_params` (wins over the legacy scalar `stores`/`products`/`sparsity`), mapping each knob onto its `SeederConfig` sub-dataclass via `dataclasses.replace`. +- `app/features/demo/schemas.py` — `DemoRunRequest.seed_overrides: SeederOverrides | None` + `DemoRunRequest.user_scope: UserScope | None` (NEW small model) + two cross-field validators. +- `app/features/demo/pipeline.py` — `DemoContext` carries both; `step_seed` forwards `overrides` to `POST /seeder/generate`; `step_status` honors `user_scope` (validate via `/dimensions/*/{id}`; warn + fallback to discovery on a dangling pair). +- `app/features/demo/workspace.py` — `create_workspace` writes the two E1 story slots; list/detail response schemas expose them (replay reads list rows). +- `frontend/src` — `SeedConfigPanel.tsx` + `ScopeSelector.tsx` (composed from installed shadcn primitives), `lib/workspace-replay.ts` pure replay-frame builder, `types/api.ts` additions, `showcase.tsx` wiring. +- Tests: seeder schema/route/service tests (incl. out-of-bounds 422 + unknown-knob 422), demo schema JSON-path tests, pipeline `_RecordingClient` forwarding tests, workspace slot persistence tests, replay-verbatim regression (backend integration + frontend pure-helper test), component vitests. +- Docs: `docs/_base/API_CONTRACTS.md` (3 rows), `docs/_base/RUNBOOKS.md` (new incident entry + workspace-section update), `docs/_base/DOMAIN_MODEL.md` (slot schema documentation). + +**Success definition**: all Success Criteria check off, the five backend gates + +frontend lint/test are green, and a real-browser dogfood shows: an +overridden re-seed run (e.g. 8 stores × 20 products, promo 0.3) goes green with +the seed card echoing the overrides; a scope-selected run models the chosen +pair; a kept run replays both verbatim. + +## Why + +- Umbrella #406: today the showcase accepts only `seed`/`scenario`/`reset`/`skip_seed`; the preset's behavioral character (noise, promos, stockouts, sparsity) is take-it-or-leave-it, and the modeled grain is always the first discovered `(store, product)` pair (`app/features/demo/pipeline.py:582-631`) — the operator cannot tell the story of a specific SKU. +- The seeder HTTP contract already accepts 25+ FLAT scalar/flag fields (`app/features/seeder/schemas.py:78-298`) — the umbrella's top risk is that surface growing unbounded. A curated nested object with `extra="forbid"` is the documented mitigation: 7 knobs, mechanically allow-listed, everything else stays preset-driven. +- E1 #407 reserves `seed_overrides` + `user_scope` JSONB story slots on `showcase_workspace` precisely so this epic's config survives into Replay — without persistence, replay of an overridden run would silently regenerate different data. +- E3 is Parallel after Foundation: it can land independently of E2 #408 / E4 #410 / E5 #411 / E6 #412 (no shared files beyond additive edits to `showcase.tsx` / `workspace.py` — coordinate merge order if simultaneous). + +## What + +### Open question resolved — seeder override contract shape + +**DECISION: expand `GenerateParams` with an additive nested optional object +(`overrides: SeederOverrides | None = None`). NO new endpoint.** Rationale, +researched against the current code: + +1. **The layering already exists.** `_build_config_from_params` (`app/features/seeder/service.py:202-247`) is a layered override pipeline: preset → scalar dims/window/sparsity → `_apply_phase1_overrides` (:74-137) → `_apply_phase2_overrides` (:139-199). A `_apply_seed_overrides` applied last is a fourth layer in an established pattern — a new endpoint would have to reimplement or call into this exact function anyway. +2. **A new endpoint duplicates load-bearing guards.** `POST /seeder/generate` carries `_check_seeder_enabled()` (production guard, `routes.py:21-33`), the ValueError→400 / Exception→500 RFC 7807 envelope (`routes.py:114-136`), and the seeder-is-the-only-bulk-mutation-path invariant. A second generate-shaped endpoint doubles that audit surface for zero contract benefit. +3. **Back-compat is free.** Absent field = `None` = byte-identical behavior — the exact precedent the Phase 1/Phase 2 field comments in `schemas.py:121-123,175-177` already promise and test. +4. **Nested (not more flat scalars) is the allow-list mechanism.** `ConfigDict(extra="forbid")` on the nested model makes an unknown knob a 422 — the umbrella's "contract grows unbounded" mitigation becomes machine-enforced, and the 7 curated knobs stay visually distinct from the 25+ legacy scalars. +5. **One schema serves both slices.** The demo start frame forwards the same object verbatim; placing `SeederOverrides` in `app/shared/seeder/overrides.py` lets `app/features/seeder/schemas.py` and `app/features/demo/schemas.py` both import it without a cross-slice import (precedent: `demo/schemas.py:16` already imports `ScenarioPreset` from `app/shared/seeder/config`). + +Trade-off accepted: `extra="forbid"` means a FUTURE knob sent by a newer client +to an older backend errors loudly instead of being ignored. That asymmetry vs. +the top-level start frame (unknown TOP-LEVEL keys remain ignored) is +deliberate — silent knob-dropping would fake-honor a config the run never used. + +### Allow-listed knob → config-field mapping (the complete MVP surface) + +| Knob (wire name) | Type / bounds | Maps to (via `dataclasses.replace`) | Preset reference values | +|---|---|---|---| +| `stores` | `int`, ge=1 le=100 | `config.dimensions.stores` (`DimensionConfig.stores`, `app/shared/seeder/config.py:118`) | demo profiles 3–5; scalar `GenerateParams.stores` caps 100 | +| `products` | `int`, ge=1 le=500 | `config.dimensions.products` (`DimensionConfig.products`, config.py:119) | demo profiles 10–25; scalar caps 500 | +| `window_days` | `int`, ge=75 le=365 | `config.start_date = config.end_date - timedelta(days=window_days)` (end_date untouched) | ≥75 keeps the `historical_backfill` gate clear (`pipeline.py` gate = `3*(14+1)+30 = 75`); ≤365 = `DEFAULT_SEED_SPAN_DAYS` | +| `sparsity` | `float`, ge=0.0 le=0.9 | `config.sparsity = replace(config.sparsity, missing_combinations_pct=v)` (`SparsityConfig.missing_combinations_pct`, config.py:141) — `replace` PRESERVES the preset's `random_gaps_*` fields | sparse preset uses 0.5; 1.0 would seed zero series (hard-fail), hence the 0.9 cap | +| `promotion_intensity` | `float`, ge=0.0 le=0.5 | `config.retail = replace(config.retail, promotion_probability=v)` (`RetailPatternConfig.promotion_probability`, config.py:101) | preset max 0.25 (holiday_rush); 0.5 cap = 2× headroom | +| `stockout_intensity` | `float`, ge=0.0 le=0.5 | `config.retail = replace(config.retail, stockout_probability=v)` (config.py:102) | preset max 0.25 (stockout_heavy); higher values risk NaN-WAPE (documented expected-fail, mirrors sparse) | +| `noise_sigma` | `float`, ge=0.0 le=0.5 | `config.time_series = replace(config.time_series, noise_sigma=v)` (`TimeSeriesConfig.noise_sigma`, config.py:72) | preset max 0.4 (high_variance) | + +Precedence (document in the field description AND a service test): nested +`overrides` is applied LAST in `_build_config_from_params` and therefore WINS +over the legacy scalar `stores` / `products` / `sparsity` when both are sent. +`window_days` recomputes `start_date` from the (scalar-or-default) `end_date`. +The pipeline keeps sending `sparsity=0.0` as the scalar (preserves preset +character per the `if params.sparsity > 0` guard at `service.py:225-226`); +`overrides.sparsity` is the only way the demo overrides sparsity. + +### `seed_overrides` / `user_scope` slot schemas (THIS PRP's contract to define) + +E1 #407 reserves the slots; the JSON inside them is defined HERE: + +```jsonc +// showcase_workspace.seed_overrides (JSONB; NULL when the run had none) +// = SeederOverrides.model_dump(mode="json", exclude_none=True) — SPARSE: +// only operator-set knobs appear; {} never stored (None instead). +{ + "stores": 8, // int 1..100, optional + "products": 20, // int 1..500, optional + "window_days": 120, // int 75..365, optional + "sparsity": 0.3, // float 0.0..0.9, optional + "promotion_intensity": 0.3, // float 0.0..0.5, optional + "stockout_intensity": 0.1, // float 0.0..0.5, optional + "noise_sigma": 0.25 // float 0.0..0.5, optional +} + +// showcase_workspace.user_scope (JSONB; NULL when no pair was picked) +// = UserScope.model_dump(mode="json") — both keys always present when non-null: +{ + "store_id": 12, // int ge=1 — REAL discovered id (sequences + "product_id": 47 // int ge=1 never reset; ids are NOT 1-based) +} +``` + +Replay semantics: the slots record the REQUESTED config (replay-verbatim +contract, mirrors the E1 seed/scenario/reset/skip_seed columns). The EFFECTIVE +grain a run actually modeled is already recorded separately by +`finalize_workspace` into the `store_id` / `product_id` columns +(`workspace.py:136-137`) — when a replayed `user_scope` dangles (warn+fallback, +below), the two will legitimately differ; that divergence is visible, not +hidden. + +### User-visible behavior + +- **Advanced seed config panel** (`/showcase`): a collapsible "Advanced seed config" section appears under the run controls, enabled ONLY while **Re-seed first** is ticked (overrides are meaningless on `skip_seed=true` and the backend rejects the combination). 7 controls with the bounds above; a "live summary" line echoes the effective config (e.g. "8 stores × 20 products × 120 days · promo 0.30"); a caveat notes high sparsity/stockout values can legitimately fail the backtest (NaN WAPE — same documented semantics as the `sparse` preset). `window_days` control is disabled with an explanatory tooltip when the `holiday_rush` preset is selected (calendar-pinned window). +- **Store/product focus-pair selector**: two dropdowns (stores, products — fed by `GET /dimensions/stores` / `GET /dimensions/products`, `page_size=100`) plus a pre-run preview card showing the chosen store (code/name/region/type), product (sku/name/category/brand) and the currently seeded window (from `GET /seeder/status`). Works WITHOUT re-seeding (scope selection on the existing dataset is the primary use). Ticking **Reset database** clears the selection with a caveat ("a wipe re-issues ids — re-pick after the run"), because Postgres sequences never reset (memory anchor: seeder-does-not-reset-id-sequences). +- **Run**: the start frame carries `seed_overrides` (only when re-seeding and ≥1 knob set) and `user_scope` (when a pair is picked). The seed step card echoes the overridden dims; the status step card says "user-selected pair" vs "discovered pair". +- **Replay** of a kept run re-submits recorded `seed_overrides` + `user_scope` verbatim alongside the existing 4 config fields. Load repopulates the panel + selector. +- **Legacy behavior**: a start frame without the new fields is byte-identical to today (contract test). + +### Technical requirements + +- All new request fields are additive `Optional` with `None` defaults; the WS start frame keeps ignoring unknown TOP-LEVEL keys (`DemoRunRequest` default `extra=ignore`); the nested models use `extra="forbid"` (allow-list enforcement). +- `SeederOverrides` and `UserScope` carry `ConfigDict(strict=True, extra="forbid")`. All fields are JSON-native (`int`/`float`) → NO `Field(strict=False)` override needed and the strict-mode AST policy test (`app/core/tests/test_strict_mode_policy.py`) stays green. Runtime-verified on pydantic 2.12.5: a nested-model field under a `strict=True` parent validates from the JSON-parsed dict (FastAPI's `validate_python` path) — see verification log. +- All config is start-frame-time. NOTHING is configurable mid-run — the pipeline is strictly linear under the module-level `asyncio.Lock` (design invariant from umbrella #406; do not add any mid-run mutation channel). +- The demo slice must not import `app/features/seeder/*` — `SeederOverrides` lives in `app/shared/seeder/overrides.py`; `UserScope` lives in `app/features/demo/schemas.py` (demo-only concept). `pipeline.py` may import both (`app.shared.*` + own-slice schemas are already imported at `pipeline.py:43-45`). +- The seeder stays the only bulk-mutation path; no new wipe semantics; `_check_seeder_enabled` untouched. +- E3 ships ZERO Alembic migrations. CONTRACT(E1): the `seed_overrides` + `user_scope` JSONB slots exist on `showcase_workspace` (E1 #407 migration) before this epic executes. + +### Success Criteria + +- [ ] `POST /seeder/generate` accepts `{"overrides": {"stores": 8, "promotion_intensity": 0.3}}` → 201, and the generated config reflects the knobs (service unit test); `{"overrides": {"stores": 0}}` → 422; `{"overrides": {"bogus_knob": 1}}` → 422; a body WITHOUT `overrides` produces a byte-identical `SeederConfig` to today (regression test). +- [ ] `DemoRunRequest.model_validate({...})` JSON-path tests: `seed_overrides` with `skip_seed=true` → ValidationError; `window_days` with `scenario="holiday_rush"` → ValidationError; legacy 4-field frame still validates; `user_scope` happy path. +- [ ] `step_seed` forwards `overrides` in the `/seeder/generate` POST body (`_RecordingClient` assertion); `step_status` uses a valid `user_scope` pair (asserts the GET-by-id calls + ctx fields), and WARNS + falls back to discovery on a 404 pair. +- [ ] A `preservation="keep"` run records `seed_overrides` + `user_scope` into the E1 story slots; `GET /demo/workspaces` list items AND `/{id}` detail expose both; the e2e replay regression (`tests/test_e2e_demo.py::test_demo_replay_same_config_twice` extended or sibling test) proves a replayed row carries identical slot JSON. +- [ ] Frontend: panel renders 7 bounded controls only when Re-seed is ticked; selector previews the chosen pair; `workspaceToRunRequest(ws)` unit test proves replay-verbatim including the new fields; `pnpm lint && pnpm test --run` green; no NEW `tsc -b` errors in touched files. +- [ ] Legacy start frames byte-identical (backend contract test + existing demo tests untouched-green). +- [ ] Backend gates green: `uv run ruff check . && uv run ruff format --check . && uv run mypy app/ && uv run pyright app/ && uv run pytest -v -m "not integration"`. +- [ ] Docs updated additively: API_CONTRACTS (seeder + demo + WS rows), RUNBOOKS (new showcase incident entry + workspace-section note), DOMAIN_MODEL (slot schemas under the `showcase_workspace` aggregate). +- [ ] Real-browser dogfood (Level 4) performed. + +## All Needed Context + +### Documentation & References + +```yaml +# MUST READ — codebase patterns (verified 2026-06-12, dev @ bdf85f6 — PRE-E1; +# re-anchor line numbers by symbol after E1 #407 merges) + +- file: app/features/seeder/schemas.py + why: | + GenerateParams at 78-298 — the contract to extend. Note the Phase 1 + comment block at 121-123 ("All flags default off so existing scenarios + remain byte-identical") — copy that promise onto the new field. The model + is plain BaseModel (NO ConfigDict(strict=True)) — do NOT add strict mode + to GenerateParams itself (it has date fields start_date/end_date; only + the NEW nested SeederOverrides model is strict). + ChangepointEventParam at 51-64 is the existing nested-model-in-params + precedent (list[ChangepointEventParam] at 153-156). + +- file: app/features/seeder/service.py + why: | + _build_config_from_params at 202-247 — THE integration point. Scalar + overrides at 218-226 (dataclasses.replace on dimensions; sparsity only + when > 0); _apply_phase1_overrides at 74-137 and _apply_phase2_overrides + at 139-199 are the mutate-config-in-place pattern to mirror for + _apply_seed_overrides. APPLY THE NEW LAYER LAST (after :241) so nested + wins over scalars. from dataclasses import replace already imported (:7). + +- file: app/shared/seeder/config.py + why: | + The override targets: TimeSeriesConfig.noise_sigma :72, + RetailPatternConfig.promotion_probability/stockout_probability :101-102, + DimensionConfig.stores/products :118-119, + SparsityConfig.missing_combinations_pct :141 (+ random_gaps fields to + PRESERVE via replace). ScenarioPreset :37-47. holiday_rush pinned window + :553-579 (the reason window_days is rejected for that preset). + DEFAULT_SEED_SPAN_DAYS=365 :10. NO Pydantic here — config.py stays + dataclasses; the new Pydantic model goes in a NEW sibling module + app/shared/seeder/overrides.py. + +- file: app/features/seeder/routes.py + why: | + POST /seeder/generate at 85-136 — NO route-code change needed (the body + model change flows through); read for the _check_seeder_enabled guard + (21-33) and the error envelope you must NOT duplicate (the + no-new-endpoint rationale). + +- file: app/features/demo/schemas.py + why: | + DemoRunRequest at 29-85 — the model to extend. The model_validator + _workspace_name_requires_keep at 80-85 is the EXACT cross-field-rule + pattern for the two new validators. The docstring at 30-38 explains the + strict-mode policy; scenario's strict=False override at 59-63 (enum) — + nested BaseModel fields need NO such override (runtime-verified). + WorkspaceListItem at 169-190 / WorkspaceDetailResponse at 192-203 — add + seed_overrides + user_scope to BOTH (replay reads LIST rows: + showcase.tsx:174-186). CONTRACT(E1): E1's PRP may already have surfaced + the story slots on these response models — if so, verify shape + (dict[str, Any] | None) and skip the duplicate edit. + +- file: app/features/demo/pipeline.py + why: | + DemoContext at 212-263 — add seed_overrides/user_scope fields (follow the + PRP-38/39/40 additive-Optional comment style). step_seed at 541-579 — + extend the POST body; _SCENARIO_SEED_PROFILE at 513-538 supplies the + defaults overrides partially replace. step_status at 582-631 — the + first-pair discovery to branch around for user_scope (its docstring + already states ids are NOT 1-based). run_pipeline ctx construction at + 2646-2651 — thread the two new req fields. StepStatus literal includes + "warn" (schemas.py:19) and only "fail" stops the run (:2729-2738) — the + warn+fallback path is safe. CRITICAL header rule :18-19: pipeline must + NOT import app.features.* outside its own slice — app.shared.* is fine. + +- file: app/features/demo/workspace.py + why: | + create_workspace at 46-79 — add the two slot writes on the + ShowcaseWorkspace(...) constructor; warn-and-continue contract at 10-13 + (a slot-write failure must never break the run — the try/except already + guarantees it). finalize_workspace at 106-155 — NO change for the slots + (recorded at create); note store_id/product_id columns at 136-137 record + the EFFECTIVE grain (divergence-visible design). + CONTRACT(E1): E1 refactors create_workspace to write its new columns — + rebase this edit onto E1's merged version. + +- file: app/features/demo/models.py + why: | + ShowcaseWorkspace ORM — E3 does NOT edit this file. CONTRACT(E1): after + E1 merges it carries seed_overrides/user_scope as JSONB story slots; + verify the exact attribute names/types there before writing + workspace.py code. (Assumed shape: nullable JSONB columns mirroring the + created_objects precedent at 77-79.) + +- file: app/features/demo/tests/test_pipeline.py + why: | + _RecordingClient at 1025-1068 (records (method, path, json_body) per + call, canned responses keyed by (method, path-prefix)); _as_client cast + at 1070+. Reuse for: overrides-forwarding, user_scope GET-by-id calls, + warn+fallback (404 canned response). + +- file: app/features/demo/tests/test_schemas.py + why: | + The JSON-path test conventions: test_demo_run_request_json_path_keep_ + with_name :67, test_demo_run_request_legacy_frame_still_validates :75, + test_demo_run_request_workspace_name_requires_keep :83 — mirror all + three shapes for the new fields. + +- file: app/features/seeder/tests/test_routes.py + why: | + Route-test harness: client fixture :15 (TestClient + mocked settings, + seeder_allow_production=True), TestGenerate :96 — add overrides 201 / + 422-bounds / 422-unknown-knob cases here. test_generate_validation_error + :157 is the 422 pattern. + +- file: app/features/seeder/tests/test_service.py + why: | + Service-test patterns for _build_config_from_params — add: knob→field + mapping, precedence-over-scalars, window_days math, preset-character + preservation (e.g. sparse preset's random_gaps survive an overrides. + sparsity replace), and the no-overrides byte-identical regression. + +- file: tests/test_e2e_demo.py + why: | + test_demo_replay_same_config_twice at 561-609 — the replay-regression + guard to extend (or sibling): a keep-run with seed_overrides+user_scope, + replayed, must produce a second row with identical slot JSON. + +- file: frontend/src/pages/showcase.tsx + why: | + Wiring surface. handleRun start frame at 139-156 (conditional-spread + pattern for optional fields — reuse for seed_overrides/user_scope); + handleLoadWorkspace at 160-168 (repopulate panel+selector); + handleReplayWorkspace at 174-186 (REPLACE its inline object with the new + workspaceToRunRequest helper); controls block at 269-363 (panel + + selector land after the existing checkboxes); reset checkbox at 301-311 + (hook the scope-clearing caveat here). + +- file: frontend/src/types/api.ts + why: | + DemoRunRequest at 778-788 (+ seed_overrides?/user_scope?); + WorkspaceListItem at 806-816 and WorkspaceDetail at 819-825 (+ both + fields, `| null`); add SeedOverrides + UserScope interfaces near the + demo block. WARNING: MIXED CRLF/LF line endings — surgical edits only; + verify `git diff --stat` stays small. + +- file: frontend/src/hooks/use-stores.ts + why: | + useStores at 16-43 (TanStack Query over /dimensions/stores with + page/page_size/enabled) — the selector's data source; use-products.ts + mirrors it (useProducts :16, useProduct :45). page_size hard cap is 100 + (app/features/dimensions/routes.py:62,187). + +- file: frontend/src/hooks/use-seeder.ts + why: useSeederStatus :15 — the seeded-window source for the preview card. + +- file: frontend/src/hooks/use-demo-pipeline.ts + why: | + start(req) at 241-249 sends the req object as the WS start frame + verbatim — generic over the widened DemoRunRequest; NO change needed + (read to confirm). RunHistoryStrip replays stored req objects, so + localStorage replays inherit the new fields for free. + +- file: frontend/src/components/demo/ScenarioPicker.test.tsx + why: | + The vitest + @testing-library/react + afterEach(cleanup) harness pattern + for the two new component test files. + +- file: frontend/src/components/ui/ + why: | + Installed primitives: collapsible.tsx, select.tsx, slider.tsx, input.tsx, + badge.tsx, card.tsx, tooltip.tsx, checkbox.tsx — the panel + selector + compose from these; NO new shadcn install required. If one becomes + necessary anyway: pin `pnpm dlx shadcn@4.7.0 add ...` (5.x writes a stub + pnpm-workspace.yaml and skips the component) and use per-component + @radix-ui/react-X imports, never the radix barrel. + +- file: docs/_base/RUNBOOKS.md + why: | + "Showcase page (/showcase) pipeline fails at step X" — numbered entries + end at 28; append entry 29 (overrides/scope incident matrix) in the same + bold-trigger/Cause/Fix format. The "Showcase workspace — + preserve/restore/replay/delete semantics" section's "Explicitly out of + scope" list says advanced seed configuration is NOT implemented — E3 + DELIVERS it: rewrite that bullet (move seed_overrides/user_scope to the + documented surface; phase-level config stays out of scope). + +- file: docs/_base/API_CONTRACTS.md + why: | + Rows to extend additively: the /seeder/* row (mention the overrides + object on POST /seeder/generate), POST /demo/run, and the WS + /demo/stream start-frame bullet (E1/E2 notes were just added — append an + "E3 (#409)" note, don't disturb them). + +- file: docs/_base/DOMAIN_MODEL.md + why: | + showcase_workspace aggregate section — document the seed_overrides / + user_scope slot JSON schemas (the umbrella's "JSONB story slots become a + junk drawer" mitigation requires documented slot schemas here). + +- file: PRPs/PRP-showcase-workspace-E2-preset-exposure.md + why: | + Closest predecessor (preset exposure + seed profiles) — its gotcha block + (holiday_rush pinning, seeder precedence, sparse NaN-WAPE, frontend tsc + gate) all recur in E3; this PRP inherits and extends them. + +# Issue / initiative context +- url: https://github.com/w7-mgfcode/ForecastLabAI/issues/409 + why: The epic this PRP implements (Parallel after Foundation E1 #407). +- url: https://github.com/w7-mgfcode/ForecastLabAI/issues/406 + why: | + Umbrella — Approach ("all configuration is start-frame-time", "no new + router outside existing slices"), Risks table row 1 (the allow-list + mitigation this PRP implements), out-of-scope list (NO mid-run controls, + NO embedded scenario-builder). +- url: https://github.com/w7-mgfcode/ForecastLabAI/issues/407 + why: | + Foundation epic whose contract is GIVEN: JSONB story slots incl. + seed_overrides + user_scope; columns replayed_from_workspace_id / + archived / pinned / notes / tags / config_schema_version; PATCH + /demo/workspaces/{id}. E3 builds on, never re-decides, this surface. + +# External references +- url: https://docs.pydantic.dev/latest/concepts/strict_mode/ + why: | + Strict-mode semantics for nested models: a model-typed field validates + dict input using the NESTED model's own config — confirmed empirically + (verification log) so no doc-faith is required. NOTE: the docs site + 301-redirects and anchors have drifted; the runtime verification in the + Known Gotchas log is the authoritative claim, not this URL. +- url: https://docs.pydantic.dev/latest/api/config/#pydantic.config.ConfigDict.extra + why: extra="forbid" → unknown nested keys raise ValidationError (the 422 allow-list mechanism). +``` + +### Current Codebase tree (relevant subset, pre-E1) + +```bash +app/shared/seeder/ +├── config.py # dataclasses; override TARGETS (no Pydantic here) +├── core.py / generators/ # consume SeederConfig — untouched by E3 +app/features/seeder/ +├── schemas.py # GenerateParams @78 (25+ flat fields) +├── service.py # _build_config_from_params @202; _apply_phaseN @74/@139 +├── routes.py # POST /generate @85 (guard @21; no route change) +└── tests/ # test_routes.py, test_service.py, test_schemas.py +app/features/demo/ +├── schemas.py # DemoRunRequest @29; Workspace* responses @169 +├── pipeline.py # DemoContext @212; step_seed @541; step_status @582; run_pipeline @2618 +├── workspace.py # create_workspace @46; finalize_workspace @106 +├── models.py # ShowcaseWorkspace (E1 adds the story slots — not edited here) +└── tests/ # test_pipeline.py (_RecordingClient @1025), test_schemas.py, test_workspace.py +tests/test_e2e_demo.py # replay regression @561 +frontend/src/ +├── pages/showcase.tsx # handleRun @139; handleLoad @160; handleReplay @174; controls @269 +├── types/api.ts # DemoRunRequest @778; WorkspaceListItem @806 (MIXED CRLF/LF) +├── hooks/use-stores.ts, use-products.ts, use-seeder.ts, use-demo-pipeline.ts +└── components/demo/ # ScenarioPicker, WorkspacePanel, ... (+ index.ts barrel) +``` + +### Desired Codebase tree (files added/modified) + +```bash +app/shared/seeder/overrides.py # NEW — SeederOverrides (strict, extra=forbid, 7 knobs) +app/shared/seeder/tests/test_overrides.py # NEW — bounds, forbid, JSON-path, sparse-dump tests +app/features/seeder/schemas.py # MOD — GenerateParams.overrides: SeederOverrides | None +app/features/seeder/service.py # MOD — _apply_seed_overrides, wired LAST in _build_config_from_params +app/features/seeder/tests/test_service.py # MOD — mapping/precedence/window/byte-identical tests +app/features/seeder/tests/test_routes.py # MOD — 201-with-overrides, 422-bounds, 422-unknown-knob +app/features/demo/schemas.py # MOD — UserScope; DemoRunRequest fields + validators; Workspace* responses +app/features/demo/pipeline.py # MOD — DemoContext fields; step_seed forward; step_status scope branch +app/features/demo/workspace.py # MOD — create_workspace writes both slots +app/features/demo/tests/test_schemas.py # MOD — JSON-path + validator tests +app/features/demo/tests/test_pipeline.py # MOD — forwarding + scope + warn/fallback tests +app/features/demo/tests/test_workspace.py # MOD — slot persistence tests +tests/test_e2e_demo.py # MOD — replay-verbatim regression incl. slots (integration) +frontend/src/types/api.ts # MOD — SeedOverrides, UserScope, DemoRunRequest, Workspace* (surgical) +frontend/src/lib/workspace-replay.ts # NEW — workspaceToRunRequest(ws) pure helper +frontend/src/lib/workspace-replay.test.ts # NEW — replay-verbatim FE regression +frontend/src/components/demo/SeedConfigPanel.tsx # NEW — collapsible 7-knob panel +frontend/src/components/demo/SeedConfigPanel.test.tsx # NEW +frontend/src/components/demo/ScopeSelector.tsx # NEW — pair selector + preview card +frontend/src/components/demo/ScopeSelector.test.tsx # NEW +frontend/src/components/demo/index.ts # MOD — export the two new components (match barrel style) +frontend/src/pages/showcase.tsx # MOD — wiring (state, panel, selector, start frames) +docs/_base/API_CONTRACTS.md # MOD — seeder overrides + /demo/run + WS start-frame E3 notes +docs/_base/RUNBOOKS.md # MOD — showcase incident 29 + workspace-section scope update +docs/_base/DOMAIN_MODEL.md # MOD — slot schemas on the showcase_workspace aggregate +``` + +### Known Gotchas & Library Quirks + +```python +# CRITICAL — EXECUTION ORDER: do not start until E1 #407 is merged to dev. +# E3 writes JSONB slots that E1's migration creates. First action of Task 1: +# re-read app/features/demo/models.py + workspace.py on the post-E1 dev and +# re-anchor every CONTRACT(E1) tag in this PRP. + +# CRITICAL — pydantic strict + nested models (runtime-verified 2026-06-12 on +# pydantic 2.12.5; re-run on lib upgrade): +# uv run python -c " +# from pydantic import BaseModel, ConfigDict, Field +# class N(BaseModel): +# model_config = ConfigDict(strict=True, extra='forbid') +# stores: int | None = Field(default=None, ge=1, le=100) +# class P(BaseModel): +# model_config = ConfigDict(strict=True) +# seed_overrides: N | None = None +# print(P.model_validate({'seed_overrides': {'stores': 5}})) # OK — dict→model under strict +# P.model_validate({'seed_overrides': {'stores': 999}}) # ValidationError (bounds) +# " +# and N.model_validate({'stores': 5, 'bogus': 1}) → ValidationError (forbid). +# Conclusions baked into the design: NO Field(strict=False) needed on the +# nested field; extra='forbid' IS the allow-list; FastAPI's validate_python +# path (the JSON dict) works. All knobs are int/float → the strict-mode AST +# policy test (app/core/tests/test_strict_mode_policy.py) does not fire. + +# CRITICAL — do NOT add ConfigDict(strict=True) to GenerateParams itself: it +# has date fields (start_date/end_date) and is deliberately non-strict today. +# Only the NEW nested models are strict. + +# CRITICAL — seeder override precedence (service.py:213-226 + the new layer): +# preset → scalar stores/products/window/sparsity → phase1 → phase2 → +# overrides (LAST, wins). Use dataclasses.replace for every sub-config so +# preset-customized sibling fields survive (e.g. sparse preset's +# random_gaps_per_series when overrides.sparsity is set; scenario-customized +# region/category lists when overrides.stores is set — same reason the +# existing scalar override at :218-222 uses replace). + +# CRITICAL — holiday_rush is CALENDAR-PINNED (config.py:553-579): its +# HolidayConfig spikes are fixed 2024 dates. seed_overrides.window_days on +# scenario='holiday_rush' must be REJECTED at DemoRunRequest validation +# (clear ValueError message), not silently ignored — a shifted window +# silently drops every holiday spike. Direct /seeder/generate callers who +# combine them are out of scope (the preset docstring already documents +# explicit-dates-to-shift). + +# CRITICAL — seed_overrides requires skip_seed=False. The seed step is skipped +# on skip_seed=true (pipeline.py:543-544) so overrides would be a silent +# no-op; reject in a model_validator (mirror _workspace_name_requires_keep, +# schemas.py:80-85). The frontend enforces the same by gating the panel on +# the Re-seed checkbox. + +# CRITICAL — ids are NOT 1-based (step_status docstring, pipeline.py:585-587; +# memory anchor seeder-does-not-reset-id-sequences). The scope selector MUST +# be fed from live /dimensions data, never synthesized ids. user_scope can +# dangle after reset+reseed → step_status WARN + fallback to discovery (the +# replay path of a reset=true workspace would otherwise hard-fail forever). +# "warn" does NOT stop the run (only "fail" does — pipeline.py:2729-2738). + +# CRITICAL — high stockout_intensity / sparsity overrides can legitimately +# FAIL the backtest (all-NaN WAPE → step_backtest FAIL by design; same +# semantics as the sparse preset, RUNBOOKS incident 28). Do NOT add a +# graceful-skip; ship the panel caveat + runbook entry 29 instead. + +# CRITICAL — workspace writes stay warn-and-continue (workspace.py:10-13). +# The slot writes go INSIDE the existing try/except in create_workspace; a +# failure yields workspace_id=None and a green run, never an exception. + +# GOTCHA — replay reads WorkspaceListItem (the LIST row — showcase.tsx:174): +# seed_overrides/user_scope must be on the LIST response, not detail-only. +# CONTRACT(E1): if E1 already exposed the slots detail-only, ADD them to the +# list item here (cheap; sparse JSONB). + +# GOTCHA — frontend type gates: `pnpm tsc --noEmit` is vacuous (solution-style +# tsconfig) and `pnpm tsc -b` fails with ~24 PRE-EXISTING errors on dev, +# none in demo components. Gate on `pnpm lint && pnpm test --run` plus: +# cd frontend && pnpm tsc -b 2>&1 | grep -E "SeedConfigPanel|ScopeSelector|workspace-replay|types/api|pages/showcase" # expect empty + +# GOTCHA — frontend/src/types/api.ts has MIXED CRLF/LF line endings; repo-wide +# files are inconsistently CRLF/LF. Keep edits surgical; check +# `git diff --stat` before committing (Edit/Write emit LF — avoid whole-file +# noise diffs). + +# GOTCHA — shadcn: compose from INSTALLED primitives (collapsible, select, +# slider, input, badge, tooltip — frontend/src/components/ui/). Semantic +# tokens only (text-muted-foreground, border-primary, text-destructive for +# the reset caveat — mirrors showcase.tsx:309). Never raw colors. + +# GOTCHA — mypy --strict AND pyright --strict gate every backend edit. The +# DemoContext additions need full annotations (SeederOverrides | None); +# pipeline.py imports them from app.shared.seeder.overrides (NOT from the +# seeder feature slice — vertical-slice rule, pipeline.py:18-19). + +# GOTCHA — step_seed currently derives the detail line from profile dims +# (pipeline.py:577). With overrides, compute effective stores/products = +# override-or-profile for BOTH the POST scalars and the detail string so +# the card tells the truth; keep scalar sparsity=0.0 (preset-character +# guard); the nested object carries the operator's sparsity. + +# CONVENTION — commits (every one references #409; no AI trailer; scopes from +# .claude/rules/commit-format.md — seeder slice ⊂ `data`, demo slice ⊂ `api`): +# feat(data): add allow-listed nested seed overrides to seeder contract (#409) +# feat(api): thread seed overrides and user scope through demo pipeline (#409) +# feat(ui): add advanced seed config panel and scope selector to showcase (#409) +# test(api): cover replay-verbatim seed overrides and scope slots (#409) +# docs(docs): document seed override contract and workspace slots (#409) +# docs(repo): track showcase completion e3 prp (#409) +# Branch off dev: feat/showcase-completion-e3-seed-config-scope (49 chars ≤ 50). + +# RUNTIME-VERIFICATION LOG (per prp-create step 3): +# - pydantic 2.12.5 nested-strict + extra=forbid + bounds behavior verified +# with the command in the CRITICAL block above (all four assertions pass). +# - Seeder precedence semantics read directly from service.py:202-247 (not +# inferred); the `if params.sparsity > 0` guard confirmed at :225-226. +# - dimensions page_size cap 100 confirmed at app/features/dimensions/ +# routes.py:62 and :187. +# - `pnpm tsc -b` pre-existing-failure state re-confirmed by the E2 PRP log +# (2026-06-12); no demo-component errors. +# - No other third-party API claims — everything else cites in-repo code. +``` + +## Implementation Blueprint + +### Data models and structure + +```python +# app/shared/seeder/overrides.py (NEW) +"""Curated, allow-listed seed-override schema (E3, issue #409). + +Shared between the seeder slice (GenerateParams.overrides) and the demo slice +(DemoRunRequest.seed_overrides) — app/shared is the sanctioned cross-slice +home (vertical-slice rule). extra='forbid' IS the allow-list: any knob not +listed here is a 422 at the HTTP boundary (umbrella #406 risk mitigation — +the full 25+ knob surface stays preset-driven). +""" +from pydantic import BaseModel, ConfigDict, Field + +class SeederOverrides(BaseModel): + # strict=True catches JSON-native coercion bugs ("5" → 5); every field is + # int/float so no Field(strict=False) override is needed (security-patterns.md). + model_config = ConfigDict(strict=True, extra="forbid") + + stores: int | None = Field(default=None, ge=1, le=100, description="Store count → DimensionConfig.stores; wins over the scalar `stores` param.") + products: int | None = Field(default=None, ge=1, le=500, description="Product count → DimensionConfig.products; wins over the scalar `products` param.") + window_days: int | None = Field(default=None, ge=75, le=365, description="Seeded window length; start_date = end_date - window_days. >=75 keeps the showcase historical_backfill gate clear. Rejected on the calendar-pinned holiday_rush preset (demo surface).") + sparsity: float | None = Field(default=None, ge=0.0, le=0.9, description="Missing (store,product) grain fraction → SparsityConfig.missing_combinations_pct; preserves the preset's gap config. 1.0 disallowed (zero series).") + promotion_intensity: float | None = Field(default=None, ge=0.0, le=0.5, description="→ RetailPatternConfig.promotion_probability (preset max 0.25).") + stockout_intensity: float | None = Field(default=None, ge=0.0, le=0.5, description="→ RetailPatternConfig.stockout_probability. High values can legitimately NaN-WAPE-fail the backtest (documented).") + noise_sigma: float | None = Field(default=None, ge=0.0, le=0.5, description="→ TimeSeriesConfig.noise_sigma (preset max 0.4).") + + def is_empty(self) -> bool: + """True when no knob is set ({} on the wire) — treated as None everywhere.""" + return not self.model_dump(exclude_none=True) +``` + +```python +# app/features/demo/schemas.py — additions (demo-only concept stays in-slice) +class UserScope(BaseModel): + """Operator-selected (store, product) focus pair (E3, issue #409). + + Ids are REAL discovered ids (sequences never reset — ids are not 1-based); + step_status validates them and warn-falls-back to discovery when dangling. + """ + model_config = ConfigDict(strict=True, extra="forbid") + store_id: int = Field(..., ge=1) + product_id: int = Field(..., ge=1) + +# DemoRunRequest — two additive Optional fields + two validators: +# seed_overrides: SeederOverrides | None = None (import from app.shared.seeder.overrides) +# user_scope: UserScope | None = None +# +# @model_validator(mode="after") _seed_overrides_require_reseed: +# if self.seed_overrides is not None and not self.seed_overrides.is_empty() +# and self.skip_seed: +# raise ValueError("seed_overrides requires skip_seed=false (Re-seed first)") +# # normalize: an empty overrides object collapses to None +# if self.seed_overrides is not None and self.seed_overrides.is_empty(): +# self.seed_overrides = None # NOTE: model_validator(after) may mutate self +# +# @model_validator(mode="after") _window_days_forbidden_on_holiday_rush: +# if (self.seed_overrides is not None +# and self.seed_overrides.window_days is not None +# and self.scenario is ScenarioPreset.HOLIDAY_RUSH): +# raise ValueError("window_days cannot override the calendar-pinned holiday_rush window") +# +# WorkspaceListItem (+ WorkspaceDetailResponse inherits): +# seed_overrides: dict[str, Any] | None = Field(default=None, ...) +# user_scope: dict[str, Any] | None = Field(default=None, ...) +# (from_attributes=True already set — ORM JSONB maps straight through. +# CONTRACT(E1): skip if E1's PRP already added them; ensure LIST exposure.) +``` + +```python +# app/features/seeder/service.py — the new layer (mirror _apply_phase2_overrides) +def _apply_seed_overrides(config: SeederConfig, overrides: SeederOverrides | None) -> None: + """Apply the curated nested overrides LAST — wins over scalar params. + + dataclasses.replace is field-precise: preset-customized sibling fields + (region/category lists, random_gaps_*) survive every knob. + """ + if overrides is None: + return + if overrides.stores is not None or overrides.products is not None: + config.dimensions = replace( + config.dimensions, + stores=overrides.stores if overrides.stores is not None else config.dimensions.stores, + products=overrides.products if overrides.products is not None else config.dimensions.products, + ) + if overrides.window_days is not None: + config.start_date = config.end_date - timedelta(days=overrides.window_days) + if overrides.sparsity is not None: + config.sparsity = replace(config.sparsity, missing_combinations_pct=overrides.sparsity) + if overrides.promotion_intensity is not None or overrides.stockout_intensity is not None: + config.retail = replace( + config.retail, + promotion_probability=(overrides.promotion_intensity + if overrides.promotion_intensity is not None + else config.retail.promotion_probability), + stockout_probability=(overrides.stockout_intensity + if overrides.stockout_intensity is not None + else config.retail.stockout_probability), + ) + if overrides.noise_sigma is not None: + config.time_series = replace(config.time_series, noise_sigma=overrides.noise_sigma) +# Wire-in (one line, AFTER _apply_phase2_overrides at :241): +# _apply_seed_overrides(config, params.overrides) +``` + +```python +# app/features/demo/pipeline.py — step changes (sketch) + +# DemoContext additions (after workspace_name, with an E3 #409 comment): +# seed_overrides: SeederOverrides | None = None +# user_scope: UserScope | None = None +# run_pipeline ctx construction: thread req.seed_overrides / req.user_scope. + +# step_seed — effective dims + verbatim forward: +# stores = ctx.seed_overrides.stores if (ctx.seed_overrides and ctx.seed_overrides.stores) else profile.stores +# products = ... same for products ... +# window: if ctx.seed_overrides and ctx.seed_overrides.window_days: +# seed_end = datetime.now(UTC).date(); seed_start = seed_end - timedelta(days=ctx.seed_overrides.window_days) +# elif profile.window is not None: ... (existing pinned branch; validator already +# guarantees window_days is never set on holiday_rush) +# json_body gains: **({"overrides": ctx.seed_overrides.model_dump(exclude_none=True)} +# if ctx.seed_overrides else {}) +# detail line + data echo the effective dims and "overrides" keys applied. + +# step_status — user-scope branch BEFORE first-pair discovery: +# if ctx.user_scope is not None: +# try: +# store_body = await client.request("status[scope-store]", "GET", +# f"/dimensions/stores/{ctx.user_scope.store_id}") +# product_body = await client.request("status[scope-product]", "GET", +# f"/dimensions/products/{ctx.user_scope.product_id}") +# except _StepError: +# scope_warn = ("user_scope (store=%d, product=%d) not found — fell back " +# "to discovered pair" % (...)) # WARN, never fail (replay safety) +# else: +# ctx.store_id, ctx.product_id = ctx.user_scope.store_id, ctx.user_scope.product_id +# -> return ("pass", f"... store_id={..} product_id={..} (user-selected)", +# {..., "user_scope_applied": True}) +# # fallback / no-scope path: existing discovery (582-631) unchanged; when the +# # scope dangled return ("warn", scope_warn + discovery detail, +# # {..., "user_scope_applied": False}). +``` + +```python +# app/features/demo/workspace.py — create_workspace constructor additions +# (INSIDE the existing try; attribute names per the merged E1 model — +# CONTRACT(E1): assumed `seed_overrides` / `user_scope` nullable JSONB): +# seed_overrides=(req.seed_overrides.model_dump(mode="json", exclude_none=True) +# if req.seed_overrides else None), +# user_scope=(req.user_scope.model_dump(mode="json") if req.user_scope else None), +``` + +```tsx +// frontend/src/lib/workspace-replay.ts (NEW) — replay-verbatim in ONE place +import type { DemoRunRequest, WorkspaceListItem } from '@/types/api' + +/** Build the verbatim replay start frame for a saved workspace (E4 semantics + * + E3 #409 slots). Omits absent optionals so legacy rows replay byte- + * identically to today. */ +export function workspaceToRunRequest(ws: WorkspaceListItem): DemoRunRequest { + return { + seed: ws.seed, + scenario: ws.scenario, + reset: ws.reset, + skip_seed: ws.skip_seed, + preservation: 'keep', + // CONTRACT(E1): replay provenance — post-E1, handleReplayWorkspace's inline + // object sends this field (an E1 frozen success criterion); this helper + // REPLACES that object and must preserve it or lineage silently regresses. + replayed_from_workspace_id: ws.workspace_id, + ...(ws.name ? { workspace_name: ws.name } : {}), + ...(ws.seed_overrides ? { seed_overrides: ws.seed_overrides } : {}), + ...(ws.user_scope ? { user_scope: ws.user_scope } : {}), + } +} + +// types/api.ts additions (surgical): +// export interface SeedOverrides { stores?: number; products?: number; +// window_days?: number; sparsity?: number; promotion_intensity?: number; +// stockout_intensity?: number; noise_sigma?: number } +// export interface UserScope { store_id: number; product_id: number } +// DemoRunRequest += seed_overrides?: SeedOverrides; user_scope?: UserScope +// WorkspaceListItem += seed_overrides: SeedOverrides | null; user_scope: UserScope | null + +// SeedConfigPanel.tsx — props: { value: SeedOverrides | null; onChange(v: SeedOverrides | null): void; +// disabled?: boolean; windowLocked?: boolean /* holiday_rush */ } +// "Advanced seed config"; Inputs (stores 1..20 UI-range, products 1..50, +// window_days 75..365) + Sliders (sparsity 0..0.9 step .05, promo/stockout 0..0.5, +// noise 0..0.5); live summary line; NaN-WAPE caveat ; emits null when all unset. +// UI ranges are TIGHTER than the API bounds (laptop-scale); the API bounds are the law. + +// ScopeSelector.tsx — props: { value: UserScope | null; onChange(v: UserScope | null): void; +// disabled?: boolean } +// two shadcn patch({ metric: metric as DemoRankingMetric })} + disabled={disabled} + > + + + + + {RANKING_METRICS.map((metric) => ( + + {metric.label} + + ))} + + + + + patch({ horizon: Number(event.target.value) || 0 })} + /> + + + + + + + + +
+ + + + + patch({ n_splits: Number(event.target.value) || 0 })} + /> + + + patch({ min_train_size: Number(event.target.value) || 0 })} + /> + + + patch({ gap: Number(event.target.value) || 0 })} + /> + +
+
+
+ + {errors.length > 0 && ( +
    + {errors.map((error) => ( +
  • + {error} +
  • + ))} +
+ )} + + {fitWarning && ( +

+ {fitWarning} +

+ )} + + ) +} diff --git a/frontend/src/components/demo/ReplayConfirmDialog.test.tsx b/frontend/src/components/demo/ReplayConfirmDialog.test.tsx new file mode 100644 index 00000000..cee3981b --- /dev/null +++ b/frontend/src/components/demo/ReplayConfirmDialog.test.tsx @@ -0,0 +1,112 @@ +import { cleanup, fireEvent, render, screen } from '@testing-library/react' +import { afterEach, beforeAll, describe, expect, it, vi } from 'vitest' +import { ReplayConfirmDialog } from './ReplayConfirmDialog' +import { buildReplayRequest } from './replay-request' +import type { WorkspaceListItem } from '@/types/api' + +beforeAll(() => { + class ResizeObserverStub { + observe() {} + unobserve() {} + disconnect() {} + } + vi.stubGlobal('ResizeObserver', ResizeObserverStub) +}) + +afterEach(() => { + cleanup() + vi.clearAllMocks() +}) + +const baseItem: WorkspaceListItem = { + workspace_id: 'a'.repeat(32), + name: 'replay-me', + status: 'completed', + seed: 7, + scenario: 'demo_minimal', + reset: false, + skip_seed: true, + result_summary: null, + created_at: '2026-06-01T12:00:00Z', + archived: false, + pinned: false, + tags: [], + replayed_from_workspace_id: null, + seed_overrides: null, + user_scope: null, +} + +function renderDialog(workspace: WorkspaceListItem | null, handlers = {}) { + const onConfirm = vi.fn() + const onCancel = vi.fn() + render( + , + ) + return { onConfirm, onCancel } +} + +describe('ReplayConfirmDialog', () => { + it('renders nothing while no replay is pending', () => { + renderDialog(null) + expect(document.body.textContent).not.toContain('Replay workspace') + }) + + it('renders the recorded-vs-sent preview values', () => { + renderDialog(baseItem) + const copy = document.body.textContent ?? '' + expect(copy).toContain('Replay workspace “replay-me”?') + expect(copy).toContain('seed') + expect(copy).toContain('7') + expect(copy).toContain('demo_minimal') + expect(copy).toContain('keep') + // replayed_from points at the source row on both columns. + expect(copy).toContain(baseItem.workspace_id) + // The verbatim-replay hint for operators who want a different config. + expect(copy).toContain('Use Load instead') + }) + + it('uses a plain confirm label on a non-destructive replay', () => { + renderDialog(baseItem) + const action = screen.getByTestId('replay-confirm') + expect(action.textContent).toBe('Replay') + expect(document.body.textContent).not.toContain('WIPES the database') + }) + + it('escalates to destructive copy + label when reset=true', () => { + renderDialog({ ...baseItem, reset: true }) + expect(document.body.textContent).toContain('WIPES the database') + const action = screen.getByTestId('replay-confirm') + expect(action.textContent).toBe('Replay & wipe database') + expect(action.className).toContain('bg-destructive') + }) + + it('confirm fires onConfirm once; cancel fires onCancel and never confirms', () => { + const { onConfirm } = renderDialog(baseItem) + fireEvent.click(screen.getByTestId('replay-confirm')) + expect(onConfirm).toHaveBeenCalledTimes(1) + cleanup() + const second = renderDialog(baseItem) + fireEvent.click(screen.getByText('Cancel')) + expect(second.onCancel).toHaveBeenCalledTimes(1) + expect(second.onConfirm).not.toHaveBeenCalled() + }) + + it('highlights a mismatching row (defensive — verbatim replays match)', () => { + render( + , + ) + const mismatched = document.querySelector('td.font-semibold.text-destructive') + expect(mismatched?.textContent).toBe('99') + }) +}) diff --git a/frontend/src/components/demo/ReplayConfirmDialog.tsx b/frontend/src/components/demo/ReplayConfirmDialog.tsx new file mode 100644 index 00000000..8395f446 --- /dev/null +++ b/frontend/src/components/demo/ReplayConfirmDialog.tsx @@ -0,0 +1,158 @@ +/** + * E2 (#408) — replay confirmation dialog with a recorded-vs-sent preview. + * + * Every panel Replay goes through this dialog (no code path starts a replay + * without it). The body renders a Field / Recorded / Will-send table; rows + * where the two values differ are highlighted (defensive — a verbatim replay + * normally matches). A reset=true workspace escalates: destructive warning + * copy + a destructive-styled confirm button ("Replay & wipe database"). + * + * Replay policy stays verbatim by design — operators who want a different + * config use Load (which repopulates every control) and Run instead. + */ + +import { AlertTriangle } from 'lucide-react' +import { + AlertDialog, + AlertDialogAction, + AlertDialogCancel, + AlertDialogContent, + AlertDialogDescription, + AlertDialogFooter, + AlertDialogHeader, + AlertDialogTitle, +} from '@/components/ui/alert-dialog' +import { + Table, + TableBody, + TableCell, + TableHead, + TableHeader, + TableRow, +} from '@/components/ui/table' +import { cn } from '@/lib/utils' +import type { DemoRunRequest, WorkspaceListItem } from '@/types/api' + +interface ReplayConfirmDialogProps { + /** The workspace pending replay — null keeps the dialog closed. */ + workspace: WorkspaceListItem | null + /** The exact request the confirmed replay will send (single source). */ + requestPreview: DemoRunRequest | null + onConfirm: () => void + onCancel: () => void +} + +function fmt(value: unknown): string { + if (value === undefined || value === null || value === '') return '—' + return String(value) +} + +interface PreviewRow { + field: string + recorded: unknown + willSend: unknown +} + +function buildRows(ws: WorkspaceListItem, req: DemoRunRequest): PreviewRow[] { + return [ + { field: 'seed', recorded: ws.seed, willSend: req.seed }, + { field: 'scenario', recorded: ws.scenario, willSend: req.scenario }, + { field: 'reset', recorded: ws.reset, willSend: req.reset }, + { field: 'skip_seed', recorded: ws.skip_seed, willSend: req.skip_seed }, + { field: 'name', recorded: ws.name, willSend: req.workspace_name ?? null }, + { field: 'preservation', recorded: 'keep', willSend: req.preservation }, + { + field: 'replayed_from', + recorded: ws.workspace_id, + willSend: req.replayed_from_workspace_id, + }, + ] +} + +export function ReplayConfirmDialog({ + workspace, + requestPreview, + onConfirm, + onCancel, +}: ReplayConfirmDialogProps) { + const rows = + workspace && requestPreview ? buildRows(workspace, requestPreview) : [] + const destructive = workspace?.reset === true + const label = workspace?.name ?? workspace?.workspace_id.slice(0, 8) ?? '' + + return ( + { + if (!open) onCancel() + }} + > + + + Replay workspace “{label}”? + + The recorded config is re-submitted verbatim as a new kept run — + the original workspace row is never changed. + + + + {destructive && ( +
+ + + Replaying this workspace WIPES the database and + reseeds it from scratch. + +
+ )} + + + + + Field + Recorded + Will send + + + + {rows.map((row) => { + const mismatch = fmt(row.recorded) !== fmt(row.willSend) + return ( + + {row.field} + {fmt(row.recorded)} + + {fmt(row.willSend)} + + + ) + })} + +
+ +

+ Want to change the config first? Use Load instead. +

+ + + Cancel + + {destructive ? 'Replay & wipe database' : 'Replay'} + + +
+
+ ) +} diff --git a/frontend/src/components/demo/RunConfigPanel.test.tsx b/frontend/src/components/demo/RunConfigPanel.test.tsx new file mode 100644 index 00000000..b58bc934 --- /dev/null +++ b/frontend/src/components/demo/RunConfigPanel.test.tsx @@ -0,0 +1,102 @@ +import { afterEach, beforeAll, describe, expect, it, vi } from 'vitest' +import { cleanup, fireEvent, render, screen } from '@testing-library/react' +import type { ModelCatalogResponse } from '@/types/api' + +// Radix primitives need a couple of layout APIs jsdom lacks. +beforeAll(() => { + class ResizeObserverStub { + observe() {} + unobserve() {} + disconnect() {} + } + vi.stubGlobal('ResizeObserver', ResizeObserverStub) + if (!Element.prototype.hasPointerCapture) { + Element.prototype.hasPointerCapture = () => false + } + if (!Element.prototype.scrollIntoView) { + Element.prototype.scrollIntoView = () => {} + } +}) + +// A catalog with one DISABLED opt-in model (lightgbm) — the picker must hide it. +const CATALOG: ModelCatalogResponse = { + models: [ + { + model_type: 'naive', + label: 'Naive', + family: 'baseline', + feature_aware: false, + requires_extra: false, + default_params: {}, + supports_auto_predict: true, + description: 'baseline', + enabled: true, + }, + { + model_type: 'lightgbm', + label: 'LightGBM', + family: 'tree', + feature_aware: true, + requires_extra: true, + default_params: {}, + supports_auto_predict: false, + description: 'opt-in', + enabled: false, + }, + ], + default_candidate_model_types: ['naive'], +} + +vi.mock('@/hooks/use-model-selection', () => ({ + useModelCatalog: () => ({ data: CATALOG, isLoading: false, isError: false, error: null }), +})) + +import { RunConfigPanel } from './RunConfigPanel' +import { DEFAULT_BACKTEST, DEFAULT_TRAIN_MODELS } from './run-config-utils' + +afterEach(cleanup) + +function renderPanel(overrides: Partial> = {}) { + const onSelectionChange = vi.fn() + const onBacktestChange = vi.fn() + render( + , + ) + // The panel is collapsed by default; open it to render the inner controls. + fireEvent.click(screen.getByTestId('run-config-toggle')) + return { onSelectionChange, onBacktestChange } +} + +describe('RunConfigPanel', () => { + it('hides opt-in models whose flag is off (enabled=false)', () => { + renderPanel() + expect(screen.getByTestId('candidate-model-naive')).toBeTruthy() + expect(screen.queryByTestId('candidate-model-lightgbm')).toBeNull() + }) + + it('appends prophet_like (V2) to the preview only on showcase_rich', () => { + renderPanel({ scenario: 'showcase_rich', selection: ['naive'] }) + expect(screen.getByTestId('preview-chip-naive')).toBeTruthy() + expect(screen.getByTestId('preview-chip-prophet_like')).toBeTruthy() + }) + + it('does not append prophet_like on demo_minimal', () => { + renderPanel({ scenario: 'demo_minimal', selection: ['naive'] }) + expect(screen.getByTestId('preview-chip-naive')).toBeTruthy() + expect(screen.queryByTestId('preview-chip-prophet_like')).toBeNull() + }) + + it('reset restores the default selection + backtest', () => { + const { onSelectionChange, onBacktestChange } = renderPanel({ selection: ['naive'] }) + fireEvent.click(screen.getByTestId('run-config-reset')) + expect(onSelectionChange).toHaveBeenCalledWith(DEFAULT_TRAIN_MODELS) + expect(onBacktestChange).toHaveBeenCalledWith(DEFAULT_BACKTEST) + }) +}) diff --git a/frontend/src/components/demo/RunConfigPanel.tsx b/frontend/src/components/demo/RunConfigPanel.tsx new file mode 100644 index 00000000..c36a18b6 --- /dev/null +++ b/frontend/src/components/demo/RunConfigPanel.tsx @@ -0,0 +1,158 @@ +import { useMemo, useState } from 'react' +import { ChevronDown, RotateCcw, SlidersHorizontal } from 'lucide-react' +import { CandidateModelPicker } from '@/components/champion-selector/candidate-model-picker' +import { useModelCatalog } from '@/hooks/use-model-selection' +import { Badge } from '@/components/ui/badge' +import { Button } from '@/components/ui/button' +import { + Collapsible, + CollapsibleContent, + CollapsibleTrigger, +} from '@/components/ui/collapsible' +import { cn } from '@/lib/utils' +import type { + DemoBacktestConfig, + ModelCatalogResponse, + ModelFamily, + ScenarioPreset, +} from '@/types/api' +import { DemoBacktestSettingsForm } from './DemoBacktestSettingsForm' +import { + DEFAULT_BACKTEST, + DEFAULT_TRAIN_MODELS, + buildTrainPlan, + isDefaultBacktest, + isDefaultSelection, +} from './run-config-utils' + +interface RunConfigPanelProps { + scenario: ScenarioPreset + disabled?: boolean + selection: string[] + onSelectionChange: (models: string[]) => void + backtest: DemoBacktestConfig + onBacktestChange: (cfg: DemoBacktestConfig) => void +} + +/** + * E4 (#410) — collapsible "Run configuration (advanced)" section on /showcase. + * + * Composes the reused CandidateModelPicker (fed an enabled-filtered catalog so + * disabled opt-in models are hidden), the DemoBacktestSettingsForm, a Reset + * button, and a train-candidate preview chip list. Collapsed by default so an + * untouched run sends a byte-identical legacy frame (the dirty-only rule lives + * in showcase.tsx). + */ +export function RunConfigPanel({ + scenario, + disabled = false, + selection, + onSelectionChange, + backtest, + onBacktestChange, +}: RunConfigPanelProps) { + const [open, setOpen] = useState(false) + const { data: catalog, isLoading } = useModelCatalog() + + // Hide opt-in models whose forecast_enable_* flag is off (catalog.enabled). + const enabledCatalog: ModelCatalogResponse | undefined = useMemo(() => { + if (!catalog) return undefined + return { ...catalog, models: catalog.models.filter((m) => m.enabled) } + }, [catalog]) + + const familyByType = useMemo(() => { + const map: Record = {} + for (const m of catalog?.models ?? []) map[m.model_type] = m.family + return map + }, [catalog]) + + const plan = useMemo( + () => buildTrainPlan(selection, scenario, familyByType), + [selection, scenario, familyByType], + ) + + const isCustomized = !isDefaultSelection(selection) || !isDefaultBacktest(backtest) + + function reset() { + onSelectionChange([...DEFAULT_TRAIN_MODELS]) + onBacktestChange({ ...DEFAULT_BACKTEST }) + } + + return ( + + + + + +
+
+

Models to train

+ +
+ +
+ +
+

Backtest settings

+ +
+ +
+

+ Will train {plan.length} model{plan.length === 1 ? '' : 's'} +

+
+ {plan.map((entry) => ( + + {entry.model_type} + {entry.v2 ? ' (V2)' : ''} + {entry.family ? ` · ${entry.family}` : ''} + + ))} +
+
+
+
+ ) +} diff --git a/frontend/src/components/demo/ScopeSelector.test.tsx b/frontend/src/components/demo/ScopeSelector.test.tsx new file mode 100644 index 00000000..643dc057 --- /dev/null +++ b/frontend/src/components/demo/ScopeSelector.test.tsx @@ -0,0 +1,109 @@ +import { afterEach, describe, expect, it, vi } from 'vitest' +import { cleanup, fireEvent, render, screen } from '@testing-library/react' +import { ScopeSelector } from './ScopeSelector' + +afterEach(cleanup) + +vi.mock('@/hooks/use-stores', () => ({ + useStores: () => ({ + data: { + stores: [ + { + id: 12, + code: 'S012', + name: 'Riverside', + region: 'North', + city: null, + store_type: 'supermarket', + created_at: '', + updated_at: '', + }, + { + id: 13, + code: 'S013', + name: 'Hilltop', + region: 'South', + city: null, + store_type: 'express', + created_at: '', + updated_at: '', + }, + ], + }, + isLoading: false, + }), +})) + +vi.mock('@/hooks/use-products', () => ({ + useProducts: () => ({ + data: { + products: [ + { + id: 47, + sku: 'SKU-047', + name: 'Oat Milk', + category: 'Dairy', + brand: 'BrandA', + base_price: null, + base_cost: null, + created_at: '', + updated_at: '', + }, + ], + }, + isLoading: false, + }), +})) + +vi.mock('@/hooks/use-seeder', () => ({ + useSeederStatus: () => ({ + data: { + date_range_start: '2026-01-01', + date_range_end: '2026-03-31', + }, + }), +})) + +describe('ScopeSelector', () => { + it('renders the two dropdowns with auto-discover placeholders', () => { + render( undefined} />) + expect(screen.getByText('Auto-discover first store')).toBeTruthy() + expect(screen.getByText('Auto-discover first product')).toBeTruthy() + // No preview card while nothing is selected. + expect(screen.queryByText('Focus pair')).toBeNull() + }) + + it('previews the selected pair with names, traits, and the seeded window', () => { + render( + undefined} /> + ) + expect(screen.getByText('Focus pair')).toBeTruthy() + expect(screen.getByText('S012 · Riverside (North, supermarket)')).toBeTruthy() + expect(screen.getByText('SKU-047 · Oat Milk (Dairy, BrandA)')).toBeTruthy() + expect(screen.getByText(/2026-01-01 → 2026-03-31/)).toBeTruthy() + }) + + it('falls back to raw ids when the pair is not in the loaded page', () => { + render( + undefined} /> + ) + expect(screen.getByText('store #999')).toBeTruthy() + expect(screen.getByText('product #888')).toBeTruthy() + }) + + it('clears the selection via the Clear focus button', () => { + const onChange = vi.fn() + render() + fireEvent.click(screen.getByText('Clear focus')) + expect(onChange).toHaveBeenCalledWith(null) + }) + + it('hides the Clear button and disables triggers when disabled', () => { + render( + undefined} disabled /> + ) + const storeTrigger = screen.getByLabelText('Focus store') as HTMLButtonElement + expect(storeTrigger.disabled).toBe(true) + expect((screen.getByText('Clear focus') as HTMLButtonElement).disabled).toBe(true) + }) +}) diff --git a/frontend/src/components/demo/ScopeSelector.tsx b/frontend/src/components/demo/ScopeSelector.tsx new file mode 100644 index 00000000..1ed1b1e8 --- /dev/null +++ b/frontend/src/components/demo/ScopeSelector.tsx @@ -0,0 +1,143 @@ +import { Crosshair } from 'lucide-react' +import { Button } from '@/components/ui/button' +import { Card, CardContent } from '@/components/ui/card' +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from '@/components/ui/select' +import { useProducts } from '@/hooks/use-products' +import { useSeederStatus } from '@/hooks/use-seeder' +import { useStores } from '@/hooks/use-stores' +import type { UserScope } from '@/types/api' + +/** + * E3 (#409) — store/product focus-pair selector with a pre-run preview. + * + * Fed from live /dimensions data (NEVER synthesized ids — Postgres sequences + * don't reset, so ids are not 1-based). Works without re-seeding: scope + * selection on the existing dataset is the primary use. The status step + * validates the pair server-side and warn-falls-back to discovery when it + * dangles (e.g. after a reset re-issued ids). + */ +interface ScopeSelectorProps { + value: UserScope | null + onChange: (value: UserScope | null) => void + disabled?: boolean +} + +// page_size hard cap on /dimensions endpoints is 100. +const PAGE_SIZE = 100 + +/** "S001 · Main St (North, supermarket)" — label + non-null traits. */ +function describeEntity(label: string, traits: Array): string { + const present = traits.filter((t): t is string => t !== null && t !== '') + return present.length > 0 ? `${label} (${present.join(', ')})` : label +} + +export function ScopeSelector({ value, onChange, disabled = false }: ScopeSelectorProps) { + const storesQuery = useStores({ page: 1, pageSize: PAGE_SIZE }) + const productsQuery = useProducts({ page: 1, pageSize: PAGE_SIZE }) + const { data: seederStatus } = useSeederStatus() + + const stores = storesQuery.data?.stores ?? [] + const products = productsQuery.data?.products ?? [] + const selectedStore = stores.find((s) => s.id === value?.store_id) ?? null + const selectedProduct = products.find((p) => p.id === value?.product_id) ?? null + + return ( +
+
+ + + + + {value !== null && ( + + )} +
+ + {value !== null && ( + + + + + Focus pair + + + {selectedStore + ? describeEntity(`${selectedStore.code} · ${selectedStore.name}`, [ + selectedStore.region, + selectedStore.store_type, + ]) + : `store #${value.store_id}`} + + + {selectedProduct + ? describeEntity(`${selectedProduct.sku} · ${selectedProduct.name}`, [ + selectedProduct.category, + selectedProduct.brand, + ]) + : `product #${value.product_id}`} + + {seederStatus?.date_range_start && seederStatus.date_range_end && ( + + seeded window {seederStatus.date_range_start} → {seederStatus.date_range_end} + + )} + + + )} +
+ ) +} diff --git a/frontend/src/components/demo/SeedConfigPanel.test.tsx b/frontend/src/components/demo/SeedConfigPanel.test.tsx new file mode 100644 index 00000000..c81d8bed --- /dev/null +++ b/frontend/src/components/demo/SeedConfigPanel.test.tsx @@ -0,0 +1,95 @@ +import { afterEach, describe, expect, it, vi } from 'vitest' +import { cleanup, fireEvent, render, screen } from '@testing-library/react' +import { SeedConfigPanel } from './SeedConfigPanel' +import type { SeedOverrides } from '@/types/api' + +// jsdom lacks ResizeObserver; the radix Slider requires it (no vitest setup +// file exists in this project — the stub stays local to this suite). +class ResizeObserverStub { + observe() {} + unobserve() {} + disconnect() {} +} +globalThis.ResizeObserver = globalThis.ResizeObserver ?? (ResizeObserverStub as never) + +afterEach(cleanup) + +function openPanel(value: SeedOverrides | null = null, props = {}) { + const onChange = vi.fn() + render() + fireEvent.click(screen.getByText('Advanced seed config')) + return onChange +} + +describe('SeedConfigPanel', () => { + it('renders all 7 knob controls when expanded', () => { + openPanel() + // 3 int inputs + for (const label of ['Stores', 'Products', 'Window (days)']) { + expect(screen.getByLabelText(label)).toBeTruthy() + } + // 4 float sliders + for (const label of [ + 'Sparsity', + 'Promotion intensity', + 'Stockout intensity', + 'Noise sigma', + ]) { + expect(screen.getAllByLabelText(label).length).toBeGreaterThan(0) + } + }) + + it('emits a sparse object containing only the touched knob', () => { + const onChange = openPanel() + fireEvent.change(screen.getByLabelText('Stores'), { target: { value: '8' } }) + expect(onChange).toHaveBeenCalledWith({ stores: 8 }) + }) + + it('merges a new knob into the existing sparse object', () => { + const onChange = openPanel({ stores: 8 }) + fireEvent.change(screen.getByLabelText('Products'), { target: { value: '20' } }) + expect(onChange).toHaveBeenCalledWith({ stores: 8, products: 20 }) + }) + + it('emits null when the last knob is cleared', () => { + const onChange = openPanel({ stores: 8 }) + fireEvent.change(screen.getByLabelText('Stores'), { target: { value: '' } }) + expect(onChange).toHaveBeenCalledWith(null) + }) + + it('emits null via the Clear overrides button', () => { + const onChange = openPanel({ stores: 8, noise_sigma: 0.25 }) + fireEvent.click(screen.getByText('Clear overrides')) + expect(onChange).toHaveBeenCalledWith(null) + }) + + it('disables every control when disabled', () => { + openPanel({ stores: 8 }, { disabled: true }) + expect((screen.getByLabelText('Stores') as HTMLInputElement).disabled).toBe(true) + expect((screen.getByLabelText('Products') as HTMLInputElement).disabled).toBe(true) + }) + + it('locks only the window control when windowLocked (holiday_rush)', () => { + openPanel(null, { windowLocked: true }) + expect((screen.getByLabelText('Window (days)') as HTMLInputElement).disabled).toBe(true) + expect((screen.getByLabelText('Stores') as HTMLInputElement).disabled).toBe(false) + expect(screen.getByText('pinned window (holiday_rush)')).toBeTruthy() + }) + + it('shows the NaN-WAPE caveat at high stockout intensity', () => { + openPanel({ stockout_intensity: 0.4 }) + expect( + screen.getByText(/can legitimately fail the backtest/i) + ).toBeTruthy() + }) + + it('hides the caveat at tame values', () => { + openPanel({ stockout_intensity: 0.1, sparsity: 0.2 }) + expect(screen.queryByText(/can legitimately fail the backtest/i)).toBeNull() + }) + + it('echoes the live summary of set knobs', () => { + openPanel({ stores: 8, products: 20, promotion_intensity: 0.3 }) + expect(screen.getByText('8 stores · 20 products · promo 0.30')).toBeTruthy() + }) +}) diff --git a/frontend/src/components/demo/SeedConfigPanel.tsx b/frontend/src/components/demo/SeedConfigPanel.tsx new file mode 100644 index 00000000..6eed23e2 --- /dev/null +++ b/frontend/src/components/demo/SeedConfigPanel.tsx @@ -0,0 +1,213 @@ +import { useState } from 'react' +import { ChevronsUpDown, AlertTriangle } from 'lucide-react' +import { Badge } from '@/components/ui/badge' +import { Button } from '@/components/ui/button' +import { + Collapsible, + CollapsibleContent, + CollapsibleTrigger, +} from '@/components/ui/collapsible' +import { Input } from '@/components/ui/input' +import { Slider } from '@/components/ui/slider' +import type { SeedOverrides } from '@/types/api' + +/** + * E3 (#409) — advanced seed config panel: the 7 curated, allow-listed knobs. + * + * Emits a SPARSE object (only operator-touched knobs) and null when nothing + * is set, so legacy start frames stay byte-identical. The UI int ranges are + * deliberately TIGHTER than the API bounds (laptop-scale demo data); the API + * bounds are the law and the backend rejects anything outside them. + */ +interface SeedConfigPanelProps { + value: SeedOverrides | null + onChange: (value: SeedOverrides | null) => void + /** Disable every control (run in flight / Re-seed unticked). */ + disabled?: boolean + /** holiday_rush is calendar-pinned — the window control locks. */ + windowLocked?: boolean +} + +// UI input ranges (int knobs). API bounds: stores 1..100, products 1..500, +// window_days 75..365 — the inputs clamp to demo-scale subsets. +const INT_KNOBS = [ + { key: 'stores', label: 'Stores', min: 1, max: 20, placeholder: 'preset' }, + { key: 'products', label: 'Products', min: 1, max: 50, placeholder: 'preset' }, + { key: 'window_days', label: 'Window (days)', min: 75, max: 365, placeholder: 'preset' }, +] as const + +// Float knobs rendered as sliders. API bounds are the slider ranges. +const FLOAT_KNOBS = [ + { key: 'sparsity', label: 'Sparsity', max: 0.9 }, + { key: 'promotion_intensity', label: 'Promotion intensity', max: 0.5 }, + { key: 'stockout_intensity', label: 'Stockout intensity', max: 0.5 }, + { key: 'noise_sigma', label: 'Noise sigma', max: 0.5 }, +] as const + +/** Thresholds above which the NaN-WAPE caveat shows (mirrors the sparse + * preset's documented expected-fail semantics, RUNBOOKS incident 28). */ +const RISKY_SPARSITY = 0.4 +const RISKY_STOCKOUT = 0.25 + +function setKnob( + value: SeedOverrides | null, + key: keyof SeedOverrides, + knobValue: number | undefined +): SeedOverrides | null { + const next: SeedOverrides = { ...(value ?? {}) } + if (knobValue === undefined) { + delete next[key] + } else { + next[key] = knobValue + } + return Object.keys(next).length > 0 ? next : null +} + +export function SeedConfigPanel({ + value, + onChange, + disabled = false, + windowLocked = false, +}: SeedConfigPanelProps) { + const [open, setOpen] = useState(false) + + const touched = value !== null && Object.keys(value).length > 0 + const risky = + (value?.sparsity ?? 0) > RISKY_SPARSITY || (value?.stockout_intensity ?? 0) > RISKY_STOCKOUT + + const summaryParts: string[] = [] + if (value?.stores !== undefined) summaryParts.push(`${value.stores} stores`) + if (value?.products !== undefined) summaryParts.push(`${value.products} products`) + if (value?.window_days !== undefined) summaryParts.push(`${value.window_days} days`) + if (value?.sparsity !== undefined) summaryParts.push(`sparsity ${value.sparsity.toFixed(2)}`) + if (value?.promotion_intensity !== undefined) + summaryParts.push(`promo ${value.promotion_intensity.toFixed(2)}`) + if (value?.stockout_intensity !== undefined) + summaryParts.push(`stockout ${value.stockout_intensity.toFixed(2)}`) + if (value?.noise_sigma !== undefined) + summaryParts.push(`noise ${value.noise_sigma.toFixed(2)}`) + + return ( + + + {/* The trigger stays clickable while disabled so the operator can + still INSPECT the config mid-run; only the controls lock. */} + + + +
+
+ {INT_KNOBS.map((knob) => { + const locked = knob.key === 'window_days' && windowLocked + return ( + + ) + })} +
+ +
+ {FLOAT_KNOBS.map((knob) => { + const knobValue = value?.[knob.key] + return ( +
+ + {knob.label}:{' '} + + {knobValue !== undefined ? knobValue.toFixed(2) : 'preset'} + + + { + const v = vals[0] + // 0 from an untouched slider means "preset" — only an + // explicit non-zero (or a previously set knob) registers. + if (v === 0 && knobValue === undefined) return + onChange(setKnob(value, knob.key, v === 0 ? undefined : v)) + }} + /> +
+ ) + })} +
+ +
+ {touched ? ( + <> +

{summaryParts.join(' · ')}

+ + + ) : ( +

+ No overrides — the scenario preset drives every knob. +

+ )} + {risky && ( + + + high sparsity/stockout can legitimately fail the backtest (NaN WAPE) + + )} +
+
+
+
+ ) +} diff --git a/frontend/src/components/demo/WorkspaceArtifactsPanel.test.tsx b/frontend/src/components/demo/WorkspaceArtifactsPanel.test.tsx index 8d1e60ce..fa8b0ab7 100644 --- a/frontend/src/components/demo/WorkspaceArtifactsPanel.test.tsx +++ b/frontend/src/components/demo/WorkspaceArtifactsPanel.test.tsx @@ -1,8 +1,8 @@ -import { cleanup, render } from '@testing-library/react' +import { cleanup, render, screen } from '@testing-library/react' import { afterEach, describe, expect, it } from 'vitest' import { MemoryRouter } from 'react-router-dom' import { WorkspaceArtifactsPanel } from './WorkspaceArtifactsPanel' -import type { WorkspaceDetail } from '@/types/api' +import type { WorkspaceDetail, WorkspaceHealth } from '@/types/api' afterEach(() => cleanup()) @@ -28,12 +28,23 @@ const fullWorkspace: WorkspaceDetail = { agent_session_id: 'sess-1', scenario_plan_ids: ['sp-1', 'sp-2'], }, + archived: false, + pinned: false, + tags: [], + replayed_from_workspace_id: null, + seed_overrides: null, + user_scope: null, + run_config: null, + notes: null, + config_schema_version: 1, + approval_events: null, + rag_events: null, } -function renderPanel(workspace: WorkspaceDetail) { +function renderPanel(workspace: WorkspaceDetail, health: WorkspaceHealth | null = null) { return render( - + , ) } @@ -90,3 +101,75 @@ describe('WorkspaceArtifactsPanel', () => { expect(hrefs).toContain('/visualize/forecast?store_id=3&product_id=7') }) }) + +// ============================================================================= +// E2 (#408) — link-health markers + summary chip +// ============================================================================= + +const baseHealth: WorkspaceHealth = { + workspace_id: fullWorkspace.workspace_id, + workspace_status: 'completed', + partial_run: false, + references: [], + alive: 0, + dead: 0, + unknown: 0, + checked_at: '2026-06-13T00:00:00Z', +} + +describe('WorkspaceArtifactsPanel — health', () => { + it('renders the summary chip with alive/dead counts', () => { + const health: WorkspaceHealth = { ...baseHealth, alive: 5, dead: 2 } + renderPanel(fullWorkspace, health) + const chip = screen.getByTestId('workspace-health-summary') + expect(chip.textContent).toContain('5 live') + expect(chip.textContent).toContain('2 dead') + }) + + it('hides the dead count at zero and the chip without health data', () => { + const { container, unmount } = renderPanel(fullWorkspace, { ...baseHealth, alive: 3 }) + expect(container.textContent).toContain('3 live') + expect(container.textContent).not.toContain('dead') + unmount() + renderPanel(fullWorkspace, null) + expect(screen.queryByTestId('workspace-health-summary')).toBeNull() + }) + + it('marks a card whose reference probed dead — unknown gets no marker', () => { + const health: WorkspaceHealth = { + ...baseHealth, + alive: 4, + dead: 1, + unknown: 1, + references: [ + { + key: 'scenario_plan_ids[0]', + ref_type: 'scenario_plan', + ref_id: 'sp-1', + status: 'dead', + probe_path: '/scenarios/sp-1', + }, + { + key: 'batch_id', + ref_type: 'batch', + ref_id: 'batch-1', + status: 'unknown', + probe_path: '/batch/batch-1', + }, + ], + } + renderPanel(fullWorkspace, health) + expect(screen.getByTestId('dead-link-sp-1')).toBeTruthy() + expect(screen.queryByTestId('dead-link-batch-1')).toBeNull() + }) + + it('renders the partial-run badge for a never-completed row', () => { + const health: WorkspaceHealth = { + ...baseHealth, + workspace_status: 'failed', + partial_run: true, + } + const { container } = renderPanel({ ...fullWorkspace, status: 'failed' }, health) + expect(container.textContent).toContain('partial run') + }) +}) diff --git a/frontend/src/components/demo/WorkspaceArtifactsPanel.tsx b/frontend/src/components/demo/WorkspaceArtifactsPanel.tsx index 255d62fa..0246f7da 100644 --- a/frontend/src/components/demo/WorkspaceArtifactsPanel.tsx +++ b/frontend/src/components/demo/WorkspaceArtifactsPanel.tsx @@ -4,23 +4,33 @@ * Mirrors InspectArtifactsPanel's card shape but reads the persisted * `created_objects` soft references + grain columns from the workspace row * instead of live step.data — the run is long gone; the row is the memory. + * + * E2 (#408) — health-aware: cards whose soft reference probed `dead` carry a + * warning marker, and a summary chip row shows alive/dead counts plus a + * partial-run warning for rows whose pipeline never completed. `unknown` + * references render without a marker (no false alarms on transient 5xx). */ import { Link } from 'react-router-dom' -import { ArrowUpRight } from 'lucide-react' +import { AlertTriangle, ArrowUpRight } from 'lucide-react' +import { Badge } from '@/components/ui/badge' import { Card, CardContent } from '@/components/ui/card' import { ROUTES } from '@/lib/constants' -import type { WorkspaceDetail } from '@/types/api' +import type { WorkspaceDetail, WorkspaceHealth } from '@/types/api' interface ArtifactCard { label: string blurb: string href: string | null disabledReason?: string + /** E2 (#408) — the soft-reference id backing this card, when probeable. */ + refId?: string } interface WorkspaceArtifactsPanelProps { workspace: WorkspaceDetail + /** E2 (#408) — link-health result; undefined while loading / not probed. */ + health?: WorkspaceHealth | null } function asString(value: unknown): string | null { @@ -46,18 +56,21 @@ function buildCards(ws: WorkspaceDetail): ArtifactCard[] { blurb: 'Registry detail for the run this workspace promoted.', href: winningRunId ? `${ROUTES.EXPLORER.RUNS}/${winningRunId}` : null, disabledReason: 'The run never registered a winner.', + refId: winningRunId ?? undefined, }) cards.push({ label: 'V2 feature-frame run', blurb: 'The prophet_like V2 run with feature groups + safety classes.', href: v2RunId ? `${ROUTES.EXPLORER.RUNS}/${v2RunId}` : null, disabledReason: 'No V2 run recorded (demo_minimal flow or v2_train skipped).', + refId: v2RunId ?? undefined, }) planIds.forEach((planId, index) => { cards.push({ label: `Scenario plan ${index + 1}`, blurb: 'Saved what-if plan from the planning phase.', href: `${ROUTES.VISUALIZE.PLANNER}?scenario_id=${planId}`, + refId: planId, }) }) if (planIds.length === 0) { @@ -73,12 +86,14 @@ function buildCards(ws: WorkspaceDetail): ArtifactCard[] { blurb: 'Run-by-run results for the batch preset sweep.', href: batchId ? `${ROUTES.VISUALIZE.BATCH}/${batchId}` : null, disabledReason: 'No batch recorded (demo_minimal flow or batch skipped).', + refId: batchId ?? undefined, }) cards.push({ label: 'Deployment alias', blurb: alias ? `Ops view of the ${alias} alias.` : 'Ops view of aliases.', href: alias ? ROUTES.OPS : null, disabledReason: 'No alias recorded.', + refId: alias ?? undefined, }) cards.push({ label: 'Forecast on grain', @@ -101,22 +116,53 @@ function buildCards(ws: WorkspaceDetail): ArtifactCard[] { blurb: 'The chat surface — the recorded session has likely expired.', href: sessionId ? ROUTES.CHAT : null, disabledReason: 'No agent session recorded (no LLM key or step skipped).', + refId: sessionId ?? undefined, }) return cards } -export function WorkspaceArtifactsPanel({ workspace }: WorkspaceArtifactsPanelProps) { +const DEAD_LINK_TOOLTIP = 'This object no longer exists — it was deleted after the run.' + +export function WorkspaceArtifactsPanel({ workspace, health }: WorkspaceArtifactsPanelProps) { const cards = buildCards(workspace) + // E2 (#408) — ref_id -> status lookup; only `dead` produces a marker. + const deadRefIds = new Set( + (health?.references ?? []) + .filter((ref) => ref.status === 'dead') + .map((ref) => ref.ref_id) + ) return ( -

- Workspace artifacts - - {workspace.name ?? workspace.workspace_id.slice(0, 8)} - -

+
+

+ Workspace artifacts + + {workspace.name ?? workspace.workspace_id.slice(0, 8)} + +

+ {health && ( +
+ ✓ {health.alive} live + {health.dead > 0 && ( + ✕ {health.dead} dead + )} + {health.partial_run && ( + + partial run + + )} +
+ )} +

Everything this kept run created, re-attached from its workspace row. Cards greyed out when the run did not record the matching object. @@ -124,26 +170,38 @@ export function WorkspaceArtifactsPanel({ workspace }: WorkspaceArtifactsPanelPr

{cards.map((card) => { const isActive = typeof card.href === 'string' && card.href.length > 0 + const isDead = card.refId !== undefined && deadRefIds.has(card.refId) + const cardTitle = ( +
+ + {card.label} + {isDead && ( + + )} + + {isActive && } +
+ ) return (
{isActive ? ( -
- {card.label} - -
+ {cardTitle}

{card.blurb}

) : (
-
{card.label}
+ {cardTitle}

{card.blurb}

)} diff --git a/frontend/src/components/demo/WorkspaceEditDialog.test.tsx b/frontend/src/components/demo/WorkspaceEditDialog.test.tsx new file mode 100644 index 00000000..476d120a --- /dev/null +++ b/frontend/src/components/demo/WorkspaceEditDialog.test.tsx @@ -0,0 +1,142 @@ +import { cleanup, fireEvent, render, screen } from '@testing-library/react' +import { afterEach, beforeAll, beforeEach, describe, expect, it, vi } from 'vitest' +import { toast } from 'sonner' +import { WorkspaceEditDialog } from './WorkspaceEditDialog' +import type { WorkspaceDetail, WorkspaceListItem } from '@/types/api' + +beforeAll(() => { + class ResizeObserverStub { + observe() {} + unobserve() {} + disconnect() {} + } + vi.stubGlobal('ResizeObserver', ResizeObserverStub) +}) + +afterEach(() => { + cleanup() + vi.clearAllMocks() +}) + +const baseItem: WorkspaceListItem = { + workspace_id: 'a'.repeat(32), + name: 'edit-me', + status: 'completed', + seed: 7, + scenario: 'demo_minimal', + reset: false, + skip_seed: true, + result_summary: null, + created_at: '2026-06-01T12:00:00Z', + archived: false, + pinned: false, + tags: ['smoke'], + replayed_from_workspace_id: null, + seed_overrides: null, + user_scope: null, +} + +let mockDetail: { + data: Partial | undefined + isSuccess: boolean + isError: boolean +} = { data: undefined, isSuccess: false, isError: false } + +let mockPatchResult: { mutate: ReturnType; isPending: boolean } = { + mutate: vi.fn(), + isPending: false, +} + +vi.mock('@/hooks/use-workspaces', () => ({ + useWorkspace: () => mockDetail, + usePatchWorkspace: () => mockPatchResult, +})) + +vi.mock('sonner', () => ({ + toast: { success: vi.fn(), error: vi.fn() }, +})) + +beforeEach(() => { + mockDetail = { + data: { ...baseItem, notes: 'old notes' }, + isSuccess: true, + isError: false, + } + mockPatchResult = { mutate: vi.fn(), isPending: false } +}) + +function renderDialog(workspace: WorkspaceListItem | null = baseItem) { + const onClose = vi.fn() + render() + return { onClose } +} + +describe('WorkspaceEditDialog', () => { + it('renders nothing when closed', () => { + renderDialog(null) + expect(document.body.textContent).not.toContain('Edit workspace details') + }) + + it('primes the form from the row + detail notes', () => { + renderDialog() + expect((screen.getByLabelText('Name') as HTMLInputElement).value).toBe('edit-me') + expect((screen.getByLabelText('Notes') as HTMLTextAreaElement).value).toBe('old notes') + expect( + (screen.getByLabelText(/Tags/) as HTMLInputElement).value, + ).toBe('smoke') + }) + + it('disables Save with an inline hint on a pattern violation', () => { + renderDialog() + fireEvent.change(screen.getByLabelText('Name'), { target: { value: 'Bad Name!' } }) + expect(document.body.textContent).toContain('Lowercase letters/digits only') + expect((screen.getByTestId('workspace-edit-save') as HTMLButtonElement).disabled).toBe(true) + expect(mockPatchResult.mutate).not.toHaveBeenCalled() + }) + + it('sends ONLY dirty fields (partial-update semantics)', () => { + renderDialog() + fireEvent.change(screen.getByLabelText(/Tags/), { target: { value: 'smoke, e2' } }) + fireEvent.click(screen.getByTestId('workspace-edit-save')) + expect(mockPatchResult.mutate).toHaveBeenCalledTimes(1) + const [payload] = mockPatchResult.mutate.mock.calls[0] as [ + { workspaceId: string; update: Record }, + unknown, + ] + expect(payload.workspaceId).toBe(baseItem.workspace_id) + expect(payload.update).toEqual({ tags: ['smoke', 'e2'] }) + }) + + it('clearing the name sends an explicit null', () => { + renderDialog() + fireEvent.change(screen.getByLabelText('Name'), { target: { value: '' } }) + fireEvent.click(screen.getByTestId('workspace-edit-save')) + const [payload] = mockPatchResult.mutate.mock.calls[0] as [ + { update: Record }, + unknown, + ] + expect(payload.update).toEqual({ name: null }) + }) + + it('a clean save (no changes) just closes without a mutation', () => { + const { onClose } = renderDialog() + fireEvent.click(screen.getByTestId('workspace-edit-save')) + expect(mockPatchResult.mutate).not.toHaveBeenCalled() + expect(onClose).toHaveBeenCalledTimes(1) + }) + + it('success toasts and closes; failure toasts an error', () => { + const { onClose } = renderDialog() + fireEvent.change(screen.getByLabelText('Name'), { target: { value: 'renamed' } }) + fireEvent.click(screen.getByTestId('workspace-edit-save')) + const [, options] = mockPatchResult.mutate.mock.calls[0] as [ + unknown, + { onSuccess: () => void; onError: (error: unknown) => void }, + ] + options.onSuccess() + expect(toast.success).toHaveBeenCalledWith('Workspace updated.') + expect(onClose).toHaveBeenCalled() + options.onError(new Error('boom')) + expect(toast.error).toHaveBeenCalledWith(expect.stringContaining('Update failed')) + }) +}) diff --git a/frontend/src/components/demo/WorkspaceEditDialog.tsx b/frontend/src/components/demo/WorkspaceEditDialog.tsx new file mode 100644 index 00000000..82367182 Binary files /dev/null and b/frontend/src/components/demo/WorkspaceEditDialog.tsx differ diff --git a/frontend/src/components/demo/WorkspaceLineageStrip.test.tsx b/frontend/src/components/demo/WorkspaceLineageStrip.test.tsx new file mode 100644 index 00000000..27e4e3da --- /dev/null +++ b/frontend/src/components/demo/WorkspaceLineageStrip.test.tsx @@ -0,0 +1,89 @@ +import { cleanup, fireEvent, render, screen } from '@testing-library/react' +import { afterEach, describe, expect, it, vi } from 'vitest' +import { WorkspaceLineageStrip } from './WorkspaceLineageStrip' +import type { WorkspaceLineage } from '@/hooks/use-workspaces' +import type { WorkspaceDetail } from '@/types/api' + +afterEach(() => { + cleanup() + vi.clearAllMocks() +}) + +let mockLineage: { data: WorkspaceLineage | undefined } = { data: undefined } + +vi.mock('@/hooks/use-workspaces', () => ({ + useWorkspaceLineage: () => mockLineage, +})) + +const detailOf = (id: string, name: string | null): WorkspaceDetail => + ({ workspace_id: id, name }) as WorkspaceDetail + +function renderStrip(onLoadAncestor = vi.fn()) { + render() + return onLoadAncestor +} + +describe('WorkspaceLineageStrip', () => { + it('renders nothing when the workspace has no lineage', () => { + mockLineage = { + data: { + entries: [ + { workspace_id: 'a'.repeat(32), name: 'solo', deleted: false, detail: detailOf('a'.repeat(32), 'solo') }, + ], + truncated: false, + }, + } + renderStrip() + expect(screen.queryByTestId('workspace-lineage')).toBeNull() + }) + + it('renders the chain newest → original with clickable ancestors', () => { + const parentDetail = detailOf('b'.repeat(32), 'parent') + mockLineage = { + data: { + entries: [ + { workspace_id: 'a'.repeat(32), name: 'child', deleted: false, detail: detailOf('a'.repeat(32), 'child') }, + { workspace_id: 'b'.repeat(32), name: 'parent', deleted: false, detail: parentDetail }, + { workspace_id: 'c'.repeat(32), name: 'origin', deleted: false, detail: detailOf('c'.repeat(32), 'origin') }, + ], + truncated: false, + }, + } + const onLoadAncestor = renderStrip() + const strip = screen.getByTestId('workspace-lineage') + const text = strip.textContent ?? '' + // Order: current first, then parents. + expect(text.indexOf('child')).toBeLessThan(text.indexOf('parent')) + expect(text.indexOf('parent')).toBeLessThan(text.indexOf('origin')) + fireEvent.click(screen.getByText('parent')) + expect(onLoadAncestor).toHaveBeenCalledWith(parentDetail) + }) + + it('renders the deleted-ancestor sentinel without erroring', () => { + mockLineage = { + data: { + entries: [ + { workspace_id: 'a'.repeat(32), name: 'child', deleted: false, detail: detailOf('a'.repeat(32), 'child') }, + { workspace_id: 'b'.repeat(32), name: null, deleted: true, detail: null }, + ], + truncated: false, + }, + } + renderStrip() + expect(screen.getByTestId('workspace-lineage').textContent).toContain('(original deleted)') + }) + + it('renders a trailing ellipsis when the chain is depth-capped', () => { + mockLineage = { + data: { + entries: [ + { workspace_id: 'a'.repeat(32), name: 'child', deleted: false, detail: detailOf('a'.repeat(32), 'child') }, + { workspace_id: 'b'.repeat(32), name: 'parent', deleted: false, detail: detailOf('b'.repeat(32), 'parent') }, + ], + truncated: true, + }, + } + renderStrip() + expect(screen.getByTestId('workspace-lineage').textContent).toContain('…') + }) +}) diff --git a/frontend/src/components/demo/WorkspaceLineageStrip.tsx b/frontend/src/components/demo/WorkspaceLineageStrip.tsx new file mode 100644 index 00000000..c405fdcc --- /dev/null +++ b/frontend/src/components/demo/WorkspaceLineageStrip.tsx @@ -0,0 +1,64 @@ +/** + * E2 (#408) — replay lineage breadcrumb for the loaded workspace. + * + * Renders the replayed_from_workspace_id chain newest → original: + * `this ← parent ← grandparent …` (depth-capped). Ancestors are clickable + * (loads them); a deleted ancestor renders as "(original deleted)" — dangling + * soft references are designed, never an error. Renders nothing when the + * loaded workspace is not a replay. + */ + +import { Fragment } from 'react' +import { Button } from '@/components/ui/button' +import { useWorkspaceLineage } from '@/hooks/use-workspaces' +import type { WorkspaceDetail } from '@/types/api' + +interface WorkspaceLineageStripProps { + workspaceId: string + /** Load an ancestor into the page (full detail — the walk already has it). */ + onLoadAncestor: (ws: WorkspaceDetail) => void +} + +function labelOf(workspaceId: string, name: string | null): string { + return name ?? workspaceId.slice(0, 8) +} + +export function WorkspaceLineageStrip({ workspaceId, onLoadAncestor }: WorkspaceLineageStripProps) { + const { data } = useWorkspaceLineage(workspaceId) + const entries = data?.entries ?? [] + + // No lineage to show: still walking, or the loaded row is not a replay. + if (entries.length < 2) return null + + return ( +
+ Replay lineage: + {entries.map((entry, index) => ( + + {index > 0 && } + {entry.deleted ? ( + (original deleted) + ) : index === 0 ? ( + // The loaded workspace itself — not a link. + + {labelOf(entry.workspace_id, entry.name)} + + ) : ( + + )} + + ))} + {data?.truncated && } +
+ ) +} diff --git a/frontend/src/components/demo/WorkspacePanel.test.tsx b/frontend/src/components/demo/WorkspacePanel.test.tsx index 2d08aa40..dffb3ce0 100644 --- a/frontend/src/components/demo/WorkspacePanel.test.tsx +++ b/frontend/src/components/demo/WorkspacePanel.test.tsx @@ -1,8 +1,27 @@ import { QueryClient, QueryClientProvider } from '@tanstack/react-query' -import { cleanup, fireEvent, render } from '@testing-library/react' -import { afterEach, describe, expect, it, vi } from 'vitest' +import { cleanup, fireEvent, render, screen, waitFor } from '@testing-library/react' +import { MemoryRouter } from 'react-router-dom' +import { afterEach, beforeAll, beforeEach, describe, expect, it, vi } from 'vitest' +import { toast } from 'sonner' import { WorkspacePanel } from './WorkspacePanel' -import type { WorkspaceListItem, WorkspaceListResponse } from '@/types/api' +import { ApiError } from '@/lib/api' +import type { WorkspaceListItem, WorkspaceListParams, WorkspaceListResponse } from '@/types/api' + +beforeAll(() => { + // Radix AlertDialog/DropdownMenu need these in jsdom. + class ResizeObserverStub { + observe() {} + unobserve() {} + disconnect() {} + } + vi.stubGlobal('ResizeObserver', ResizeObserverStub) + if (!Element.prototype.hasPointerCapture) { + Element.prototype.hasPointerCapture = () => false + } + if (!Element.prototype.scrollIntoView) { + Element.prototype.scrollIntoView = () => {} + } +}) afterEach(() => { cleanup() @@ -19,6 +38,19 @@ const baseItem: WorkspaceListItem = { skip_seed: true, result_summary: { winner_model_type: 'seasonal_naive' }, created_at: '2026-06-01T12:00:00Z', + archived: false, + pinned: false, + tags: [], + replayed_from_workspace_id: null, + seed_overrides: null, + user_scope: null, + run_config: null, +} + +const secondItem: WorkspaceListItem = { + ...baseItem, + workspace_id: 'b'.repeat(32), + name: 'second', } let mockResponse: { data: WorkspaceListResponse | undefined; isLoading: boolean } = { @@ -26,25 +58,78 @@ let mockResponse: { data: WorkspaceListResponse | undefined; isLoading: boolean isLoading: false, } +let lastListParams: WorkspaceListParams | undefined + +let mockDeleteResult: { + mutate: ReturnType + mutateAsync: ReturnType + isPending: boolean +} = { mutate: vi.fn(), mutateAsync: vi.fn(), isPending: false } + +let mockPatchResult: { mutate: ReturnType; isPending: boolean } = { + mutate: vi.fn(), + isPending: false, +} + +let mockExportResult: { mutate: ReturnType; isPending: boolean } = { + mutate: vi.fn(), + isPending: false, +} + +const mockNavigate = vi.fn() + vi.mock('@/hooks/use-workspaces', () => ({ - useWorkspaces: () => mockResponse, + useWorkspaces: (params: WorkspaceListParams) => { + lastListParams = params + return mockResponse + }, + // WorkspaceEditDialog dependencies (mounted closed by the panel). + useWorkspace: () => ({ data: undefined, isSuccess: false, isError: false }), + useDeleteWorkspace: () => mockDeleteResult, + usePatchWorkspace: () => mockPatchResult, + useExportWorkspace: () => mockExportResult, +})) + +vi.mock('react-router-dom', async (importOriginal) => { + const actual = await importOriginal() + return { ...actual, useNavigate: () => mockNavigate } +}) + +vi.mock('sonner', () => ({ + toast: { success: vi.fn(), error: vi.fn() }, })) +beforeEach(() => { + lastListParams = undefined + mockDeleteResult = { mutate: vi.fn(), mutateAsync: vi.fn(), isPending: false } + mockPatchResult = { mutate: vi.fn(), isPending: false } + mockExportResult = { mutate: vi.fn(), isPending: false } +}) + function renderPanel(props: Partial[0]> = {}) { const queryClient = new QueryClient({ defaultOptions: { queries: { retry: false } } }) return render( - {}} - onReplay={() => {}} - isRunning={false} - lastWorkspaceId={null} - {...props} - /> + + {}} + onRequestReplay={() => {}} + isRunning={false} + lastWorkspaceId={null} + {...props} + /> + , ) } +/** Open a Radix dropdown/select (pattern: model-family-tabs.test.tsx). */ +function radixOpen(target: HTMLElement) { + fireEvent.pointerDown(target, { button: 0, ctrlKey: false }) + fireEvent.mouseDown(target, { button: 0 }) + fireEvent.click(target) +} + describe('WorkspacePanel', () => { it('renders the discoverable empty state (panel never hidden)', () => { mockResponse = { data: { workspaces: [], total: 0 }, isLoading: false } @@ -61,7 +146,6 @@ describe('WorkspacePanel', () => { expect(container.textContent).toContain('seed 7') expect(container.textContent).toContain('COMPLETED') expect(container.textContent).toContain('winner seasonal_naive') - // No destructive badge on a reset=false row. expect(container.textContent).not.toContain('DESTRUCTIVE') }) @@ -74,32 +158,340 @@ describe('WorkspacePanel', () => { expect(container.textContent).toContain('DESTRUCTIVE') }) - it('falls back to the workspace_id slice when the row is unnamed', () => { + it('renders the custom-config badge only when run_config is set (E4 #410)', () => { + // Default-config row: no badge. + mockResponse = { data: { workspaces: [baseItem], total: 1 }, isLoading: false } + const plain = renderPanel() + expect(plain.container.querySelector('[data-testid="run-config-summary-badge"]')).toBeNull() + cleanup() + + // Custom-config row: badge with the compact summary. mockResponse = { - data: { workspaces: [{ ...baseItem, name: null }], total: 1 }, + data: { + workspaces: [ + { + ...baseItem, + run_config: { + train_model_types: ['naive', 'regression', 'prophet_like', 'seasonal_average'], + backtest: { horizon: 21, n_splits: 4, metric: 'rmse' }, + }, + }, + ], + total: 1, + }, isLoading: false, } - const { container } = renderPanel() - expect(container.textContent).toContain('aaaaaaaa') + const custom = renderPanel() + const badge = custom.container.querySelector('[data-testid="run-config-summary-badge"]') + expect(badge).not.toBeNull() + expect(badge!.textContent).toContain('4 models') + expect(badge!.textContent).toContain('rmse') + expect(badge!.textContent).toContain('4×h21') }) - it('invokes onLoad / onReplay with the list item', () => { + it('invokes onLoad / onRequestReplay with the list item — replay never starts here', () => { mockResponse = { data: { workspaces: [baseItem], total: 1 }, isLoading: false } const onLoad = vi.fn() - const onReplay = vi.fn() - const { container } = renderPanel({ onLoad, onReplay }) + const onRequestReplay = vi.fn() + const { container } = renderPanel({ onLoad, onRequestReplay }) const buttons = Array.from(container.querySelectorAll('button')) fireEvent.click(buttons.find((b) => (b.textContent ?? '').includes('Load'))!) expect(onLoad).toHaveBeenCalledWith(baseItem) fireEvent.click(buttons.find((b) => (b.textContent ?? '').includes('Replay'))!) - expect(onReplay).toHaveBeenCalledWith(baseItem) + expect(onRequestReplay).toHaveBeenCalledWith(baseItem) }) - it('disables both actions while a run is in flight', () => { + it('disables row actions while a run is in flight', () => { mockResponse = { data: { workspaces: [baseItem], total: 1 }, isLoading: false } - const { container } = renderPanel({ isRunning: true }) - const buttons = Array.from(container.querySelectorAll('button')) - expect(buttons.length).toBeGreaterThanOrEqual(2) - expect(buttons.every((b) => b.disabled)).toBe(true) + renderPanel({ isRunning: true }) + const labels = ['Load', 'Replay', 'Export'] + for (const label of labels) { + const button = screen + .getAllByRole('button') + .find((b) => (b.textContent ?? '').includes(label))! as HTMLButtonElement + expect(button.disabled).toBe(true) + } + }) +}) + +describe('WorkspacePanel — E6 export', () => { + function findExportButton(container: HTMLElement) { + return Array.from(container.querySelectorAll('button')).find((b) => + (b.textContent ?? '').includes('Export') + )! + } + + it('renders an Export button per row', () => { + mockResponse = { data: { workspaces: [baseItem], total: 1 }, isLoading: false } + const { container } = renderPanel() + expect(findExportButton(container)).toBeTruthy() + }) + + it('fires the export mutation with the row id and toasts the bundle path', () => { + mockResponse = { data: { workspaces: [baseItem], total: 1 }, isLoading: false } + const { container } = renderPanel() + fireEvent.click(findExportButton(container)) + + expect(mockExportResult.mutate).toHaveBeenCalledTimes(1) + const [workspaceId, options] = mockExportResult.mutate.mock.calls[0] as [ + string, + { onSuccess: (r: unknown) => void; onError: (error: unknown) => void }, + ] + expect(workspaceId).toBe(baseItem.workspace_id) + + options.onSuccess({ + workspace_id: baseItem.workspace_id, + bundle_path: `artifacts/showcase/${baseItem.workspace_id}`, + bundle_format_version: 1, + exported_at: '2026-06-12T14:00:00Z', + files: [ + { path: 'manifest.json', sha256: 'a', size_bytes: 1 }, + { path: 'checksums.sha256', sha256: 'b', size_bytes: 1 }, + ], + scenario_plans_exported: 0, + model_runs_referenced: 0, + unresolved_references: [], + validated: true, + }) + expect(toast.success).toHaveBeenCalledWith(expect.stringContaining('Bundle written to')) + expect(toast.success).toHaveBeenCalledWith(expect.stringContaining('checksums verified')) + }) + + it('notes dangling references in the success toast', () => { + mockResponse = { data: { workspaces: [baseItem], total: 1 }, isLoading: false } + const { container } = renderPanel() + fireEvent.click(findExportButton(container)) + const [, options] = mockExportResult.mutate.mock.calls[0] as [ + string, + { onSuccess: (r: unknown) => void; onError: (error: unknown) => void }, + ] + options.onSuccess({ + workspace_id: baseItem.workspace_id, + bundle_path: `artifacts/showcase/${baseItem.workspace_id}`, + bundle_format_version: 1, + exported_at: '2026-06-12T14:00:00Z', + files: [{ path: 'manifest.json', sha256: 'a', size_bytes: 1 }], + scenario_plans_exported: 0, + model_runs_referenced: 0, + unresolved_references: [{ key: 'scenario_plan_ids', ref_id: 'gone', reason: 'HTTP 404' }], + validated: true, + }) + expect(toast.success).toHaveBeenCalledWith(expect.stringContaining('1 unresolved reference')) + }) + + it('surfaces an export failure via the error toast', () => { + mockResponse = { data: { workspaces: [baseItem], total: 1 }, isLoading: false } + const { container } = renderPanel() + fireEvent.click(findExportButton(container)) + const [, options] = mockExportResult.mutate.mock.calls[0] as [ + string, + { onSuccess: (r: unknown) => void; onError: (error: unknown) => void }, + ] + options.onError(new ApiError('Export bundle write failed: disk full', 500)) + expect(toast.error).toHaveBeenCalledWith(expect.stringContaining('Export failed')) + }) +}) + +describe('WorkspacePanel — E2 lifecycle badges + toolbar params', () => { + it('renders pinned / archived / replay badges', () => { + mockResponse = { + data: { + workspaces: [ + { + ...baseItem, + pinned: true, + archived: true, + replayed_from_workspace_id: 'c'.repeat(32), + }, + ], + total: 1, + }, + isLoading: false, + } + const { container } = renderPanel() + expect(container.textContent).toContain('archived') + expect(container.textContent).toContain('replay') + expect(screen.getByLabelText('Unpin e4-panel')).toBeTruthy() + }) + + it('flows the debounced search into the q list param (min 2 chars)', async () => { + mockResponse = { data: { workspaces: [baseItem], total: 1 }, isLoading: false } + renderPanel() + fireEvent.change(screen.getByLabelText('Search workspaces by name'), { + target: { value: 'demo' }, + }) + await waitFor(() => expect(lastListParams?.q).toBe('demo')) + }) + + it('flows the show-archived toggle into include_archived', () => { + mockResponse = { data: { workspaces: [baseItem], total: 1 }, isLoading: false } + const { container } = renderPanel() + expect(lastListParams?.include_archived).toBeUndefined() + const checkbox = Array.from(container.querySelectorAll('button[role="checkbox"]')).find( + (el) => el.parentElement?.textContent?.includes('Show archived'), + )! + fireEvent.click(checkbox) + expect(lastListParams?.include_archived).toBe(true) + }) + + it('flows the sort select into sort_by/sort_order', () => { + mockResponse = { data: { workspaces: [baseItem], total: 1 }, isLoading: false } + renderPanel() + radixOpen(screen.getByLabelText('Sort workspaces')) + fireEvent.click(screen.getByText('Name')) + expect(lastListParams?.sort_by).toBe('name') + expect(lastListParams?.sort_order).toBe('asc') + }) + + it('clicking a tag chip filters by that tag; the toolbar chip clears it', () => { + mockResponse = { + data: { workspaces: [{ ...baseItem, tags: ['smoke'] }], total: 1 }, + isLoading: false, + } + renderPanel() + fireEvent.click(screen.getByLabelText('Filter by tag smoke')) + expect(lastListParams?.tags).toBe('smoke') + fireEvent.click(screen.getByLabelText('Clear tag filter smoke')) + expect(lastListParams?.tags).toBeUndefined() + }) + + it('pin toggle fires the PATCH mutation', () => { + mockResponse = { data: { workspaces: [baseItem], total: 1 }, isLoading: false } + renderPanel() + fireEvent.click(screen.getByLabelText('Pin e4-panel')) + expect(mockPatchResult.mutate).toHaveBeenCalledWith( + { workspaceId: baseItem.workspace_id, update: { pinned: true } }, + expect.anything(), + ) + }) + + it('archive action in the dropdown fires the PATCH mutation', () => { + mockResponse = { data: { workspaces: [baseItem], total: 1 }, isLoading: false } + renderPanel() + radixOpen(screen.getByLabelText('More actions for e4-panel')) + fireEvent.click(screen.getByText('Archive')) + expect(mockPatchResult.mutate).toHaveBeenCalledWith( + { workspaceId: baseItem.workspace_id, update: { archived: true } }, + expect.anything(), + ) + }) +}) + +describe('WorkspacePanel — multi-select', () => { + function selectBoth() { + mockResponse = { data: { workspaces: [baseItem, secondItem], total: 2 }, isLoading: false } + const result = renderPanel({ onDeleted: vi.fn() }) + fireEvent.click(screen.getByLabelText('Select workspace e4-panel')) + fireEvent.click(screen.getByLabelText('Select workspace second')) + return result + } + + it('shows the selection footer with the count', () => { + const { container } = selectBoth() + expect(container.textContent).toContain('2 selected') + }) + + it('Compare is enabled only at exactly two selections', () => { + mockResponse = { data: { workspaces: [baseItem, secondItem], total: 2 }, isLoading: false } + renderPanel() + fireEvent.click(screen.getByLabelText('Select workspace e4-panel')) + const compare = () => + screen + .getAllByRole('button') + .find((b) => (b.textContent ?? '') === 'Compare')! as HTMLButtonElement + expect(compare().disabled).toBe(true) + fireEvent.click(screen.getByLabelText('Select workspace second')) + expect(compare().disabled).toBe(false) + fireEvent.click(compare()) + expect(mockNavigate).toHaveBeenCalledWith( + `/showcase/compare?a=${baseItem.workspace_id}&b=${secondItem.workspace_id}`, + ) + }) + + it('delete-selected confirms once then issues N sequential single deletes', async () => { + mockDeleteResult.mutateAsync.mockResolvedValue(undefined) + selectBoth() + fireEvent.click( + screen.getAllByRole('button').find((b) => (b.textContent ?? '').includes('Delete selected'))!, + ) + // Nothing deleted before the confirmation. + expect(mockDeleteResult.mutateAsync).not.toHaveBeenCalled() + expect(document.body.textContent).toContain('Delete 2 workspace records?') + fireEvent.click(screen.getByTestId('workspace-multi-delete-confirm')) + await waitFor(() => expect(mockDeleteResult.mutateAsync).toHaveBeenCalledTimes(2)) + expect(mockDeleteResult.mutateAsync).toHaveBeenNthCalledWith(1, baseItem.workspace_id) + expect(mockDeleteResult.mutateAsync).toHaveBeenNthCalledWith(2, secondItem.workspace_id) + await waitFor(() => + expect(toast.success).toHaveBeenCalledWith(expect.stringContaining('2 workspace records')), + ) + }) + + it('collects multi-delete failures into one error toast', async () => { + mockDeleteResult.mutateAsync + .mockResolvedValueOnce(undefined) + .mockRejectedValueOnce(new ApiError('Workspace not found', 404)) + selectBoth() + fireEvent.click( + screen.getAllByRole('button').find((b) => (b.textContent ?? '').includes('Delete selected'))!, + ) + fireEvent.click(screen.getByTestId('workspace-multi-delete-confirm')) + await waitFor(() => + expect(toast.error).toHaveBeenCalledWith(expect.stringContaining('Some deletes failed')), + ) + }) +}) + +describe('WorkspacePanel — single delete', () => { + function openDeleteDialog() { + mockResponse = { data: { workspaces: [baseItem], total: 1 }, isLoading: false } + const result = renderPanel({ onDeleted: vi.fn() }) + radixOpen(screen.getByLabelText('More actions for e4-panel')) + fireEvent.click(screen.getByText('Delete…')) + return result + } + + it('shows a confirmation whose copy makes metadata-only deletion clear', () => { + openDeleteDialog() + expect(mockDeleteResult.mutate).not.toHaveBeenCalled() + const copy = document.body.textContent ?? '' + expect(copy).toContain('Delete workspace "e4-panel"?') + expect(copy).toContain('only the saved workspace record') + expect(copy).toContain('NOT deleted') + }) + + it('confirming deletes the row and notifies the page on success', () => { + const onDeleted = vi.fn() + mockResponse = { data: { workspaces: [baseItem], total: 1 }, isLoading: false } + renderPanel({ onDeleted }) + radixOpen(screen.getByLabelText('More actions for e4-panel')) + fireEvent.click(screen.getByText('Delete…')) + fireEvent.click(screen.getByTestId('workspace-delete-confirm')) + + expect(mockDeleteResult.mutate).toHaveBeenCalledTimes(1) + const [workspaceId, options] = mockDeleteResult.mutate.mock.calls[0] as [ + string, + { onSuccess: () => void; onError: (error: unknown) => void }, + ] + expect(workspaceId).toBe(baseItem.workspace_id) + options.onSuccess() + expect(onDeleted).toHaveBeenCalledWith(baseItem.workspace_id) + expect(toast.success).toHaveBeenCalledWith(expect.stringContaining('were kept')) + }) + + it('cancelling the dialog never fires the mutation', () => { + openDeleteDialog() + fireEvent.click(screen.getByText('Keep workspace')) + expect(mockDeleteResult.mutate).not.toHaveBeenCalled() + }) + + it('surfaces a failed delete via the error toast', () => { + openDeleteDialog() + fireEvent.click(screen.getByTestId('workspace-delete-confirm')) + const [, options] = mockDeleteResult.mutate.mock.calls[0] as [ + string, + { onSuccess: () => void; onError: (error: unknown) => void }, + ] + options.onError(new ApiError('Workspace not found: ' + 'a'.repeat(32), 404)) + expect(toast.error).toHaveBeenCalledWith(expect.stringContaining('Delete failed')) }) }) diff --git a/frontend/src/components/demo/WorkspacePanel.tsx b/frontend/src/components/demo/WorkspacePanel.tsx index 6638b597..b3cdcd89 100644 --- a/frontend/src/components/demo/WorkspacePanel.tsx +++ b/frontend/src/components/demo/WorkspacePanel.tsx @@ -1,35 +1,107 @@ /** - * E4 (#393) — server-backed saved-workspaces panel for the Showcase page. + * E4 (#393) / E2 (#408) — server-backed saved-workspaces panel for the + * Showcase page. * - * Lists `showcase_workspace` rows (newest first) with two actions per row: - * - Load — re-attach: the page repopulates the run controls + renders the - * artifact deep-link cards. Read-only; no run starts. - * - Replay — re-run: the page re-submits the recorded config verbatim through - * the existing WS run path with preservation="keep". + * Lists `showcase_workspace` rows with lifecycle management (E2 #408): + * - Toolbar: name search, show-archived toggle, allow-listed sort, active + * tag-filter chip. The panel owns the list params; filtering/sorting is + * server-side (pinned rows always order first). + * - Per-row: Load (restore config, read-only), Replay (routes through the + * page's confirm dialog via onRequestReplay — NO replay starts here), + * pin toggle, actions dropdown (pin / archive / edit details / delete), + * pinned/archived/replay badges, clickable tag chips. + * - Multi-select: per-row checkboxes; Delete selected (N sequential single + * DELETEs behind one confirmation — deliberately NO bulk endpoint) and + * Compare (exactly 2 → /showcase/compare?a=&b=). * - * The panel stays dumb: it hands the LIST item to the page callbacks; detail - * fetching (created_objects) lives in the page via useWorkspace. + * Deletes remove the workspace METADATA row only — created objects are soft + * references and stay intact. */ -import { useEffect } from 'react' +import { useEffect, useMemo, useState } from 'react' +import { useNavigate } from 'react-router-dom' import { useQueryClient } from '@tanstack/react-query' -import { FolderOpen, Play } from 'lucide-react' +import { + Archive, + ArchiveRestore, + FileDown, + FolderOpen, + MoreHorizontal, + Pencil, + Pin, + PinOff, + Play, + Search, + Trash2, + X, +} from 'lucide-react' +import { toast } from 'sonner' +import { + AlertDialog, + AlertDialogAction, + AlertDialogCancel, + AlertDialogContent, + AlertDialogDescription, + AlertDialogFooter, + AlertDialogHeader, + AlertDialogTitle, +} from '@/components/ui/alert-dialog' +import { Badge } from '@/components/ui/badge' import { Button } from '@/components/ui/button' import { Card, CardContent } from '@/components/ui/card' -import { useWorkspaces } from '@/hooks/use-workspaces' -import type { WorkspaceListItem } from '@/types/api' +import { Checkbox } from '@/components/ui/checkbox' +import { + DropdownMenu, + DropdownMenuContent, + DropdownMenuItem, + DropdownMenuTrigger, +} from '@/components/ui/dropdown-menu' +import { Input } from '@/components/ui/input' +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from '@/components/ui/select' +import { + useDeleteWorkspace, + useExportWorkspace, + usePatchWorkspace, + useWorkspaces, +} from '@/hooks/use-workspaces' +import { ApiError, getErrorMessage } from '@/lib/api' +import { ROUTES } from '@/lib/constants' +import { cn } from '@/lib/utils' +import type { WorkspaceListItem, WorkspaceListParams } from '@/types/api' +import { WorkspaceEditDialog } from './WorkspaceEditDialog' +import { parseRunConfig } from './run-config-utils' interface WorkspacePanelProps { /** Called when the operator clicks Load — restore config + artifacts, no run. */ onLoad: (ws: WorkspaceListItem) => void - /** Called when the operator clicks Replay — re-run the recorded config. */ - onReplay: (ws: WorkspaceListItem) => void - /** Disables both actions while a pipeline run is in flight. */ + /** + * E2 (#408) — called when the operator clicks Replay. The PAGE owns the + * confirmation dialog; the panel never starts a replay itself. + */ + onRequestReplay: (ws: WorkspaceListItem) => void + /** Called after a workspace row was deleted — lets the page drop a loaded one. */ + onDeleted?: (workspaceId: string) => void + /** Disables all actions while a pipeline run is in flight. */ isRunning: boolean /** summary.workspaceId of the latest kept run — triggers a list refetch. */ lastWorkspaceId: string | null } +type SortKey = 'newest' | 'oldest' | 'name' | 'status' + +const SORT_PARAMS: Record> = { + newest: {}, + oldest: { sort_by: 'created_at', sort_order: 'asc' }, + name: { sort_by: 'name', sort_order: 'asc' }, + status: { sort_by: 'status', sort_order: 'asc' }, +} + function statusClass(status: WorkspaceListItem['status']): string { switch (status) { case 'completed': @@ -46,9 +118,152 @@ function winnerOf(ws: WorkspaceListItem): string | null { return typeof winner === 'string' ? winner : null } -export function WorkspacePanel({ onLoad, onReplay, isRunning, lastWorkspaceId }: WorkspacePanelProps) { - const { data, isLoading } = useWorkspaces() +function labelOf(ws: WorkspaceListItem): string { + return ws.name ?? ws.workspace_id.slice(0, 8) +} + +// E4 (#410) — compact run-config summary, e.g. "custom: 4 models · rmse · 4×h21". +// Null when the row used default config (run_config null) — no badge rendered. +function runConfigSummary(ws: WorkspaceListItem): string | null { + const parsed = parseRunConfig(ws.run_config) + if (!parsed) return null + const { trainModels, backtest } = parsed + return `${trainModels.length} models · ${backtest.metric} · ${backtest.n_splits}×h${backtest.horizon}` +} + +export function WorkspacePanel({ + onLoad, + onRequestReplay, + onDeleted, + isRunning, + lastWorkspaceId, +}: WorkspacePanelProps) { + // ── E2 (#408) — server-side list params ───────────────────────────────── + const [search, setSearch] = useState('') + const [appliedQ, setAppliedQ] = useState('') + const [showArchived, setShowArchived] = useState(false) + const [sortKey, setSortKey] = useState('newest') + const [tagFilter, setTagFilter] = useState(null) + + // Debounced search — the q param needs >= 2 chars (server min_length). + useEffect(() => { + const handle = window.setTimeout(() => setAppliedQ(search.trim()), 300) + return () => window.clearTimeout(handle) + }, [search]) + + const params = useMemo( + () => ({ + ...(appliedQ.length >= 2 ? { q: appliedQ } : {}), + ...(tagFilter ? { tags: tagFilter } : {}), + ...(showArchived ? { include_archived: true } : {}), + ...SORT_PARAMS[sortKey], + }), + [appliedQ, tagFilter, showArchived, sortKey] + ) + + const { data, isLoading } = useWorkspaces(params) const queryClient = useQueryClient() + const deleteWorkspace = useDeleteWorkspace() + const patchWorkspace = usePatchWorkspace() + const exportWorkspace = useExportWorkspace() + + // ── dialogs + selection state ──────────────────────────────────────────── + const [pendingDelete, setPendingDelete] = useState(null) + const [pendingEdit, setPendingEdit] = useState(null) + const [confirmMultiDelete, setConfirmMultiDelete] = useState(false) + const [selected, setSelected] = useState>(new Set()) + const navigate = useNavigate() + + const handleConfirmDelete = () => { + const ws = pendingDelete + if (!ws) return + setPendingDelete(null) + deleteWorkspace.mutate(ws.workspace_id, { + onSuccess: () => { + toast.success( + `Workspace "${labelOf(ws)}" deleted — its model runs, scenarios, and artifacts were kept.` + ) + onDeleted?.(ws.workspace_id) + }, + onError: (error) => { + toast.error(`Delete failed: ${getErrorMessage(error)}`) + // A 404 means the row is already gone server-side — drop the stale entry. + if (error instanceof ApiError && error.status === 404) { + void queryClient.invalidateQueries({ queryKey: ['workspaces'] }) + } + }, + }) + } + + // E2 (#408) — multi-select delete: N sequential SINGLE deletes (no bulk + // endpoint by design); failures collect into one summary toast. + const handleConfirmDeleteSelected = async () => { + const ids = Array.from(selected) + setConfirmMultiDelete(false) + const failures: string[] = [] + for (const id of ids) { + try { + await deleteWorkspace.mutateAsync(id) + onDeleted?.(id) + } catch (error) { + failures.push(`${id.slice(0, 8)}: ${getErrorMessage(error)}`) + } + } + setSelected(new Set()) + if (failures.length === 0) { + toast.success( + `Deleted ${ids.length} workspace record${ids.length === 1 ? '' : 's'} — created objects were kept.` + ) + } else { + toast.error(`Some deletes failed: ${failures.join('; ')}`) + } + } + + const handleTogglePin = (ws: WorkspaceListItem) => { + patchWorkspace.mutate( + { workspaceId: ws.workspace_id, update: { pinned: !ws.pinned } }, + { onError: (error) => toast.error(`Update failed: ${getErrorMessage(error)}`) } + ) + } + + const handleToggleArchive = (ws: WorkspaceListItem) => { + patchWorkspace.mutate( + { workspaceId: ws.workspace_id, update: { archived: !ws.archived } }, + { + onSuccess: () => { + toast.success(ws.archived ? 'Workspace unarchived.' : 'Workspace archived.') + }, + onError: (error) => toast.error(`Update failed: ${getErrorMessage(error)}`), + } + ) + } + + // E6 (#412) — non-destructive export; no confirmation dialog. Success toast + // surfaces the bundle path + file count + checksum state + any dangling refs. + const handleExport = (ws: WorkspaceListItem) => { + exportWorkspace.mutate(ws.workspace_id, { + onSuccess: (result) => { + const fileCount = `${result.files.length} file${result.files.length === 1 ? '' : 's'}` + const checksums = result.validated ? 'verified' : 'FAILED' + const unresolved = result.unresolved_references.length + ? ` ${result.unresolved_references.length} unresolved reference(s).` + : '' + toast.success( + `Bundle written to ${result.bundle_path} — ${fileCount}, checksums ${checksums}.${unresolved}` + ) + }, + onError: (error) => toast.error(`Export failed: ${getErrorMessage(error)}`), + }) + } + + const toggleSelected = (workspaceId: string) => { + setSelected((prev) => { + const next = new Set(prev) + if (next.has(workspaceId)) next.delete(workspaceId) + else next.add(workspaceId) + return next + }) + } // Refetch the list once the latest kept run settles — syncing React state to // an external system (the server-backed list) is the sanctioned effect use. @@ -59,6 +274,9 @@ export function WorkspacePanel({ onLoad, onReplay, isRunning, lastWorkspaceId }: }, [lastWorkspaceId, queryClient]) const items = data?.workspaces ?? [] + const allSelected = items.length > 0 && items.every((ws) => selected.has(ws.workspace_id)) + const selectedIds = Array.from(selected) + const hasActiveFilter = appliedQ.length >= 2 || tagFilter !== null || showArchived return ( @@ -71,59 +289,302 @@ export function WorkspacePanel({ onLoad, onReplay, isRunning, lastWorkspaceId }: )}
+ + {/* E2 (#408) — toolbar: search / show-archived / sort / tag chip. */} +
+
+ + setSearch(e.target.value)} + aria-label="Search workspaces by name" + /> +
+ + + {tagFilter && ( + + tag: {tagFilter} + + + )} +
+ {items.length === 0 ? (

{isLoading ? 'Loading workspaces…' - : 'No saved workspaces yet — tick "Save as workspace" before a run to keep it.'} + : hasActiveFilter + ? 'No workspaces match the active filters.' + : 'No saved workspaces yet — tick "Save as workspace" before a run to keep it.'}

) : ( -
    - {items.map((ws) => ( -
  • -
    - {ws.name ?? ws.workspace_id.slice(0, 8)} - {ws.scenario} - seed {ws.seed} - {ws.status.toUpperCase()} - {winnerOf(ws) && winner {winnerOf(ws)}} - {ws.reset && ( - - DESTRUCTIVE (replay wipes all data) - + <> + +
      + {items.map((ws) => ( +
    • - {new Date(ws.created_at).toLocaleString()} - -
    -
    - - -
    -
  • - ))} -
+ > +
+ toggleSelected(ws.workspace_id)} + aria-label={`Select workspace ${labelOf(ws)}`} + /> + + {labelOf(ws)} + {ws.archived && archived} + {ws.replayed_from_workspace_id && replay} + {runConfigSummary(ws) && ( + + custom: {runConfigSummary(ws)} + + )} + {ws.scenario} + seed {ws.seed} + {ws.status.toUpperCase()} + {winnerOf(ws) && winner {winnerOf(ws)}} + {ws.reset && ( + + DESTRUCTIVE (replay wipes all data) + + )} + {ws.tags.map((tag) => ( + + ))} + + {new Date(ws.created_at).toLocaleString()} + +
+
+ + + {/* E6 (#412) — export a checksum-validated bundle. Self- + contained block (survives an E2 row restyle / rebase). */} + + + + + + + handleTogglePin(ws)}> + {ws.pinned ? ( + + ) : ( + + )} + {ws.pinned ? 'Unpin' : 'Pin'} + + handleToggleArchive(ws)}> + {ws.archived ? ( + + ) : ( + + )} + {ws.archived ? 'Unarchive' : 'Archive'} + + setPendingEdit(ws)}> + + Edit details… + + setPendingDelete(ws)} + > + + Delete… + + + +
+ + ))} + + + {/* E2 (#408) — selection footer. */} + {selectedIds.length > 0 && ( +
+ {selectedIds.length} selected + + +
+ )} + )} + + {/* Shared confirmation dialog for the row pending deletion. */} + { + if (!open) setPendingDelete(null) + }} + > + + + + Delete workspace {pendingDelete ? `"${labelOf(pendingDelete)}"` : ''}? + + + This removes only the saved workspace record — its replay config + and artifact links. The model runs, scenario plans, aliases, jobs, + and artifacts the run created are NOT deleted and remain available + elsewhere in the app. This cannot be undone. + + + + Keep workspace + + Delete workspace + + + + + + {/* E2 (#408) — one confirmation for the whole selection. */} + { + if (!open) setConfirmMultiDelete(false) + }} + > + + + Delete {selectedIds.length} workspace records? + + Their created objects are NOT deleted — model runs, scenario + plans, aliases, jobs, and artifacts stay available elsewhere in + the app. This cannot be undone. + + + + Keep workspaces + void handleConfirmDeleteSelected()} + data-testid="workspace-multi-delete-confirm" + > + Delete selected + + + + + + {/* E2 (#408) — rename / notes / tags editor. */} + setPendingEdit(null)} /> ) } diff --git a/frontend/src/components/demo/WorkspaceStoryPanel.test.tsx b/frontend/src/components/demo/WorkspaceStoryPanel.test.tsx new file mode 100644 index 00000000..9de64bef --- /dev/null +++ b/frontend/src/components/demo/WorkspaceStoryPanel.test.tsx @@ -0,0 +1,121 @@ +/** + * E5 (#411) — render tests for WorkspaceStoryPanel: approval history, + * knowledge events, reproduction markers, and the legacy self-hide path. + */ + +import { cleanup, render, screen } from '@testing-library/react' +import { afterEach, describe, expect, it } from 'vitest' +import { WorkspaceStoryPanel } from './WorkspaceStoryPanel' +import type { WorkspaceDetail } from '@/types/api' + +afterEach(() => cleanup()) + +const baseWorkspace: WorkspaceDetail = { + workspace_id: 'a'.repeat(32), + name: 'e5-story', + status: 'completed', + seed: 42, + scenario: 'showcase_rich', + reset: false, + skip_seed: true, + result_summary: null, + created_at: '2026-06-01T12:00:00Z', + store_id: 3, + product_id: 7, + date_start: '2026-01-01', + date_end: '2026-03-31', + created_objects: {}, + archived: false, + pinned: false, + tags: [], + replayed_from_workspace_id: null, + seed_overrides: null, + user_scope: null, + run_config: null, + notes: null, + config_schema_version: 2, + approval_events: null, + rag_events: null, +} + +function renderPanel(workspace: WorkspaceDetail) { + return render() +} + +describe('WorkspaceStoryPanel', () => { + it('renders nothing for a legacy row with no slots and no reproduction', () => { + const { container } = renderPanel(baseWorkspace) + expect(container.firstChild).toBeNull() + expect(screen.queryByTestId('workspace-story-panel')).toBeNull() + }) + + it('renders approval history with a decision badge, tool, and transcript snippet', () => { + const workspace: WorkspaceDetail = { + ...baseWorkspace, + approval_events: [ + { + action_id: 'act-1', + tool_name: 'save_scenario', + decision: 'rejected', + decided_at: '2026-06-01T12:05:00Z', + session_id: 'sess-1', + auto_approved: false, + reason: 'not now', + execution_status: 'rejected', + transcript_summary: 'I would like to save this scenario plan.', + tokens_used: 240, + tool_calls_count: 1, + }, + ], + } + const { container } = renderPanel(workspace) + expect(screen.getByTestId('workspace-story-panel')).toBeTruthy() + expect(container.textContent).toContain('rejected') + expect(container.textContent).toContain('save_scenario') + expect(container.textContent).toContain('I would like to save this scenario plan.') + expect(container.textContent).toContain('reason: not now') + }) + + it('renders knowledge events with event/status/provider/count', () => { + const workspace: WorkspaceDetail = { + ...baseWorkspace, + rag_events: [ + { + event: 'index', + status: 'pass', + detail: 'indexed 5 files', + count: 42, + occurred_at: '2026-06-01T12:03:00Z', + provider: 'openai', + reachable: null, + }, + ], + } + const { container } = renderPanel(workspace) + expect(container.textContent).toContain('index') + expect(container.textContent).toContain('pass') + expect(container.textContent).toContain('openai') + expect(container.textContent).toContain('count: 42') + }) + + it('renders reproduction markers only when story_reproduction is present', () => { + const workspace: WorkspaceDetail = { + ...baseWorkspace, + result_summary: { + story_reproduction: { + agent: 'reproduced', + knowledge: 'not_reproduced', + source_workspace_id: 'b'.repeat(32), + }, + }, + } + renderPanel(workspace) + const marker = screen.getByTestId('story-reproduction') + expect(marker.textContent).toContain('agent') + expect(marker.textContent).toContain('reproduced') + expect(marker.textContent).toContain('knowledge') + expect(marker.textContent).toContain('not reproduced') + // source_workspace_id is not rendered as a verdict chip. + expect(marker.textContent).not.toContain('source_workspace_id') + }) +}) diff --git a/frontend/src/components/demo/WorkspaceStoryPanel.tsx b/frontend/src/components/demo/WorkspaceStoryPanel.tsx new file mode 100644 index 00000000..cc98b420 --- /dev/null +++ b/frontend/src/components/demo/WorkspaceStoryPanel.tsx @@ -0,0 +1,193 @@ +/** + * E5 (#411) — render the agent/HITL + RAG story captured on a LOADED + * workspace row. Three sections: + * - Approval history: each approval_events entry (decision badge + tool + + * transcript snippet + when). + * - Knowledge events: each rag_events entry (event/status/provider/count). + * - Reproduction markers: result_summary.story_reproduction chips (replay + * rows only — rendered only when present). + * + * Renders NOTHING for legacy rows that carry neither slot nor a reproduction + * marker. Reads the row only — the run is long gone, the row is the memory + * (same contract as WorkspaceArtifactsPanel). + */ + +import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card' +import { StatusBadge } from '@/components/common/status-badge' +import type { + ApprovalEventDetail, + RagEventDetail, + WorkspaceDetail, +} from '@/types/api' + +interface WorkspaceStoryPanelProps { + workspace: WorkspaceDetail +} + +/** Format an ISO timestamp for display; '—' when null. */ +function formatWhen(value: string | null | undefined): string { + if (!value) return '—' + const parsed = new Date(value) + return Number.isNaN(parsed.getTime()) ? value : parsed.toLocaleString() +} + +/** Decision → StatusBadge variant. */ +function decisionVariant( + decision: string | null, +): 'success' | 'error' | 'warning' | 'default' { + if (decision === 'approved') return 'success' + if (decision === 'rejected') return 'error' + if (decision === 'timed_out') return 'warning' + return 'default' +} + +/** rag_events status → StatusBadge variant. */ +function ragStatusVariant(status: string): 'success' | 'warning' | 'pending' | 'default' { + if (status === 'pass') return 'success' + if (status === 'warn') return 'warning' + if (status === 'skip') return 'pending' + return 'default' +} + +/** story_reproduction verdict → StatusBadge variant. */ +function verdictVariant(verdict: string): 'success' | 'error' | 'pending' | 'default' { + if (verdict === 'reproduced') return 'success' + if (verdict === 'not_reproduced') return 'error' + if (verdict === 'not_applicable' || verdict === 'unknown') return 'pending' + return 'default' +} + +/** Read result_summary.story_reproduction as a tolerant map of string verdicts. */ +function readReproduction( + summary: Record | null, +): Record | null { + if (!summary || typeof summary !== 'object') return null + const raw = (summary as Record).story_reproduction + if (!raw || typeof raw !== 'object') return null + const out: Record = {} + for (const [key, value] of Object.entries(raw as Record)) { + if (typeof value === 'string') out[key] = value + } + return Object.keys(out).length > 0 ? out : null +} + +export function WorkspaceStoryPanel({ workspace }: WorkspaceStoryPanelProps) { + const approvalEvents: ApprovalEventDetail[] = workspace.approval_events ?? [] + const ragEvents: RagEventDetail[] = workspace.rag_events ?? [] + const reproduction = readReproduction(workspace.result_summary) + + // Legacy rows: nothing captured -> render nothing. + if (approvalEvents.length === 0 && ragEvents.length === 0 && reproduction === null) { + return null + } + + return ( + + + Run story + + The agent/HITL approval and knowledge moments this run captured — + replayed from the workspace row. + + + + {/* Reproduction markers — replay rows only. */} + {reproduction && ( +
+

Replay reproduction

+
+ {Object.entries(reproduction) + .filter(([key]) => key !== 'source_workspace_id') + .map(([key, verdict]) => ( + + {key} + + {verdict.replace(/_/g, ' ')} + + + ))} +
+
+ )} + + {/* Approval history. */} +
+

Approval history

+ {approvalEvents.length === 0 ? ( +

No approval events recorded.

+ ) : ( +
    + {approvalEvents.map((event, index) => ( +
  • +
    + + {event.decision ?? 'unknown'} + + {event.tool_name && ( + + {event.tool_name} + + )} + {event.auto_approved === true && ( + (auto) + )} + + {formatWhen(event.decided_at)} + +
    + {event.transcript_summary && ( +

    + {event.transcript_summary} +

    + )} + {event.reason && ( +

    + reason: {event.reason} +

    + )} +
  • + ))} +
+ )} +
+ + {/* Knowledge events. */} +
+

Knowledge events

+ {ragEvents.length === 0 ? ( +

No knowledge events recorded.

+ ) : ( +
    + {ragEvents.map((event, index) => ( +
  • + {event.event} + + {event.status} + + {event.provider && ( + + {event.provider} + + )} + count: {event.count} + {event.detail && ( + {event.detail} + )} + + {formatWhen(event.occurred_at)} + +
  • + ))} +
+ )} +
+
+
+ ) +} diff --git a/frontend/src/components/demo/demo-step-card.test.tsx b/frontend/src/components/demo/demo-step-card.test.tsx index eac4112d..9e0b5d6d 100644 --- a/frontend/src/components/demo/demo-step-card.test.tsx +++ b/frontend/src/components/demo/demo-step-card.test.tsx @@ -3,13 +3,16 @@ * and the Inspect deep-link hrefs they expose. */ -import { afterEach, describe, expect, it } from 'vitest' -import { cleanup, render, screen } from '@testing-library/react' +import { afterEach, describe, expect, it, vi } from 'vitest' +import { cleanup, fireEvent, render, screen, waitFor } from '@testing-library/react' import { MemoryRouter } from 'react-router-dom' import type { DemoStep } from '@/hooks/use-demo-pipeline' import { DemoStepCard } from './demo-step-card' -afterEach(cleanup) +afterEach(() => { + cleanup() + vi.unstubAllGlobals() +}) function makeStep( name: string, @@ -144,28 +147,101 @@ describe('DemoStepCard PRP-39 mini-summaries', () => { expect(text).toContain('approval=executed') }) - it('agent_hitl_flow — running + awaiting_approval=true surfaces the Approve button', () => { + it('agent_hitl_flow — running + awaiting_approval=true surfaces Approve and Reject', () => { const step = makeStep('agent_hitl_flow', 'running', { session_id: 'sess-x', awaiting_approval: true, action_id: 'act-y', - approval_url: '/agents/sessions/sess-x/approve', + decision_window_s: 10, }) const { container } = renderCard(step, null) const buttons = Array.from(container.querySelectorAll('button')).map((b) => b.textContent) expect(buttons).toContain('Approve') + expect(buttons).toContain('Reject') }) - it('agent_hitl_flow — terminal status hides the Approve button', () => { + it('agent_hitl_flow — terminal status hides the decision buttons', () => { const step = makeStep('agent_hitl_flow', 'pass', { session_id: 'sess-x', awaiting_approval: true, // stale flag from intermediate event action_id: 'act-y', - approval_url: '/agents/sessions/sess-x/approve', + decision_window_s: 10, }) const { container } = renderCard(step, null) const buttons = Array.from(container.querySelectorAll('button')).map((b) => b.textContent) expect(buttons).not.toContain('Approve') + expect(buttons).not.toContain('Reject') + }) + + it('agent_hitl_flow — countdown reads data.decision_window_s', () => { + const step = makeStep('agent_hitl_flow', 'running', { + session_id: 'sess-x', + awaiting_approval: true, + action_id: 'act-y', + decision_window_s: 7, + }) + const { container } = renderCard(step, null) + expect(container.textContent).toContain('auto-approve in 7s') + }) + + it('agent_hitl_flow — Approve POSTs the demo relay with the approved decision', async () => { + const fetchMock = vi.fn().mockResolvedValue(new Response(null, { status: 204 })) + vi.stubGlobal('fetch', fetchMock) + const step = makeStep('agent_hitl_flow', 'running', { + session_id: 'sess-x', + awaiting_approval: true, + action_id: 'act-y', + decision_window_s: 10, + }) + renderCard(step, null) + fireEvent.click(screen.getByRole('button', { name: 'Approve' })) + await waitFor(() => expect(fetchMock).toHaveBeenCalledTimes(1)) + const call = fetchMock.mock.calls[0]! + expect(String(call[0])).toContain('/demo/hitl-decision') + const init = call[1] as RequestInit + expect(init.method).toBe('POST') + expect(JSON.parse(String(init.body))).toEqual({ action_id: 'act-y', decision: 'approved' }) + // Both buttons disable after a click. + expect(screen.getByRole('button', { name: 'Approving…' })).toBeTruthy() + expect((screen.getByRole('button', { name: 'Reject' }) as HTMLButtonElement).disabled).toBe(true) + }) + + it('agent_hitl_flow — Reject POSTs the demo relay with the rejected decision', async () => { + const fetchMock = vi.fn().mockResolvedValue(new Response(null, { status: 204 })) + vi.stubGlobal('fetch', fetchMock) + const step = makeStep('agent_hitl_flow', 'running', { + session_id: 'sess-x', + awaiting_approval: true, + action_id: 'act-z', + decision_window_s: 10, + }) + renderCard(step, null) + fireEvent.click(screen.getByRole('button', { name: 'Reject' })) + await waitFor(() => expect(fetchMock).toHaveBeenCalledTimes(1)) + const init = fetchMock.mock.calls[0]![1] as RequestInit + expect(JSON.parse(String(init.body))).toEqual({ action_id: 'act-z', decision: 'rejected' }) + expect(screen.getByRole('button', { name: 'Rejecting…' })).toBeTruthy() + }) + + it('agent_hitl_flow — absorbs a 404 (auto-approve raced) without surfacing an error', async () => { + const problem = JSON.stringify({ status: 404, detail: 'No pending HITL action' }) + const fetchMock = vi.fn().mockResolvedValue( + new Response(problem, { + status: 404, + headers: { 'content-type': 'application/problem+json' }, + }), + ) + vi.stubGlobal('fetch', fetchMock) + const step = makeStep('agent_hitl_flow', 'running', { + session_id: 'sess-x', + awaiting_approval: true, + action_id: 'act-y', + decision_window_s: 10, + }) + const { container } = renderCard(step, null) + fireEvent.click(screen.getByRole('button', { name: 'Approve' })) + await waitFor(() => expect(fetchMock).toHaveBeenCalledTimes(1)) + expect(container.textContent).not.toMatch(/decision failed/) }) it('ops_snapshot — renders the 5-tile mini grid with values', () => { diff --git a/frontend/src/components/demo/demo-step-card.tsx b/frontend/src/components/demo/demo-step-card.tsx index b2b1379b..9fef1529 100644 --- a/frontend/src/components/demo/demo-step-card.tsx +++ b/frontend/src/components/demo/demo-step-card.tsx @@ -4,6 +4,7 @@ import { Link } from 'react-router-dom' import type { DemoStep, DemoStepUiStatus } from '@/hooks/use-demo-pipeline' import { Button } from '@/components/ui/button' import { Card } from '@/components/ui/card' +import { api, ApiError } from '@/lib/api' import { cn } from '@/lib/utils' import { HorizonBucketsMini } from './HorizonBucketsMini' @@ -361,58 +362,77 @@ function OpsSnapshotMiniGrid({ data }: { data: Record }) { } /** - * PRP-41 — one-click Approve button rendered on the HITL step card when - * the backend has emitted `awaiting_approval=true` + `status='running'`. + * E5 (#411) — Approve / Reject buttons rendered on the HITL step card while + * the backend awaits a decision (`awaiting_approval=true` + `status='running'`). * - * Clicking POSTs `{action_id, approved: true}` to the captured approval_url. - * Optimistic disable on click; the backend's auto-approve absorbs a 400 - * "No pending action" if the auto-approve fires first (Task 1 contract probe). + * Either click relays the operator's intent to the DEMO slice via + * `POST /demo/hitl-decision` (through `lib/api.ts` `api()` — API_BASE_URL + * prefixed, never bare `fetch`, so it works off-origin). The pipeline is the + * sole caller of the agents approve endpoint. Both buttons disable after + * either click. A live "auto-approve in Ns" countdown reads the backend's + * `decision_window_s` (fallback 10) — never hardcoded, never derived from the + * 90 s hard timeout. 404/409 are absorbed silently (the auto-approve raced); + * only 5xx surfaces an inline error. */ -function ApproveButton({ - approvalUrl, +function HitlDecisionButtons({ actionId, + decisionWindowS, }: { - approvalUrl: string actionId: string + decisionWindowS: number }) { - const [clicked, setClicked] = useState(false) + const [pending, setPending] = useState<'approved' | 'rejected' | null>(null) const [error, setError] = useState(null) - const [waitingMs, setWaitingMs] = useState(0) + const [remaining, setRemaining] = useState(Math.max(0, Math.ceil(decisionWindowS))) useEffect(() => { - if (clicked) return - const startedAt = Date.now() - const id = setInterval(() => setWaitingMs(Date.now() - startedAt), 1000) + if (pending) return + const id = setInterval(() => { + setRemaining((prev) => (prev > 0 ? prev - 1 : 0)) + }, 1000) return () => clearInterval(id) - }, [clicked]) + }, [pending]) - const onClick = async () => { - if (clicked || !approvalUrl || !actionId) return - setClicked(true) + const decide = async (decision: 'approved' | 'rejected') => { + if (pending || !actionId) return + setPending(decision) try { - const res = await fetch(approvalUrl, { + await api('/demo/hitl-decision', { method: 'POST', - headers: { 'content-type': 'application/json' }, - body: JSON.stringify({ action_id: actionId, approved: true }), + body: { action_id: actionId, decision }, }) - // Absorb 4xx absorptions silently — the auto-approve already landed - // and the next StepEvent will surface the terminal status. - if (!res.ok && res.status >= 500) { - setError(`approve failed (${res.status})`) - } } catch (err) { - setError(err instanceof Error ? err.message : 'approve failed') + // Absorb 404 (no pending action) / 409 (already decided) — the + // auto-approve or a prior click raced. Surface only 5xx. + if (err instanceof ApiError && err.status >= 500) { + setError(`decision failed (${err.status})`) + } else if (!(err instanceof ApiError)) { + setError(err instanceof Error ? err.message : 'decision failed') + } } } return ( -
- + - {!clicked && waitingMs > 30_000 && ( + {!pending && ( - Still waiting — auto-approve in {Math.max(0, Math.ceil((90_000 - waitingMs) / 1000))}s + auto-approve in {remaining}s )} {error && {error}} @@ -493,14 +513,18 @@ export function DemoStepCard({ step, index, inspectHref }: DemoStepCardProps) { {/* PRP-41 — agents (HITL) + ops snapshot mini-summaries. */} {step.name === 'agent_hitl_flow' && } {step.name === 'ops_snapshot' && } - {/* PRP-41 — one-click Approve only while awaiting (status==running). */} + {/* E5 (#411) — Approve / Reject only while awaiting (status==running); + countdown reads data.decision_window_s (fallback 10). */} {step.data.awaiting_approval === true && step.status === 'running' && - typeof step.data.approval_url === 'string' && typeof step.data.action_id === 'string' && ( - )} {showInspect && ( diff --git a/frontend/src/components/demo/index.ts b/frontend/src/components/demo/index.ts index ccfe7b71..0b1e7cfa 100644 --- a/frontend/src/components/demo/index.ts +++ b/frontend/src/components/demo/index.ts @@ -2,3 +2,13 @@ export * from './demo-step-card' // E4 (#393) — showcase workspace restore/replay panels. export * from './WorkspacePanel' export * from './WorkspaceArtifactsPanel' +// E2 (#408) — safe replay + lifecycle + lineage. +export * from './ReplayConfirmDialog' +export * from './WorkspaceEditDialog' +export * from './WorkspaceLineageStrip' +export * from './workspace-name' +// E3 (#409) — advanced seed config + focus-pair selection. +export * from './SeedConfigPanel' +export * from './ScopeSelector' +// E5 (#411) — agent/HITL + RAG story capture panel. +export * from './WorkspaceStoryPanel' diff --git a/frontend/src/components/demo/replay-request.test.ts b/frontend/src/components/demo/replay-request.test.ts new file mode 100644 index 00000000..c375ece4 --- /dev/null +++ b/frontend/src/components/demo/replay-request.test.ts @@ -0,0 +1,91 @@ +import { describe, expect, it } from 'vitest' +import { buildReplayRequest } from './replay-request' +import type { WorkspaceListItem } from '@/types/api' + +const baseItem: WorkspaceListItem = { + workspace_id: 'a'.repeat(32), + name: 'replayable', + status: 'completed', + seed: 7, + scenario: 'showcase_rich', + reset: true, + skip_seed: false, + result_summary: null, + created_at: '2026-06-01T12:00:00Z', + archived: false, + pinned: false, + tags: [], + replayed_from_workspace_id: null, + seed_overrides: null, + user_scope: null, + run_config: null, +} + +describe('buildReplayRequest', () => { + it('re-submits the recorded config verbatim with keep + provenance', () => { + expect(buildReplayRequest(baseItem)).toEqual({ + seed: 7, + scenario: 'showcase_rich', + reset: true, + skip_seed: false, + preservation: 'keep', + replayed_from_workspace_id: baseItem.workspace_id, + workspace_name: 'replayable', + }) + }) + + it('omits workspace_name on an unnamed row (names stay optional)', () => { + const request = buildReplayRequest({ ...baseItem, name: null }) + expect('workspace_name' in request).toBe(false) + expect(request.preservation).toBe('keep') + }) + + // E3 (#409) — replay-verbatim covers the recorded story slots. + it('omits the E3 keys on a legacy row (null slots) — byte-identical frame', () => { + const request = buildReplayRequest(baseItem) + expect('seed_overrides' in request).toBe(false) + expect('user_scope' in request).toBe(false) + }) + + // E4 (#410) — replay-verbatim covers the recorded run config. + it('omits run-config keys on a default-config row (null run_config)', () => { + const request = buildReplayRequest(baseItem) + expect('train_model_types' in request).toBe(false) + expect('backtest' in request).toBe(false) + }) + + it('re-submits recorded run_config (model set + backtest) verbatim', () => { + const configured: WorkspaceListItem = { + ...baseItem, + run_config: { + train_model_types: ['naive', 'seasonal_average'], + backtest: { horizon: 21, n_splits: 4, metric: 'rmse' }, + }, + } + const request = buildReplayRequest(configured) + expect(request.train_model_types).toEqual(['naive', 'seasonal_average']) + expect(request.backtest?.horizon).toBe(21) + expect(request.backtest?.n_splits).toBe(4) + expect(request.backtest?.metric).toBe('rmse') + // Missing knobs are filled from the defaults (verbatim-complete frame). + expect(request.backtest?.strategy).toBe('expanding') + expect(request.backtest?.min_train_size).toBe(30) + }) + + it('re-submits recorded seed_overrides and user_scope verbatim', () => { + const slotted: WorkspaceListItem = { + ...baseItem, + seed_overrides: { stores: 8, products: 20, promotion_intensity: 0.3 }, + user_scope: { store_id: 12, product_id: 47 }, + } + const request = buildReplayRequest(slotted) + expect(request.seed_overrides).toEqual({ + stores: 8, + products: 20, + promotion_intensity: 0.3, + }) + expect(request.user_scope).toEqual({ store_id: 12, product_id: 47 }) + // Lineage stays intact when the slots ride along (E1 frozen criterion). + expect(request.replayed_from_workspace_id).toBe(baseItem.workspace_id) + }) +}) diff --git a/frontend/src/components/demo/replay-request.ts b/frontend/src/components/demo/replay-request.ts new file mode 100644 index 00000000..aab00732 --- /dev/null +++ b/frontend/src/components/demo/replay-request.ts @@ -0,0 +1,30 @@ +import type { DemoRunRequest, WorkspaceListItem } from '@/types/api' +import { parseRunConfig } from './run-config-utils' + +/** + * E2 (#408) — the EXACT request a confirmed replay sends. Single source for + * the confirm dialog's "Will send" column AND the page's executeReplay, so + * the preview can never lie about what goes on the wire. + */ +export function buildReplayRequest(ws: WorkspaceListItem): DemoRunRequest { + // E4 (#410) — replay-verbatim covers the recorded run config; null on + // default-config rows, so their replay frame stays byte-identical. + const runConfig = parseRunConfig(ws.run_config) + return { + seed: ws.seed, + scenario: ws.scenario, + reset: ws.reset, + skip_seed: ws.skip_seed, + preservation: 'keep', + // E1 (#407) — record replay lineage on the NEW row (soft reference). + replayed_from_workspace_id: ws.workspace_id, + ...(ws.name ? { workspace_name: ws.name } : {}), + // E3 (#409) — replay-verbatim covers the recorded slots; omitted on + // legacy rows (null) so their replay frame stays byte-identical. + ...(ws.seed_overrides ? { seed_overrides: ws.seed_overrides } : {}), + ...(ws.user_scope ? { user_scope: ws.user_scope } : {}), + ...(runConfig + ? { train_model_types: runConfig.trainModels, backtest: runConfig.backtest } + : {}), + } +} diff --git a/frontend/src/components/demo/run-config-utils.test.ts b/frontend/src/components/demo/run-config-utils.test.ts new file mode 100644 index 00000000..9bee6f14 --- /dev/null +++ b/frontend/src/components/demo/run-config-utils.test.ts @@ -0,0 +1,115 @@ +import { describe, expect, it } from 'vitest' +import { + DEFAULT_BACKTEST, + DEFAULT_TRAIN_MODELS, + buildTrainPlan, + isDefaultBacktest, + isDefaultSelection, + parseRunConfig, + splitFitWarning, + windowDaysFor, +} from './run-config-utils' +import type { DemoBacktestConfig } from '@/types/api' + +describe('isDefaultSelection', () => { + it('is true for the default trio regardless of order', () => { + expect(isDefaultSelection([...DEFAULT_TRAIN_MODELS])).toBe(true) + expect(isDefaultSelection(['moving_average', 'naive', 'seasonal_naive'])).toBe(true) + }) + + it('is false for any other selection', () => { + expect(isDefaultSelection(['naive'])).toBe(false) + expect(isDefaultSelection(['naive', 'seasonal_naive', 'regression'])).toBe(false) + }) +}) + +describe('isDefaultBacktest', () => { + it('is true for the default config', () => { + expect(isDefaultBacktest({ ...DEFAULT_BACKTEST })).toBe(true) + }) + + it('is false when any knob differs', () => { + expect(isDefaultBacktest({ ...DEFAULT_BACKTEST, metric: 'rmse' })).toBe(false) + expect(isDefaultBacktest({ ...DEFAULT_BACKTEST, horizon: 21 })).toBe(false) + }) +}) + +describe('buildTrainPlan', () => { + it('returns the selection verbatim on non-showcase scenarios', () => { + const plan = buildTrainPlan(['naive', 'seasonal_average'], 'demo_minimal') + expect(plan.map((p) => p.model_type)).toEqual(['naive', 'seasonal_average']) + expect(plan.some((p) => p.v2)).toBe(false) + }) + + it('appends prophet_like (V2) on showcase_rich when absent', () => { + const plan = buildTrainPlan(['naive'], 'showcase_rich') + expect(plan.map((p) => p.model_type)).toEqual(['naive', 'prophet_like']) + expect(plan[1].v2).toBe(true) + }) + + it('does not double-append prophet_like when already selected', () => { + const plan = buildTrainPlan(['prophet_like', 'naive'], 'showcase_rich') + expect(plan.map((p) => p.model_type)).toEqual(['prophet_like', 'naive']) + }) + + it('tags each chip with its family from the catalog map', () => { + const plan = buildTrainPlan(['naive'], 'demo_minimal', { naive: 'baseline' }) + expect(plan[0].family).toBe('baseline') + }) +}) + +describe('windowDaysFor', () => { + it('returns 92 for the short-window presets', () => { + expect(windowDaysFor('demo_minimal')).toBe(92) + expect(windowDaysFor('sparse')).toBe(92) + expect(windowDaysFor('holiday_rush')).toBe(92) + }) + + it('returns 180 for the rich-window presets', () => { + expect(windowDaysFor('showcase_rich')).toBe(180) + expect(windowDaysFor('retail_standard')).toBe(180) + }) +}) + +describe('splitFitWarning', () => { + it('returns null when the split fits the window', () => { + expect(splitFitWarning({ ...DEFAULT_BACKTEST }, 'demo_minimal')).toBeNull() + }) + + it('warns when the split exceeds the seeded window', () => { + const aggressive: DemoBacktestConfig = { + ...DEFAULT_BACKTEST, + horizon: 28, + n_splits: 5, + min_train_size: 60, + } + const warning = splitFitWarning(aggressive, 'demo_minimal') + expect(warning).toContain('demo_minimal') + }) +}) + +describe('parseRunConfig', () => { + it('returns null for a null/empty config', () => { + expect(parseRunConfig(null)).toBeNull() + expect(parseRunConfig(undefined)).toBeNull() + }) + + it('parses train_model_types + backtest, defaulting missing knobs', () => { + const parsed = parseRunConfig({ + train_model_types: ['naive', 'regression'], + backtest: { horizon: 21, metric: 'rmse' }, + }) + expect(parsed).not.toBeNull() + expect(parsed!.trainModels).toEqual(['naive', 'regression']) + expect(parsed!.backtest.horizon).toBe(21) + expect(parsed!.backtest.metric).toBe('rmse') + // Missing knobs fall back to the defaults. + expect(parsed!.backtest.n_splits).toBe(DEFAULT_BACKTEST.n_splits) + expect(parsed!.backtest.strategy).toBe(DEFAULT_BACKTEST.strategy) + }) + + it('falls back to the default trio when models are malformed', () => { + const parsed = parseRunConfig({ train_model_types: 'oops', backtest: {} }) + expect(parsed!.trainModels).toEqual(DEFAULT_TRAIN_MODELS) + }) +}) diff --git a/frontend/src/components/demo/run-config-utils.ts b/frontend/src/components/demo/run-config-utils.ts new file mode 100644 index 00000000..9264b12d --- /dev/null +++ b/frontend/src/components/demo/run-config-utils.ts @@ -0,0 +1,153 @@ +import type { + DemoBacktestConfig, + DemoRankingMetric, + ModelFamily, + ScenarioPreset, +} from '@/types/api' + +/** + * E4 (#410) — pure helpers for the showcase run-config panel. Kept in a `.ts` + * module (not a `.tsx`) so the `react-refresh/only-export-components` lint rule + * stays happy and the logic is unit-testable without rendering. + */ + +// The legacy demo trio — the default selection (and the byte-compat baseline). +export const DEFAULT_TRAIN_MODELS = ['naive', 'seasonal_naive', 'moving_average'] + +// The legacy demo split + ranking metric. Mirrors the backend defaults +// (DEMO_HORIZON=14, DEMO_BACKTEST_SPLITS=3, DEMO_MIN_TRAIN_SIZE=30, gap=0, +// strategy 'expanding', metric 'wape'). +export const DEFAULT_BACKTEST: DemoBacktestConfig = { + horizon: 14, + strategy: 'expanding', + n_splits: 3, + min_train_size: 30, + gap: 0, + metric: 'wape', +} + +// The V2 feature-aware model appended to a custom selection on showcase_rich +// (the v2_train step trains/registers it unconditionally; see pipeline.py). +export const SHOWCASE_V2_MODEL = 'prophet_like' + +/** True when `models` equals the default trio (order-insensitive). */ +export function isDefaultSelection(models: string[]): boolean { + if (models.length !== DEFAULT_TRAIN_MODELS.length) return false + const a = [...models].sort() + const b = [...DEFAULT_TRAIN_MODELS].sort() + return a.every((m, i) => m === b[i]) +} + +/** True when every backtest knob equals its default. */ +export function isDefaultBacktest(cfg: DemoBacktestConfig): boolean { + return ( + cfg.horizon === DEFAULT_BACKTEST.horizon && + cfg.strategy === DEFAULT_BACKTEST.strategy && + cfg.n_splits === DEFAULT_BACKTEST.n_splits && + cfg.min_train_size === DEFAULT_BACKTEST.min_train_size && + cfg.gap === DEFAULT_BACKTEST.gap && + cfg.metric === DEFAULT_BACKTEST.metric + ) +} + +export interface TrainPlanEntry { + model_type: string + family?: ModelFamily + /** Appended V2 entry (prophet_like on showcase_rich) — not operator-picked. */ + v2?: boolean +} + +/** + * The exact models the pipeline will train, in display order. On showcase_rich + * `prophet_like (V2)` is appended (unless already selected) because the + * v2_train step registers it unconditionally — it stays in the competition. + * The `families` map (model_type → family, from the catalog) tags each chip. + */ +export function buildTrainPlan( + models: string[], + scenario: ScenarioPreset, + families: Record = {}, +): TrainPlanEntry[] { + const plan: TrainPlanEntry[] = models.map((m) => ({ + model_type: m, + family: families[m], + })) + if (scenario === 'showcase_rich' && !models.includes(SHOWCASE_V2_MODEL)) { + plan.push({ model_type: SHOWCASE_V2_MODEL, family: families[SHOWCASE_V2_MODEL], v2: true }) + } + return plan +} + +/** + * The seeded window (days) for a scenario. SOURCE OF TRUTH: + * pipeline.py `_SCENARIO_SEED_PROFILE` (demo_minimal / sparse / holiday_rush = + * 92-day window, every other preset = 180). Keep in sync. + */ +export function windowDaysFor(scenario: ScenarioPreset): number { + if (scenario === 'demo_minimal' || scenario === 'sparse' || scenario === 'holiday_rush') { + return 92 + } + return 180 +} + +/** + * A soft (non-blocking) warning when the split cannot fit the seeded window: + * `min_train_size + n_splits * (horizon + gap) > windowDays`. The backend does + * NOT clamp — an over-aggressive split fails honestly at backtest (sparse-preset + * precedent), so the UI warns ahead of time. Returns null when the split fits. + */ +export function splitFitWarning( + cfg: DemoBacktestConfig, + scenario: ScenarioPreset, +): string | null { + const windowDays = windowDaysFor(scenario) + const required = cfg.min_train_size + cfg.n_splits * (cfg.horizon + cfg.gap) + if (required > windowDays) { + return ( + `This split needs ~${required} days but ${scenario} seeds ~${windowDays}. ` + + 'The backtest may produce NaN / too-few-folds and fail — reduce horizon, splits, or min train.' + ) + } + return null +} + +/** + * Parse a stored `run_config` (Record from a workspace row) + * into the typed pieces Load/Replay repopulate. Returns null when absent or + * shapeless. Missing knobs fall back to the defaults so a partial stored config + * still yields a complete backtest object. + */ +export function parseRunConfig( + raw: Record | null | undefined, +): { trainModels: string[]; backtest: DemoBacktestConfig } | null { + if (!raw || typeof raw !== 'object') return null + const rawModels = (raw as { train_model_types?: unknown }).train_model_types + const trainModels = + Array.isArray(rawModels) && rawModels.every((m) => typeof m === 'string') + ? (rawModels as string[]) + : DEFAULT_TRAIN_MODELS + const rawBacktest = (raw as { backtest?: unknown }).backtest + const backtest = parseBacktest(rawBacktest) + return { trainModels, backtest } +} + +function parseBacktest(raw: unknown): DemoBacktestConfig { + if (!raw || typeof raw !== 'object') return { ...DEFAULT_BACKTEST } + const obj = raw as Record + const num = (key: keyof DemoBacktestConfig, fallback: number): number => + typeof obj[key] === 'number' ? (obj[key] as number) : fallback + const strategy = obj.strategy === 'sliding' ? 'sliding' : DEFAULT_BACKTEST.strategy + const metricRaw = obj.metric + const metric: DemoRankingMetric = + metricRaw === 'mae' || metricRaw === 'rmse' || metricRaw === 'wape' + ? metricRaw + : DEFAULT_BACKTEST.metric + return { + horizon: num('horizon', DEFAULT_BACKTEST.horizon), + strategy, + n_splits: num('n_splits', DEFAULT_BACKTEST.n_splits), + min_train_size: num('min_train_size', DEFAULT_BACKTEST.min_train_size), + gap: num('gap', DEFAULT_BACKTEST.gap), + metric, + } +} diff --git a/frontend/src/components/demo/workspace-name.ts b/frontend/src/components/demo/workspace-name.ts new file mode 100644 index 00000000..cd14aa34 --- /dev/null +++ b/frontend/src/components/demo/workspace-name.ts @@ -0,0 +1,8 @@ +// E2 (#408) — single source for the workspace-name client validation, +// shared by the showcase run controls and the WorkspaceEditDialog. Mirrors +// the backend DemoRunRequest.workspace_name pattern (app/features/demo/ +// schemas.py): lowercase letters/digits, then -/_ allowed; ≤100 chars. +export const WORKSPACE_NAME_PATTERN = /^[a-z0-9][a-z0-9\-_]*$/ + +export const WORKSPACE_NAME_HINT = + 'Lowercase letters/digits only, then “-” or “_” (must not start with either).' diff --git a/frontend/src/hooks/index.ts b/frontend/src/hooks/index.ts index fb3e6aa7..9b31a266 100644 --- a/frontend/src/hooks/index.ts +++ b/frontend/src/hooks/index.ts @@ -15,3 +15,4 @@ export * from './use-websocket' export * from './use-seeder' export * from './use-demo-pipeline' export * from './use-workspaces' +export * from './use-approval-events' diff --git a/frontend/src/hooks/use-approval-events.test.ts b/frontend/src/hooks/use-approval-events.test.ts new file mode 100644 index 00000000..0aeb0cc7 --- /dev/null +++ b/frontend/src/hooks/use-approval-events.test.ts @@ -0,0 +1,86 @@ +/** + * E5 (#411) — unit tests for useApprovalEvents. Stubs fetch to assert the + * hook calls GET /demo/approval-events with the limit param and surfaces the + * flattened response (pattern: use-workspaces.test.ts). + */ +import { QueryClient, QueryClientProvider } from '@tanstack/react-query' +import { renderHook, waitFor } from '@testing-library/react' +import { afterEach, describe, expect, it, vi } from 'vitest' +import { createElement, type ReactNode } from 'react' + +import { useApprovalEvents } from './use-approval-events' + +function makeWrapper(client: QueryClient) { + return function Wrapper({ children }: { children: ReactNode }) { + return createElement(QueryClientProvider, { client }, children) + } +} + +function jsonResponse(body: unknown, status = 200): Response { + return new Response(JSON.stringify(body), { + status, + headers: { 'content-type': 'application/json' }, + }) +} + +afterEach(() => { + vi.unstubAllGlobals() +}) + +describe('useApprovalEvents', () => { + it('GETs /demo/approval-events with the limit param and returns the events', async () => { + const body = { + events: [ + { + workspace_id: 'a'.repeat(32), + workspace_name: 'e5-story', + action_id: 'act-1', + tool_name: 'save_scenario', + decision: 'approved', + decided_at: '2026-06-01T12:05:00Z', + session_id: 'sess-1', + auto_approved: false, + reason: null, + execution_status: 'executed', + transcript_summary: 'save it', + }, + ], + total: 1, + } + const fetchMock = vi.fn().mockResolvedValue(jsonResponse(body)) + vi.stubGlobal('fetch', fetchMock) + + const client = new QueryClient({ defaultOptions: { queries: { retry: false } } }) + const { result } = renderHook(() => useApprovalEvents(25), { + wrapper: makeWrapper(client), + }) + await waitFor(() => expect(result.current.isSuccess).toBe(true)) + + const url = String(fetchMock.mock.calls[0]![0]) + expect(url).toContain('/demo/approval-events') + expect(url).toContain('limit=25') + expect(result.current.data?.total).toBe(1) + expect(result.current.data?.events[0]?.tool_name).toBe('save_scenario') + }) + + it('defaults the limit to 50', async () => { + const fetchMock = vi.fn().mockResolvedValue(jsonResponse({ events: [], total: 0 })) + vi.stubGlobal('fetch', fetchMock) + + const client = new QueryClient({ defaultOptions: { queries: { retry: false } } }) + const { result } = renderHook(() => useApprovalEvents(), { + wrapper: makeWrapper(client), + }) + await waitFor(() => expect(result.current.isSuccess).toBe(true)) + + expect(String(fetchMock.mock.calls[0]![0])).toContain('limit=50') + }) + + it('stays disabled when enabled=false', () => { + const fetchMock = vi.fn() + vi.stubGlobal('fetch', fetchMock) + const client = new QueryClient({ defaultOptions: { queries: { retry: false } } }) + renderHook(() => useApprovalEvents(50, false), { wrapper: makeWrapper(client) }) + expect(fetchMock).not.toHaveBeenCalled() + }) +}) diff --git a/frontend/src/hooks/use-approval-events.ts b/frontend/src/hooks/use-approval-events.ts new file mode 100644 index 00000000..2b4cd9ea --- /dev/null +++ b/frontend/src/hooks/use-approval-events.ts @@ -0,0 +1,19 @@ +import { useQuery } from '@tanstack/react-query' +import { api } from '@/lib/api' +import type { ApprovalEventsResponse } from '@/types/api' + +/** + * E5 (#411) — recent HITL approval events flattened across saved showcase + * workspaces, newest-first. Deliberately NOT polled: the table only changes + * when a showcase run finishes capturing a decision, so refetch-on-mount is + * sufficient (mirrors useRetrainingCandidates). queryKey carries `limit` so + * distinct caps cache independently. + */ +export function useApprovalEvents(limit = 50, enabled = true) { + return useQuery({ + queryKey: ['demo', 'approval-events', limit], + queryFn: () => + api('/demo/approval-events', { params: { limit } }), + enabled, + }) +} diff --git a/frontend/src/hooks/use-model-selection.test.ts b/frontend/src/hooks/use-model-selection.test.ts index 5074351b..df08674a 100644 --- a/frontend/src/hooks/use-model-selection.test.ts +++ b/frontend/src/hooks/use-model-selection.test.ts @@ -48,6 +48,7 @@ const CATALOG: ModelCatalogResponse = { default_params: {}, supports_auto_predict: true, description: 'Repeats the last observed value.', + enabled: true, }, ], default_candidate_model_types: ['naive', 'seasonal_naive', 'moving_average'], diff --git a/frontend/src/hooks/use-workspaces.test.ts b/frontend/src/hooks/use-workspaces.test.ts new file mode 100644 index 00000000..66825d61 --- /dev/null +++ b/frontend/src/hooks/use-workspaces.test.ts @@ -0,0 +1,296 @@ +/** + * Unit tests for the use-workspaces hooks. + * + * Stubs ``fetch`` to assert the hook issues a DELETE to the workspace + * endpoint and invalidates the workspaces list on success; no real backend + * is exercised (pattern: ``use-batches.test.ts``). + */ +import { QueryClient, QueryClientProvider } from '@tanstack/react-query' +import { act, renderHook, waitFor } from '@testing-library/react' +import { afterEach, describe, expect, it, vi } from 'vitest' +import { createElement, type ReactNode } from 'react' + +import { + useDeleteWorkspace, + usePatchWorkspace, + useWorkspaceHealth, + useWorkspaceLineage, + useWorkspaces, +} from './use-workspaces' +import { ApiError } from '@/lib/api' + +function makeWrapper(client: QueryClient) { + return function Wrapper({ children }: { children: ReactNode }) { + return createElement(QueryClientProvider, { client }, children) + } +} + +afterEach(() => { + vi.unstubAllGlobals() +}) + +describe('useDeleteWorkspace', () => { + it('issues a DELETE to /demo/workspaces/{id} and invalidates the list', async () => { + const fetchMock = vi.fn().mockResolvedValue(new Response(null, { status: 204 })) + vi.stubGlobal('fetch', fetchMock) + + const client = new QueryClient({ + defaultOptions: { queries: { retry: false } }, + }) + const invalidateSpy = vi.spyOn(client, 'invalidateQueries') + const { result } = renderHook(() => useDeleteWorkspace(), { + wrapper: makeWrapper(client), + }) + + const workspaceId = 'a'.repeat(32) + await act(async () => { + result.current.mutate(workspaceId) + }) + await waitFor(() => expect(result.current.isSuccess).toBe(true)) + + expect(fetchMock).toHaveBeenCalledTimes(1) + const call = fetchMock.mock.calls[0]! + expect(String(call[0])).toContain(`/demo/workspaces/${workspaceId}`) + expect((call[1] as RequestInit).method).toBe('DELETE') + + // Success invalidates every ['workspaces', ...] query — the panel list + // refetches and the deleted row disappears. + expect(invalidateSpy).toHaveBeenCalledWith({ queryKey: ['workspaces'] }) + }) + + it('surfaces an RFC 7807 404 as ApiError on the mutation', async () => { + const problem = { + type: '/errors/not-found', + title: 'Not Found', + status: 404, + detail: 'Workspace not found: ' + 'f'.repeat(32), + code: 'NOT_FOUND', + } + vi.stubGlobal( + 'fetch', + vi.fn().mockResolvedValue( + new Response(JSON.stringify(problem), { + status: 404, + headers: { 'content-type': 'application/problem+json' }, + }), + ), + ) + + const client = new QueryClient({ + defaultOptions: { queries: { retry: false } }, + }) + const { result } = renderHook(() => useDeleteWorkspace(), { + wrapper: makeWrapper(client), + }) + + await act(async () => { + result.current.mutate('f'.repeat(32)) + }) + await waitFor(() => expect(result.current.isError).toBe(true)) + + const error = result.current.error + expect(error).toBeInstanceOf(ApiError) + expect((error as ApiError).status).toBe(404) + expect((error as ApiError).message).toContain('Workspace not found') + }) +}) + +// ============================================================================= +// E2 (#408) — params-aware list + PATCH + health + lineage +// ============================================================================= + +function jsonResponse(body: unknown, status = 200): Response { + return new Response(JSON.stringify(body), { + status, + headers: { 'content-type': 'application/json' }, + }) +} + +function problemResponse(detail: string, status: number): Response { + return new Response( + JSON.stringify({ type: '/errors/not-found', title: 'Not Found', status, detail }), + { status, headers: { 'content-type': 'application/problem+json' } }, + ) +} + +describe('useWorkspaces (E2 params)', () => { + it('serializes the list params onto the query string', async () => { + const fetchMock = vi.fn().mockResolvedValue(jsonResponse({ workspaces: [], total: 0 })) + vi.stubGlobal('fetch', fetchMock) + + const client = new QueryClient({ defaultOptions: { queries: { retry: false } } }) + const { result } = renderHook( + () => + useWorkspaces({ + q: 'demo', + tags: 'smoke', + include_archived: true, + sort_by: 'name', + sort_order: 'asc', + }), + { wrapper: makeWrapper(client) }, + ) + await waitFor(() => expect(result.current.isSuccess).toBe(true)) + + const url = String(fetchMock.mock.calls[0]![0]) + expect(url).toContain('/demo/workspaces') + expect(url).toContain('q=demo') + expect(url).toContain('tags=smoke') + expect(url).toContain('include_archived=true') + expect(url).toContain('sort_by=name') + expect(url).toContain('sort_order=asc') + }) + + it('omits unset params (legacy URL shape preserved)', async () => { + const fetchMock = vi.fn().mockResolvedValue(jsonResponse({ workspaces: [], total: 0 })) + vi.stubGlobal('fetch', fetchMock) + + const client = new QueryClient({ defaultOptions: { queries: { retry: false } } }) + const { result } = renderHook(() => useWorkspaces(), { wrapper: makeWrapper(client) }) + await waitFor(() => expect(result.current.isSuccess).toBe(true)) + + const url = String(fetchMock.mock.calls[0]![0]) + expect(url).toContain('limit=20') + expect(url).not.toContain('q=') + expect(url).not.toContain('include_archived') + expect(url).not.toContain('sort_by') + }) +}) + +describe('usePatchWorkspace', () => { + it('issues a PATCH with the partial body and invalidates the list', async () => { + const workspaceId = 'a'.repeat(32) + const fetchMock = vi + .fn() + .mockResolvedValue(jsonResponse({ workspace_id: workspaceId, pinned: true })) + vi.stubGlobal('fetch', fetchMock) + + const client = new QueryClient({ defaultOptions: { queries: { retry: false } } }) + const invalidateSpy = vi.spyOn(client, 'invalidateQueries') + const { result } = renderHook(() => usePatchWorkspace(), { + wrapper: makeWrapper(client), + }) + + await act(async () => { + result.current.mutate({ workspaceId, update: { pinned: true } }) + }) + await waitFor(() => expect(result.current.isSuccess).toBe(true)) + + const call = fetchMock.mock.calls[0]! + expect(String(call[0])).toContain(`/demo/workspaces/${workspaceId}`) + const init = call[1] as RequestInit + expect(init.method).toBe('PATCH') + expect(JSON.parse(String(init.body))).toEqual({ pinned: true }) + expect(invalidateSpy).toHaveBeenCalledWith({ queryKey: ['workspaces'] }) + }) +}) + +describe('useWorkspaceHealth', () => { + it('fetches the health endpoint for the loaded workspace', async () => { + const workspaceId = 'a'.repeat(32) + const health = { + workspace_id: workspaceId, + workspace_status: 'completed', + partial_run: false, + references: [], + alive: 0, + dead: 0, + unknown: 0, + checked_at: '2026-06-13T00:00:00Z', + } + const fetchMock = vi.fn().mockResolvedValue(jsonResponse(health)) + vi.stubGlobal('fetch', fetchMock) + + const client = new QueryClient({ defaultOptions: { queries: { retry: false } } }) + const { result } = renderHook(() => useWorkspaceHealth(workspaceId), { + wrapper: makeWrapper(client), + }) + await waitFor(() => expect(result.current.isSuccess).toBe(true)) + expect(String(fetchMock.mock.calls[0]![0])).toContain( + `/demo/workspaces/${workspaceId}/health`, + ) + expect(result.current.data).toEqual(health) + }) + + it('stays disabled without a workspace id', () => { + const fetchMock = vi.fn() + vi.stubGlobal('fetch', fetchMock) + const client = new QueryClient({ defaultOptions: { queries: { retry: false } } }) + renderHook(() => useWorkspaceHealth(''), { wrapper: makeWrapper(client) }) + expect(fetchMock).not.toHaveBeenCalled() + }) +}) + +describe('useWorkspaceLineage', () => { + const idA = 'a'.repeat(32) + const idB = 'b'.repeat(32) + const idC = 'c'.repeat(32) + + function detailBody(id: string, name: string | null, parent: string | null) { + return { + workspace_id: id, + name, + replayed_from_workspace_id: parent, + tags: [], + archived: false, + pinned: false, + } + } + + it('walks the chain newest → original and stops at the root', async () => { + const fetchMock = vi + .fn() + .mockResolvedValueOnce(jsonResponse(detailBody(idA, 'child', idB))) + .mockResolvedValueOnce(jsonResponse(detailBody(idB, 'origin', null))) + vi.stubGlobal('fetch', fetchMock) + + const client = new QueryClient({ defaultOptions: { queries: { retry: false } } }) + const { result } = renderHook(() => useWorkspaceLineage(idA), { + wrapper: makeWrapper(client), + }) + await waitFor(() => expect(result.current.isSuccess).toBe(true)) + + const lineage = result.current.data! + expect(lineage.entries.map((e) => e.workspace_id)).toEqual([idA, idB]) + expect(lineage.entries.map((e) => e.deleted)).toEqual([false, false]) + expect(lineage.truncated).toBe(false) + expect(fetchMock).toHaveBeenCalledTimes(2) + }) + + it('terminates the walk with a deleted sentinel on a 404 ancestor', async () => { + const fetchMock = vi + .fn() + .mockResolvedValueOnce(jsonResponse(detailBody(idA, 'child', idC))) + .mockResolvedValueOnce(problemResponse(`Workspace not found: ${idC}`, 404)) + vi.stubGlobal('fetch', fetchMock) + + const client = new QueryClient({ defaultOptions: { queries: { retry: false } } }) + const { result } = renderHook(() => useWorkspaceLineage(idA), { + wrapper: makeWrapper(client), + }) + await waitFor(() => expect(result.current.isSuccess).toBe(true)) + + const lineage = result.current.data! + expect(lineage.entries).toHaveLength(2) + expect(lineage.entries[1]).toMatchObject({ workspace_id: idC, deleted: true, detail: null }) + expect(lineage.truncated).toBe(false) + }) + + it('caps the walk depth and flags truncation', async () => { + // Every row points at another parent — an unbounded chain. + const fetchMock = vi.fn().mockImplementation((url: unknown) => { + const id = String(url).split('/').pop()! + return Promise.resolve(jsonResponse(detailBody(id, null, 'f'.repeat(32)))) + }) + vi.stubGlobal('fetch', fetchMock) + + const client = new QueryClient({ defaultOptions: { queries: { retry: false } } }) + const { result } = renderHook(() => useWorkspaceLineage(idA), { + wrapper: makeWrapper(client), + }) + await waitFor(() => expect(result.current.isSuccess).toBe(true)) + + expect(result.current.data!.entries).toHaveLength(5) + expect(result.current.data!.truncated).toBe(true) + }) +}) diff --git a/frontend/src/hooks/use-workspaces.ts b/frontend/src/hooks/use-workspaces.ts index 8fc02054..7e7c6a7d 100644 --- a/frontend/src/hooks/use-workspaces.ts +++ b/frontend/src/hooks/use-workspaces.ts @@ -1,16 +1,36 @@ -import { useQuery } from '@tanstack/react-query' -import { api } from '@/lib/api' -import type { WorkspaceDetail, WorkspaceListResponse } from '@/types/api' +import { useMutation, useQuery, useQueryClient } from '@tanstack/react-query' +import { api, ApiError } from '@/lib/api' +import type { + WorkspaceDetail, + WorkspaceExportResult, + WorkspaceHealth, + WorkspaceListParams, + WorkspaceListResponse, + WorkspaceUpdate, +} from '@/types/api' /** - * E4 (#393) — list saved showcase workspaces, newest first. Server-backed - * source of truth for `preservation="keep"` runs (the localStorage - * RunHistoryStrip stays ephemeral-only). + * E4 (#393) — list saved showcase workspaces. Server-backed source of truth + * for `preservation="keep"` runs (the localStorage RunHistoryStrip stays + * ephemeral-only). E2 (#408) — params-aware: q name search, single-tag + * filter, include_archived (server default hides archived), allow-listed + * sort_by/sort_order. Pinned rows always order first server-side. */ -export function useWorkspaces(limit = 20, enabled = true) { +export function useWorkspaces(params: WorkspaceListParams = {}, enabled = true) { return useQuery({ - queryKey: ['workspaces', { limit }], - queryFn: () => api('/demo/workspaces', { params: { limit } }), + queryKey: ['workspaces', params], + queryFn: () => + api('/demo/workspaces', { + params: { + limit: params.limit ?? 20, + offset: params.offset, + q: params.q, + tags: params.tags, + include_archived: params.include_archived, + sort_by: params.sort_by, + sort_order: params.sort_order, + }, + }), enabled, }) } @@ -23,3 +43,121 @@ export function useWorkspace(workspaceId: string, enabled = true) { enabled: enabled && !!workspaceId, }) } + +/** + * Delete a saved workspace METADATA row; invalidates the workspaces list on + * success. Server-side this removes only the `showcase_workspace` record — + * the run's created objects (model runs, scenario plans, aliases, jobs, + * artifacts) are soft references and stay untouched. + */ +export function useDeleteWorkspace() { + const queryClient = useQueryClient() + return useMutation({ + mutationFn: (workspaceId: string) => + api(`/demo/workspaces/${workspaceId}`, { method: 'DELETE' }), + onSuccess: () => { + void queryClient.invalidateQueries({ queryKey: ['workspaces'] }) + }, + }) +} + +/** + * E2 (#408) — partial lifecycle update (rename / notes / tags / pin / + * archive) through the E1 PATCH endpoint. Only provided fields change. + * Invalidates the blanket ['workspaces'] key so list + detail + lineage + * queries all refetch. + */ +export function usePatchWorkspace() { + const queryClient = useQueryClient() + return useMutation({ + mutationFn: ({ workspaceId, update }: { workspaceId: string; update: WorkspaceUpdate }) => + api(`/demo/workspaces/${workspaceId}`, { + method: 'PATCH', + body: update, + }), + onSuccess: () => { + void queryClient.invalidateQueries({ queryKey: ['workspaces'] }) + }, + }) +} + +/** + * E2 (#408) — soft-reference link health for the LOADED workspace only + * (never probed per list row — the backend fans out one in-process probe + * per reference). staleTime keeps reloads from hammering the probe fan-out. + */ +export function useWorkspaceHealth(workspaceId: string, enabled = true) { + return useQuery({ + queryKey: ['workspaces', workspaceId, 'health'], + queryFn: () => api(`/demo/workspaces/${workspaceId}/health`), + enabled: enabled && !!workspaceId, + staleTime: 30_000, + }) +} + +/** + * E6 (#412) — export a saved workspace to a checksum-validated bundle on disk + * (artifacts/showcase//). Export is stateless and re-runnable: it writes no + * server-side row, so it does NOT invalidate the workspaces list. + */ +export function useExportWorkspace() { + return useMutation({ + mutationFn: (workspaceId: string) => + api(`/demo/workspaces/${workspaceId}/export`, { method: 'POST' }), + }) +} + +/** One ancestor entry in a workspace's replay lineage chain (newest first). */ +export interface LineageEntry { + workspace_id: string + name: string | null + /** True when the ancestor row was deleted — dangling pointers are designed. */ + deleted: boolean + detail: WorkspaceDetail | null +} + +export interface WorkspaceLineage { + entries: LineageEntry[] + /** True when the chain continues past the depth cap. */ + truncated: boolean +} + +// A replay-of-a-replay chain deeper than this is pathological; the strip +// renders a trailing ellipsis instead of walking forever. +const LINEAGE_DEPTH_CAP = 5 + +/** + * E2 (#408) — walk the replayed_from_workspace_id chain (newest → original) + * as ONE query of serial fetches. A 404 ancestor terminates the walk with a + * deleted sentinel — dangling lineage is expected, never an error. + */ +export function useWorkspaceLineage(workspaceId: string | null) { + return useQuery({ + queryKey: ['workspaces', workspaceId, 'lineage'], + enabled: !!workspaceId, + queryFn: async (): Promise => { + const entries: LineageEntry[] = [] + let current: string | null = workspaceId + for (let depth = 0; depth < LINEAGE_DEPTH_CAP && current; depth += 1) { + try { + const detail = await api(`/demo/workspaces/${current}`) + entries.push({ + workspace_id: current, + name: detail.name, + deleted: false, + detail, + }) + current = detail.replayed_from_workspace_id + } catch (error) { + if (error instanceof ApiError && error.status === 404) { + entries.push({ workspace_id: current, name: null, deleted: true, detail: null }) + current = null + } else { + throw error + } + } + } + return { entries, truncated: current !== null } + }, + }) +} diff --git a/frontend/src/lib/constants.ts b/frontend/src/lib/constants.ts index 95cb28b8..68de8031 100644 --- a/frontend/src/lib/constants.ts +++ b/frontend/src/lib/constants.ts @@ -2,6 +2,8 @@ export const ROUTES = { DASHBOARD: '/', SHOWCASE: '/showcase', + // E2 (#408) — two-workspace compare; deep-linkable via ?a=&b=. + SHOWCASE_COMPARE: '/showcase/compare', OPS: '/ops', EXPLORER: { SALES: '/explorer/sales', diff --git a/frontend/src/pages/ops.tsx b/frontend/src/pages/ops.tsx index 233c8ef5..86455136 100644 --- a/frontend/src/pages/ops.tsx +++ b/frontend/src/pages/ops.tsx @@ -3,6 +3,7 @@ import { useNavigate, Link } from 'react-router-dom' import { Activity, AlertTriangle, CheckCircle2, Clock, Download, RefreshCw } from 'lucide-react' import { toast } from 'sonner' import { useModelHealth, useOpsSummary, useRetrainingCandidates } from '@/hooks/use-ops' +import { useApprovalEvents } from '@/hooks/use-approval-events' import { useProviderHealth } from '@/hooks/use-config' import { useCreateJob } from '@/hooks/use-jobs' import { useCreateAlias, useRun, useAliases } from '@/hooks/use-runs' @@ -97,6 +98,8 @@ export default function OpsPage() { const summaryQuery = useOpsSummary() const candidatesQuery = useRetrainingCandidates() const modelHealthQuery = useModelHealth() + // E5 (#411) — recent HITL approval events flattened across saved workspaces. + const approvalEventsQuery = useApprovalEvents() const providerQuery = useProviderHealth() const aliasesQuery = useAliases() const createJob = useCreateJob() @@ -238,6 +241,18 @@ export default function OpsPage() { setPromoteTarget(null) } + const approvalEvents = approvalEventsQuery.data?.events ?? [] + + /** E5 (#411) — approval decision → StatusBadge variant. */ + function decisionBadgeVariant( + decision: string | null, + ): 'success' | 'error' | 'warning' | 'default' { + if (decision === 'approved') return 'success' + if (decision === 'rejected') return 'error' + if (decision === 'timed_out') return 'warning' + return 'default' + } + /** PRP-36 enum → human-readable reason chip label. */ function staleReasonLabel(reason: string | null): string { if (reason === null) return '—' @@ -445,6 +460,65 @@ export default function OpsPage() { + {/* E5 (#411) — Approval History. Recent HITL approval decisions + captured on saved showcase workspaces (demo slice endpoint; + frontend-only surface). */} + + + Approval History + + Recent human-in-the-loop approval decisions captured on saved + showcase workspaces — approve, reject, or window-lapse auto-approve. + + + + {approvalEventsQuery.isLoading ? ( + + ) : approvalEvents.length === 0 ? ( +

+ No approval events yet — run a showcase pipeline with the HITL + step to capture one. +

+ ) : ( + + + + Decision + Tool + Workspace + Transcript + When + + + + {approvalEvents.map((event, index) => ( + + + + {event.decision ?? 'unknown'} + {event.auto_approved === true ? ' (auto)' : ''} + + + + {event.tool_name ?? '—'} + + + {event.workspace_name ?? event.workspace_id.slice(0, 8)} + + + {event.transcript_summary ?? '—'} + + + {formatWhen(event.decided_at)} + + + ))} + +
+ )} +
+
+ {/* PRP-37 — Stale aliases. Surfaces the new feature_frame_version_mismatch reason chip (PRP-36) alongside the existing newer-run / artifact-not-verified / run-not-success diff --git a/frontend/src/pages/showcase.tsx b/frontend/src/pages/showcase.tsx index 61d5b947..9330bf17 100644 --- a/frontend/src/pages/showcase.tsx +++ b/frontend/src/pages/showcase.tsx @@ -3,28 +3,44 @@ import { Play, Loader2, Trophy, AlertTriangle, ArrowRight, Square } from 'lucide import { useState } from 'react' import { useDemoPipeline } from '@/hooks/use-demo-pipeline' import type { DemoStep } from '@/hooks/use-demo-pipeline' -import { useWorkspace } from '@/hooks/use-workspaces' +import { useWorkspace, useWorkspaceHealth } from '@/hooks/use-workspaces' import { DemoPhasePanel } from '@/components/demo/DemoPhasePanel' import { ScenarioPicker } from '@/components/demo/ScenarioPicker' import { ShowcaseKpiStrip } from '@/components/demo/ShowcaseKpiStrip' import { InspectArtifactsPanel } from '@/components/demo/InspectArtifactsPanel' import { RunHistoryStrip } from '@/components/demo/RunHistoryStrip' +import { ReplayConfirmDialog } from '@/components/demo/ReplayConfirmDialog' +import { WorkspaceLineageStrip } from '@/components/demo/WorkspaceLineageStrip' import { WorkspacePanel } from '@/components/demo/WorkspacePanel' import { WorkspaceArtifactsPanel } from '@/components/demo/WorkspaceArtifactsPanel' +import { WorkspaceStoryPanel } from '@/components/demo/WorkspaceStoryPanel' +import { SeedConfigPanel } from '@/components/demo/SeedConfigPanel' +import { ScopeSelector } from '@/components/demo/ScopeSelector' +import { RunConfigPanel } from '@/components/demo/RunConfigPanel' +import { + DEFAULT_BACKTEST, + DEFAULT_TRAIN_MODELS, + isDefaultBacktest, + isDefaultSelection, + parseRunConfig, +} from '@/components/demo/run-config-utils' +import { buildReplayRequest } from '@/components/demo/replay-request' +import { WORKSPACE_NAME_PATTERN } from '@/components/demo/workspace-name' import { Button } from '@/components/ui/button' import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card' import { Checkbox } from '@/components/ui/checkbox' import { Input } from '@/components/ui/input' import { ROUTES } from '@/lib/constants' import { cn } from '@/lib/utils' -import type { WorkspaceListItem } from '@/types/api' +import type { + DemoBacktestConfig, + SeedOverrides, + UserScope, + WorkspaceListItem, +} from '@/types/api' const TERMINAL_STATUSES = new Set(['pass', 'fail', 'skip', 'warn']) -// E4 (#393) — mirrors the backend DemoRunRequest.workspace_name pattern -// (schemas.py): lowercase letters/digits, then -/_ allowed; ≤100 chars. -const WORKSPACE_NAME_PATTERN = /^[a-z0-9][a-z0-9\-_]*$/ - /** * PRP-38 / PRP-39 / PRP-40 — resolve the per-step Inspect deep link. * @@ -122,6 +138,16 @@ export default function ShowcasePage() { const [keepWorkspace, setKeepWorkspace] = useState(false) const [workspaceName, setWorkspaceName] = useState('') const [selectedWorkspaceId, setSelectedWorkspaceId] = useState(null) + // E2 (#408) — the workspace awaiting replay confirmation (null = no dialog). + const [pendingReplay, setPendingReplay] = useState(null) + // E3 (#409) — advanced seed config (sparse; null = preset-driven) and the + // operator-selected focus pair (null = auto-discover first pair). + const [seedOverrides, setSeedOverrides] = useState(null) + const [userScope, setUserScope] = useState(null) + // E4 (#410) — run-config phase controls. Default = the legacy trio + split; + // the dirty-only rule (below) omits both keys from the frame when untouched. + const [trainModels, setTrainModels] = useState([...DEFAULT_TRAIN_MODELS]) + const [backtestCfg, setBacktestCfg] = useState({ ...DEFAULT_BACKTEST }) // The page (not the panel) resolves the loaded workspace's detail — the // artifacts panel needs detail-only created_objects. @@ -129,6 +155,11 @@ export default function ShowcasePage() { selectedWorkspaceId ?? '', !!selectedWorkspaceId ) + // E2 (#408) — probe the LOADED workspace's soft references (never per row). + const { data: workspaceHealth } = useWorkspaceHealth( + selectedWorkspaceId ?? '', + !!selectedWorkspaceId + ) const completed = steps.filter((s) => TERMINAL_STATUSES.has(s.status)).length @@ -152,6 +183,15 @@ export default function ShowcasePage() { ...(trimmedName ? { workspace_name: trimmedName } : {}), } : {}), + // E3 (#409) — overrides only ride a re-seed run (the backend rejects + // them on skip_seed=true); omit both keys for legacy byte-compat. + ...(reseed && seedOverrides ? { seed_overrides: seedOverrides } : {}), + ...(userScope ? { user_scope: userScope } : {}), + // E4 (#410) — dirty-only inclusion: omit train_model_types / backtest + // when they equal the defaults, so untouched controls send a + // byte-identical legacy frame (umbrella criterion). + ...(isDefaultSelection(trainModels) ? {} : { train_model_types: trainModels }), + ...(isDefaultBacktest(backtestCfg) ? {} : { backtest: backtestCfg }), }) } @@ -164,25 +204,35 @@ export default function ShowcasePage() { setResetDb(ws.reset) setKeepWorkspace(true) setWorkspaceName(ws.name ?? '') + // E3 (#409) — repopulate the seed-config panel + scope selector. + setSeedOverrides(ws.seed_overrides ?? null) + setUserScope(ws.user_scope ?? null) + // E4 (#410) — repopulate the run-config panel; reset to defaults when the + // row carried no custom config (null run_config). + const runConfig = parseRunConfig(ws.run_config) + setTrainModels(runConfig ? runConfig.trainModels : [...DEFAULT_TRAIN_MODELS]) + setBacktestCfg(runConfig ? runConfig.backtest : { ...DEFAULT_BACKTEST }) setSelectedWorkspaceId(ws.workspace_id) } - // E4 (#393) — Replay: Load, then re-submit the recorded config VERBATIM - // through the existing WS run path with preservation='keep' (a replay is - // itself a workspace run). setScenario runs first (picker-desync gotcha: - // start() does not sync the picker state). + // E2 (#408) — Replay request: every replay first opens the confirmation + // dialog (recorded-vs-sent preview; destructive variant on reset=true). + // NO code path starts a replay without it. const handleReplayWorkspace = (ws: WorkspaceListItem) => { + setPendingReplay(ws) + } + + // E4 (#393) / E2 (#408) — the CONFIRMED replay: Load, then re-submit the + // recorded config VERBATIM through the existing WS run path with + // preservation='keep' (a replay is itself a workspace run). setScenario + // runs first via handleLoadWorkspace (picker-desync gotcha: start() does + // not sync the picker state). + const executeReplay = (ws: WorkspaceListItem) => { handleLoadWorkspace(ws) // The re-run's live cards take over; the original row stays untouched. setSelectedWorkspaceId(null) - start({ - seed: ws.seed, - scenario: ws.scenario, - reset: ws.reset, - skip_seed: ws.skip_seed, - preservation: 'keep', - ...(ws.name ? { workspace_name: ws.name } : {}), - }) + start(buildReplayRequest(ws)) + setPendingReplay(null) } // For the Inspect link to surface store_id/product_id on the train/backtest @@ -241,10 +291,16 @@ export default function ShowcasePage() { scenario={scenario} /> - {/* E4 (#393) — server-backed saved workspaces (Load + Replay). */} + {/* E4 (#393) / E2 (#408) — server-backed saved workspaces (lifecycle + panel; Replay routes through the confirm dialog below). */} { + // Deleting the currently loaded workspace detaches its artifacts + // panel — the metadata row backing it is gone (created objects stay). + if (selectedWorkspaceId === workspaceId) setSelectedWorkspaceId(null) + }} isRunning={isRunning} lastWorkspaceId={summary?.workspaceId ?? null} /> @@ -264,7 +320,11 @@ export default function ShowcasePage() {
-
+ {/* E3 (#409) — advanced seed config, only meaningful on a re-seed run. */} + {reseed && ( + + )} + + {/* E3 (#409) — focus-pair selection works on the EXISTING dataset + (no re-seed needed); a Reset run clears it (ids re-issued). */} +
+ + {resetDb && ( +

+ Reset database re-issues entity ids — re-pick the focus pair after the run. +

+ )} +
+ + {/* E4 (#410) — run-config phase controls (model set + backtest + + preview). Collapsed by default; untouched sends a legacy frame. */} + + {phase === 'running' && (

Step {completed} of {steps.length} complete… @@ -439,10 +543,30 @@ export default function ShowcasePage() { )} {/* E4 (#393) — re-attached artifacts of a LOADED workspace. Any started - run detaches it (selectedWorkspaceId cleared) so live cards take over. */} + run detaches it (selectedWorkspaceId cleared) so live cards take over. + E2 (#408) — lineage strip + link-health markers ride along. */} {phase !== 'running' && loadedWorkspace && ( - +

+ handleLoadWorkspace(ancestor)} + /> + + {/* E5 (#411) — captured agent/HITL + RAG story; self-hides on legacy rows. */} + +
)} + + {/* E2 (#408) — replay confirmation with the recorded-vs-sent preview. */} + pendingReplay && executeReplay(pendingReplay)} + onCancel={() => setPendingReplay(null)} + />
) } diff --git a/frontend/src/pages/visualize/champion.test.tsx b/frontend/src/pages/visualize/champion.test.tsx index 2ae297ca..691a8b19 100644 --- a/frontend/src/pages/visualize/champion.test.tsx +++ b/frontend/src/pages/visualize/champion.test.tsx @@ -29,6 +29,7 @@ const CATALOG: ModelCatalogResponse = { default_params: {}, supports_auto_predict: true, description: 'Repeats the last observed value.', + enabled: true, }, { model_type: 'regression', @@ -39,6 +40,7 @@ const CATALOG: ModelCatalogResponse = { default_params: {}, supports_auto_predict: false, description: 'Histogram gradient boosting.', + enabled: true, }, ], default_candidate_model_types: ['naive', 'regression'], diff --git a/frontend/src/pages/workspace-compare.test.tsx b/frontend/src/pages/workspace-compare.test.tsx new file mode 100644 index 00000000..ea8a352c --- /dev/null +++ b/frontend/src/pages/workspace-compare.test.tsx @@ -0,0 +1,159 @@ +import { cleanup, render } from '@testing-library/react' +import { MemoryRouter, Route, Routes } from 'react-router-dom' +import { afterEach, beforeAll, beforeEach, describe, expect, it, vi } from 'vitest' +import WorkspaceComparePage from './workspace-compare' +import type { WorkspaceDetail } from '@/types/api' + +beforeAll(() => { + class ResizeObserverStub { + observe() {} + unobserve() {} + disconnect() {} + } + vi.stubGlobal('ResizeObserver', ResizeObserverStub) +}) + +afterEach(() => { + cleanup() + vi.clearAllMocks() +}) + +const idA = 'a'.repeat(32) +const idB = 'b'.repeat(32) + +function makeDetail(overrides: Partial): WorkspaceDetail { + return { + workspace_id: idA, + name: 'ws-a', + status: 'completed', + seed: 42, + scenario: 'demo_minimal', + reset: false, + skip_seed: true, + result_summary: { + winner_model_type: 'seasonal_naive', + winner_wape: 0.15, + wall_clock_s: 12, + }, + created_at: '2026-06-01T12:00:00Z', + archived: false, + pinned: false, + tags: [], + replayed_from_workspace_id: null, + seed_overrides: null, + user_scope: null, + store_id: 3, + product_id: 7, + date_start: '2026-01-01', + date_end: '2026-03-31', + created_objects: { winning_run_id: 'run-1', alias: 'demo-production' }, + notes: null, + config_schema_version: 1, + ...overrides, + } +} + +let details: Record = {} + +vi.mock('@/hooks/use-workspaces', () => ({ + useWorkspaces: () => ({ + data: { + workspaces: Object.values(details).filter(Boolean), + total: Object.keys(details).length, + }, + isLoading: false, + }), + useWorkspace: (workspaceId: string, enabled = true) => { + if (!enabled || !workspaceId) return { data: undefined, isLoading: false, error: null } + const detail = details[workspaceId] + return detail + ? { data: detail, isLoading: false, error: null } + : { data: undefined, isLoading: false, error: new Error('not found') } + }, +})) + +beforeEach(() => { + details = { + [idA]: makeDetail({}), + [idB]: makeDetail({ + workspace_id: idB, + name: 'ws-b', + seed: 99, + status: 'failed', + replayed_from_workspace_id: idA, + result_summary: { + winner_model_type: 'naive', + winner_wape: 0.25, + wall_clock_s: 20, + }, + created_objects: { winning_run_id: 'run-2', batch_id: 'batch-1' }, + }), + } +}) + +function renderPage(query = `?a=${idA}&b=${idB}`) { + return render( + + + } /> + + , + ) +} + +describe('WorkspaceComparePage', () => { + it('renders the config diff for two deep-linked workspaces', () => { + const { container } = renderPage() + const copy = container.textContent ?? '' + expect(copy).toContain('ws-a') + expect(copy).toContain('ws-b') + expect(copy).toContain('42') + expect(copy).toContain('99') + // Mismatching seed rows are emphasized. + const bolded = Array.from(container.querySelectorAll('td.font-semibold')).map( + (el) => el.textContent, + ) + expect(bolded).toContain('42') + expect(bolded).toContain('99') + }) + + it('renders the result diff with the sign-only WAPE delta', () => { + const { container } = renderPage() + const copy = container.textContent ?? '' + expect(copy).toContain('seasonal_naive') + expect(copy).toContain('0.1500') + expect(copy).toContain('0.2500') + expect(copy).toContain('0.1000') // 0.25 - 0.15 + }) + + it('renders the created-objects presence matrix over the key union', () => { + const { container } = renderPage() + const copy = container.textContent ?? '' + expect(copy).toContain('winning_run_id') + expect(copy).toContain('alias') + expect(copy).toContain('batch_id') + }) + + it('renders the lineage note when one side replays the other', () => { + const { container } = renderPage() + expect(container.textContent).toContain('Workspace B is a replay of workspace A.') + }) + + it('renders the partial-run badge on a failed side', () => { + const { container } = renderPage() + expect(container.textContent).toContain('partial run') + }) + + it('degrades to the picker when an id no longer resolves (no crash)', () => { + details[idB] = undefined + const { container } = renderPage() + expect(container.textContent).toContain('no longer exists') + // The diff sections never render half-ready. + expect(container.textContent).not.toContain('Created objects') + }) + + it('prompts for selection when ids are missing', () => { + const { container } = renderPage('') + expect(container.textContent).toContain('Select two workspaces') + }) +}) diff --git a/frontend/src/pages/workspace-compare.tsx b/frontend/src/pages/workspace-compare.tsx new file mode 100644 index 00000000..f6e8b785 --- /dev/null +++ b/frontend/src/pages/workspace-compare.tsx @@ -0,0 +1,394 @@ +/** + * E2 (#408) — two-workspace compare page (/showcase/compare?a=&b=). + * + * Mirrors the run-compare two-picker pattern (pages/explorer/run-compare.tsx) + * but the diff is FRONTEND-ONLY: a workspace compare is a plain field diff + * over two already-served WorkspaceDetail payloads — no backend endpoint. + * Renders: config table (mismatches highlighted), result-summary diff + * (WAPE delta is sign-only), created-objects presence matrix, lineage note + * when one side replays the other, and partial-run badges. Invalid/missing + * ids degrade to the picker — never a crash. + */ + +import { Link, useSearchParams } from 'react-router-dom' +import { ArrowDown, ArrowLeft, ArrowUp } from 'lucide-react' +import { useWorkspace, useWorkspaces } from '@/hooks/use-workspaces' +import { Badge } from '@/components/ui/badge' +import { Button } from '@/components/ui/button' +import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card' +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from '@/components/ui/select' +import { + Table, + TableBody, + TableCell, + TableHead, + TableHeader, + TableRow, +} from '@/components/ui/table' +import { formatNumber } from '@/lib/api' +import { ROUTES } from '@/lib/constants' +import { cn } from '@/lib/utils' +import type { WorkspaceDetail, WorkspaceListItem } from '@/types/api' + +/** Neutral delta indicator — sign only, no better/worse colour-coding. */ +function DeltaCell({ diff }: { diff: number | null }) { + if (diff == null) { + return + } + if (diff > 0) { + return ( + + + {formatNumber(diff, 4)} + + ) + } + if (diff < 0) { + return ( + + + {formatNumber(diff, 4)} + + ) + } + return {formatNumber(diff, 4)} +} + +function labelOf(ws: WorkspaceListItem): string { + return ws.name ?? ws.workspace_id.slice(0, 8) +} + +function WorkspacePicker({ + label, + value, + workspaces, + onSelect, +}: { + label: string + value: string + workspaces: WorkspaceListItem[] + onSelect: (workspaceId: string) => void +}) { + return ( +
+ {label} + +
+ ) +} + +function summaryNumber(ws: WorkspaceDetail, key: string): number | null { + const value = ws.result_summary?.[key] + return typeof value === 'number' ? value : null +} + +function summaryString(ws: WorkspaceDetail, key: string): string | null { + const value = ws.result_summary?.[key] + return typeof value === 'string' ? value : null +} + +interface ConfigRow { + field: string + a: string + b: string +} + +function buildConfigRows(a: WorkspaceDetail, b: WorkspaceDetail): ConfigRow[] { + const fmt = (value: unknown): string => + value === null || value === undefined || value === '' ? '—' : String(value) + return [ + { field: 'seed', a: fmt(a.seed), b: fmt(b.seed) }, + { field: 'scenario', a: fmt(a.scenario), b: fmt(b.scenario) }, + { field: 'reset', a: fmt(a.reset), b: fmt(b.reset) }, + { field: 'skip_seed', a: fmt(a.skip_seed), b: fmt(b.skip_seed) }, + { field: 'name', a: fmt(a.name), b: fmt(b.name) }, + { field: 'tags', a: fmt(a.tags.join(', ')), b: fmt(b.tags.join(', ')) }, + ] +} + +/** Union of soft-reference keys recorded on either side. */ +function objectKeys(a: WorkspaceDetail, b: WorkspaceDetail): string[] { + return Array.from( + new Set([...Object.keys(a.created_objects), ...Object.keys(b.created_objects)]) + ).sort() +} + +function lineageNote(a: WorkspaceDetail, b: WorkspaceDetail): string | null { + if (b.replayed_from_workspace_id === a.workspace_id) { + return 'Workspace B is a replay of workspace A.' + } + if (a.replayed_from_workspace_id === b.workspace_id) { + return 'Workspace A is a replay of workspace B.' + } + return null +} + +function SideStatus({ ws }: { ws: WorkspaceDetail }) { + return ( + + {ws.status} + {ws.status !== 'completed' && ( + + partial run + + )} + + ) +} + +export default function WorkspaceComparePage() { + const [params, setParams] = useSearchParams() + const a = params.get('a') ?? '' + const b = params.get('b') ?? '' + + // Pickers include archived rows — comparing an archived run is legitimate. + const listQuery = useWorkspaces({ limit: 100, include_archived: true }) + const detailA = useWorkspace(a, !!a) + const detailB = useWorkspace(b, !!b) + + function selectWorkspace(slot: 'a' | 'b', workspaceId: string) { + setParams((prev) => { + const next = new URLSearchParams(prev) + next.set(slot, workspaceId) + return next + }) + } + + const workspaces = listQuery.data?.workspaces ?? [] + const wsA = detailA.data + const wsB = detailB.data + // A 404 (deleted id in the URL) degrades to the picker — never a crash. + const bothReady = !!wsA && !!wsB + + const wapeA = wsA ? summaryNumber(wsA, 'winner_wape') : null + const wapeB = wsB ? summaryNumber(wsB, 'winner_wape') : null + const note = bothReady ? lineageNote(wsA, wsB) : null + + return ( +
+
+ +

Compare workspaces

+

+ Pick two saved showcase workspaces to compare their replay config, + results, and recorded objects side by side. +

+
+ + + + Select workspaces + + The comparison is deep-linkable — the URL carries the two workspace ids. + + + + selectWorkspace('a', id)} + /> + selectWorkspace('b', id)} + /> + + + + {(!a || !b || detailA.error || detailB.error || !bothReady) && ( + + + {detailA.error || detailB.error + ? 'One of the selected workspaces no longer exists — select another above.' + : detailA.isLoading || detailB.isLoading + ? 'Loading workspaces…' + : 'Select two workspaces above to see the comparison.'} + + + )} + + {bothReady && ( + <> + {note && ( + + + {note} + + + )} + + + + Config + + Recorded replay config — mismatching rows are highlighted. + + + + + + + Field + Workspace A + Workspace B + + + + + Workspace ID + + {wsA.workspace_id} + + + {wsB.workspace_id} + + + + Status + + + + + + + + {buildConfigRows(wsA, wsB).map((row) => { + const mismatch = row.a !== row.b + return ( + + {row.field} + + {row.a} + + + {row.b} + + + ) + })} + +
+
+
+ + + + Results + + Δ is Workspace B minus Workspace A — sign only, not a quality judgement. + + + + + + + Metric + Workspace A + Workspace B + Δ + + + + + Winner + {summaryString(wsA, 'winner_model_type') ?? '—'} + {summaryString(wsB, 'winner_model_type') ?? '—'} + + + + + + Winner WAPE + {wapeA != null ? formatNumber(wapeA, 4) : '—'} + {wapeB != null ? formatNumber(wapeB, 4) : '—'} + + + + + + Wall-clock (s) + + {summaryNumber(wsA, 'wall_clock_s') != null + ? formatNumber(summaryNumber(wsA, 'wall_clock_s')!, 1) + : '—'} + + + {summaryNumber(wsB, 'wall_clock_s') != null + ? formatNumber(summaryNumber(wsB, 'wall_clock_s')!, 1) + : '—'} + + + + + + +
+
+
+ + + + Created objects + + Which soft references each run recorded (✓ recorded / — absent). + + + + {objectKeys(wsA, wsB).length === 0 ? ( +

+ Neither workspace recorded any created objects. +

+ ) : ( + + + + Object + Workspace A + Workspace B + + + + {objectKeys(wsA, wsB).map((key) => ( + + {key} + {key in wsA.created_objects ? '✓' : '—'} + {key in wsB.created_objects ? '✓' : '—'} + + ))} + +
+ )} +
+
+ + )} +
+ ) +} diff --git a/frontend/src/types/api.ts b/frontend/src/types/api.ts index 93de98cc..a817bcc4 100644 --- a/frontend/src/types/api.ts +++ b/frontend/src/types/api.ts @@ -774,6 +774,42 @@ export interface StepEvent { phase_total?: number | null } +// E3 (#409) — curated, allow-listed seed overrides (7 knobs; unknown keys 422 +// server-side via extra='forbid'). Requires skip_seed=false on the start frame. +export interface SeedOverrides { + stores?: number + products?: number + window_days?: number + sparsity?: number + promotion_intensity?: number + stockout_intensity?: number + noise_sigma?: number +} + +// E3 (#409) — operator-selected focus pair (REAL ids from /dimensions — +// sequences never reset, so ids are not 1-based). +export interface UserScope { + store_id: number + product_id: number +} + +// E4 (#410) — winner-ranking metric for the showcase backtest. A subset of +// RankingMetric (the champion selector's wape/smape/mae/bias) — issue #410 +// names exactly WAPE/MAE/RMSE, all lower-is-better. +export type DemoRankingMetric = 'wape' | 'mae' | 'rmse' + +// E4 (#410) — showcase backtest config. Mirrors the backend +// app/features/demo/schemas.py:DemoBacktestConfig (which itself mirrors +// SplitConfig bounds; the demo n_splits default is 3, not SplitConfig's 5). +export interface DemoBacktestConfig { + horizon: number // 1..90, def 14; must be > gap + strategy: SplitStrategy // def 'expanding' + n_splits: number // 2..20, def 3 + min_train_size: number // >= 7, def 30 + gap: number // 0..30, def 0 + metric: DemoRankingMetric // def 'wape' +} + // Start frame for WS /demo/stream and request body for POST /demo/run. export interface DemoRunRequest { seed?: number @@ -785,6 +821,16 @@ export interface DemoRunRequest { // Omit both to keep the legacy ephemeral behavior byte-identical. preservation?: 'ephemeral' | 'keep' workspace_name?: string + // E1 (#407) — replay provenance: the source workspace_id a Replay re-runs. + replayed_from_workspace_id?: string + // E3 (#409) — advanced seed config + focus pair; omit both for legacy runs. + seed_overrides?: SeedOverrides + user_scope?: UserScope + // E4 (#410) — run-config phase controls. Omit both (dirty-only rule) to keep + // the legacy frame byte-identical; None server-side → the legacy baseline + // trio + default split. + train_model_types?: string[] + backtest?: DemoBacktestConfig } // Aggregate result returned by the synchronous POST /demo/run. @@ -813,6 +859,18 @@ export interface WorkspaceListItem { skip_seed: boolean result_summary: Record | null created_at: string + // E1 (#407) — lifecycle + provenance fields (consumed by E2 #408). + archived: boolean + pinned: boolean + tags: string[] + replayed_from_workspace_id: string | null + // E3 (#409) — replay-relevant story slots (on the LIST item: replay reads + // list rows); null on runs without them. + seed_overrides: SeedOverrides | null + user_scope: UserScope | null + // E4 (#410) — replay-input run config (model set + backtest); null on + // default-config / pre-E4 rows. Replay rebuilds the start frame from it. + run_config: Record | null } // Full row from GET /demo/workspaces/{workspace_id}. @@ -822,6 +880,13 @@ export interface WorkspaceDetail extends WorkspaceListItem { date_start: string | null date_end: string | null created_objects: Record + // E1 (#407) — operator annotation + schema version. + notes: string | null + config_schema_version: number + // E5 (#411) -- story slots: agent/HITL approval + RAG knowledge events. + // null until E5 writes them; legacy rows stay null. + approval_events: ApprovalEventDetail[] | null + rag_events: RagEventDetail[] | null } // Page shape of GET /demo/workspaces. @@ -830,6 +895,130 @@ export interface WorkspaceListResponse { total: number } +// === Showcase story capture (E5, #411) === + +// One approval_events entry on WorkspaceDetail (built from JSONB; tolerant). +export interface ApprovalEventDetail { + action_id: string | null + tool_name: string | null + decision: 'approved' | 'rejected' | 'timed_out' | string | null + decided_at: string | null + session_id: string | null + auto_approved?: boolean | null + reason?: string | null + execution_status?: string | null + tool_call_summary?: { description?: string; arguments_keys?: string[] } | null + transcript_summary?: string | null + tokens_used?: number | null + tool_calls_count?: number | null +} + +// One rag_events entry on WorkspaceDetail (built from JSONB; tolerant). +export interface RagEventDetail { + event: 'probe' | 'index' | 'retrieve' | 'skip' | string + status: 'pass' | 'warn' | 'skip' | string + detail: string + count: number + occurred_at: string + provider?: string | null + reachable?: boolean | null +} + +// One flattened row from GET /demo/approval-events (workspace-tagged). +export interface ApprovalEventItem { + workspace_id: string + workspace_name: string | null + action_id: string | null + tool_name: string | null + decision: string | null + decided_at: string | null + session_id: string | null + auto_approved: boolean | null + reason: string | null + execution_status: string | null + transcript_summary: string | null +} + +// Page shape of GET /demo/approval-events. +export interface ApprovalEventsResponse { + events: ApprovalEventItem[] + total: number +} + +// E2 (#408) — partial-update body for PATCH /demo/workspaces/{workspace_id} +// (E1 endpoint). Absent field = unchanged; explicit null clears name/notes. +export interface WorkspaceUpdate { + name?: string | null + notes?: string | null + tags?: string[] + archived?: boolean + pinned?: boolean +} + +// E2 (#408) — query params for GET /demo/workspaces. Archived rows are +// hidden unless include_archived; unknown sort_by falls back server-side. +export interface WorkspaceListParams { + limit?: number + offset?: number + q?: string + tags?: string + include_archived?: boolean + sort_by?: 'created_at' | 'name' | 'seed' | 'status' + sort_order?: 'asc' | 'desc' +} + +// E2 (#408) — link-health classification of one probed soft reference. +export type RefHealthStatus = 'alive' | 'dead' | 'unknown' + +export interface WorkspaceRefHealth { + key: string + ref_type: 'model_run' | 'scenario_plan' | 'alias' | 'batch' | 'agent_session' | 'job' + ref_id: string + status: RefHealthStatus + probe_path: string +} + +// E2 (#408) — GET /demo/workspaces/{workspace_id}/health response. +export interface WorkspaceHealth { + workspace_id: string + workspace_status: 'running' | 'completed' | 'failed' + partial_run: boolean + references: WorkspaceRefHealth[] + alive: number + dead: number + unknown: number + checked_at: string +} + +// === Showcase Workspace Export (E6, #412) === + +// One file inside an exported workspace bundle. +export interface ExportFileEntry { + path: string + sha256: string + size_bytes: number +} + +// A soft reference that could not be resolved during export. +export interface UnresolvedReference { + key: string + ref_id: string + reason: string +} + +// Result of POST /demo/workspaces/{workspace_id}/export. +export interface WorkspaceExportResult { + workspace_id: string + bundle_path: string + bundle_format_version: number + exported_at: string + files: ExportFileEntry[] + scenario_plans_exported: number + model_runs_referenced: number + unresolved_references: UnresolvedReference[] + validated: boolean +} + // === AI Model Configuration (/config) === // Presence + masked preview of one provider API key (never the raw value). @@ -1287,6 +1476,9 @@ export interface CandidateModelInfo { /** false for feature-aware models (the predict path rejects them). */ supports_auto_predict: boolean description: string + // E4 (#410) — runtime forecast_enable_* overlay (service-set). False exactly + // when the matching opt-in flag is off; the showcase picker hides those. + enabled: boolean } export interface ModelCatalogResponse { diff --git a/tests/test_e2e_demo.py b/tests/test_e2e_demo.py index 5ef406ff..3323c524 100644 --- a/tests/test_e2e_demo.py +++ b/tests/test_e2e_demo.py @@ -614,6 +614,62 @@ def test_demo_replay_same_config_twice( assert row["status"] == "completed" +@pytest.mark.integration +def test_demo_replay_preserves_seed_overrides_and_scope( + uvicorn_subprocess: subprocess.Popen[bytes], +) -> None: + """E3 (#409) — replayed runs carry identical seed_overrides/user_scope slots. + + The replay-verbatim contract: the slots record the REQUESTED config, so two + runs of the same body must produce two workspace rows with identical slot + JSON. ``user_scope`` is deliberately a (1, 1) pair that almost certainly + dangles (sequences never reset) — the status step must WARN + fall back, + the run must still pass, and the slot must still record the request + verbatim (requested-vs-effective divergence stays visible: the row's + store_id/product_id columns carry the discovered grain). + """ + import json + + body_dict: dict[str, object] = { + "seed": 42, + "reset": True, + "skip_seed": False, + "scenario": "demo_minimal", + "preservation": "keep", + "workspace_name": "e3-replay-slots", + # Smallest overrides to keep wall-clock sane (matches demo_minimal dims). + "seed_overrides": {"stores": 3, "products": 10}, + "user_scope": {"store_id": 1, "product_id": 1}, + } + + first = _post_demo_run(body_dict, REPLAY_RUN_TIMEOUT_S) + assert first["overall_status"] == "pass", ( + f"first run did not pass: " + f"steps={[(s['step_name'], s['status'], s['detail']) for s in first['steps']]}" # type: ignore[index] + ) + second = _post_demo_run(body_dict, REPLAY_RUN_TIMEOUT_S) + assert second["overall_status"] == "pass", ( + f"replay did not pass: " + f"steps={[(s['step_name'], s['status'], s['detail']) for s in second['steps']]}" # type: ignore[index] + ) + + with urllib.request.urlopen( # noqa: S310 — http://127.0.0.1 internal URL + f"{DEMO_API_URL}/demo/workspaces?limit=100", timeout=10.0 + ) as resp: + assert resp.status == 200 + page = json.loads(resp.read()) + rows_by_id = {w["workspace_id"]: w for w in page["workspaces"]} + first_row = rows_by_id[first["workspace_id"]] + second_row = rows_by_id[second["workspace_id"]] + + # The slots are exposed on the LIST item (replay reads list rows) and are + # identical across the original and the replay. + assert first_row["seed_overrides"] == {"stores": 3, "products": 10} + assert first_row["user_scope"] == {"store_id": 1, "product_id": 1} + assert second_row["seed_overrides"] == first_row["seed_overrides"] + assert second_row["user_scope"] == first_row["user_scope"] + + @pytest.mark.integration def test_run_demo_precondition_failure_exits_2() -> None: """A bogus API URL surfaces as a precondition failure with exit 2. diff --git a/uv.lock b/uv.lock index 61725802..f7cf5bdf 100644 --- a/uv.lock +++ b/uv.lock @@ -821,7 +821,7 @@ wheels = [ [[package]] name = "forecastlabai" -version = "0.2.21" +version = "0.2.22" source = { editable = "." } dependencies = [ { name = "alembic" },