From 06712644704280f84eaac1d3e6a25ad60e3c7f1e Mon Sep 17 00:00:00 2001 From: Gabor Szabo Date: Sat, 13 Jun 2026 09:44:26 +0200 Subject: [PATCH 1/3] feat(api): add showcase workspace export bundle endpoint (#412) POST /demo/workspaces/{id}/export writes a checksum-validated bundle (manifest.json + scenario-plan snapshots + sha256sum-compatible checksums.sha256) under artifacts/showcase//, validating every checksum before returning. Soft references resolve over in-process HTTP; model artifacts are referenced (uri + registry hash + live verify), never copied. Dangling refs are reported, not fatal; 404 missing, 409 while running, deterministic overwrite on re-export. Stateless: no migration, no DB writes. Traversal guard + chunked sha256 mirror registry/storage.py (pattern, not import). --- .env.example | 4 + app/core/config.py | 4 + app/features/demo/export.py | 387 +++++++++++++++++++++++++ app/features/demo/routes.py | 55 +++- app/features/demo/schemas.py | 57 ++++ app/features/demo/tests/test_export.py | 362 +++++++++++++++++++++++ app/features/demo/tests/test_routes.py | 78 ++++- 7 files changed, 944 insertions(+), 3 deletions(-) create mode 100644 app/features/demo/export.py create mode 100644 app/features/demo/tests/test_export.py diff --git a/.env.example b/.env.example index 38ef75b4..62da51b9 100644 --- a/.env.example +++ b/.env.example @@ -29,6 +29,10 @@ FORECAST_ENABLE_LIGHTGBM=false # FORECAST_ENABLE_XGBOOST defaults to false (opt-in; install ml-xgboost extra) # FORECAST_ENABLE_RANDOM_FOREST=false # PRP-36 optional model — pure sklearn, no extra needed +# Demo / Showcase settings +# E6 (#412) — root for saved-workspace export bundles (manifest + checksums). +SHOWCASE_EXPORT_ROOT=./artifacts/showcase + # RAG Configuration # Embedding Provider: "openai" or "ollama" RAG_EMBEDDING_PROVIDER=openai diff --git a/app/core/config.py b/app/core/config.py index 033c77d9..6cedfd37 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -129,6 +129,10 @@ class Settings(BaseSettings): registry_artifact_root: str = "./artifacts/registry" registry_duplicate_policy: Literal["allow", "deny", "detect"] = "detect" + # Demo / Showcase + # E6 (#412) — root for workspace export bundles (manifest + checksums). + showcase_export_root: str = "./artifacts/showcase" + # Analytics analytics_max_rows: int = 10000 analytics_max_date_range_days: int = 730 diff --git a/app/features/demo/export.py b/app/features/demo/export.py new file mode 100644 index 00000000..fc5090f9 --- /dev/null +++ b/app/features/demo/export.py @@ -0,0 +1,387 @@ +"""Workspace export-bundle writer (E6, issue #412). + +Write a self-describing, checksum-validated bundle for a saved showcase +workspace under ``//``:: + + manifest.json versioned snapshot + references + scenario_plans/.json one per resolvable scenario plan + checksums.sha256 sha256sum-compatible; covers every other file + +Frozen decisions (see ``PRPs/PRP-showcase-completion-E6-export-bundle.md``): + +1. One directory per ``workspace_id`` (unique uuid4 hex), keyed off the DB row. +2. Re-export is a deterministic overwrite -- the existing guarded bundle + directory is removed and rewritten; ``exported_at`` records the moment. +3. Soft references resolve over the public HTTP surface IN-PROCESS + (``httpx.ASGITransport``) -- the demo slice may not import the registry / + scenarios slices (vertical-slice rule). Any non-2xx -> an + ``unresolved_references`` entry (or ``artifact_verified=None``), never a + failed export. +4. Model artifacts are REFERENCED (uri + registry hash + live verify result), + never copied. +5. Stateless -- export writes NOTHING to the database (no row, no story slot). +6. ``failed`` workspaces are exportable; ``running`` ones are a 409. +7. ``checksums.sha256`` excludes itself (a self-referencing checksum file is a + bootstrap hole) and uses the two-space ``sha256sum`` separator. + +The traversal guard (:func:`_resolve_bundle_dir`) and chunked SHA-256 +(:func:`_compute_sha256`) MIRROR ``app/features/registry/storage.py`` +(``LocalFSProvider._resolve_path`` / ``AbstractStorageProvider.compute_hash``) +-- the vertical-slice rule forbids importing that module, so the ~10-line +pattern is reimplemented here. Reference resolution uses the same in-process +``httpx`` client ``app/features/demo/link_health.py`` uses. +""" + +from __future__ import annotations + +import hashlib +import json +import shutil +from datetime import UTC, datetime +from pathlib import Path +from typing import TYPE_CHECKING, Any + +import httpx +from sqlalchemy.ext.asyncio import AsyncSession + +from app.core.config import get_settings +from app.core.exceptions import ConflictError, ForecastLabError, NotFoundError +from app.core.logging import get_logger +from app.features.demo import workspace +from app.features.demo.models import WORKSPACE_STATUS_RUNNING +from app.features.demo.schemas import ( + BUNDLE_FORMAT_VERSION, + ExportFileEntry, + UnresolvedReference, + WorkspaceDetailResponse, + WorkspaceExportResult, +) + +if TYPE_CHECKING: + from fastapi import FastAPI + +logger = get_logger(__name__) + +_MANIFEST = "manifest.json" +_CHECKSUMS = "checksums.sha256" +_PLANS_DIR = "scenario_plans" +# created_objects run-id keys whose registry runs the manifest references. +_RUN_KEYS = ("winning_run_id", "v2_run_id", "stale_alias_run_id") +# Generous in-process budget (no real network); a hung driven endpoint surfaces +# as a response under raise_app_exceptions=False, not a hang. +_EXPORT_TIMEOUT = httpx.Timeout(30.0, connect=5.0) + + +def _compute_sha256(path: Path) -> str: + """Chunked SHA-256 of a file (mirror ``registry/storage.py:compute_hash``).""" + digest = hashlib.sha256() + with path.open("rb") as handle: + for chunk in iter(lambda: handle.read(8192), b""): + digest.update(chunk) + return digest.hexdigest() + + +def _resolve_bundle_dir(root: Path, workspace_id: str) -> Path: + """Resolve ``/``, guarding against path traversal. + + Mirrors ``registry/storage.py:LocalFSProvider._resolve_path`` -- ``resolve()`` + then ``relative_to(root)``. A ``workspace_id`` that escapes the root raises + ``ValueError`` BEFORE any disk I/O. ``root`` must already be resolved. The + id always comes from the DB row (uuid4 hex), never raw from the URL path, so + this is defense in depth. + """ + bundle_dir = (root / workspace_id).resolve() + try: + bundle_dir.relative_to(root) + except ValueError: + logger.warning( + "demo.export_path_traversal_attempt", + workspace_id=workspace_id, + root=str(root), + ) + raise + return bundle_dir + + +def _write_json(path: Path, payload: dict[str, Any]) -> int: + """Write deterministic JSON (sorted keys, 2-space indent, trailing newline). + + ``sort_keys`` makes the bytes order-independent so unchanged state + re-exports to identical bytes (stable checksums). Returns the byte size. + """ + data = (json.dumps(payload, indent=2, sort_keys=True) + "\n").encode("utf-8") + path.write_bytes(data) + return len(data) + + +def _root_relative(root: Path) -> str: + """Repo-root-relative POSIX string for display (no absolute-path leak).""" + try: + return root.relative_to(Path.cwd()).as_posix() + except ValueError: + return root.as_posix() + + +def _open_client(app: FastAPI) -> httpx.AsyncClient: + """In-process client over ``ASGITransport`` (pattern: ``link_health.py``). + + ``raise_app_exceptions=False`` is load-bearing: a driven endpoint's failure + becomes a 5xx *response* (-> ``unresolved_references`` / ``artifact_verified + =None``), never a re-raised exception inside the export. ``base_url`` is + cosmetic but required by httpx. + """ + return httpx.AsyncClient( + transport=httpx.ASGITransport(app=app, raise_app_exceptions=False), + base_url="http://demo.internal", + timeout=_EXPORT_TIMEOUT, + ) + + +async def _resolve_model_runs( + client: httpx.AsyncClient, + created: dict[str, Any], +) -> tuple[list[dict[str, Any]], list[UnresolvedReference]]: + """Resolve the run-id soft references to manifest model-run references. + + A run that resolves (2xx) is referenced (uri + registry hash + a live + ``artifact_verified`` from the verify endpoint when both uri and hash are + present). A non-2xx run is an ``unresolved_references`` entry. A failed + artifact *verify* on a resolved run is NOT unresolved -- the run resolved; + only its artifact check did not (``artifact_verified=None``). + """ + model_runs: list[dict[str, Any]] = [] + unresolved: list[UnresolvedReference] = [] + for key in _RUN_KEYS: + run_id = created.get(key) + if not isinstance(run_id, str) or not run_id: + continue + resp = await client.get(f"/registry/runs/{run_id}") + if resp.status_code != 200: + reason = f"HTTP {resp.status_code}" + unresolved.append(UnresolvedReference(key=key, ref_id=run_id, reason=reason)) + logger.warning( + "demo.export_unresolved_reference", key=key, ref_id=run_id, reason=reason + ) + continue + body = resp.json() + artifact_uri = body.get("artifact_uri") + artifact_hash = body.get("artifact_hash") + verified: bool | None = None + if artifact_uri and artifact_hash: + vresp = await client.get(f"/registry/runs/{run_id}/verify") + if vresp.status_code == 200: + raw = vresp.json().get("verified") + verified = raw if isinstance(raw, bool) else None + model_runs.append( + { + "key": key, + "run_id": run_id, + "model_type": body.get("model_type"), + "status": body.get("status"), + "artifact_uri": artifact_uri, + "artifact_hash": artifact_hash, + "artifact_verified": verified, + "metrics": body.get("metrics"), + } + ) + return model_runs, unresolved + + +async def _resolve_scenario_plans( + client: httpx.AsyncClient, + created: dict[str, Any], + plans_dir: Path, +) -> tuple[list[dict[str, Any]], list[tuple[str, int]], list[UnresolvedReference]]: + """Write a JSON snapshot per resolvable scenario plan; report dangles. + + Returns ``(manifest plan entries, written (relpath, size) pairs, + unresolved)``. The plan body is stored verbatim -- its ``run_id`` is the + forecast ARTIFACT key, not a registry ``model_run.run_id`` (different id + spaces; memory anchor ``scenario-run-id-vs-registry-run-id``), so it is + never joined against the registry. + """ + plan_entries: list[dict[str, Any]] = [] + file_entries: list[tuple[str, int]] = [] + unresolved: list[UnresolvedReference] = [] + # JSONB types this list[str], but nothing enforces it at runtime -- treat + # entries as untrusted (mirrors link_health's created_objects guards). + raw_plan_ids = created.get("scenario_plan_ids") + plan_ids: list[Any] = raw_plan_ids if isinstance(raw_plan_ids, list) else [] + for scenario_id in plan_ids: + if not isinstance(scenario_id, str) or not scenario_id: + continue + resp = await client.get(f"/scenarios/{scenario_id}") + if resp.status_code != 200: + reason = f"HTTP {resp.status_code}" + unresolved.append( + UnresolvedReference(key="scenario_plan_ids", ref_id=scenario_id, reason=reason) + ) + logger.warning( + "demo.export_unresolved_reference", + key="scenario_plan_ids", + ref_id=scenario_id, + reason=reason, + ) + continue + body = resp.json() + rel = f"{_PLANS_DIR}/{scenario_id}.json" + size = _write_json(plans_dir / f"{scenario_id}.json", body) + plan_entries.append( + { + "scenario_id": scenario_id, + "file": rel, + "name": body.get("name") if isinstance(body, dict) else None, + } + ) + file_entries.append((rel, size)) + return plan_entries, file_entries, unresolved + + +def _validate_checksums(bundle_dir: Path) -> bool: + """Re-read ``checksums.sha256``, recompute every listed hash, compare. + + Returns ``False`` (the caller logs it) rather than raising on any mismatch + or parse issue -- a failed validation is reported honestly in the response. + """ + checksums_path = bundle_dir / _CHECKSUMS + try: + content = checksums_path.read_text(encoding="utf-8") + except OSError: + return False + for line in content.splitlines(): + if not line.strip(): + continue + # sha256sum format: " " (two-space separator). + expected, _, rel = line.partition(" ") + if not rel: + return False + target = bundle_dir / rel + try: + actual = _compute_sha256(target) + except OSError: + return False + if actual != expected: + return False + return True + + +async def export_workspace( + db: AsyncSession, + app: FastAPI, + workspace_id: str, + *, + export_root: str | Path | None = None, +) -> WorkspaceExportResult: + """Export a saved workspace to a checksum-validated bundle on disk. + + Re-queries the row via :func:`workspace.get_workspace` so the function is + independently callable/testable; the route's 404/409 pre-guard fires before + any export work begins. + + Args: + db: Caller-owned async session (used only to load the row). + app: The live FastAPI app for in-process soft-reference resolution. + workspace_id: External id of the workspace to export. + export_root: Override the configured ``showcase_export_root`` (tests). + + Returns: + The export result (bundle path, file inventory, counts, unresolved + references, checksum-validation flag). + + Raises: + NotFoundError: When no workspace matches ``workspace_id`` (404). + ConflictError: When the workspace run is still ``running`` (409). + ForecastLabError: When the bundle cannot be written to disk (500). + """ + row = await workspace.get_workspace(db, workspace_id) + if row is None: + raise NotFoundError(message=f"Workspace not found: {workspace_id}") + if row.status == WORKSPACE_STATUS_RUNNING: + raise ConflictError( + "Cannot export while the run is still in progress; retry after the run settles." + ) + + snapshot = WorkspaceDetailResponse.model_validate(row).model_dump(mode="json") + created = row.created_objects or {} + + root = Path(export_root or get_settings().showcase_export_root).resolve() + root.mkdir(parents=True, exist_ok=True) + # GUARD before any rmtree / mkdir / write -- the rmtree target is the + # guarded resolution only, never a raw request value. + bundle_dir = _resolve_bundle_dir(root, row.workspace_id) + + exported_at = datetime.now(UTC) + try: + if bundle_dir.exists(): + shutil.rmtree(bundle_dir) # Decision 2 -- deterministic overwrite. + plans_dir = bundle_dir / _PLANS_DIR + plans_dir.mkdir(parents=True) + + async with _open_client(app) as client: + model_runs, run_unresolved = await _resolve_model_runs(client, created) + plan_entries, plan_files, plan_unresolved = await _resolve_scenario_plans( + client, created, plans_dir + ) + unresolved = [*run_unresolved, *plan_unresolved] + + manifest = { + "bundle_format_version": BUNDLE_FORMAT_VERSION, + "exported_at": exported_at.isoformat(), + "workspace": snapshot, + "model_runs": model_runs, + "scenario_plans": plan_entries, + "unresolved_references": [ref.model_dump() for ref in unresolved], + # Paths + sizes so a consumer can sanity-check without parsing the + # hash file; hashes live ONLY in checksums.sha256 (Decision 7). + "files": [{"path": rel, "size_bytes": size} for rel, size in plan_files], + } + _write_json(bundle_dir / _MANIFEST, manifest) + + # checksums.sha256 -- every bundle file except itself, sorted, two-space + # sha256sum format, bundle-relative POSIX paths. + checksum_lines = [ + f"{_compute_sha256(path)} {path.relative_to(bundle_dir).as_posix()}" + for path in sorted(bundle_dir.rglob("*")) + if path.is_file() and path.name != _CHECKSUMS + ] + (bundle_dir / _CHECKSUMS).write_text("\n".join(checksum_lines) + "\n", encoding="utf-8") + except OSError as exc: + logger.warning( + "demo.workspace_export_failed", + workspace_id=row.workspace_id, + error=str(exc), + error_type=type(exc).__name__, + ) + raise ForecastLabError( + message=f"Export bundle write failed: {exc}", status_code=500 + ) from exc + + validated = _validate_checksums(bundle_dir) + files = [ + ExportFileEntry( + path=path.relative_to(bundle_dir).as_posix(), + sha256=_compute_sha256(path), + size_bytes=path.stat().st_size, + ) + for path in sorted(bundle_dir.rglob("*")) + if path.is_file() + ] + + logger.info( + "demo.workspace_exported", + workspace_id=row.workspace_id, + files=len(files), + unresolved=len(unresolved), + validated=validated, + ) + return WorkspaceExportResult( + workspace_id=row.workspace_id, + bundle_path=f"{_root_relative(root)}/{row.workspace_id}", + bundle_format_version=BUNDLE_FORMAT_VERSION, + exported_at=exported_at, + files=files, + scenario_plans_exported=len(plan_entries), + model_runs_referenced=len(model_runs), + unresolved_references=unresolved, + validated=validated, + ) diff --git a/app/features/demo/routes.py b/app/features/demo/routes.py index dc9d6b89..0f27e458 100644 --- a/app/features/demo/routes.py +++ b/app/features/demo/routes.py @@ -14,6 +14,11 @@ update (rename / notes / tags / archive / pin); ``status`` is not patchable. - ``DELETE /demo/workspaces/{workspace_id}`` -- delete the workspace METADATA row only; the run's created objects are soft references and stay untouched. +- ``POST /demo/workspaces/{workspace_id}/export`` -- E6 (#412): write a + checksum-validated bundle (manifest + scenario-plan snapshots + checksums) + under ``artifacts/showcase//``; soft references resolve + in-process, model artifacts are referenced (never copied), dangling refs are + reported, not fatal. The run/stream handlers obtain the live FastAPI app from ``request.app`` / ``websocket.app`` and pass it into the pipeline -- the slice never imports @@ -40,7 +45,8 @@ from app.core.database import get_db from app.core.exceptions import ConflictError, NotFoundError from app.core.logging import get_logger -from app.features.demo import hitl, link_health, service, workspace +from app.features.demo import export, hitl, link_health, service, workspace +from app.features.demo.models import WORKSPACE_STATUS_RUNNING from app.features.demo.schemas import ( ApprovalEventItem, ApprovalEventsResponse, @@ -49,6 +55,7 @@ HitlDecisionRequest, StepEvent, WorkspaceDetailResponse, + WorkspaceExportResult, WorkspaceHealthResponse, WorkspaceListItem, WorkspaceListResponse, @@ -348,6 +355,52 @@ async def delete_showcase_workspace( raise NotFoundError(message=f"Workspace not found: {workspace_id}") +@router.post( + "/workspaces/{workspace_id}/export", + response_model=WorkspaceExportResult, + summary="Export a saved showcase workspace as a checksum-validated bundle", + description=( + "Write artifacts/showcase// -- a versioned manifest.json, " + "one JSON per resolvable scenario plan, and a sha256sum-compatible " + "checksums.sha256 -- then re-verify every checksum before returning. " + "Model artifacts are referenced (uri + registry hash + live verify), " + "never copied. Dangling soft references are reported in " + "`unresolved_references` (the export still returns 200). 404 when the " + "workspace is missing; 409 while its run is still in progress; " + "re-export overwrites the bundle." + ), +) +async def export_showcase_workspace( + workspace_id: str, + request: Request, + db: AsyncSession = Depends(get_db), +) -> WorkspaceExportResult: + """Export a saved showcase workspace to a checksum-validated bundle (E6, #412). + + Args: + workspace_id: External identifier of the workspace. + request: The incoming request (used to obtain the live FastAPI app for + the in-process soft-reference resolution GETs). + db: Async database session from dependency. + + Returns: + The export result -- bundle path, file inventory with hashes, counts, + unresolved references, and the checksum-validation flag. + + Raises: + NotFoundError: When no workspace matches ``workspace_id`` (404). + ConflictError: When the workspace run is still in progress (409). + """ + row = await workspace.get_workspace(db, workspace_id) + if row is None: + raise NotFoundError(message=f"Workspace not found: {workspace_id}") + if row.status == WORKSPACE_STATUS_RUNNING: + raise ConflictError( + "Cannot export while the run is still in progress; retry after the run settles." + ) + return await export.export_workspace(db, request.app, workspace_id) + + @router.websocket("/stream") async def stream_demo_pipeline(websocket: WebSocket) -> None: """Stream one StepEvent per pipeline step over a WebSocket. diff --git a/app/features/demo/schemas.py b/app/features/demo/schemas.py index 70b1e8c3..353a7eac 100644 --- a/app/features/demo/schemas.py +++ b/app/features/demo/schemas.py @@ -567,3 +567,60 @@ class ApprovalEventsResponse(BaseModel): ..., description="Flattened approval events, newest workspace first; empty when none." ) total: int = Field(..., ge=0, description="Number of flattened entries returned (capped).") + + +# ============================================================================= +# E6 (#412) -- workspace export bundle (POST /demo/workspaces/{id}/export) +# ============================================================================= + +# Bumped on any manifest-shape change so bundle consumers can branch on it. +BUNDLE_FORMAT_VERSION = 1 + + +class ExportFileEntry(BaseModel): + """One file inside an exported workspace bundle (E6, issue #412). + + Response model -- plain ``BaseModel``, NOT ``ConfigDict(strict=True)``: + strict mode is a request-body policy and this endpoint has no body. + """ + + path: str = Field(..., description="Bundle-relative POSIX path.") + sha256: str = Field(..., description="Hex SHA-256 of the file contents.") + size_bytes: int = Field(..., ge=0, description="File size in bytes.") + + +class UnresolvedReference(BaseModel): + """A soft reference that could not be resolved during export (E6, #412).""" + + key: str = Field(..., description="created_objects key (e.g. 'scenario_plan_ids').") + ref_id: str = Field(..., description="The id that failed to resolve.") + reason: str = Field(..., description="Short cause, e.g. 'HTTP 404'.") + + +class WorkspaceExportResult(BaseModel): + """Result of ``POST /demo/workspaces/{workspace_id}/export`` (E6, #412).""" + + workspace_id: str = Field(..., description="The exported workspace's id.") + bundle_path: str = Field( + ..., description="Repo-root-relative bundle dir, e.g. 'artifacts/showcase/'." + ) + bundle_format_version: int = Field(..., description="Manifest schema version.") + exported_at: datetime = Field(..., description="When the export ran (UTC).") + # The COMPLETE on-disk inventory, INCLUDING checksums.sha256 itself (with + # its own computed hash) -- it just never lists itself inside the checksum + # file; the response is where that hash lives. + files: list[ExportFileEntry] = Field( + ..., description="Every file in the bundle with its hash and size." + ) + scenario_plans_exported: int = Field( + ..., ge=0, description="Scenario plans written to scenario_plans/." + ) + model_runs_referenced: int = Field( + ..., ge=0, description="Model runs referenced in the manifest (not copied)." + ) + unresolved_references: list[UnresolvedReference] = Field( + ..., description="Soft references that could not be resolved (export still succeeded)." + ) + validated: bool = Field( + ..., description="True when checksums.sha256 re-read + recomputed clean." + ) diff --git a/app/features/demo/tests/test_export.py b/app/features/demo/tests/test_export.py new file mode 100644 index 00000000..2b1c950f --- /dev/null +++ b/app/features/demo/tests/test_export.py @@ -0,0 +1,362 @@ +"""Tests for the workspace export-bundle writer (E6, issue #412). + +Unit tests (no DB, no app) cover the disk primitives -- chunked sha256, the +traversal guard (must raise BEFORE any I/O), deterministic JSON -- and the +manifest assembly via a mocked in-process client. Integration tests run the +real endpoint against docker-compose Postgres with a ``tmp_path`` export root. +""" + +from __future__ import annotations + +import datetime as _dt +import hashlib +import json +from pathlib import Path +from types import SimpleNamespace +from typing import TYPE_CHECKING, Any, cast + +import httpx +import pytest + +from app.features.demo import export, workspace +from app.features.demo.models import ShowcaseWorkspace +from app.features.demo.schemas import WorkspaceExportResult + +if TYPE_CHECKING: + from fastapi import FastAPI + from sqlalchemy.ext.asyncio import AsyncSession + +# The direct-call unit tests monkeypatch get_workspace + _open_client, so the +# real session / app are never touched -- typed None sentinels keep the strict +# signature satisfied without a DB or app instance. +_NO_DB = cast("AsyncSession", None) +_NO_APP = cast("FastAPI", None) + +# ============================================================================= +# Unit -- disk primitives (no DB, no app) +# ============================================================================= + + +def test_compute_sha256_matches_whole_file(tmp_path: Path) -> None: + """The chunked digest equals a whole-file hashlib hash.""" + target = tmp_path / "blob" + target.write_bytes(b"y" * 25_000) # > one 8192-byte chunk + assert export._compute_sha256(target) == hashlib.sha256(target.read_bytes()).hexdigest() + + +@pytest.mark.parametrize("evil", ["../escape", "../../etc/passwd", "/etc/passwd"]) +def test_resolve_bundle_dir_rejects_traversal_before_io(tmp_path: Path, evil: str) -> None: + """A traversal-shaped id raises ValueError and writes nothing.""" + root = tmp_path.resolve() + with pytest.raises(ValueError): + export._resolve_bundle_dir(root, evil) + # The guard does pure path math -- no directory is created. + assert list(root.iterdir()) == [] + + +def test_resolve_bundle_dir_accepts_uuid_hex(tmp_path: Path) -> None: + """A normal uuid-hex id resolves directly under the root.""" + root = tmp_path.resolve() + workspace_id = "a" * 32 + resolved = export._resolve_bundle_dir(root, workspace_id) + assert resolved == root / workspace_id + + +def test_write_json_is_deterministic(tmp_path: Path) -> None: + """Two dumps of key-shuffled payloads produce identical bytes.""" + a = tmp_path / "a.json" + b = tmp_path / "b.json" + size_a = export._write_json(a, {"z": 1, "a": 2, "m": {"y": 1, "x": 2}}) + size_b = export._write_json(b, {"a": 2, "m": {"x": 2, "y": 1}, "z": 1}) + assert a.read_bytes() == b.read_bytes() + assert size_a == size_b == len(a.read_bytes()) + assert a.read_text().endswith("\n") + + +def test_validate_checksums_round_trip(tmp_path: Path) -> None: + """A hand-built bundle validates; a tampered file flips validated False.""" + bundle = tmp_path / "wsid" + bundle.mkdir() + payload = bundle / "manifest.json" + payload.write_text("hello\n", encoding="utf-8") + digest = export._compute_sha256(payload) + (bundle / "checksums.sha256").write_text(f"{digest} manifest.json\n", encoding="utf-8") + assert export._validate_checksums(bundle) is True + + payload.write_text("tampered\n", encoding="utf-8") + assert export._validate_checksums(bundle) is False + + +# ============================================================================= +# Unit -- manifest assembly via a mocked in-process client +# ============================================================================= + + +def _row(**overrides: object) -> SimpleNamespace: + """An ORM-shaped ShowcaseWorkspace stand-in (mirrors test_routes._orm_like_row).""" + base: dict[str, object] = { + "workspace_id": "a" * 32, + "name": "e6-export", + "status": "completed", + "seed": 42, + "scenario": "showcase_rich", + "reset": False, + "skip_seed": True, + "store_id": 3, + "product_id": 7, + "date_start": _dt.date(2026, 1, 1), + "date_end": _dt.date(2026, 3, 31), + "created_objects": {}, + "result_summary": {"winner_model_type": "naive"}, + "created_at": _dt.datetime(2026, 6, 1, 12, 0, tzinfo=_dt.UTC), + } + base.update(overrides) + return SimpleNamespace(**base) + + +def _mock_client() -> httpx.AsyncClient: + """In-process client returning canned registry / scenario bodies + one 404.""" + + def handler(request: httpx.Request) -> httpx.Response: + path = request.url.path + if path == "/registry/runs/run-win": + return httpx.Response( + 200, + json={ + "run_id": "run-win", + "model_type": "naive", + "status": "success", + "artifact_uri": "demo/naive-model_abc.joblib", + "artifact_hash": "deadbeef", + "metrics": {"wape": 0.12}, + }, + ) + if path == "/registry/runs/run-win/verify": + return httpx.Response(200, json={"verified": True}) + if path == "/registry/runs/run-gone": + return httpx.Response(404, json={"detail": "run not found"}) + if path == "/scenarios/plan-1": + return httpx.Response( + 200, + json={ + "scenario_id": "plan-1", + "name": "Price cut 15%", + "run_id": "model_xyz", + "assumptions": {"price_change_pct": -0.15}, + "comparison": {}, + "tags": ["showcase"], + }, + ) + if path == "/scenarios/dangling": + return httpx.Response(404, json={"detail": "scenario not found"}) + return httpx.Response(404, json={"detail": f"unmatched {path}"}) + + return httpx.AsyncClient( + transport=httpx.MockTransport(handler), base_url="http://demo.internal" + ) + + +async def test_export_assembles_manifest_and_reports_dangles( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """A mixed run resolves one run + one plan and reports two dangles.""" + row = _row( + created_objects={ + "winning_run_id": "run-win", + "v2_run_id": "run-gone", # 404 -> unresolved + "scenario_plan_ids": ["plan-1", "dangling"], + } + ) + + async def fake_get(_db: object, _workspace_id: str) -> SimpleNamespace: + return row + + monkeypatch.setattr(workspace, "get_workspace", fake_get) + monkeypatch.setattr(export, "_open_client", lambda _app: _mock_client()) + + result: WorkspaceExportResult = await export.export_workspace( + db=_NO_DB, app=_NO_APP, workspace_id="a" * 32, export_root=tmp_path + ) + + assert result.validated is True + assert result.model_runs_referenced == 1 + assert result.scenario_plans_exported == 1 + # Two dangles: the v2 run (404) and the dangling scenario plan (404). + keys = sorted((ref.key, ref.ref_id) for ref in result.unresolved_references) + assert keys == [("scenario_plan_ids", "dangling"), ("v2_run_id", "run-gone")] + + bundle = tmp_path / ("a" * 32) + manifest = json.loads((bundle / "manifest.json").read_text()) + assert manifest["bundle_format_version"] == 1 + assert manifest["workspace"]["workspace_id"] == "a" * 32 + assert manifest["model_runs"][0]["run_id"] == "run-win" + assert manifest["model_runs"][0]["artifact_verified"] is True + assert manifest["scenario_plans"][0]["scenario_id"] == "plan-1" + # The plan body is stored verbatim under scenario_plans/. + plan = json.loads((bundle / "scenario_plans" / "plan-1.json").read_text()) + assert plan["name"] == "Price cut 15%" + # checksums.sha256 covers every file except itself, two-space separator. + lines = (bundle / "checksums.sha256").read_text().splitlines() + covered = {line.split(" ", 1)[1] for line in lines} + assert "manifest.json" in covered + assert "scenario_plans/plan-1.json" in covered + assert "checksums.sha256" not in covered + # The response inventory DOES include the checksum file itself. + assert any(entry.path == "checksums.sha256" for entry in result.files) + + +async def test_export_overwrites_stale_bundle( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """A pre-existing stale file in the bundle dir is gone after re-export.""" + row = _row(created_objects={"winning_run_id": "run-win"}) + + async def fake_get(_db: object, _workspace_id: str) -> SimpleNamespace: + return row + + monkeypatch.setattr(workspace, "get_workspace", fake_get) + monkeypatch.setattr(export, "_open_client", lambda _app: _mock_client()) + + bundle = tmp_path / ("a" * 32) + (bundle / "scenario_plans").mkdir(parents=True) + stale = bundle / "scenario_plans" / "stale.json" + stale.write_text("{}", encoding="utf-8") + + await export.export_workspace( + db=_NO_DB, app=_NO_APP, workspace_id="a" * 32, export_root=tmp_path + ) + + assert not stale.exists() + assert (bundle / "manifest.json").exists() + + +async def test_export_empty_created_objects_minimal_bundle( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """An empty-references run still exports a valid manifest + checksums.""" + row = _row(created_objects={}) + + async def fake_get(_db: object, _workspace_id: str) -> SimpleNamespace: + return row + + monkeypatch.setattr(workspace, "get_workspace", fake_get) + monkeypatch.setattr(export, "_open_client", lambda _app: _mock_client()) + + result = await export.export_workspace( + db=_NO_DB, app=_NO_APP, workspace_id="a" * 32, export_root=tmp_path + ) + assert result.validated is True + assert result.model_runs_referenced == 0 + assert result.scenario_plans_exported == 0 + assert result.unresolved_references == [] + paths = {entry.path for entry in result.files} + assert paths == {"manifest.json", "checksums.sha256"} + + +async def test_export_404_on_missing_workspace(monkeypatch: pytest.MonkeyPatch) -> None: + """A missing row raises NotFoundError before any disk work.""" + from app.core.exceptions import NotFoundError + + async def fake_get(_db: object, _workspace_id: str) -> None: + return None + + monkeypatch.setattr(workspace, "get_workspace", fake_get) + with pytest.raises(NotFoundError): + await export.export_workspace(db=_NO_DB, app=_NO_APP, workspace_id="z" * 32) + + +async def test_export_409_on_running_workspace(monkeypatch: pytest.MonkeyPatch) -> None: + """A running row raises ConflictError (references not yet settled).""" + from app.core.exceptions import ConflictError + + async def fake_get(_db: object, _workspace_id: str) -> SimpleNamespace: + return _row(status="running") + + monkeypatch.setattr(workspace, "get_workspace", fake_get) + with pytest.raises(ConflictError): + await export.export_workspace(db=_NO_DB, app=_NO_APP, workspace_id="a" * 32) + + +# ============================================================================= +# Integration -- real endpoint, real Postgres, tmp_path export root +# ============================================================================= + + +@pytest.mark.integration +async def test_export_endpoint_round_trip( + client: httpx.AsyncClient, + db_session: Any, + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """A completed row exports; checksums verify; a dangling plan is reported.""" + from app.core.config import get_settings + + workspace_id = "e6" + "0" * 30 + db_session.add( + ShowcaseWorkspace( + workspace_id=workspace_id, + name="e6-integration", + seed=42, + scenario="showcase_rich", + reset=False, + skip_seed=True, + status="completed", + created_objects={"scenario_plan_ids": ["dangling-plan-1"]}, + ) + ) + await db_session.commit() + + # Point the export root at tmp_path without disturbing the cached settings. + patched = get_settings().model_copy(update={"showcase_export_root": str(tmp_path)}) + monkeypatch.setattr(export, "get_settings", lambda: patched) + + resp = await client.post(f"/demo/workspaces/{workspace_id}/export") + assert resp.status_code == 200 + body = resp.json() + assert body["validated"] is True + assert body["bundle_path"].endswith(workspace_id) + # The dangling scenario plan is reported, not fatal. + assert any(ref["ref_id"] == "dangling-plan-1" for ref in body["unresolved_references"]) + + bundle = tmp_path / workspace_id + assert (bundle / "manifest.json").exists() + # Independently re-verify every checksum line (don't trust validated alone). + for line in (bundle / "checksums.sha256").read_text().splitlines(): + if not line.strip(): + continue + expected, _, rel = line.partition(" ") + actual = hashlib.sha256((bundle / rel).read_bytes()).hexdigest() + assert actual == expected, rel + + # Re-export overwrites: plant a stale file, re-export, assert it's gone. + stale = bundle / "scenario_plans" / "stale.json" + stale.write_text("{}", encoding="utf-8") + resp2 = await client.post(f"/demo/workspaces/{workspace_id}/export") + assert resp2.status_code == 200 + assert not stale.exists() + + +@pytest.mark.integration +async def test_export_endpoint_409_on_running( + client: httpx.AsyncClient, + db_session: Any, +) -> None: + """The endpoint rejects a still-running workspace with 409 problem+json.""" + workspace_id = "e6run" + "0" * 27 + db_session.add( + ShowcaseWorkspace( + workspace_id=workspace_id, + name="e6-running", + seed=1, + scenario="demo_minimal", + reset=False, + skip_seed=True, + status="running", + ) + ) + await db_session.commit() + + resp = await client.post(f"/demo/workspaces/{workspace_id}/export") + assert resp.status_code == 409 + assert resp.headers["content-type"].startswith("application/problem+json") diff --git a/app/features/demo/tests/test_routes.py b/app/features/demo/tests/test_routes.py index 7b8858ba..1b00e6a9 100644 --- a/app/features/demo/tests/test_routes.py +++ b/app/features/demo/tests/test_routes.py @@ -14,8 +14,14 @@ from fastapi.testclient import TestClient from sqlalchemy.ext.asyncio import AsyncSession -from app.features.demo import service, workspace -from app.features.demo.schemas import DemoRunRequest, DemoRunResult, StepEvent +from app.features.demo import export, service, workspace +from app.features.demo.schemas import ( + DemoRunRequest, + DemoRunResult, + ExportFileEntry, + StepEvent, + WorkspaceExportResult, +) from app.main import app @@ -481,6 +487,74 @@ async def fake_delete(_db, _workspace_id: str) -> bool: assert "Workspace not found" in resp.json()["detail"] +# ============================================================================= +# E6 (#412) -- POST /demo/workspaces/{workspace_id}/export (unit) +# ============================================================================= + + +async def test_export_workspace_404(client, monkeypatch): + """An unknown workspace_id is a 404 problem+json (export never runs).""" + + async def fake_get(_db, _workspace_id: str) -> None: + return None + + monkeypatch.setattr(workspace, "get_workspace", fake_get) + + resp = await client.post("/demo/workspaces/" + "0" * 32 + "/export") + assert resp.status_code == 404 + assert resp.headers["content-type"].startswith("application/problem+json") + assert "Workspace not found" in resp.json()["detail"] + + +async def test_export_workspace_409_when_running(client, monkeypatch): + """A still-running workspace is a 409 problem+json (refs not settled).""" + + async def fake_get(_db, workspace_id: str) -> SimpleNamespace: + return _orm_like_row(workspace_id=workspace_id, status="running") + + monkeypatch.setattr(workspace, "get_workspace", fake_get) + + resp = await client.post("/demo/workspaces/" + "a" * 32 + "/export") + assert resp.status_code == 409 + assert resp.headers["content-type"].startswith("application/problem+json") + + +async def test_export_workspace_200_happy_path(client, monkeypatch): + """A completed workspace returns the export result the writer produced.""" + + async def fake_get(_db, workspace_id: str) -> SimpleNamespace: + return _orm_like_row(workspace_id=workspace_id, status="completed") + + canned = WorkspaceExportResult( + workspace_id="a" * 32, + bundle_path="artifacts/showcase/" + "a" * 32, + bundle_format_version=1, + exported_at=_dt.datetime(2026, 6, 12, 14, 0, tzinfo=_dt.UTC), + files=[ + ExportFileEntry(path="manifest.json", sha256="0" * 64, size_bytes=128), + ExportFileEntry(path="checksums.sha256", sha256="1" * 64, size_bytes=80), + ], + scenario_plans_exported=0, + model_runs_referenced=1, + unresolved_references=[], + validated=True, + ) + + async def fake_export(_db, _app, workspace_id: str) -> WorkspaceExportResult: + return canned + + monkeypatch.setattr(workspace, "get_workspace", fake_get) + monkeypatch.setattr(export, "export_workspace", fake_export) + + resp = await client.post("/demo/workspaces/" + "a" * 32 + "/export") + assert resp.status_code == 200 + body = resp.json() + assert body["validated"] is True + assert body["bundle_format_version"] == 1 + assert body["model_runs_referenced"] == 1 + assert len(body["files"]) == 2 + + # ============================================================================= # E1 (#407) -- PATCH /demo/workspaces/{workspace_id} (unit) # ============================================================================= From 93282edcc59d9e78ccad248d12f19740ac19fd26 Mon Sep 17 00:00:00 2001 From: Gabor Szabo Date: Sat, 13 Jun 2026 09:44:37 +0200 Subject: [PATCH 2/3] feat(ui): add export button to saved-workspaces panel (#412) Per-row Export button (between Replay and the actions menu) calls POST /demo/workspaces/{id}/export via a new useExportWorkspace mutation. Non-destructive, so no confirmation dialog; success toast shows the bundle path, file count, checksum state, and any unresolved-reference count; failure surfaces the problem-details message. Self-contained block to survive an E2 row restyle. Adds WorkspaceExportResult / ExportFileEntry / UnresolvedReference types. --- .../components/demo/WorkspacePanel.test.tsx | 87 ++++++++++++++++++- .../src/components/demo/WorkspacePanel.tsx | 38 +++++++- frontend/src/hooks/use-workspaces.ts | 13 +++ frontend/src/types/api.ts | 29 +++++++ 4 files changed, 165 insertions(+), 2 deletions(-) diff --git a/frontend/src/components/demo/WorkspacePanel.test.tsx b/frontend/src/components/demo/WorkspacePanel.test.tsx index f1fa2d25..dffb3ce0 100644 --- a/frontend/src/components/demo/WorkspacePanel.test.tsx +++ b/frontend/src/components/demo/WorkspacePanel.test.tsx @@ -71,6 +71,11 @@ let mockPatchResult: { mutate: ReturnType; isPending: boolean } = isPending: false, } +let mockExportResult: { mutate: ReturnType; isPending: boolean } = { + mutate: vi.fn(), + isPending: false, +} + const mockNavigate = vi.fn() vi.mock('@/hooks/use-workspaces', () => ({ @@ -82,6 +87,7 @@ vi.mock('@/hooks/use-workspaces', () => ({ useWorkspace: () => ({ data: undefined, isSuccess: false, isError: false }), useDeleteWorkspace: () => mockDeleteResult, usePatchWorkspace: () => mockPatchResult, + useExportWorkspace: () => mockExportResult, })) vi.mock('react-router-dom', async (importOriginal) => { @@ -97,6 +103,7 @@ beforeEach(() => { lastListParams = undefined mockDeleteResult = { mutate: vi.fn(), mutateAsync: vi.fn(), isPending: false } mockPatchResult = { mutate: vi.fn(), isPending: false } + mockExportResult = { mutate: vi.fn(), isPending: false } }) function renderPanel(props: Partial[0]> = {}) { @@ -197,7 +204,7 @@ describe('WorkspacePanel', () => { it('disables row actions while a run is in flight', () => { mockResponse = { data: { workspaces: [baseItem], total: 1 }, isLoading: false } renderPanel({ isRunning: true }) - const labels = ['Load', 'Replay'] + const labels = ['Load', 'Replay', 'Export'] for (const label of labels) { const button = screen .getAllByRole('button') @@ -207,6 +214,84 @@ describe('WorkspacePanel', () => { }) }) +describe('WorkspacePanel — E6 export', () => { + function findExportButton(container: HTMLElement) { + return Array.from(container.querySelectorAll('button')).find((b) => + (b.textContent ?? '').includes('Export') + )! + } + + it('renders an Export button per row', () => { + mockResponse = { data: { workspaces: [baseItem], total: 1 }, isLoading: false } + const { container } = renderPanel() + expect(findExportButton(container)).toBeTruthy() + }) + + it('fires the export mutation with the row id and toasts the bundle path', () => { + mockResponse = { data: { workspaces: [baseItem], total: 1 }, isLoading: false } + const { container } = renderPanel() + fireEvent.click(findExportButton(container)) + + expect(mockExportResult.mutate).toHaveBeenCalledTimes(1) + const [workspaceId, options] = mockExportResult.mutate.mock.calls[0] as [ + string, + { onSuccess: (r: unknown) => void; onError: (error: unknown) => void }, + ] + expect(workspaceId).toBe(baseItem.workspace_id) + + options.onSuccess({ + workspace_id: baseItem.workspace_id, + bundle_path: `artifacts/showcase/${baseItem.workspace_id}`, + bundle_format_version: 1, + exported_at: '2026-06-12T14:00:00Z', + files: [ + { path: 'manifest.json', sha256: 'a', size_bytes: 1 }, + { path: 'checksums.sha256', sha256: 'b', size_bytes: 1 }, + ], + scenario_plans_exported: 0, + model_runs_referenced: 0, + unresolved_references: [], + validated: true, + }) + expect(toast.success).toHaveBeenCalledWith(expect.stringContaining('Bundle written to')) + expect(toast.success).toHaveBeenCalledWith(expect.stringContaining('checksums verified')) + }) + + it('notes dangling references in the success toast', () => { + mockResponse = { data: { workspaces: [baseItem], total: 1 }, isLoading: false } + const { container } = renderPanel() + fireEvent.click(findExportButton(container)) + const [, options] = mockExportResult.mutate.mock.calls[0] as [ + string, + { onSuccess: (r: unknown) => void; onError: (error: unknown) => void }, + ] + options.onSuccess({ + workspace_id: baseItem.workspace_id, + bundle_path: `artifacts/showcase/${baseItem.workspace_id}`, + bundle_format_version: 1, + exported_at: '2026-06-12T14:00:00Z', + files: [{ path: 'manifest.json', sha256: 'a', size_bytes: 1 }], + scenario_plans_exported: 0, + model_runs_referenced: 0, + unresolved_references: [{ key: 'scenario_plan_ids', ref_id: 'gone', reason: 'HTTP 404' }], + validated: true, + }) + expect(toast.success).toHaveBeenCalledWith(expect.stringContaining('1 unresolved reference')) + }) + + it('surfaces an export failure via the error toast', () => { + mockResponse = { data: { workspaces: [baseItem], total: 1 }, isLoading: false } + const { container } = renderPanel() + fireEvent.click(findExportButton(container)) + const [, options] = mockExportResult.mutate.mock.calls[0] as [ + string, + { onSuccess: (r: unknown) => void; onError: (error: unknown) => void }, + ] + options.onError(new ApiError('Export bundle write failed: disk full', 500)) + expect(toast.error).toHaveBeenCalledWith(expect.stringContaining('Export failed')) + }) +}) + describe('WorkspacePanel — E2 lifecycle badges + toolbar params', () => { it('renders pinned / archived / replay badges', () => { mockResponse = { diff --git a/frontend/src/components/demo/WorkspacePanel.tsx b/frontend/src/components/demo/WorkspacePanel.tsx index fe931421..b3cdcd89 100644 --- a/frontend/src/components/demo/WorkspacePanel.tsx +++ b/frontend/src/components/demo/WorkspacePanel.tsx @@ -24,6 +24,7 @@ import { useQueryClient } from '@tanstack/react-query' import { Archive, ArchiveRestore, + FileDown, FolderOpen, MoreHorizontal, Pencil, @@ -63,7 +64,12 @@ import { SelectTrigger, SelectValue, } from '@/components/ui/select' -import { useDeleteWorkspace, usePatchWorkspace, useWorkspaces } from '@/hooks/use-workspaces' +import { + useDeleteWorkspace, + useExportWorkspace, + usePatchWorkspace, + useWorkspaces, +} from '@/hooks/use-workspaces' import { ApiError, getErrorMessage } from '@/lib/api' import { ROUTES } from '@/lib/constants' import { cn } from '@/lib/utils' @@ -159,6 +165,7 @@ export function WorkspacePanel({ const queryClient = useQueryClient() const deleteWorkspace = useDeleteWorkspace() const patchWorkspace = usePatchWorkspace() + const exportWorkspace = useExportWorkspace() // ── dialogs + selection state ──────────────────────────────────────────── const [pendingDelete, setPendingDelete] = useState(null) @@ -231,6 +238,24 @@ export function WorkspacePanel({ ) } + // E6 (#412) — non-destructive export; no confirmation dialog. Success toast + // surfaces the bundle path + file count + checksum state + any dangling refs. + const handleExport = (ws: WorkspaceListItem) => { + exportWorkspace.mutate(ws.workspace_id, { + onSuccess: (result) => { + const fileCount = `${result.files.length} file${result.files.length === 1 ? '' : 's'}` + const checksums = result.validated ? 'verified' : 'FAILED' + const unresolved = result.unresolved_references.length + ? ` ${result.unresolved_references.length} unresolved reference(s).` + : '' + toast.success( + `Bundle written to ${result.bundle_path} — ${fileCount}, checksums ${checksums}.${unresolved}` + ) + }, + onError: (error) => toast.error(`Export failed: ${getErrorMessage(error)}`), + }) + } + const toggleSelected = (workspaceId: string) => { setSelected((prev) => { const next = new Set(prev) @@ -410,6 +435,17 @@ export function WorkspacePanel({ Replay + {/* E6 (#412) — export a checksum-validated bundle. Self- + contained block (survives an E2 row restyle / rebase). */} +