diff --git a/.env.example b/.env.example index 38ef75b4..62da51b9 100644 --- a/.env.example +++ b/.env.example @@ -29,6 +29,10 @@ FORECAST_ENABLE_LIGHTGBM=false # FORECAST_ENABLE_XGBOOST defaults to false (opt-in; install ml-xgboost extra) # FORECAST_ENABLE_RANDOM_FOREST=false # PRP-36 optional model — pure sklearn, no extra needed +# Demo / Showcase settings +# E6 (#412) — root for saved-workspace export bundles (manifest + checksums). +SHOWCASE_EXPORT_ROOT=./artifacts/showcase + # RAG Configuration # Embedding Provider: "openai" or "ollama" RAG_EMBEDDING_PROVIDER=openai diff --git a/app/core/config.py b/app/core/config.py index 033c77d9..6cedfd37 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -129,6 +129,10 @@ class Settings(BaseSettings): registry_artifact_root: str = "./artifacts/registry" registry_duplicate_policy: Literal["allow", "deny", "detect"] = "detect" + # Demo / Showcase + # E6 (#412) — root for workspace export bundles (manifest + checksums). + showcase_export_root: str = "./artifacts/showcase" + # Analytics analytics_max_rows: int = 10000 analytics_max_date_range_days: int = 730 diff --git a/app/features/demo/export.py b/app/features/demo/export.py new file mode 100644 index 00000000..fc5090f9 --- /dev/null +++ b/app/features/demo/export.py @@ -0,0 +1,387 @@ +"""Workspace export-bundle writer (E6, issue #412). + +Write a self-describing, checksum-validated bundle for a saved showcase +workspace under ``//``:: + + manifest.json versioned snapshot + references + scenario_plans/.json one per resolvable scenario plan + checksums.sha256 sha256sum-compatible; covers every other file + +Frozen decisions (see ``PRPs/PRP-showcase-completion-E6-export-bundle.md``): + +1. One directory per ``workspace_id`` (unique uuid4 hex), keyed off the DB row. +2. Re-export is a deterministic overwrite -- the existing guarded bundle + directory is removed and rewritten; ``exported_at`` records the moment. +3. Soft references resolve over the public HTTP surface IN-PROCESS + (``httpx.ASGITransport``) -- the demo slice may not import the registry / + scenarios slices (vertical-slice rule). Any non-2xx -> an + ``unresolved_references`` entry (or ``artifact_verified=None``), never a + failed export. +4. Model artifacts are REFERENCED (uri + registry hash + live verify result), + never copied. +5. Stateless -- export writes NOTHING to the database (no row, no story slot). +6. ``failed`` workspaces are exportable; ``running`` ones are a 409. +7. ``checksums.sha256`` excludes itself (a self-referencing checksum file is a + bootstrap hole) and uses the two-space ``sha256sum`` separator. + +The traversal guard (:func:`_resolve_bundle_dir`) and chunked SHA-256 +(:func:`_compute_sha256`) MIRROR ``app/features/registry/storage.py`` +(``LocalFSProvider._resolve_path`` / ``AbstractStorageProvider.compute_hash``) +-- the vertical-slice rule forbids importing that module, so the ~10-line +pattern is reimplemented here. Reference resolution uses the same in-process +``httpx`` client ``app/features/demo/link_health.py`` uses. +""" + +from __future__ import annotations + +import hashlib +import json +import shutil +from datetime import UTC, datetime +from pathlib import Path +from typing import TYPE_CHECKING, Any + +import httpx +from sqlalchemy.ext.asyncio import AsyncSession + +from app.core.config import get_settings +from app.core.exceptions import ConflictError, ForecastLabError, NotFoundError +from app.core.logging import get_logger +from app.features.demo import workspace +from app.features.demo.models import WORKSPACE_STATUS_RUNNING +from app.features.demo.schemas import ( + BUNDLE_FORMAT_VERSION, + ExportFileEntry, + UnresolvedReference, + WorkspaceDetailResponse, + WorkspaceExportResult, +) + +if TYPE_CHECKING: + from fastapi import FastAPI + +logger = get_logger(__name__) + +_MANIFEST = "manifest.json" +_CHECKSUMS = "checksums.sha256" +_PLANS_DIR = "scenario_plans" +# created_objects run-id keys whose registry runs the manifest references. +_RUN_KEYS = ("winning_run_id", "v2_run_id", "stale_alias_run_id") +# Generous in-process budget (no real network); a hung driven endpoint surfaces +# as a response under raise_app_exceptions=False, not a hang. +_EXPORT_TIMEOUT = httpx.Timeout(30.0, connect=5.0) + + +def _compute_sha256(path: Path) -> str: + """Chunked SHA-256 of a file (mirror ``registry/storage.py:compute_hash``).""" + digest = hashlib.sha256() + with path.open("rb") as handle: + for chunk in iter(lambda: handle.read(8192), b""): + digest.update(chunk) + return digest.hexdigest() + + +def _resolve_bundle_dir(root: Path, workspace_id: str) -> Path: + """Resolve ``/``, guarding against path traversal. + + Mirrors ``registry/storage.py:LocalFSProvider._resolve_path`` -- ``resolve()`` + then ``relative_to(root)``. A ``workspace_id`` that escapes the root raises + ``ValueError`` BEFORE any disk I/O. ``root`` must already be resolved. The + id always comes from the DB row (uuid4 hex), never raw from the URL path, so + this is defense in depth. + """ + bundle_dir = (root / workspace_id).resolve() + try: + bundle_dir.relative_to(root) + except ValueError: + logger.warning( + "demo.export_path_traversal_attempt", + workspace_id=workspace_id, + root=str(root), + ) + raise + return bundle_dir + + +def _write_json(path: Path, payload: dict[str, Any]) -> int: + """Write deterministic JSON (sorted keys, 2-space indent, trailing newline). + + ``sort_keys`` makes the bytes order-independent so unchanged state + re-exports to identical bytes (stable checksums). Returns the byte size. + """ + data = (json.dumps(payload, indent=2, sort_keys=True) + "\n").encode("utf-8") + path.write_bytes(data) + return len(data) + + +def _root_relative(root: Path) -> str: + """Repo-root-relative POSIX string for display (no absolute-path leak).""" + try: + return root.relative_to(Path.cwd()).as_posix() + except ValueError: + return root.as_posix() + + +def _open_client(app: FastAPI) -> httpx.AsyncClient: + """In-process client over ``ASGITransport`` (pattern: ``link_health.py``). + + ``raise_app_exceptions=False`` is load-bearing: a driven endpoint's failure + becomes a 5xx *response* (-> ``unresolved_references`` / ``artifact_verified + =None``), never a re-raised exception inside the export. ``base_url`` is + cosmetic but required by httpx. + """ + return httpx.AsyncClient( + transport=httpx.ASGITransport(app=app, raise_app_exceptions=False), + base_url="http://demo.internal", + timeout=_EXPORT_TIMEOUT, + ) + + +async def _resolve_model_runs( + client: httpx.AsyncClient, + created: dict[str, Any], +) -> tuple[list[dict[str, Any]], list[UnresolvedReference]]: + """Resolve the run-id soft references to manifest model-run references. + + A run that resolves (2xx) is referenced (uri + registry hash + a live + ``artifact_verified`` from the verify endpoint when both uri and hash are + present). A non-2xx run is an ``unresolved_references`` entry. A failed + artifact *verify* on a resolved run is NOT unresolved -- the run resolved; + only its artifact check did not (``artifact_verified=None``). + """ + model_runs: list[dict[str, Any]] = [] + unresolved: list[UnresolvedReference] = [] + for key in _RUN_KEYS: + run_id = created.get(key) + if not isinstance(run_id, str) or not run_id: + continue + resp = await client.get(f"/registry/runs/{run_id}") + if resp.status_code != 200: + reason = f"HTTP {resp.status_code}" + unresolved.append(UnresolvedReference(key=key, ref_id=run_id, reason=reason)) + logger.warning( + "demo.export_unresolved_reference", key=key, ref_id=run_id, reason=reason + ) + continue + body = resp.json() + artifact_uri = body.get("artifact_uri") + artifact_hash = body.get("artifact_hash") + verified: bool | None = None + if artifact_uri and artifact_hash: + vresp = await client.get(f"/registry/runs/{run_id}/verify") + if vresp.status_code == 200: + raw = vresp.json().get("verified") + verified = raw if isinstance(raw, bool) else None + model_runs.append( + { + "key": key, + "run_id": run_id, + "model_type": body.get("model_type"), + "status": body.get("status"), + "artifact_uri": artifact_uri, + "artifact_hash": artifact_hash, + "artifact_verified": verified, + "metrics": body.get("metrics"), + } + ) + return model_runs, unresolved + + +async def _resolve_scenario_plans( + client: httpx.AsyncClient, + created: dict[str, Any], + plans_dir: Path, +) -> tuple[list[dict[str, Any]], list[tuple[str, int]], list[UnresolvedReference]]: + """Write a JSON snapshot per resolvable scenario plan; report dangles. + + Returns ``(manifest plan entries, written (relpath, size) pairs, + unresolved)``. The plan body is stored verbatim -- its ``run_id`` is the + forecast ARTIFACT key, not a registry ``model_run.run_id`` (different id + spaces; memory anchor ``scenario-run-id-vs-registry-run-id``), so it is + never joined against the registry. + """ + plan_entries: list[dict[str, Any]] = [] + file_entries: list[tuple[str, int]] = [] + unresolved: list[UnresolvedReference] = [] + # JSONB types this list[str], but nothing enforces it at runtime -- treat + # entries as untrusted (mirrors link_health's created_objects guards). + raw_plan_ids = created.get("scenario_plan_ids") + plan_ids: list[Any] = raw_plan_ids if isinstance(raw_plan_ids, list) else [] + for scenario_id in plan_ids: + if not isinstance(scenario_id, str) or not scenario_id: + continue + resp = await client.get(f"/scenarios/{scenario_id}") + if resp.status_code != 200: + reason = f"HTTP {resp.status_code}" + unresolved.append( + UnresolvedReference(key="scenario_plan_ids", ref_id=scenario_id, reason=reason) + ) + logger.warning( + "demo.export_unresolved_reference", + key="scenario_plan_ids", + ref_id=scenario_id, + reason=reason, + ) + continue + body = resp.json() + rel = f"{_PLANS_DIR}/{scenario_id}.json" + size = _write_json(plans_dir / f"{scenario_id}.json", body) + plan_entries.append( + { + "scenario_id": scenario_id, + "file": rel, + "name": body.get("name") if isinstance(body, dict) else None, + } + ) + file_entries.append((rel, size)) + return plan_entries, file_entries, unresolved + + +def _validate_checksums(bundle_dir: Path) -> bool: + """Re-read ``checksums.sha256``, recompute every listed hash, compare. + + Returns ``False`` (the caller logs it) rather than raising on any mismatch + or parse issue -- a failed validation is reported honestly in the response. + """ + checksums_path = bundle_dir / _CHECKSUMS + try: + content = checksums_path.read_text(encoding="utf-8") + except OSError: + return False + for line in content.splitlines(): + if not line.strip(): + continue + # sha256sum format: " " (two-space separator). + expected, _, rel = line.partition(" ") + if not rel: + return False + target = bundle_dir / rel + try: + actual = _compute_sha256(target) + except OSError: + return False + if actual != expected: + return False + return True + + +async def export_workspace( + db: AsyncSession, + app: FastAPI, + workspace_id: str, + *, + export_root: str | Path | None = None, +) -> WorkspaceExportResult: + """Export a saved workspace to a checksum-validated bundle on disk. + + Re-queries the row via :func:`workspace.get_workspace` so the function is + independently callable/testable; the route's 404/409 pre-guard fires before + any export work begins. + + Args: + db: Caller-owned async session (used only to load the row). + app: The live FastAPI app for in-process soft-reference resolution. + workspace_id: External id of the workspace to export. + export_root: Override the configured ``showcase_export_root`` (tests). + + Returns: + The export result (bundle path, file inventory, counts, unresolved + references, checksum-validation flag). + + Raises: + NotFoundError: When no workspace matches ``workspace_id`` (404). + ConflictError: When the workspace run is still ``running`` (409). + ForecastLabError: When the bundle cannot be written to disk (500). + """ + row = await workspace.get_workspace(db, workspace_id) + if row is None: + raise NotFoundError(message=f"Workspace not found: {workspace_id}") + if row.status == WORKSPACE_STATUS_RUNNING: + raise ConflictError( + "Cannot export while the run is still in progress; retry after the run settles." + ) + + snapshot = WorkspaceDetailResponse.model_validate(row).model_dump(mode="json") + created = row.created_objects or {} + + root = Path(export_root or get_settings().showcase_export_root).resolve() + root.mkdir(parents=True, exist_ok=True) + # GUARD before any rmtree / mkdir / write -- the rmtree target is the + # guarded resolution only, never a raw request value. + bundle_dir = _resolve_bundle_dir(root, row.workspace_id) + + exported_at = datetime.now(UTC) + try: + if bundle_dir.exists(): + shutil.rmtree(bundle_dir) # Decision 2 -- deterministic overwrite. + plans_dir = bundle_dir / _PLANS_DIR + plans_dir.mkdir(parents=True) + + async with _open_client(app) as client: + model_runs, run_unresolved = await _resolve_model_runs(client, created) + plan_entries, plan_files, plan_unresolved = await _resolve_scenario_plans( + client, created, plans_dir + ) + unresolved = [*run_unresolved, *plan_unresolved] + + manifest = { + "bundle_format_version": BUNDLE_FORMAT_VERSION, + "exported_at": exported_at.isoformat(), + "workspace": snapshot, + "model_runs": model_runs, + "scenario_plans": plan_entries, + "unresolved_references": [ref.model_dump() for ref in unresolved], + # Paths + sizes so a consumer can sanity-check without parsing the + # hash file; hashes live ONLY in checksums.sha256 (Decision 7). + "files": [{"path": rel, "size_bytes": size} for rel, size in plan_files], + } + _write_json(bundle_dir / _MANIFEST, manifest) + + # checksums.sha256 -- every bundle file except itself, sorted, two-space + # sha256sum format, bundle-relative POSIX paths. + checksum_lines = [ + f"{_compute_sha256(path)} {path.relative_to(bundle_dir).as_posix()}" + for path in sorted(bundle_dir.rglob("*")) + if path.is_file() and path.name != _CHECKSUMS + ] + (bundle_dir / _CHECKSUMS).write_text("\n".join(checksum_lines) + "\n", encoding="utf-8") + except OSError as exc: + logger.warning( + "demo.workspace_export_failed", + workspace_id=row.workspace_id, + error=str(exc), + error_type=type(exc).__name__, + ) + raise ForecastLabError( + message=f"Export bundle write failed: {exc}", status_code=500 + ) from exc + + validated = _validate_checksums(bundle_dir) + files = [ + ExportFileEntry( + path=path.relative_to(bundle_dir).as_posix(), + sha256=_compute_sha256(path), + size_bytes=path.stat().st_size, + ) + for path in sorted(bundle_dir.rglob("*")) + if path.is_file() + ] + + logger.info( + "demo.workspace_exported", + workspace_id=row.workspace_id, + files=len(files), + unresolved=len(unresolved), + validated=validated, + ) + return WorkspaceExportResult( + workspace_id=row.workspace_id, + bundle_path=f"{_root_relative(root)}/{row.workspace_id}", + bundle_format_version=BUNDLE_FORMAT_VERSION, + exported_at=exported_at, + files=files, + scenario_plans_exported=len(plan_entries), + model_runs_referenced=len(model_runs), + unresolved_references=unresolved, + validated=validated, + ) diff --git a/app/features/demo/routes.py b/app/features/demo/routes.py index dc9d6b89..0f27e458 100644 --- a/app/features/demo/routes.py +++ b/app/features/demo/routes.py @@ -14,6 +14,11 @@ update (rename / notes / tags / archive / pin); ``status`` is not patchable. - ``DELETE /demo/workspaces/{workspace_id}`` -- delete the workspace METADATA row only; the run's created objects are soft references and stay untouched. +- ``POST /demo/workspaces/{workspace_id}/export`` -- E6 (#412): write a + checksum-validated bundle (manifest + scenario-plan snapshots + checksums) + under ``artifacts/showcase//``; soft references resolve + in-process, model artifacts are referenced (never copied), dangling refs are + reported, not fatal. The run/stream handlers obtain the live FastAPI app from ``request.app`` / ``websocket.app`` and pass it into the pipeline -- the slice never imports @@ -40,7 +45,8 @@ from app.core.database import get_db from app.core.exceptions import ConflictError, NotFoundError from app.core.logging import get_logger -from app.features.demo import hitl, link_health, service, workspace +from app.features.demo import export, hitl, link_health, service, workspace +from app.features.demo.models import WORKSPACE_STATUS_RUNNING from app.features.demo.schemas import ( ApprovalEventItem, ApprovalEventsResponse, @@ -49,6 +55,7 @@ HitlDecisionRequest, StepEvent, WorkspaceDetailResponse, + WorkspaceExportResult, WorkspaceHealthResponse, WorkspaceListItem, WorkspaceListResponse, @@ -348,6 +355,52 @@ async def delete_showcase_workspace( raise NotFoundError(message=f"Workspace not found: {workspace_id}") +@router.post( + "/workspaces/{workspace_id}/export", + response_model=WorkspaceExportResult, + summary="Export a saved showcase workspace as a checksum-validated bundle", + description=( + "Write artifacts/showcase// -- a versioned manifest.json, " + "one JSON per resolvable scenario plan, and a sha256sum-compatible " + "checksums.sha256 -- then re-verify every checksum before returning. " + "Model artifacts are referenced (uri + registry hash + live verify), " + "never copied. Dangling soft references are reported in " + "`unresolved_references` (the export still returns 200). 404 when the " + "workspace is missing; 409 while its run is still in progress; " + "re-export overwrites the bundle." + ), +) +async def export_showcase_workspace( + workspace_id: str, + request: Request, + db: AsyncSession = Depends(get_db), +) -> WorkspaceExportResult: + """Export a saved showcase workspace to a checksum-validated bundle (E6, #412). + + Args: + workspace_id: External identifier of the workspace. + request: The incoming request (used to obtain the live FastAPI app for + the in-process soft-reference resolution GETs). + db: Async database session from dependency. + + Returns: + The export result -- bundle path, file inventory with hashes, counts, + unresolved references, and the checksum-validation flag. + + Raises: + NotFoundError: When no workspace matches ``workspace_id`` (404). + ConflictError: When the workspace run is still in progress (409). + """ + row = await workspace.get_workspace(db, workspace_id) + if row is None: + raise NotFoundError(message=f"Workspace not found: {workspace_id}") + if row.status == WORKSPACE_STATUS_RUNNING: + raise ConflictError( + "Cannot export while the run is still in progress; retry after the run settles." + ) + return await export.export_workspace(db, request.app, workspace_id) + + @router.websocket("/stream") async def stream_demo_pipeline(websocket: WebSocket) -> None: """Stream one StepEvent per pipeline step over a WebSocket. diff --git a/app/features/demo/schemas.py b/app/features/demo/schemas.py index 70b1e8c3..353a7eac 100644 --- a/app/features/demo/schemas.py +++ b/app/features/demo/schemas.py @@ -567,3 +567,60 @@ class ApprovalEventsResponse(BaseModel): ..., description="Flattened approval events, newest workspace first; empty when none." ) total: int = Field(..., ge=0, description="Number of flattened entries returned (capped).") + + +# ============================================================================= +# E6 (#412) -- workspace export bundle (POST /demo/workspaces/{id}/export) +# ============================================================================= + +# Bumped on any manifest-shape change so bundle consumers can branch on it. +BUNDLE_FORMAT_VERSION = 1 + + +class ExportFileEntry(BaseModel): + """One file inside an exported workspace bundle (E6, issue #412). + + Response model -- plain ``BaseModel``, NOT ``ConfigDict(strict=True)``: + strict mode is a request-body policy and this endpoint has no body. + """ + + path: str = Field(..., description="Bundle-relative POSIX path.") + sha256: str = Field(..., description="Hex SHA-256 of the file contents.") + size_bytes: int = Field(..., ge=0, description="File size in bytes.") + + +class UnresolvedReference(BaseModel): + """A soft reference that could not be resolved during export (E6, #412).""" + + key: str = Field(..., description="created_objects key (e.g. 'scenario_plan_ids').") + ref_id: str = Field(..., description="The id that failed to resolve.") + reason: str = Field(..., description="Short cause, e.g. 'HTTP 404'.") + + +class WorkspaceExportResult(BaseModel): + """Result of ``POST /demo/workspaces/{workspace_id}/export`` (E6, #412).""" + + workspace_id: str = Field(..., description="The exported workspace's id.") + bundle_path: str = Field( + ..., description="Repo-root-relative bundle dir, e.g. 'artifacts/showcase/'." + ) + bundle_format_version: int = Field(..., description="Manifest schema version.") + exported_at: datetime = Field(..., description="When the export ran (UTC).") + # The COMPLETE on-disk inventory, INCLUDING checksums.sha256 itself (with + # its own computed hash) -- it just never lists itself inside the checksum + # file; the response is where that hash lives. + files: list[ExportFileEntry] = Field( + ..., description="Every file in the bundle with its hash and size." + ) + scenario_plans_exported: int = Field( + ..., ge=0, description="Scenario plans written to scenario_plans/." + ) + model_runs_referenced: int = Field( + ..., ge=0, description="Model runs referenced in the manifest (not copied)." + ) + unresolved_references: list[UnresolvedReference] = Field( + ..., description="Soft references that could not be resolved (export still succeeded)." + ) + validated: bool = Field( + ..., description="True when checksums.sha256 re-read + recomputed clean." + ) diff --git a/app/features/demo/tests/test_export.py b/app/features/demo/tests/test_export.py new file mode 100644 index 00000000..2b1c950f --- /dev/null +++ b/app/features/demo/tests/test_export.py @@ -0,0 +1,362 @@ +"""Tests for the workspace export-bundle writer (E6, issue #412). + +Unit tests (no DB, no app) cover the disk primitives -- chunked sha256, the +traversal guard (must raise BEFORE any I/O), deterministic JSON -- and the +manifest assembly via a mocked in-process client. Integration tests run the +real endpoint against docker-compose Postgres with a ``tmp_path`` export root. +""" + +from __future__ import annotations + +import datetime as _dt +import hashlib +import json +from pathlib import Path +from types import SimpleNamespace +from typing import TYPE_CHECKING, Any, cast + +import httpx +import pytest + +from app.features.demo import export, workspace +from app.features.demo.models import ShowcaseWorkspace +from app.features.demo.schemas import WorkspaceExportResult + +if TYPE_CHECKING: + from fastapi import FastAPI + from sqlalchemy.ext.asyncio import AsyncSession + +# The direct-call unit tests monkeypatch get_workspace + _open_client, so the +# real session / app are never touched -- typed None sentinels keep the strict +# signature satisfied without a DB or app instance. +_NO_DB = cast("AsyncSession", None) +_NO_APP = cast("FastAPI", None) + +# ============================================================================= +# Unit -- disk primitives (no DB, no app) +# ============================================================================= + + +def test_compute_sha256_matches_whole_file(tmp_path: Path) -> None: + """The chunked digest equals a whole-file hashlib hash.""" + target = tmp_path / "blob" + target.write_bytes(b"y" * 25_000) # > one 8192-byte chunk + assert export._compute_sha256(target) == hashlib.sha256(target.read_bytes()).hexdigest() + + +@pytest.mark.parametrize("evil", ["../escape", "../../etc/passwd", "/etc/passwd"]) +def test_resolve_bundle_dir_rejects_traversal_before_io(tmp_path: Path, evil: str) -> None: + """A traversal-shaped id raises ValueError and writes nothing.""" + root = tmp_path.resolve() + with pytest.raises(ValueError): + export._resolve_bundle_dir(root, evil) + # The guard does pure path math -- no directory is created. + assert list(root.iterdir()) == [] + + +def test_resolve_bundle_dir_accepts_uuid_hex(tmp_path: Path) -> None: + """A normal uuid-hex id resolves directly under the root.""" + root = tmp_path.resolve() + workspace_id = "a" * 32 + resolved = export._resolve_bundle_dir(root, workspace_id) + assert resolved == root / workspace_id + + +def test_write_json_is_deterministic(tmp_path: Path) -> None: + """Two dumps of key-shuffled payloads produce identical bytes.""" + a = tmp_path / "a.json" + b = tmp_path / "b.json" + size_a = export._write_json(a, {"z": 1, "a": 2, "m": {"y": 1, "x": 2}}) + size_b = export._write_json(b, {"a": 2, "m": {"x": 2, "y": 1}, "z": 1}) + assert a.read_bytes() == b.read_bytes() + assert size_a == size_b == len(a.read_bytes()) + assert a.read_text().endswith("\n") + + +def test_validate_checksums_round_trip(tmp_path: Path) -> None: + """A hand-built bundle validates; a tampered file flips validated False.""" + bundle = tmp_path / "wsid" + bundle.mkdir() + payload = bundle / "manifest.json" + payload.write_text("hello\n", encoding="utf-8") + digest = export._compute_sha256(payload) + (bundle / "checksums.sha256").write_text(f"{digest} manifest.json\n", encoding="utf-8") + assert export._validate_checksums(bundle) is True + + payload.write_text("tampered\n", encoding="utf-8") + assert export._validate_checksums(bundle) is False + + +# ============================================================================= +# Unit -- manifest assembly via a mocked in-process client +# ============================================================================= + + +def _row(**overrides: object) -> SimpleNamespace: + """An ORM-shaped ShowcaseWorkspace stand-in (mirrors test_routes._orm_like_row).""" + base: dict[str, object] = { + "workspace_id": "a" * 32, + "name": "e6-export", + "status": "completed", + "seed": 42, + "scenario": "showcase_rich", + "reset": False, + "skip_seed": True, + "store_id": 3, + "product_id": 7, + "date_start": _dt.date(2026, 1, 1), + "date_end": _dt.date(2026, 3, 31), + "created_objects": {}, + "result_summary": {"winner_model_type": "naive"}, + "created_at": _dt.datetime(2026, 6, 1, 12, 0, tzinfo=_dt.UTC), + } + base.update(overrides) + return SimpleNamespace(**base) + + +def _mock_client() -> httpx.AsyncClient: + """In-process client returning canned registry / scenario bodies + one 404.""" + + def handler(request: httpx.Request) -> httpx.Response: + path = request.url.path + if path == "/registry/runs/run-win": + return httpx.Response( + 200, + json={ + "run_id": "run-win", + "model_type": "naive", + "status": "success", + "artifact_uri": "demo/naive-model_abc.joblib", + "artifact_hash": "deadbeef", + "metrics": {"wape": 0.12}, + }, + ) + if path == "/registry/runs/run-win/verify": + return httpx.Response(200, json={"verified": True}) + if path == "/registry/runs/run-gone": + return httpx.Response(404, json={"detail": "run not found"}) + if path == "/scenarios/plan-1": + return httpx.Response( + 200, + json={ + "scenario_id": "plan-1", + "name": "Price cut 15%", + "run_id": "model_xyz", + "assumptions": {"price_change_pct": -0.15}, + "comparison": {}, + "tags": ["showcase"], + }, + ) + if path == "/scenarios/dangling": + return httpx.Response(404, json={"detail": "scenario not found"}) + return httpx.Response(404, json={"detail": f"unmatched {path}"}) + + return httpx.AsyncClient( + transport=httpx.MockTransport(handler), base_url="http://demo.internal" + ) + + +async def test_export_assembles_manifest_and_reports_dangles( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """A mixed run resolves one run + one plan and reports two dangles.""" + row = _row( + created_objects={ + "winning_run_id": "run-win", + "v2_run_id": "run-gone", # 404 -> unresolved + "scenario_plan_ids": ["plan-1", "dangling"], + } + ) + + async def fake_get(_db: object, _workspace_id: str) -> SimpleNamespace: + return row + + monkeypatch.setattr(workspace, "get_workspace", fake_get) + monkeypatch.setattr(export, "_open_client", lambda _app: _mock_client()) + + result: WorkspaceExportResult = await export.export_workspace( + db=_NO_DB, app=_NO_APP, workspace_id="a" * 32, export_root=tmp_path + ) + + assert result.validated is True + assert result.model_runs_referenced == 1 + assert result.scenario_plans_exported == 1 + # Two dangles: the v2 run (404) and the dangling scenario plan (404). + keys = sorted((ref.key, ref.ref_id) for ref in result.unresolved_references) + assert keys == [("scenario_plan_ids", "dangling"), ("v2_run_id", "run-gone")] + + bundle = tmp_path / ("a" * 32) + manifest = json.loads((bundle / "manifest.json").read_text()) + assert manifest["bundle_format_version"] == 1 + assert manifest["workspace"]["workspace_id"] == "a" * 32 + assert manifest["model_runs"][0]["run_id"] == "run-win" + assert manifest["model_runs"][0]["artifact_verified"] is True + assert manifest["scenario_plans"][0]["scenario_id"] == "plan-1" + # The plan body is stored verbatim under scenario_plans/. + plan = json.loads((bundle / "scenario_plans" / "plan-1.json").read_text()) + assert plan["name"] == "Price cut 15%" + # checksums.sha256 covers every file except itself, two-space separator. + lines = (bundle / "checksums.sha256").read_text().splitlines() + covered = {line.split(" ", 1)[1] for line in lines} + assert "manifest.json" in covered + assert "scenario_plans/plan-1.json" in covered + assert "checksums.sha256" not in covered + # The response inventory DOES include the checksum file itself. + assert any(entry.path == "checksums.sha256" for entry in result.files) + + +async def test_export_overwrites_stale_bundle( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """A pre-existing stale file in the bundle dir is gone after re-export.""" + row = _row(created_objects={"winning_run_id": "run-win"}) + + async def fake_get(_db: object, _workspace_id: str) -> SimpleNamespace: + return row + + monkeypatch.setattr(workspace, "get_workspace", fake_get) + monkeypatch.setattr(export, "_open_client", lambda _app: _mock_client()) + + bundle = tmp_path / ("a" * 32) + (bundle / "scenario_plans").mkdir(parents=True) + stale = bundle / "scenario_plans" / "stale.json" + stale.write_text("{}", encoding="utf-8") + + await export.export_workspace( + db=_NO_DB, app=_NO_APP, workspace_id="a" * 32, export_root=tmp_path + ) + + assert not stale.exists() + assert (bundle / "manifest.json").exists() + + +async def test_export_empty_created_objects_minimal_bundle( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """An empty-references run still exports a valid manifest + checksums.""" + row = _row(created_objects={}) + + async def fake_get(_db: object, _workspace_id: str) -> SimpleNamespace: + return row + + monkeypatch.setattr(workspace, "get_workspace", fake_get) + monkeypatch.setattr(export, "_open_client", lambda _app: _mock_client()) + + result = await export.export_workspace( + db=_NO_DB, app=_NO_APP, workspace_id="a" * 32, export_root=tmp_path + ) + assert result.validated is True + assert result.model_runs_referenced == 0 + assert result.scenario_plans_exported == 0 + assert result.unresolved_references == [] + paths = {entry.path for entry in result.files} + assert paths == {"manifest.json", "checksums.sha256"} + + +async def test_export_404_on_missing_workspace(monkeypatch: pytest.MonkeyPatch) -> None: + """A missing row raises NotFoundError before any disk work.""" + from app.core.exceptions import NotFoundError + + async def fake_get(_db: object, _workspace_id: str) -> None: + return None + + monkeypatch.setattr(workspace, "get_workspace", fake_get) + with pytest.raises(NotFoundError): + await export.export_workspace(db=_NO_DB, app=_NO_APP, workspace_id="z" * 32) + + +async def test_export_409_on_running_workspace(monkeypatch: pytest.MonkeyPatch) -> None: + """A running row raises ConflictError (references not yet settled).""" + from app.core.exceptions import ConflictError + + async def fake_get(_db: object, _workspace_id: str) -> SimpleNamespace: + return _row(status="running") + + monkeypatch.setattr(workspace, "get_workspace", fake_get) + with pytest.raises(ConflictError): + await export.export_workspace(db=_NO_DB, app=_NO_APP, workspace_id="a" * 32) + + +# ============================================================================= +# Integration -- real endpoint, real Postgres, tmp_path export root +# ============================================================================= + + +@pytest.mark.integration +async def test_export_endpoint_round_trip( + client: httpx.AsyncClient, + db_session: Any, + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """A completed row exports; checksums verify; a dangling plan is reported.""" + from app.core.config import get_settings + + workspace_id = "e6" + "0" * 30 + db_session.add( + ShowcaseWorkspace( + workspace_id=workspace_id, + name="e6-integration", + seed=42, + scenario="showcase_rich", + reset=False, + skip_seed=True, + status="completed", + created_objects={"scenario_plan_ids": ["dangling-plan-1"]}, + ) + ) + await db_session.commit() + + # Point the export root at tmp_path without disturbing the cached settings. + patched = get_settings().model_copy(update={"showcase_export_root": str(tmp_path)}) + monkeypatch.setattr(export, "get_settings", lambda: patched) + + resp = await client.post(f"/demo/workspaces/{workspace_id}/export") + assert resp.status_code == 200 + body = resp.json() + assert body["validated"] is True + assert body["bundle_path"].endswith(workspace_id) + # The dangling scenario plan is reported, not fatal. + assert any(ref["ref_id"] == "dangling-plan-1" for ref in body["unresolved_references"]) + + bundle = tmp_path / workspace_id + assert (bundle / "manifest.json").exists() + # Independently re-verify every checksum line (don't trust validated alone). + for line in (bundle / "checksums.sha256").read_text().splitlines(): + if not line.strip(): + continue + expected, _, rel = line.partition(" ") + actual = hashlib.sha256((bundle / rel).read_bytes()).hexdigest() + assert actual == expected, rel + + # Re-export overwrites: plant a stale file, re-export, assert it's gone. + stale = bundle / "scenario_plans" / "stale.json" + stale.write_text("{}", encoding="utf-8") + resp2 = await client.post(f"/demo/workspaces/{workspace_id}/export") + assert resp2.status_code == 200 + assert not stale.exists() + + +@pytest.mark.integration +async def test_export_endpoint_409_on_running( + client: httpx.AsyncClient, + db_session: Any, +) -> None: + """The endpoint rejects a still-running workspace with 409 problem+json.""" + workspace_id = "e6run" + "0" * 27 + db_session.add( + ShowcaseWorkspace( + workspace_id=workspace_id, + name="e6-running", + seed=1, + scenario="demo_minimal", + reset=False, + skip_seed=True, + status="running", + ) + ) + await db_session.commit() + + resp = await client.post(f"/demo/workspaces/{workspace_id}/export") + assert resp.status_code == 409 + assert resp.headers["content-type"].startswith("application/problem+json") diff --git a/app/features/demo/tests/test_routes.py b/app/features/demo/tests/test_routes.py index 7b8858ba..1b00e6a9 100644 --- a/app/features/demo/tests/test_routes.py +++ b/app/features/demo/tests/test_routes.py @@ -14,8 +14,14 @@ from fastapi.testclient import TestClient from sqlalchemy.ext.asyncio import AsyncSession -from app.features.demo import service, workspace -from app.features.demo.schemas import DemoRunRequest, DemoRunResult, StepEvent +from app.features.demo import export, service, workspace +from app.features.demo.schemas import ( + DemoRunRequest, + DemoRunResult, + ExportFileEntry, + StepEvent, + WorkspaceExportResult, +) from app.main import app @@ -481,6 +487,74 @@ async def fake_delete(_db, _workspace_id: str) -> bool: assert "Workspace not found" in resp.json()["detail"] +# ============================================================================= +# E6 (#412) -- POST /demo/workspaces/{workspace_id}/export (unit) +# ============================================================================= + + +async def test_export_workspace_404(client, monkeypatch): + """An unknown workspace_id is a 404 problem+json (export never runs).""" + + async def fake_get(_db, _workspace_id: str) -> None: + return None + + monkeypatch.setattr(workspace, "get_workspace", fake_get) + + resp = await client.post("/demo/workspaces/" + "0" * 32 + "/export") + assert resp.status_code == 404 + assert resp.headers["content-type"].startswith("application/problem+json") + assert "Workspace not found" in resp.json()["detail"] + + +async def test_export_workspace_409_when_running(client, monkeypatch): + """A still-running workspace is a 409 problem+json (refs not settled).""" + + async def fake_get(_db, workspace_id: str) -> SimpleNamespace: + return _orm_like_row(workspace_id=workspace_id, status="running") + + monkeypatch.setattr(workspace, "get_workspace", fake_get) + + resp = await client.post("/demo/workspaces/" + "a" * 32 + "/export") + assert resp.status_code == 409 + assert resp.headers["content-type"].startswith("application/problem+json") + + +async def test_export_workspace_200_happy_path(client, monkeypatch): + """A completed workspace returns the export result the writer produced.""" + + async def fake_get(_db, workspace_id: str) -> SimpleNamespace: + return _orm_like_row(workspace_id=workspace_id, status="completed") + + canned = WorkspaceExportResult( + workspace_id="a" * 32, + bundle_path="artifacts/showcase/" + "a" * 32, + bundle_format_version=1, + exported_at=_dt.datetime(2026, 6, 12, 14, 0, tzinfo=_dt.UTC), + files=[ + ExportFileEntry(path="manifest.json", sha256="0" * 64, size_bytes=128), + ExportFileEntry(path="checksums.sha256", sha256="1" * 64, size_bytes=80), + ], + scenario_plans_exported=0, + model_runs_referenced=1, + unresolved_references=[], + validated=True, + ) + + async def fake_export(_db, _app, workspace_id: str) -> WorkspaceExportResult: + return canned + + monkeypatch.setattr(workspace, "get_workspace", fake_get) + monkeypatch.setattr(export, "export_workspace", fake_export) + + resp = await client.post("/demo/workspaces/" + "a" * 32 + "/export") + assert resp.status_code == 200 + body = resp.json() + assert body["validated"] is True + assert body["bundle_format_version"] == 1 + assert body["model_runs_referenced"] == 1 + assert len(body["files"]) == 2 + + # ============================================================================= # E1 (#407) -- PATCH /demo/workspaces/{workspace_id} (unit) # ============================================================================= diff --git a/docs/_base/API_CONTRACTS.md b/docs/_base/API_CONTRACTS.md index b2fb5c64..8ca1c2df 100644 --- a/docs/_base/API_CONTRACTS.md +++ b/docs/_base/API_CONTRACTS.md @@ -65,6 +65,7 @@ All endpoints serve JSON; error responses use `application/problem+json` (RFC 78 | demo | GET | `/demo/workspaces/{workspace_id}/health` | **E2 (#408)** — probe the workspace's soft references in-process (model runs, scenario plans, alias, batch, agent session, `job_ids` slot) via `httpx.ASGITransport`; per-reference `status` ∈ `alive` (2xx) / `dead` (404 — deleted after the run) / `unknown` (anything else — never a 500), plus `alive`/`dead`/`unknown` counts and `partial_run` (true when the row's status ≠ `completed`); non-probeable keys (`v2_model_path`, `scenario_artifact_key`, `train_model_types`) are skipped; `404 application/problem+json` when the workspace is missing | | demo | PATCH | `/demo/workspaces/{workspace_id}` | **E1 (#407)** — partial lifecycle update (`name` / `notes` / `tags` / `archived` / `pinned`; `exclude_unset` semantics — only provided fields change; explicit `null` clears `name`/`notes`; explicit `null` on `archived`/`pinned`/`tags` → `422` (send `[]` to clear tags); `status` NOT patchable — the pipeline owns it); returns the updated `WorkspaceDetailResponse`; empty body = `200` no-op; `404 application/problem+json` when missing; `422` on unknown keys / bad name pattern / >20 tags | | demo | DELETE | `/demo/workspaces/{workspace_id}` | Delete one saved workspace METADATA row; `204` on success, `404 application/problem+json` when missing. The run's created objects (model runs, scenario plans, aliases, jobs, artifacts) are soft references and are NOT deleted | +| demo | POST | `/demo/workspaces/{workspace_id}/export` | **E6 (#412)** — write a checksum-validated bundle under `artifacts/showcase//`: a versioned `manifest.json` (full `WorkspaceDetailResponse` snapshot + `bundle_format_version: 1` + `exported_at` + model-run references), one `scenario_plans/.json` per resolvable plan, and a `sha256sum`-compatible `checksums.sha256` covering every other file. Re-reads + recomputes every checksum before returning (`validated: bool`). Soft references resolve over the in-process HTTP surface (`GET /registry/runs/{id}` + `/verify`, `GET /scenarios/{id}`); **model artifacts are REFERENCED (uri + registry hash + live `artifact_verified`), never copied**. Dangling soft references (deleted run / plan) become `unresolved_references` entries and the export still returns `200`. Returns `WorkspaceExportResult` (`bundle_path`, full `files` inventory with hashes/sizes, counts, `unresolved_references`, `validated`). `404 application/problem+json` when missing; `409` while `status="running"` (references not yet settled); `500` on a disk write failure. Re-export overwrites the bundle deterministically (`exported_at` records the moment). No migration, no DB writes — stateless and re-runnable; `artifacts/` is gitignored so bundles never enter version control. `failed` and archived workspaces export normally | | demo | POST | `/demo/hitl-decision` | **E5 (#411)** — relay the Showcase HITL step card's Approve/Reject to the in-flight pipeline. Body `{action_id: str, decision: 'approved' \| 'rejected', reason?: str ≤500}` (`ConfigDict(strict=True, extra='forbid')`). `204` on success; `404 application/problem+json` when no matching action is pending; `409` when the action was already decided; `422` on a malformed body. The in-memory single-slot relay is safe because the pipeline runs one-at-a-time under the module `_pipeline_lock`; the pipeline forwards the real decision to `/agents/sessions/{id}/approve` (`approved=true\|false` + reason) — `agent_require_approval` is untouched. A reject keeps the pipeline GREEN (D5); the gated `save_scenario` never executes | | demo | GET | `/demo/approval-events` | **E5 (#411)** — recent HITL approval events flattened across the newest saved workspaces carrying the `approval_events` slot, newest-workspace-first (`limit` 1-200 default 50); `200` + empty list when none. Each item carries `workspace_id` / `workspace_name` plus the entry's base + additive keys (`decision`, `tool_name`, `auto_approved`, `reason`, `execution_status`, `transcript_summary`, …). Audit-glance surface — no pagination/offset (D6). Backs the `/ops` page's Approval History table (frontend-only — the ops slice does not import demo code) | | config | GET | `/config/ai` | Effective AI-model config (agent LLM + RAG embeddings); API keys masked, never raw | diff --git a/docs/_base/RUNBOOKS.md b/docs/_base/RUNBOOKS.md index 6cdb9fd7..2dd0d6dc 100644 --- a/docs/_base/RUNBOOKS.md +++ b/docs/_base/RUNBOOKS.md @@ -161,10 +161,11 @@ uv run python scripts/run_demo.py --seed 42 --quiet 2>&1 | tee demo.log 3. **Rows accumulate unless deleted.** `DELETE /demo/workspaces/{workspace_id}` (and the panel's per-row **Delete** button, behind a confirmation dialog) removes a saved row; a missing id is an RFC 7807 404. Undeleted rows are harmless audit records. E2 (#408) — the panel adds search / tag filter / sort / show-archived (archived rows are hidden from the list by default) and a multi-select **Delete selected** action — N sequential single DELETEs behind one confirmation; deliberately NO bulk endpoint. 4. **Deleting a workspace deletes METADATA ONLY.** The delete removes just the `showcase_workspace` row — the model runs, scenario plans, aliases, jobs, agent sessions, and on-disk artifacts the run created are NOT touched (and the seeded data is not reverted). `created_objects` ids are SOFT references (deliberately no FKs), so deletion in either direction never cascades: an operator-issued `DELETE /registry/runs/{id}` or scenario-plan delete leaves dangling deep links on a loaded workspace's artifact cards — expected; the workspace row records what WAS created, not what still exists. E2 (#408) — that staleness now SURFACES instead of dangling silently: loading a workspace probes its references via `GET /demo/workspaces/{id}/health`, dead references get a warning marker on the artifact cards, and a summary chip shows alive/dead counts plus a partial-run warning for never-completed rows. 5. **`holiday_rush` workspaces replay the pinned 2024 window.** The preset seeds a fixed Oct–Dec 2024 window (incident 28 above); a Replay with `reset=false` ADDS those rows to a today-anchored dataset, so `/seeder/status` reports the union range afterwards. For a clean pinned window, save the workspace from a run with **Reset database** ticked — its (destructive) Replay then reproduces the pinned window exactly. +6. **Export writes a checksum-validated bundle, overwriting on re-export (E6 #412).** The panel's per-row **Export** button (and `POST /demo/workspaces/{workspace_id}/export`) writes `artifacts/showcase//` — `manifest.json` (full workspace snapshot + model-run references + `bundle_format_version`/`exported_at`), one `scenario_plans/.json` per resolvable plan, and a `sha256sum`-compatible `checksums.sha256`. The endpoint re-reads and recomputes every checksum before returning (`validated: true`); verify by hand with `cd artifacts/showcase/ && sha256sum -c checksums.sha256`. Export is **non-destructive and stateless** — no DB write, no story slot, and `artifacts/` is gitignored so bundles never reach version control. **Dangling soft references are expected, not errors:** a scenario plan or model run deleted since the run becomes an `unresolved_references` entry and the export still succeeds (the success toast names the count). Model artifacts are REFERENCED (uri + registry hash + a live `artifact_verified`), never copied — the registry already owns and hash-verifies them. **Re-export overwrites:** the previous bundle directory is removed wholesale and rewritten (the `shutil.rmtree` target is always the traversal-guarded `/`, never a raw request value), so a stale `scenario_plans/.json` from a prior export disappears on the next one. A `running` workspace returns `409` (its references are not yet settled); a `failed` or archived workspace exports normally. The export root is configurable via `SHOWCASE_EXPORT_ROOT` (default `./artifacts/showcase`, resolved against the backend CWD — repo root for local uvicorn, `/app` in the container). **Notes:** keep-runs are recorded by warn-and-continue hooks — a DB hiccup during `create_workspace` yields a green pipeline with `workspace_id: null` and no row (check uvicorn logs for `demo.workspace_create_failed`). Ephemeral runs write no workspace rows and stay in the localStorage Run-history strip; kept runs appear ONLY in the server-backed panel. On `showcase_rich` keep-runs, the planning-phase scenario plans carry the `workspace:` tag (E3 #392) — retrieve them via `GET /scenarios?tags=workspace: