From 9cad7220af0a7a3be5f60bd4680c7135cddd585d Mon Sep 17 00:00:00 2001 From: RomirJ Date: Thu, 11 Jun 2026 01:21:01 -0700 Subject: [PATCH] fix(eval): route Modal runs to the requested export The Modal LIBERO wrapper accepted export_dir but never passed it to the wrapped script, so tether eval --runtime modal could evaluate the default smolvla_libero_monolithic reference export while reporting results for a user's path. Map the requested export directory to the Modal ONNX volume subdir and pass --onnx-subdir on every suite invocation. Local paths map by basename; /onnx_out paths preserve their relative subdir. If the volume subdir is missing, Modal now fails loudly instead of silently using the baked-in reference export. Tests: PYTHONPATH=src /Users/romirjain/Desktop/building\ projects/fastcrest/tether/.venv/bin/python -m pytest tests/test_eval_modal_runner.py tests/test_eval_cli.py -p no:cacheprovider Lint: PYTHONPATH=src /Users/romirjain/Desktop/building\ projects/fastcrest/tether/.venv/bin/ruff check src/tether/eval/modal_runner.py tests/test_eval_modal_runner.py Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/eval.md | 2 ++ src/tether/eval/modal_runner.py | 48 +++++++++++++++++++++++++++------ tests/test_eval_modal_runner.py | 23 +++++++++++++--- 3 files changed, 62 insertions(+), 11 deletions(-) diff --git a/docs/eval.md b/docs/eval.md index a77e22b..df937aa 100644 --- a/docs/eval.md +++ b/docs/eval.md @@ -4,6 +4,8 @@ Per ADR `2026-04-25-eval-as-a-service-architecture`. Phase 1 ships LIBERO only on Modal (with Linux x86_64 local fallback); Phase 2 adds SimplerEnv + `customer` suite + HF Hub video upload. +Modal Phase 1 does not upload local export files automatically. For `--runtime modal`, `./my-export/` maps to `/onnx_out/my-export/` in the `pi0-onnx-outputs` Modal volume; if that subdirectory is not prepared, the run fails loudly instead of evaluating a reference export. + ## Quick start ```bash diff --git a/src/tether/eval/modal_runner.py b/src/tether/eval/modal_runner.py index 3c4c53b..6d88fcf 100644 --- a/src/tether/eval/modal_runner.py +++ b/src/tether/eval/modal_runner.py @@ -20,14 +20,14 @@ - The repo cloned (so scripts/modal_libero_monolithic_onnx.py is reachable). Phase 2 will package this as a deployable Modal app. -Phase 1 Modal runner is bounded to the smolvla_libero_monolithic -reference export (hardcoded in the script). Arbitrary --export_dir -support is Phase 2 (per docs/eval.md "What's deliberately NOT -shipped Phase 1"). +Phase 1 Modal runner evaluates an export already present in the +pi0-onnx-outputs Modal volume. The local export_dir maps to an +ONNX subdirectory under /onnx_out; if that subdirectory has not been +uploaded/prepared, the Modal script fails loudly instead of silently +evaluating a reference export. """ from __future__ import annotations -import json import logging import re import shutil @@ -54,6 +54,7 @@ # Path to the wrapped script, relative to repo root. DEFAULT_MODAL_SCRIPT = "scripts/modal_libero_monolithic_onnx.py" +MODAL_ONNX_OUTPUT_PATH = "/onnx_out" class ModalNotInstalledError(RuntimeError): @@ -106,9 +107,10 @@ def run_libero_on_modal( Args: config: LiberoSuiteConfig (tasks = suite names like "libero_spatial"). - export_dir: customer's export directory. Phase 1: ignored - (the wrapped script targets the pre-uploaded reference - export). Phase 2 wires customer-arbitrary export upload. + export_dir: customer's export directory. The basename (or path + relative to /onnx_out) is forwarded as the Modal volume subdir + so `./my-export` evaluates `/onnx_out/my-export`, not the + baked-in reference export. repo_root: where to find scripts/. None = parent of cwd. modal_invoker: subprocess wrapper. None = real `modal` CLI. modal_binary: name of `modal` CLI. Used for PATH check. @@ -150,6 +152,8 @@ def run_libero_on_modal( ) return [] + onnx_subdir = _modal_onnx_subdir_for_export(export_dir) + # Per-suite invocation -- existing script handles per-task fan-out # within one Modal call (cheaper cold-start than per-task fan-out # at the Tether layer). @@ -160,6 +164,7 @@ def run_libero_on_modal( modal_binary=modal_binary, script_path=str(abs_script), suite=suite, + onnx_subdir=onnx_subdir, num_episodes=config.num_episodes, seed=config.seed, timeout_s=suite_timeout_s, @@ -170,12 +175,38 @@ def run_libero_on_modal( return all_episodes +def _modal_onnx_subdir_for_export(export_dir: Path) -> str: + """Map the user-facing export_dir to the Modal volume subdir. + + The Modal app mounts the shared ONNX volume at /onnx_out. A user may pass + either a path already rooted there (`/onnx_out/foo`) or the local export dir + they used for `tether export` (`./foo`). In both cases the wrapper must + forward a specific subdir; otherwise the script falls back to its legacy + smolvla_libero_monolithic reference export. + """ + export_path = Path(export_dir).expanduser() + modal_root = Path(MODAL_ONNX_OUTPUT_PATH) + try: + rel = export_path.resolve(strict=False).relative_to(modal_root) + except ValueError: + rel = Path(export_path.name) + + onnx_subdir = rel.as_posix() + if not onnx_subdir or onnx_subdir == ".": + raise ValueError( + "export_dir must identify a concrete Modal ONNX subdirectory " + f"under {MODAL_ONNX_OUTPUT_PATH}." + ) + return onnx_subdir + + def _invoke_one_suite( *, modal_invoker: ModalInvoker, modal_binary: str, script_path: str, suite: str, + onnx_subdir: str, num_episodes: int, seed: int, timeout_s: float, @@ -188,6 +219,7 @@ def _invoke_one_suite( "--suite", suite, "--num-episodes", str(num_episodes), "--tasks", "all", + "--onnx-subdir", onnx_subdir, ] t0 = time.perf_counter() completed = modal_invoker(cmd, timeout_s) diff --git a/tests/test_eval_modal_runner.py b/tests/test_eval_modal_runner.py index 43c9cf4..9bd6e62 100644 --- a/tests/test_eval_modal_runner.py +++ b/tests/test_eval_modal_runner.py @@ -6,12 +6,12 @@ import pytest -from tether.eval.libero import EpisodeResult, LiberoSuiteConfig +from tether.eval.libero import LiberoSuiteConfig from tether.eval.modal_runner import ( - DEFAULT_MODAL_SCRIPT, TASK_SUITE_MAX_STEPS, ModalInvocationResult, ModalNotInstalledError, + _modal_onnx_subdir_for_export, _parse_invocation_to_episodes, _parse_modal_stdout, run_libero_on_modal, @@ -267,6 +267,8 @@ def test_run_libero_passes_correct_cli_args(tmp_path): scripts_dir.mkdir() fake_script = scripts_dir / "modal_libero_monolithic_onnx.py" fake_script.write_text("# stub") + export_dir = tmp_path / "customer-smolvla-export" + export_dir.mkdir() captured = [] @@ -280,7 +282,7 @@ def _spy_invoker(cmd, timeout_s): num_episodes=5, tasks=("libero_object",), seed=42, ) run_libero_on_modal( - config=config, export_dir=tmp_path, + config=config, export_dir=export_dir, repo_root=tmp_path, modal_invoker=_spy_invoker, ) assert len(captured) == 1 @@ -293,6 +295,8 @@ def _spy_invoker(cmd, timeout_s): assert "5" in cmd assert "--tasks" in cmd assert "all" in cmd + assert "--onnx-subdir" in cmd + assert cmd[cmd.index("--onnx-subdir") + 1] == "customer-smolvla-export" def test_run_libero_invokes_per_suite_for_multiple_tasks(tmp_path): @@ -320,3 +324,16 @@ def _counting_invoker(cmd, timeout_s): repo_root=tmp_path, modal_invoker=_counting_invoker, ) assert invocations == ["libero_spatial", "libero_object", "libero_goal"] + + +def test_modal_onnx_subdir_for_local_export_uses_basename(tmp_path): + export_dir = tmp_path / "my-export" + export_dir.mkdir() + + assert _modal_onnx_subdir_for_export(export_dir) == "my-export" + + +def test_modal_onnx_subdir_for_modal_volume_path_preserves_relative_subdir(): + export_dir = Path("/onnx_out/runs/customer-a/export-42") + + assert _modal_onnx_subdir_for_export(export_dir) == "runs/customer-a/export-42"