Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/eval.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

Per ADR `2026-04-25-eval-as-a-service-architecture`. Phase 1 ships LIBERO only on Modal (with Linux x86_64 local fallback); Phase 2 adds SimplerEnv + `customer` suite + HF Hub video upload.

Modal Phase 1 does not upload local export files automatically. For `--runtime modal`, `./my-export/` maps to `/onnx_out/my-export/` in the `pi0-onnx-outputs` Modal volume; if that subdirectory is not prepared, the run fails loudly instead of evaluating a reference export.

## Quick start

```bash
Expand Down
48 changes: 40 additions & 8 deletions src/tether/eval/modal_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,14 @@
- The repo cloned (so scripts/modal_libero_monolithic_onnx.py is
reachable). Phase 2 will package this as a deployable Modal app.

Phase 1 Modal runner is bounded to the smolvla_libero_monolithic
reference export (hardcoded in the script). Arbitrary --export_dir
support is Phase 2 (per docs/eval.md "What's deliberately NOT
shipped Phase 1").
Phase 1 Modal runner evaluates an export already present in the
pi0-onnx-outputs Modal volume. The local export_dir maps to an
ONNX subdirectory under /onnx_out; if that subdirectory has not been
uploaded/prepared, the Modal script fails loudly instead of silently
evaluating a reference export.
"""
from __future__ import annotations

import json
import logging
import re
import shutil
Expand All @@ -54,6 +54,7 @@

# Path to the wrapped script, relative to repo root.
DEFAULT_MODAL_SCRIPT = "scripts/modal_libero_monolithic_onnx.py"
MODAL_ONNX_OUTPUT_PATH = "/onnx_out"


class ModalNotInstalledError(RuntimeError):
Expand Down Expand Up @@ -106,9 +107,10 @@ def run_libero_on_modal(
Args:
config: LiberoSuiteConfig (tasks = suite names like
"libero_spatial").
export_dir: customer's export directory. Phase 1: ignored
(the wrapped script targets the pre-uploaded reference
export). Phase 2 wires customer-arbitrary export upload.
export_dir: customer's export directory. The basename (or path
relative to /onnx_out) is forwarded as the Modal volume subdir
so `./my-export` evaluates `/onnx_out/my-export`, not the
baked-in reference export.
repo_root: where to find scripts/. None = parent of cwd.
modal_invoker: subprocess wrapper. None = real `modal` CLI.
modal_binary: name of `modal` CLI. Used for PATH check.
Expand Down Expand Up @@ -150,6 +152,8 @@ def run_libero_on_modal(
)
return []

onnx_subdir = _modal_onnx_subdir_for_export(export_dir)

# Per-suite invocation -- existing script handles per-task fan-out
# within one Modal call (cheaper cold-start than per-task fan-out
# at the Tether layer).
Expand All @@ -160,6 +164,7 @@ def run_libero_on_modal(
modal_binary=modal_binary,
script_path=str(abs_script),
suite=suite,
onnx_subdir=onnx_subdir,
num_episodes=config.num_episodes,
seed=config.seed,
timeout_s=suite_timeout_s,
Expand All @@ -170,12 +175,38 @@ def run_libero_on_modal(
return all_episodes


def _modal_onnx_subdir_for_export(export_dir: Path) -> str:
"""Map the user-facing export_dir to the Modal volume subdir.

The Modal app mounts the shared ONNX volume at /onnx_out. A user may pass
either a path already rooted there (`/onnx_out/foo`) or the local export dir
they used for `tether export` (`./foo`). In both cases the wrapper must
forward a specific subdir; otherwise the script falls back to its legacy
smolvla_libero_monolithic reference export.
"""
export_path = Path(export_dir).expanduser()
modal_root = Path(MODAL_ONNX_OUTPUT_PATH)
try:
rel = export_path.resolve(strict=False).relative_to(modal_root)
except ValueError:
rel = Path(export_path.name)

onnx_subdir = rel.as_posix()
if not onnx_subdir or onnx_subdir == ".":
raise ValueError(
"export_dir must identify a concrete Modal ONNX subdirectory "
f"under {MODAL_ONNX_OUTPUT_PATH}."
)
return onnx_subdir


def _invoke_one_suite(
*,
modal_invoker: ModalInvoker,
modal_binary: str,
script_path: str,
suite: str,
onnx_subdir: str,
num_episodes: int,
seed: int,
timeout_s: float,
Expand All @@ -188,6 +219,7 @@ def _invoke_one_suite(
"--suite", suite,
"--num-episodes", str(num_episodes),
"--tasks", "all",
"--onnx-subdir", onnx_subdir,
]
t0 = time.perf_counter()
completed = modal_invoker(cmd, timeout_s)
Expand Down
23 changes: 20 additions & 3 deletions tests/test_eval_modal_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@

import pytest

from tether.eval.libero import EpisodeResult, LiberoSuiteConfig
from tether.eval.libero import LiberoSuiteConfig
from tether.eval.modal_runner import (
DEFAULT_MODAL_SCRIPT,
TASK_SUITE_MAX_STEPS,
ModalInvocationResult,
ModalNotInstalledError,
_modal_onnx_subdir_for_export,
_parse_invocation_to_episodes,
_parse_modal_stdout,
run_libero_on_modal,
Expand Down Expand Up @@ -267,6 +267,8 @@ def test_run_libero_passes_correct_cli_args(tmp_path):
scripts_dir.mkdir()
fake_script = scripts_dir / "modal_libero_monolithic_onnx.py"
fake_script.write_text("# stub")
export_dir = tmp_path / "customer-smolvla-export"
export_dir.mkdir()

captured = []

Expand All @@ -280,7 +282,7 @@ def _spy_invoker(cmd, timeout_s):
num_episodes=5, tasks=("libero_object",), seed=42,
)
run_libero_on_modal(
config=config, export_dir=tmp_path,
config=config, export_dir=export_dir,
repo_root=tmp_path, modal_invoker=_spy_invoker,
)
assert len(captured) == 1
Expand All @@ -293,6 +295,8 @@ def _spy_invoker(cmd, timeout_s):
assert "5" in cmd
assert "--tasks" in cmd
assert "all" in cmd
assert "--onnx-subdir" in cmd
assert cmd[cmd.index("--onnx-subdir") + 1] == "customer-smolvla-export"


def test_run_libero_invokes_per_suite_for_multiple_tasks(tmp_path):
Expand Down Expand Up @@ -320,3 +324,16 @@ def _counting_invoker(cmd, timeout_s):
repo_root=tmp_path, modal_invoker=_counting_invoker,
)
assert invocations == ["libero_spatial", "libero_object", "libero_goal"]


def test_modal_onnx_subdir_for_local_export_uses_basename(tmp_path):
export_dir = tmp_path / "my-export"
export_dir.mkdir()

assert _modal_onnx_subdir_for_export(export_dir) == "my-export"


def test_modal_onnx_subdir_for_modal_volume_path_preserves_relative_subdir():
export_dir = Path("/onnx_out/runs/customer-a/export-42")

assert _modal_onnx_subdir_for_export(export_dir) == "runs/customer-a/export-42"
Loading