Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 15 additions & 6 deletions scripts/modal_libero_monolithic_onnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,11 @@ def _compat_load(*args, **kwargs):
kwargs.setdefault("weights_only", False)
return _orig_torch_load(*args, **kwargs)
torch.load = _compat_load
seed = int(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
torch.cuda.manual_seed_all(seed)

# ─── Load SmolVLAPolicy (for preprocessor/postprocessor + prepare_* helpers) ──
# We use the policy to build ONNX inputs but bypass its forward. This is
Expand Down Expand Up @@ -228,10 +233,10 @@ def _compat_load(*args, **kwargs):
_input_shapes = {inp.name: inp.shape for inp in sess.get_inputs()}
if "img_cam1" in _input_names:
_cam_keys = ("img_cam1", "img_cam2", "img_cam3", "mask_cam1", "mask_cam2", "mask_cam3")
print(f"[onnx] cam naming: SmolVLA-style (cam1/cam2/cam3)")
print("[onnx] cam naming: SmolVLA-style (cam1/cam2/cam3)")
elif "img_base" in _input_names:
_cam_keys = ("img_base", "img_wrist_l", "img_wrist_r", "mask_base", "mask_wrist_l", "mask_wrist_r")
print(f"[onnx] cam naming: pi05-style (base/wrist_l/wrist_r)")
print("[onnx] cam naming: pi05-style (base/wrist_l/wrist_r)")
else:
raise RuntimeError(
f"Unknown camera-naming convention in ONNX inputs: {sorted(_input_names)}. "
Expand Down Expand Up @@ -265,7 +270,6 @@ def _compat_load(*args, **kwargs):
print(f"[onnx] lang_seq (detected): {expected_lang_seq}")

# ─── LIBERO setup ────────────────────────────────────────────────
np.random.seed(seed)
from libero.libero import benchmark
from libero.libero import get_libero_path
from libero.libero.envs import OffScreenRenderEnv
Expand All @@ -278,8 +282,10 @@ def _compat_load(*args, **kwargs):
f"max_steps={max_steps}")

def _quat2axisangle(quat):
if quat[3] > 1.0: quat[3] = 1.0
elif quat[3] < -1.0: quat[3] = -1.0
if quat[3] > 1.0:
quat[3] = 1.0
elif quat[3] < -1.0:
quat[3] = -1.0
den = np.sqrt(1.0 - quat[3] * quat[3])
if math.isclose(den, 0.0):
return np.zeros(3)
Expand Down Expand Up @@ -594,13 +600,15 @@ def main(
tasks: str = "0",
suite: str = "libero_10",
onnx_subdir: str = "smolvla_libero_monolithic",
seed: int = 7,
):
"""
--num-episodes N: episodes per task (native used 5)
--tasks "0" single task
--tasks "0,1,2,3,4" N=25 matching native run
--tasks "all" all 10 tasks
--onnx-subdir subfolder under /onnx_out/ (default smolvla_libero_monolithic)
--seed RNG seed for LIBERO envs, NumPy, and Torch noise
"""
if tasks == "all":
task_list = None
Expand All @@ -613,13 +621,14 @@ def main(
task_suite_name=suite,
task_indices=task_list,
onnx_subdir=onnx_subdir,
seed=seed,
)
print("\n=== RESULT ===")
# Early-return failure path (e.g., ONNX missing on volume) — surface
# the status + reason so operators don't see opaque '?' counts.
# Caught by 2026-04-25 eval-as-a-service Modal smoke validation.
if r.get("status") == "fail":
print(f" status: FAIL")
print(" status: FAIL")
print(f" reason: {r.get('reason', '(no reason)')}")
return
print(f" success_rate: {r.get('success_rate_pct', '?')}%")
Expand Down
2 changes: 1 addition & 1 deletion src/tether/eval/modal_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
"""
from __future__ import annotations

import json
import logging
import re
import shutil
Expand Down Expand Up @@ -187,6 +186,7 @@ def _invoke_one_suite(
modal_binary, "run", script_path,
"--suite", suite,
"--num-episodes", str(num_episodes),
"--seed", str(seed),
"--tasks", "all",
]
t0 = time.perf_counter()
Expand Down
17 changes: 13 additions & 4 deletions src/tether/finetune/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
import subprocess
import time
from pathlib import Path
from typing import Any

from tether.finetune.config import FinetuneConfig, FinetuneResult
from tether.seeding import seed_everything, seeded_subprocess_env

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -155,7 +155,7 @@ def _build_lerobot_command(cfg: FinetuneConfig) -> list[str]:
"lerobot-train",
f"--policy.type={policy_type}",
f"--policy.repo_id={repo_id}",
f"--policy.push_to_hub=false",
"--policy.push_to_hub=false",
f"--dataset.repo_id={cfg.dataset}",
f"--output_dir={lerobot_output}",
f"--steps={cfg.num_steps}",
Expand All @@ -175,7 +175,7 @@ def _build_lerobot_command(cfg: FinetuneConfig) -> list[str]:
cmd.append(f"--policy.n_action_steps={cfg.chunk_size}")
if cfg.mode == "lora":
cmd.extend([
f"--peft.method_type=lora",
"--peft.method_type=lora",
f"--peft.r={cfg.lora_rank}",
])
for k, v in cfg.extra_lerobot_args.items():
Expand All @@ -193,10 +193,11 @@ def _run_lerobot_training(
and the root logger. Returns the subprocess exit code.
"""
cmd = _build_lerobot_command(cfg)
env = seeded_subprocess_env(cfg.seed, env)
logger.info("[finetune] exec: %s", " ".join(cmd))
log_path.parent.mkdir(parents=True, exist_ok=True)
with log_path.open("w") as log:
log.write(f"# tether finetune — lerobot-train invocation\n")
log.write("# tether finetune — lerobot-train invocation\n")
log.write(f"# cmd: {' '.join(cmd)}\n\n")
log.flush()
proc = subprocess.Popen(
Expand Down Expand Up @@ -376,6 +377,14 @@ def run_finetune(cfg: FinetuneConfig, *, hooks=None) -> FinetuneResult:
error="config validation failed:\n " + "\n ".join(errs),
)

seed_report = seed_everything(cfg.seed)
logger.info(
"[finetune] seeded process: seed=%d torch=%s cuda=%s",
cfg.seed,
seed_report["torch"],
seed_report["cuda"],
)

# Pre-flight validation (v0.5) — catches top customer pains before
# any GPU time. Dry-run + skip flags supported.
if not cfg.skip_preflight:
Expand Down
66 changes: 66 additions & 0 deletions src/tether/seeding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
"""Shared process seeding helpers for training and evaluation paths."""
from __future__ import annotations

import os
import random
from collections.abc import Mapping
from typing import Any

import numpy as np


def seed_everything(seed: int, *, deterministic_torch: bool = False) -> dict[str, Any]:
"""Seed Python, NumPy, and Torch when available.

``PYTHONHASHSEED`` only affects newly spawned Python interpreters after
process start, but setting it here keeps child training processes aligned
with the requested run seed.
"""
seed_int = int(seed)
os.environ["PYTHONHASHSEED"] = str(seed_int)
random.seed(seed_int)
np.random.seed(seed_int)

report: dict[str, Any] = {
"seed": seed_int,
"python": True,
"numpy": True,
"torch": False,
"cuda": False,
"deterministic_torch": deterministic_torch,
}

try:
import torch
except ImportError:
return report

torch.manual_seed(seed_int)
report["torch"] = True
if torch.cuda.is_available():
torch.cuda.manual_seed_all(seed_int)
report["cuda"] = True

if deterministic_torch:
try:
torch.use_deterministic_algorithms(True, warn_only=True)
except TypeError:
torch.use_deterministic_algorithms(True)
if hasattr(torch.backends, "cudnn"):
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

return report


def seeded_subprocess_env(
seed: int,
base_env: Mapping[str, str] | None = None,
) -> dict[str, str]:
"""Return an environment with ``PYTHONHASHSEED`` pinned to ``seed``."""
env = dict(os.environ if base_env is None else base_env)
env["PYTHONHASHSEED"] = str(int(seed))
return env


__all__ = ["seed_everything", "seeded_subprocess_env"]
6 changes: 3 additions & 3 deletions tests/test_eval_modal_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,11 @@
from __future__ import annotations

import subprocess
from pathlib import Path

import pytest

from tether.eval.libero import EpisodeResult, LiberoSuiteConfig
from tether.eval.libero import LiberoSuiteConfig
from tether.eval.modal_runner import (
DEFAULT_MODAL_SCRIPT,
TASK_SUITE_MAX_STEPS,
ModalInvocationResult,
ModalNotInstalledError,
Expand Down Expand Up @@ -291,6 +289,8 @@ def _spy_invoker(cmd, timeout_s):
assert "libero_object" in cmd
assert "--num-episodes" in cmd
assert "5" in cmd
assert "--seed" in cmd
assert "42" in cmd
assert "--tasks" in cmd
assert "all" in cmd

Expand Down
48 changes: 46 additions & 2 deletions tests/test_finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,11 @@

import pytest

from tether.finetune import FinetuneConfig, FinetuneResult, run_finetune
from tether.finetune import FinetuneConfig, run_finetune
from tether.finetune.run import (
_build_lerobot_command,
_locate_checkpoint,
_run_lerobot_training,
_validate_config,
)

Expand Down Expand Up @@ -132,6 +133,7 @@ def test_basic_command_shape(self, tmp_path):
assert "--steps=5000" in joined
assert "--batch_size=16" in joined
assert "--optimizer.lr=0.0002" in joined
assert "--seed=42" in joined
assert "--peft.method_type=lora" in joined
assert "--peft.r=32" in joined
# precision is NOT a top-level lerobot 0.5.1 flag — should not appear
Expand Down Expand Up @@ -225,6 +227,48 @@ def test_config_failure_aborts(self, tmp_path):
mock_train.assert_not_called()
assert "base is required" in (result.error or "")

def test_run_finetune_seeds_before_training(self, tmp_path):
cfg = self._cfg(tmp_path, skip_export=True, seed=123)
events = []

def _fake_seed(seed):
events.append(("seed", seed))
return {"torch": True, "cuda": False}

def _fake_train(cfg, log_path, **kwargs):
events.append(("train", cfg.seed))
self._setup_fake_checkpoint(cfg.output)
return 0

with patch("tether.finetune.run.seed_everything", side_effect=_fake_seed), \
patch("tether.finetune.run._run_lerobot_training", side_effect=_fake_train):
result = run_finetune(cfg)

assert result.status == "ok"
assert events[:2] == [("seed", 123), ("train", 123)]

def test_lerobot_subprocess_sets_pythonhashseed(self, tmp_path):
cfg = self._cfg(tmp_path, seed=987)

class _Proc:
stdout = iter(["training\n"])
returncode = 0

def wait(self):
return None

with patch("subprocess.Popen", return_value=_Proc()) as popen:
rc = _run_lerobot_training(
cfg,
tmp_path / "training_log.jsonl",
env={"EXISTING": "1"},
)

assert rc == 0
proc_env = popen.call_args.kwargs["env"]
assert proc_env["PYTHONHASHSEED"] == "987"
assert proc_env["EXISTING"] == "1"

def test_training_failure_surfaces_rc(self, tmp_path):
cfg = self._cfg(tmp_path)
with patch("tether.finetune.run._run_lerobot_training", return_value=42):
Expand All @@ -239,7 +283,7 @@ def test_successful_training_plus_export(self, tmp_path):

def _fake_train(cfg, log_path, **kwargs):
# Simulate a successful training run that wrote a checkpoint.
ckpt = self._setup_fake_checkpoint(cfg.output, step=1000)
self._setup_fake_checkpoint(cfg.output, step=1000)
return 0

with patch("tether.finetune.run._run_lerobot_training", side_effect=_fake_train), \
Expand Down
48 changes: 48 additions & 0 deletions tests/test_seeding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from __future__ import annotations

import random

import numpy as np
import pytest

from tether.seeding import seed_everything, seeded_subprocess_env


def test_seed_everything_repeats_python_numpy_and_torch():
torch = pytest.importorskip("torch")

report = seed_everything(123)
first = (
random.random(),
float(np.random.random()),
float(torch.rand(1).item()),
)

report_again = seed_everything(123)
second = (
random.random(),
float(np.random.random()),
float(torch.rand(1).item()),
)

assert first == second
assert report["seed"] == 123
assert report["python"]
assert report["numpy"]
assert report["torch"]
assert report_again["torch"]


def test_seeded_subprocess_env_sets_pythonhashseed(monkeypatch):
monkeypatch.setenv("KEEP_ME", "yes")

env = seeded_subprocess_env(456)

assert env["PYTHONHASHSEED"] == "456"
assert env["KEEP_ME"] == "yes"


def test_seeded_subprocess_env_preserves_explicit_base_env():
env = seeded_subprocess_env(789, {"EXISTING": "1", "PYTHONHASHSEED": "old"})

assert env == {"EXISTING": "1", "PYTHONHASHSEED": "789"}
Loading