From 6e906adad18ed14c46918c8457f44f0eff184bca Mon Sep 17 00:00:00 2001 From: RomirJ Date: Thu, 11 Jun 2026 01:49:55 -0700 Subject: [PATCH] fix(seed): seed train and eval randomness Co-Authored-By: Claude Opus 4.7 (1M context) --- scripts/modal_libero_monolithic_onnx.py | 21 +++++--- src/tether/eval/modal_runner.py | 2 +- src/tether/finetune/run.py | 17 +++++-- src/tether/seeding.py | 66 +++++++++++++++++++++++++ tests/test_eval_modal_runner.py | 6 +-- tests/test_finetune.py | 48 +++++++++++++++++- tests/test_seeding.py | 48 ++++++++++++++++++ 7 files changed, 192 insertions(+), 16 deletions(-) create mode 100644 src/tether/seeding.py create mode 100644 tests/test_seeding.py diff --git a/scripts/modal_libero_monolithic_onnx.py b/scripts/modal_libero_monolithic_onnx.py index 1784d6c..8e9c24a 100644 --- a/scripts/modal_libero_monolithic_onnx.py +++ b/scripts/modal_libero_monolithic_onnx.py @@ -165,6 +165,11 @@ def _compat_load(*args, **kwargs): kwargs.setdefault("weights_only", False) return _orig_torch_load(*args, **kwargs) torch.load = _compat_load + seed = int(seed) + np.random.seed(seed) + torch.manual_seed(seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed_all(seed) # ─── Load SmolVLAPolicy (for preprocessor/postprocessor + prepare_* helpers) ── # We use the policy to build ONNX inputs but bypass its forward. This is @@ -228,10 +233,10 @@ def _compat_load(*args, **kwargs): _input_shapes = {inp.name: inp.shape for inp in sess.get_inputs()} if "img_cam1" in _input_names: _cam_keys = ("img_cam1", "img_cam2", "img_cam3", "mask_cam1", "mask_cam2", "mask_cam3") - print(f"[onnx] cam naming: SmolVLA-style (cam1/cam2/cam3)") + print("[onnx] cam naming: SmolVLA-style (cam1/cam2/cam3)") elif "img_base" in _input_names: _cam_keys = ("img_base", "img_wrist_l", "img_wrist_r", "mask_base", "mask_wrist_l", "mask_wrist_r") - print(f"[onnx] cam naming: pi05-style (base/wrist_l/wrist_r)") + print("[onnx] cam naming: pi05-style (base/wrist_l/wrist_r)") else: raise RuntimeError( f"Unknown camera-naming convention in ONNX inputs: {sorted(_input_names)}. " @@ -265,7 +270,6 @@ def _compat_load(*args, **kwargs): print(f"[onnx] lang_seq (detected): {expected_lang_seq}") # ─── LIBERO setup ──────────────────────────────────────────────── - np.random.seed(seed) from libero.libero import benchmark from libero.libero import get_libero_path from libero.libero.envs import OffScreenRenderEnv @@ -278,8 +282,10 @@ def _compat_load(*args, **kwargs): f"max_steps={max_steps}") def _quat2axisangle(quat): - if quat[3] > 1.0: quat[3] = 1.0 - elif quat[3] < -1.0: quat[3] = -1.0 + if quat[3] > 1.0: + quat[3] = 1.0 + elif quat[3] < -1.0: + quat[3] = -1.0 den = np.sqrt(1.0 - quat[3] * quat[3]) if math.isclose(den, 0.0): return np.zeros(3) @@ -594,6 +600,7 @@ def main( tasks: str = "0", suite: str = "libero_10", onnx_subdir: str = "smolvla_libero_monolithic", + seed: int = 7, ): """ --num-episodes N: episodes per task (native used 5) @@ -601,6 +608,7 @@ def main( --tasks "0,1,2,3,4" N=25 matching native run --tasks "all" all 10 tasks --onnx-subdir subfolder under /onnx_out/ (default smolvla_libero_monolithic) + --seed RNG seed for LIBERO envs, NumPy, and Torch noise """ if tasks == "all": task_list = None @@ -613,13 +621,14 @@ def main( task_suite_name=suite, task_indices=task_list, onnx_subdir=onnx_subdir, + seed=seed, ) print("\n=== RESULT ===") # Early-return failure path (e.g., ONNX missing on volume) — surface # the status + reason so operators don't see opaque '?' counts. # Caught by 2026-04-25 eval-as-a-service Modal smoke validation. if r.get("status") == "fail": - print(f" status: FAIL") + print(" status: FAIL") print(f" reason: {r.get('reason', '(no reason)')}") return print(f" success_rate: {r.get('success_rate_pct', '?')}%") diff --git a/src/tether/eval/modal_runner.py b/src/tether/eval/modal_runner.py index 3c4c53b..511126a 100644 --- a/src/tether/eval/modal_runner.py +++ b/src/tether/eval/modal_runner.py @@ -27,7 +27,6 @@ """ from __future__ import annotations -import json import logging import re import shutil @@ -187,6 +186,7 @@ def _invoke_one_suite( modal_binary, "run", script_path, "--suite", suite, "--num-episodes", str(num_episodes), + "--seed", str(seed), "--tasks", "all", ] t0 = time.perf_counter() diff --git a/src/tether/finetune/run.py b/src/tether/finetune/run.py index 55d9da8..5f6d2f1 100644 --- a/src/tether/finetune/run.py +++ b/src/tether/finetune/run.py @@ -17,9 +17,9 @@ import subprocess import time from pathlib import Path -from typing import Any from tether.finetune.config import FinetuneConfig, FinetuneResult +from tether.seeding import seed_everything, seeded_subprocess_env logger = logging.getLogger(__name__) @@ -155,7 +155,7 @@ def _build_lerobot_command(cfg: FinetuneConfig) -> list[str]: "lerobot-train", f"--policy.type={policy_type}", f"--policy.repo_id={repo_id}", - f"--policy.push_to_hub=false", + "--policy.push_to_hub=false", f"--dataset.repo_id={cfg.dataset}", f"--output_dir={lerobot_output}", f"--steps={cfg.num_steps}", @@ -175,7 +175,7 @@ def _build_lerobot_command(cfg: FinetuneConfig) -> list[str]: cmd.append(f"--policy.n_action_steps={cfg.chunk_size}") if cfg.mode == "lora": cmd.extend([ - f"--peft.method_type=lora", + "--peft.method_type=lora", f"--peft.r={cfg.lora_rank}", ]) for k, v in cfg.extra_lerobot_args.items(): @@ -193,10 +193,11 @@ def _run_lerobot_training( and the root logger. Returns the subprocess exit code. """ cmd = _build_lerobot_command(cfg) + env = seeded_subprocess_env(cfg.seed, env) logger.info("[finetune] exec: %s", " ".join(cmd)) log_path.parent.mkdir(parents=True, exist_ok=True) with log_path.open("w") as log: - log.write(f"# tether finetune — lerobot-train invocation\n") + log.write("# tether finetune — lerobot-train invocation\n") log.write(f"# cmd: {' '.join(cmd)}\n\n") log.flush() proc = subprocess.Popen( @@ -376,6 +377,14 @@ def run_finetune(cfg: FinetuneConfig, *, hooks=None) -> FinetuneResult: error="config validation failed:\n " + "\n ".join(errs), ) + seed_report = seed_everything(cfg.seed) + logger.info( + "[finetune] seeded process: seed=%d torch=%s cuda=%s", + cfg.seed, + seed_report["torch"], + seed_report["cuda"], + ) + # Pre-flight validation (v0.5) — catches top customer pains before # any GPU time. Dry-run + skip flags supported. if not cfg.skip_preflight: diff --git a/src/tether/seeding.py b/src/tether/seeding.py new file mode 100644 index 0000000..651b790 --- /dev/null +++ b/src/tether/seeding.py @@ -0,0 +1,66 @@ +"""Shared process seeding helpers for training and evaluation paths.""" +from __future__ import annotations + +import os +import random +from collections.abc import Mapping +from typing import Any + +import numpy as np + + +def seed_everything(seed: int, *, deterministic_torch: bool = False) -> dict[str, Any]: + """Seed Python, NumPy, and Torch when available. + + ``PYTHONHASHSEED`` only affects newly spawned Python interpreters after + process start, but setting it here keeps child training processes aligned + with the requested run seed. + """ + seed_int = int(seed) + os.environ["PYTHONHASHSEED"] = str(seed_int) + random.seed(seed_int) + np.random.seed(seed_int) + + report: dict[str, Any] = { + "seed": seed_int, + "python": True, + "numpy": True, + "torch": False, + "cuda": False, + "deterministic_torch": deterministic_torch, + } + + try: + import torch + except ImportError: + return report + + torch.manual_seed(seed_int) + report["torch"] = True + if torch.cuda.is_available(): + torch.cuda.manual_seed_all(seed_int) + report["cuda"] = True + + if deterministic_torch: + try: + torch.use_deterministic_algorithms(True, warn_only=True) + except TypeError: + torch.use_deterministic_algorithms(True) + if hasattr(torch.backends, "cudnn"): + torch.backends.cudnn.benchmark = False + torch.backends.cudnn.deterministic = True + + return report + + +def seeded_subprocess_env( + seed: int, + base_env: Mapping[str, str] | None = None, +) -> dict[str, str]: + """Return an environment with ``PYTHONHASHSEED`` pinned to ``seed``.""" + env = dict(os.environ if base_env is None else base_env) + env["PYTHONHASHSEED"] = str(int(seed)) + return env + + +__all__ = ["seed_everything", "seeded_subprocess_env"] diff --git a/tests/test_eval_modal_runner.py b/tests/test_eval_modal_runner.py index 43c9cf4..03c63b2 100644 --- a/tests/test_eval_modal_runner.py +++ b/tests/test_eval_modal_runner.py @@ -2,13 +2,11 @@ from __future__ import annotations import subprocess -from pathlib import Path import pytest -from tether.eval.libero import EpisodeResult, LiberoSuiteConfig +from tether.eval.libero import LiberoSuiteConfig from tether.eval.modal_runner import ( - DEFAULT_MODAL_SCRIPT, TASK_SUITE_MAX_STEPS, ModalInvocationResult, ModalNotInstalledError, @@ -291,6 +289,8 @@ def _spy_invoker(cmd, timeout_s): assert "libero_object" in cmd assert "--num-episodes" in cmd assert "5" in cmd + assert "--seed" in cmd + assert "42" in cmd assert "--tasks" in cmd assert "all" in cmd diff --git a/tests/test_finetune.py b/tests/test_finetune.py index 255ba1c..3cac056 100644 --- a/tests/test_finetune.py +++ b/tests/test_finetune.py @@ -13,10 +13,11 @@ import pytest -from tether.finetune import FinetuneConfig, FinetuneResult, run_finetune +from tether.finetune import FinetuneConfig, run_finetune from tether.finetune.run import ( _build_lerobot_command, _locate_checkpoint, + _run_lerobot_training, _validate_config, ) @@ -132,6 +133,7 @@ def test_basic_command_shape(self, tmp_path): assert "--steps=5000" in joined assert "--batch_size=16" in joined assert "--optimizer.lr=0.0002" in joined + assert "--seed=42" in joined assert "--peft.method_type=lora" in joined assert "--peft.r=32" in joined # precision is NOT a top-level lerobot 0.5.1 flag — should not appear @@ -225,6 +227,48 @@ def test_config_failure_aborts(self, tmp_path): mock_train.assert_not_called() assert "base is required" in (result.error or "") + def test_run_finetune_seeds_before_training(self, tmp_path): + cfg = self._cfg(tmp_path, skip_export=True, seed=123) + events = [] + + def _fake_seed(seed): + events.append(("seed", seed)) + return {"torch": True, "cuda": False} + + def _fake_train(cfg, log_path, **kwargs): + events.append(("train", cfg.seed)) + self._setup_fake_checkpoint(cfg.output) + return 0 + + with patch("tether.finetune.run.seed_everything", side_effect=_fake_seed), \ + patch("tether.finetune.run._run_lerobot_training", side_effect=_fake_train): + result = run_finetune(cfg) + + assert result.status == "ok" + assert events[:2] == [("seed", 123), ("train", 123)] + + def test_lerobot_subprocess_sets_pythonhashseed(self, tmp_path): + cfg = self._cfg(tmp_path, seed=987) + + class _Proc: + stdout = iter(["training\n"]) + returncode = 0 + + def wait(self): + return None + + with patch("subprocess.Popen", return_value=_Proc()) as popen: + rc = _run_lerobot_training( + cfg, + tmp_path / "training_log.jsonl", + env={"EXISTING": "1"}, + ) + + assert rc == 0 + proc_env = popen.call_args.kwargs["env"] + assert proc_env["PYTHONHASHSEED"] == "987" + assert proc_env["EXISTING"] == "1" + def test_training_failure_surfaces_rc(self, tmp_path): cfg = self._cfg(tmp_path) with patch("tether.finetune.run._run_lerobot_training", return_value=42): @@ -239,7 +283,7 @@ def test_successful_training_plus_export(self, tmp_path): def _fake_train(cfg, log_path, **kwargs): # Simulate a successful training run that wrote a checkpoint. - ckpt = self._setup_fake_checkpoint(cfg.output, step=1000) + self._setup_fake_checkpoint(cfg.output, step=1000) return 0 with patch("tether.finetune.run._run_lerobot_training", side_effect=_fake_train), \ diff --git a/tests/test_seeding.py b/tests/test_seeding.py new file mode 100644 index 0000000..6de89aa --- /dev/null +++ b/tests/test_seeding.py @@ -0,0 +1,48 @@ +from __future__ import annotations + +import random + +import numpy as np +import pytest + +from tether.seeding import seed_everything, seeded_subprocess_env + + +def test_seed_everything_repeats_python_numpy_and_torch(): + torch = pytest.importorskip("torch") + + report = seed_everything(123) + first = ( + random.random(), + float(np.random.random()), + float(torch.rand(1).item()), + ) + + report_again = seed_everything(123) + second = ( + random.random(), + float(np.random.random()), + float(torch.rand(1).item()), + ) + + assert first == second + assert report["seed"] == 123 + assert report["python"] + assert report["numpy"] + assert report["torch"] + assert report_again["torch"] + + +def test_seeded_subprocess_env_sets_pythonhashseed(monkeypatch): + monkeypatch.setenv("KEEP_ME", "yes") + + env = seeded_subprocess_env(456) + + assert env["PYTHONHASHSEED"] == "456" + assert env["KEEP_ME"] == "yes" + + +def test_seeded_subprocess_env_preserves_explicit_base_env(): + env = seeded_subprocess_env(789, {"EXISTING": "1", "PYTHONHASHSEED": "old"}) + + assert env == {"EXISTING": "1", "PYTHONHASHSEED": "789"}