From 6e906adad18ed14c46918c8457f44f0eff184bca Mon Sep 17 00:00:00 2001
From: RomirJ <playindus@gmail.com>
Date: Thu, 11 Jun 2026 01:49:55 -0700
Subject: [PATCH] fix(seed): seed train and eval randomness

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 scripts/modal_libero_monolithic_onnx.py | 21 +++++---
 src/tether/eval/modal_runner.py         |  2 +-
 src/tether/finetune/run.py              | 17 +++++--
 src/tether/seeding.py                   | 66 +++++++++++++++++++++++++
 tests/test_eval_modal_runner.py         |  6 +--
 tests/test_finetune.py                  | 48 +++++++++++++++++-
 tests/test_seeding.py                   | 48 ++++++++++++++++++
 7 files changed, 192 insertions(+), 16 deletions(-)
 create mode 100644 src/tether/seeding.py
 create mode 100644 tests/test_seeding.py

diff --git a/scripts/modal_libero_monolithic_onnx.py b/scripts/modal_libero_monolithic_onnx.py
index 1784d6c..8e9c24a 100644
--- a/scripts/modal_libero_monolithic_onnx.py
+++ b/scripts/modal_libero_monolithic_onnx.py
@@ -165,6 +165,11 @@ def _compat_load(*args, **kwargs):
         kwargs.setdefault("weights_only", False)
         return _orig_torch_load(*args, **kwargs)
     torch.load = _compat_load
+    seed = int(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed_all(seed)
 
     # ─── Load SmolVLAPolicy (for preprocessor/postprocessor + prepare_* helpers) ──
     # We use the policy to build ONNX inputs but bypass its forward. This is
@@ -228,10 +233,10 @@ def _compat_load(*args, **kwargs):
     _input_shapes = {inp.name: inp.shape for inp in sess.get_inputs()}
     if "img_cam1" in _input_names:
         _cam_keys = ("img_cam1", "img_cam2", "img_cam3", "mask_cam1", "mask_cam2", "mask_cam3")
-        print(f"[onnx] cam naming: SmolVLA-style (cam1/cam2/cam3)")
+        print("[onnx] cam naming: SmolVLA-style (cam1/cam2/cam3)")
     elif "img_base" in _input_names:
         _cam_keys = ("img_base", "img_wrist_l", "img_wrist_r", "mask_base", "mask_wrist_l", "mask_wrist_r")
-        print(f"[onnx] cam naming: pi05-style (base/wrist_l/wrist_r)")
+        print("[onnx] cam naming: pi05-style (base/wrist_l/wrist_r)")
     else:
         raise RuntimeError(
             f"Unknown camera-naming convention in ONNX inputs: {sorted(_input_names)}. "
@@ -265,7 +270,6 @@ def _compat_load(*args, **kwargs):
     print(f"[onnx]   lang_seq (detected): {expected_lang_seq}")
 
     # ─── LIBERO setup ────────────────────────────────────────────────
-    np.random.seed(seed)
     from libero.libero import benchmark
     from libero.libero import get_libero_path
     from libero.libero.envs import OffScreenRenderEnv
@@ -278,8 +282,10 @@ def _compat_load(*args, **kwargs):
           f"max_steps={max_steps}")
 
     def _quat2axisangle(quat):
-        if quat[3] > 1.0: quat[3] = 1.0
-        elif quat[3] < -1.0: quat[3] = -1.0
+        if quat[3] > 1.0:
+            quat[3] = 1.0
+        elif quat[3] < -1.0:
+            quat[3] = -1.0
         den = np.sqrt(1.0 - quat[3] * quat[3])
         if math.isclose(den, 0.0):
             return np.zeros(3)
@@ -594,6 +600,7 @@ def main(
     tasks: str = "0",
     suite: str = "libero_10",
     onnx_subdir: str = "smolvla_libero_monolithic",
+    seed: int = 7,
 ):
     """
     --num-episodes N: episodes per task (native used 5)
@@ -601,6 +608,7 @@ def main(
     --tasks "0,1,2,3,4"   N=25 matching native run
     --tasks "all"     all 10 tasks
     --onnx-subdir     subfolder under /onnx_out/ (default smolvla_libero_monolithic)
+    --seed            RNG seed for LIBERO envs, NumPy, and Torch noise
     """
     if tasks == "all":
         task_list = None
@@ -613,13 +621,14 @@ def main(
         task_suite_name=suite,
         task_indices=task_list,
         onnx_subdir=onnx_subdir,
+        seed=seed,
     )
     print("\n=== RESULT ===")
     # Early-return failure path (e.g., ONNX missing on volume) — surface
     # the status + reason so operators don't see opaque '?' counts.
     # Caught by 2026-04-25 eval-as-a-service Modal smoke validation.
     if r.get("status") == "fail":
-        print(f"  status: FAIL")
+        print("  status: FAIL")
         print(f"  reason: {r.get('reason', '(no reason)')}")
         return
     print(f"  success_rate: {r.get('success_rate_pct', '?')}%")
diff --git a/src/tether/eval/modal_runner.py b/src/tether/eval/modal_runner.py
index 3c4c53b..511126a 100644
--- a/src/tether/eval/modal_runner.py
+++ b/src/tether/eval/modal_runner.py
@@ -27,7 +27,6 @@
 """
 from __future__ import annotations
 
-import json
 import logging
 import re
 import shutil
@@ -187,6 +186,7 @@ def _invoke_one_suite(
         modal_binary, "run", script_path,
         "--suite", suite,
         "--num-episodes", str(num_episodes),
+        "--seed", str(seed),
         "--tasks", "all",
     ]
     t0 = time.perf_counter()
diff --git a/src/tether/finetune/run.py b/src/tether/finetune/run.py
index 55d9da8..5f6d2f1 100644
--- a/src/tether/finetune/run.py
+++ b/src/tether/finetune/run.py
@@ -17,9 +17,9 @@
 import subprocess
 import time
 from pathlib import Path
-from typing import Any
 
 from tether.finetune.config import FinetuneConfig, FinetuneResult
+from tether.seeding import seed_everything, seeded_subprocess_env
 
 logger = logging.getLogger(__name__)
 
@@ -155,7 +155,7 @@ def _build_lerobot_command(cfg: FinetuneConfig) -> list[str]:
         "lerobot-train",
         f"--policy.type={policy_type}",
         f"--policy.repo_id={repo_id}",
-        f"--policy.push_to_hub=false",
+        "--policy.push_to_hub=false",
         f"--dataset.repo_id={cfg.dataset}",
         f"--output_dir={lerobot_output}",
         f"--steps={cfg.num_steps}",
@@ -175,7 +175,7 @@ def _build_lerobot_command(cfg: FinetuneConfig) -> list[str]:
         cmd.append(f"--policy.n_action_steps={cfg.chunk_size}")
     if cfg.mode == "lora":
         cmd.extend([
-            f"--peft.method_type=lora",
+            "--peft.method_type=lora",
             f"--peft.r={cfg.lora_rank}",
         ])
     for k, v in cfg.extra_lerobot_args.items():
@@ -193,10 +193,11 @@ def _run_lerobot_training(
     and the root logger. Returns the subprocess exit code.
     """
     cmd = _build_lerobot_command(cfg)
+    env = seeded_subprocess_env(cfg.seed, env)
     logger.info("[finetune] exec: %s", " ".join(cmd))
     log_path.parent.mkdir(parents=True, exist_ok=True)
     with log_path.open("w") as log:
-        log.write(f"# tether finetune — lerobot-train invocation\n")
+        log.write("# tether finetune — lerobot-train invocation\n")
         log.write(f"# cmd: {' '.join(cmd)}\n\n")
         log.flush()
         proc = subprocess.Popen(
@@ -376,6 +377,14 @@ def run_finetune(cfg: FinetuneConfig, *, hooks=None) -> FinetuneResult:
             error="config validation failed:\n  " + "\n  ".join(errs),
         )
 
+    seed_report = seed_everything(cfg.seed)
+    logger.info(
+        "[finetune] seeded process: seed=%d torch=%s cuda=%s",
+        cfg.seed,
+        seed_report["torch"],
+        seed_report["cuda"],
+    )
+
     # Pre-flight validation (v0.5) — catches top customer pains before
     # any GPU time. Dry-run + skip flags supported.
     if not cfg.skip_preflight:
diff --git a/src/tether/seeding.py b/src/tether/seeding.py
new file mode 100644
index 0000000..651b790
--- /dev/null
+++ b/src/tether/seeding.py
@@ -0,0 +1,66 @@
+"""Shared process seeding helpers for training and evaluation paths."""
+from __future__ import annotations
+
+import os
+import random
+from collections.abc import Mapping
+from typing import Any
+
+import numpy as np
+
+
+def seed_everything(seed: int, *, deterministic_torch: bool = False) -> dict[str, Any]:
+    """Seed Python, NumPy, and Torch when available.
+
+    ``PYTHONHASHSEED`` only affects newly spawned Python interpreters after
+    process start, but setting it here keeps child training processes aligned
+    with the requested run seed.
+    """
+    seed_int = int(seed)
+    os.environ["PYTHONHASHSEED"] = str(seed_int)
+    random.seed(seed_int)
+    np.random.seed(seed_int)
+
+    report: dict[str, Any] = {
+        "seed": seed_int,
+        "python": True,
+        "numpy": True,
+        "torch": False,
+        "cuda": False,
+        "deterministic_torch": deterministic_torch,
+    }
+
+    try:
+        import torch
+    except ImportError:
+        return report
+
+    torch.manual_seed(seed_int)
+    report["torch"] = True
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed_all(seed_int)
+        report["cuda"] = True
+
+    if deterministic_torch:
+        try:
+            torch.use_deterministic_algorithms(True, warn_only=True)
+        except TypeError:
+            torch.use_deterministic_algorithms(True)
+        if hasattr(torch.backends, "cudnn"):
+            torch.backends.cudnn.benchmark = False
+            torch.backends.cudnn.deterministic = True
+
+    return report
+
+
+def seeded_subprocess_env(
+    seed: int,
+    base_env: Mapping[str, str] | None = None,
+) -> dict[str, str]:
+    """Return an environment with ``PYTHONHASHSEED`` pinned to ``seed``."""
+    env = dict(os.environ if base_env is None else base_env)
+    env["PYTHONHASHSEED"] = str(int(seed))
+    return env
+
+
+__all__ = ["seed_everything", "seeded_subprocess_env"]
diff --git a/tests/test_eval_modal_runner.py b/tests/test_eval_modal_runner.py
index 43c9cf4..03c63b2 100644
--- a/tests/test_eval_modal_runner.py
+++ b/tests/test_eval_modal_runner.py
@@ -2,13 +2,11 @@
 from __future__ import annotations
 
 import subprocess
-from pathlib import Path
 
 import pytest
 
-from tether.eval.libero import EpisodeResult, LiberoSuiteConfig
+from tether.eval.libero import LiberoSuiteConfig
 from tether.eval.modal_runner import (
-    DEFAULT_MODAL_SCRIPT,
     TASK_SUITE_MAX_STEPS,
     ModalInvocationResult,
     ModalNotInstalledError,
@@ -291,6 +289,8 @@ def _spy_invoker(cmd, timeout_s):
     assert "libero_object" in cmd
     assert "--num-episodes" in cmd
     assert "5" in cmd
+    assert "--seed" in cmd
+    assert "42" in cmd
     assert "--tasks" in cmd
     assert "all" in cmd
 
diff --git a/tests/test_finetune.py b/tests/test_finetune.py
index 255ba1c..3cac056 100644
--- a/tests/test_finetune.py
+++ b/tests/test_finetune.py
@@ -13,10 +13,11 @@
 
 import pytest
 
-from tether.finetune import FinetuneConfig, FinetuneResult, run_finetune
+from tether.finetune import FinetuneConfig, run_finetune
 from tether.finetune.run import (
     _build_lerobot_command,
     _locate_checkpoint,
+    _run_lerobot_training,
     _validate_config,
 )
 
@@ -132,6 +133,7 @@ def test_basic_command_shape(self, tmp_path):
         assert "--steps=5000" in joined
         assert "--batch_size=16" in joined
         assert "--optimizer.lr=0.0002" in joined
+        assert "--seed=42" in joined
         assert "--peft.method_type=lora" in joined
         assert "--peft.r=32" in joined
         # precision is NOT a top-level lerobot 0.5.1 flag — should not appear
@@ -225,6 +227,48 @@ def test_config_failure_aborts(self, tmp_path):
         mock_train.assert_not_called()
         assert "base is required" in (result.error or "")
 
+    def test_run_finetune_seeds_before_training(self, tmp_path):
+        cfg = self._cfg(tmp_path, skip_export=True, seed=123)
+        events = []
+
+        def _fake_seed(seed):
+            events.append(("seed", seed))
+            return {"torch": True, "cuda": False}
+
+        def _fake_train(cfg, log_path, **kwargs):
+            events.append(("train", cfg.seed))
+            self._setup_fake_checkpoint(cfg.output)
+            return 0
+
+        with patch("tether.finetune.run.seed_everything", side_effect=_fake_seed), \
+             patch("tether.finetune.run._run_lerobot_training", side_effect=_fake_train):
+            result = run_finetune(cfg)
+
+        assert result.status == "ok"
+        assert events[:2] == [("seed", 123), ("train", 123)]
+
+    def test_lerobot_subprocess_sets_pythonhashseed(self, tmp_path):
+        cfg = self._cfg(tmp_path, seed=987)
+
+        class _Proc:
+            stdout = iter(["training\n"])
+            returncode = 0
+
+            def wait(self):
+                return None
+
+        with patch("subprocess.Popen", return_value=_Proc()) as popen:
+            rc = _run_lerobot_training(
+                cfg,
+                tmp_path / "training_log.jsonl",
+                env={"EXISTING": "1"},
+            )
+
+        assert rc == 0
+        proc_env = popen.call_args.kwargs["env"]
+        assert proc_env["PYTHONHASHSEED"] == "987"
+        assert proc_env["EXISTING"] == "1"
+
     def test_training_failure_surfaces_rc(self, tmp_path):
         cfg = self._cfg(tmp_path)
         with patch("tether.finetune.run._run_lerobot_training", return_value=42):
@@ -239,7 +283,7 @@ def test_successful_training_plus_export(self, tmp_path):
 
         def _fake_train(cfg, log_path, **kwargs):
             # Simulate a successful training run that wrote a checkpoint.
-            ckpt = self._setup_fake_checkpoint(cfg.output, step=1000)
+            self._setup_fake_checkpoint(cfg.output, step=1000)
             return 0
 
         with patch("tether.finetune.run._run_lerobot_training", side_effect=_fake_train), \
diff --git a/tests/test_seeding.py b/tests/test_seeding.py
new file mode 100644
index 0000000..6de89aa
--- /dev/null
+++ b/tests/test_seeding.py
@@ -0,0 +1,48 @@
+from __future__ import annotations
+
+import random
+
+import numpy as np
+import pytest
+
+from tether.seeding import seed_everything, seeded_subprocess_env
+
+
+def test_seed_everything_repeats_python_numpy_and_torch():
+    torch = pytest.importorskip("torch")
+
+    report = seed_everything(123)
+    first = (
+        random.random(),
+        float(np.random.random()),
+        float(torch.rand(1).item()),
+    )
+
+    report_again = seed_everything(123)
+    second = (
+        random.random(),
+        float(np.random.random()),
+        float(torch.rand(1).item()),
+    )
+
+    assert first == second
+    assert report["seed"] == 123
+    assert report["python"]
+    assert report["numpy"]
+    assert report["torch"]
+    assert report_again["torch"]
+
+
+def test_seeded_subprocess_env_sets_pythonhashseed(monkeypatch):
+    monkeypatch.setenv("KEEP_ME", "yes")
+
+    env = seeded_subprocess_env(456)
+
+    assert env["PYTHONHASHSEED"] == "456"
+    assert env["KEEP_ME"] == "yes"
+
+
+def test_seeded_subprocess_env_preserves_explicit_base_env():
+    env = seeded_subprocess_env(789, {"EXISTING": "1", "PYTHONHASHSEED": "old"})
+
+    assert env == {"EXISTING": "1", "PYTHONHASHSEED": "789"}