From 00b2c13dcbe5c07dfdbf9421461edc6c48b074a6 Mon Sep 17 00:00:00 2001
From: Chenhan Yu <chenhany@nvidia.com>
Date: Wed, 17 Jun 2026 13:27:55 -0700
Subject: [PATCH 1/2] launcher: package as modelopt_launcher; mcp: call console
 script directly
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

tools/launcher/__init__.py gains PACKAGE_DIR so the directory is an
importable package named modelopt_launcher (via package-dir mapping in
pyproject.toml). common/ and examples/ stay in place — no file moves.

pyproject.toml adds:
  - packages = ["modelopt_launcher"] + package-dir = {"modelopt_launcher": "."}
  - package-data for common/**/* and examples/**/*.{yaml,jinja}
  - console_scripts: modelopt-launcher = "modelopt_launcher.launch:main"

launch.py:
  - imports from modelopt_launcher.{core,slurm_config} (works in both
    uv run and installed console-script modes via editable install)
  - adds _has_modelopt_src flag: skips packaging modelopt source when
    running as an installed console script (cluster container already
    has modelopt pre-installed)
  - adds main() entry point

tools/mcp/modelopt_mcp/bridge.py:
  - deletes _find_launcher_dir() (75-line filesystem walker) and
    _launcher_dir_not_found_response(); 7 corresponding tests removed
  - _find_launcher_examples_dir() simplified to 2 strategies:
    env override → import modelopt_launcher
  - submit_job_impl and _submit_job_dry_run switch from
    ["uv", "run", "launch.py"] with cwd=launcher_dir to
    ["modelopt-launcher"] with no cwd required
  - _resolve_experiment_dir drops the _find_launcher_dir() fallback
  - read_cluster_artifact_impl drops cwd= from nemo subprocess

tools/mcp/pyproject.toml declares modelopt-launcher as a proper
dependency (dev: editable path ../launcher; published: PyPI).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Signed-off-by: Chenhan Yu <chenhany@nvidia.com>
---
 tools/launcher/.gitignore        |   3 +
 tools/launcher/__init__.py       |   6 +-
 tools/launcher/launch.py         |  62 ++++++++++----
 tools/launcher/pyproject.toml    |  15 +++-
 tools/mcp/modelopt_mcp/bridge.py | 138 +++----------------------------
 tools/mcp/pyproject.toml         |  24 +-----
 tools/mcp/tests/test_bridge.py   |  99 ----------------------
 7 files changed, 80 insertions(+), 267 deletions(-)

diff --git a/tools/launcher/.gitignore b/tools/launcher/.gitignore
index 3eb4a49079c..3c0eb2aee0d 100644
--- a/tools/launcher/.gitignore
+++ b/tools/launcher/.gitignore
@@ -13,6 +13,9 @@ local_experiments/
 # uv lock (generated, not portable)
 uv.lock
 
+# Auto-created symlink by launch.py in dev mode
+modules/
+
 # Python cache
 __pycache__/
 
diff --git a/tools/launcher/__init__.py b/tools/launcher/__init__.py
index 11b92d8b771..88e2af4b450 100644
--- a/tools/launcher/__init__.py
+++ b/tools/launcher/__init__.py
@@ -13,4 +13,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""ModelOpt Launcher — submit quantization, training, and evaluation jobs to Slurm clusters."""
+"""modelopt_launcher — installable package exposing launcher scripts and examples."""
+
+import os as _os
+
+PACKAGE_DIR: str = _os.path.dirname(_os.path.abspath(__file__))
diff --git a/tools/launcher/launch.py b/tools/launcher/launch.py
index fdb867f08aa..d95a84eabbf 100644
--- a/tools/launcher/launch.py
+++ b/tools/launcher/launch.py
@@ -33,9 +33,16 @@
 import subprocess  # nosec B404
 import warnings
 
+import modelopt_launcher as _pkg
 import nemo_run as run
-from core import SandboxPipeline, get_default_env, register_factory, run_jobs, set_slurm_config_type
-from slurm_config import SlurmConfig, slurm_factory
+from modelopt_launcher.core import (
+    SandboxPipeline,
+    get_default_env,
+    register_factory,
+    run_jobs,
+    set_slurm_config_type,
+)
+from modelopt_launcher.slurm_config import SlurmConfig, slurm_factory
 
 set_slurm_config_type(SlurmConfig)
 register_factory("slurm_factory", slurm_factory)
@@ -44,33 +51,49 @@
 # Launcher-specific configuration
 # ---------------------------------------------------------------------------
 
-LAUNCHER_DIR = os.path.dirname(os.path.abspath(__file__))
-MODELOPT_ROOT = os.path.dirname(os.path.dirname(LAUNCHER_DIR))
+LAUNCHER_DIR = _pkg.PACKAGE_DIR  # tools/launcher/ (dev or installed)
 
-# Ensure modules/Model-Optimizer symlink exists (points to parent Model-Optimizer root)
-_mo_symlink = os.path.join(LAUNCHER_DIR, "modules", "Model-Optimizer")
-if not os.path.exists(_mo_symlink):
-    os.makedirs(os.path.join(LAUNCHER_DIR, "modules"), exist_ok=True)
-    os.symlink(os.path.relpath(MODELOPT_ROOT, os.path.join(LAUNCHER_DIR, "modules")), _mo_symlink)
+# MODELOPT_ROOT is only available in a dev checkout (tools/launcher/../..):
+# when running as an installed console script the cluster container already
+# has modelopt pre-installed, so we skip packaging it from source.
+MODELOPT_ROOT = os.path.dirname(os.path.dirname(LAUNCHER_DIR))
+_modelopt_src = os.path.join(LAUNCHER_DIR, "modules", "Model-Optimizer", "modelopt")
+_has_modelopt_src = os.path.isdir(_modelopt_src)
+
+# Ensure modules/Model-Optimizer symlink exists so the packager can resolve
+# modules/Model-Optimizer/* patterns in dev mode.
+if _has_modelopt_src:
+    _mo_symlink = os.path.join(LAUNCHER_DIR, "modules", "Model-Optimizer")
+    if not os.path.exists(_mo_symlink):
+        os.makedirs(os.path.join(LAUNCHER_DIR, "modules"), exist_ok=True)
+        os.symlink(
+            os.path.relpath(MODELOPT_ROOT, os.path.join(LAUNCHER_DIR, "modules")), _mo_symlink
+        )
 
 EXPERIMENT_TITLE = "cicd"
 DEFAULT_SLURM_ENV, DEFAULT_LOCAL_ENV = get_default_env(EXPERIMENT_TITLE)
 
-packager = run.PatternPackager(
-    include_pattern=[
+_include_pattern = ["examples/*", "common/*"]
+_relative_path = [LAUNCHER_DIR, LAUNCHER_DIR]
+
+if _has_modelopt_src:
+    _include_pattern = [
         "modules/Megatron-LM/megatron/*",
         "modules/Megatron-LM/examples/*",
         "modules/Megatron-LM/*.py",
         "modules/Model-Optimizer/modelopt/*",
         "modules/Model-Optimizer/modelopt_recipes/*",
         "modules/Model-Optimizer/examples/*",
-        "examples/*",
-        "common/*",
-    ],
-    relative_path=[LAUNCHER_DIR] * 8,
+        *_include_pattern,
+    ]
+    _relative_path = [LAUNCHER_DIR] * 6 + _relative_path
+
+packager = run.PatternPackager(
+    include_pattern=_include_pattern,
+    relative_path=_relative_path,
 )
 
-MODELOPT_SRC_PATH = os.path.join(LAUNCHER_DIR, "modules/Model-Optimizer/modelopt")
+MODELOPT_SRC_PATH = _modelopt_src if _has_modelopt_src else None
 
 
 # ---------------------------------------------------------------------------
@@ -125,5 +148,10 @@ def launch(
     )
 
 
-if __name__ == "__main__":
+def main() -> None:
+    """Console script entry point for the ``modelopt-launcher`` command."""
     run.cli.main(launch)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/launcher/pyproject.toml b/tools/launcher/pyproject.toml
index 94577098d93..8ba825776d4 100644
--- a/tools/launcher/pyproject.toml
+++ b/tools/launcher/pyproject.toml
@@ -8,8 +8,21 @@ dependencies = [
     "pyyaml",
 ]
 
+[project.scripts]
+modelopt-launcher = "modelopt_launcher.launch:main"
+
 [tool.setuptools]
-py-modules = []
+packages = ["modelopt_launcher"]
+
+[tool.setuptools.package-dir]
+modelopt_launcher = "."
+
+[tool.setuptools.package-data]
+modelopt_launcher = [
+    "common/**/*",
+    "examples/**/*.yaml",
+    "examples/**/*.jinja",
+]
 
 [tool.pytest.ini_options]
 testpaths = ["tests"]
diff --git a/tools/mcp/modelopt_mcp/bridge.py b/tools/mcp/modelopt_mcp/bridge.py
index 45d8661f290..cadb2a4a088 100644
--- a/tools/mcp/modelopt_mcp/bridge.py
+++ b/tools/mcp/modelopt_mcp/bridge.py
@@ -54,97 +54,15 @@
 )
 
 
-def _find_launcher_dir() -> Path | None:
-    """Resolve the modelopt launcher's directory.
-
-    Tries, in order:
-
-    1. ``$MODELOPT_LAUNCHER_DIR`` env override — the deterministic
-       path agents/operators can set when the package layout doesn't
-       match the in-repo expectation.
-    2. ``_THIS_DIR.parent.parent / "launcher"`` — the in-repo layout
-       (``tools/mcp/modelopt_mcp/bridge.py`` → ``tools/launcher/``).
-       Works in dev installs (``pip install -e tools/mcp``) and in
-       direct ``Model-Optimizer`` clones.
-    3. Walk up from ``os.getcwd()`` looking for
-       ``modules/Model-Optimizer/tools/launcher/`` (intern-agent
-       workspace layout) or ``tools/launcher/`` (direct
-       Model-Optimizer checkout) at each ancestor. Stops at the
-       filesystem root.
-
-    Returns ``None`` if no candidate resolves to an existing dir.
-    Callers surface that as a structured ``launcher_dir_not_found``
-    failure with the searched paths in the diagnostic.
-
-    Empirically: when modelopt-mcp is installed via ``uv tool install``
-    (intern-agent's CI install pattern, MR !226), ``_THIS_DIR`` lives
-    inside ``~/.local/share/uv/tools/modelopt-mcp/lib/.../site-packages/``
-    and step 2's parent-walk doesn't find the launcher. Step 3 (cwd
-    walk-up) handles that case — the agent's CWD is always inside
-    its cloned nmm-sandbox workspace where ``modules/Model-Optimizer/
-    tools/launcher/`` does exist.
-    """
-    env = os.environ.get("MODELOPT_LAUNCHER_DIR")
-    if env:
-        p = Path(env)
-        if p.exists():
-            return p
-
-    # In-repo layout (dev install / direct clone)
-    candidate = _THIS_DIR.parent.parent / "launcher"
-    if candidate.exists():
-        return candidate
-
-    # cwd walk-up (uv-tool-install + agent workspace layout)
-    cwd = Path.cwd().resolve()
-    for ancestor in (cwd, *cwd.parents):
-        for rel in ("modules/Model-Optimizer/tools/launcher", "tools/launcher"):
-            candidate = ancestor / rel
-            if candidate.exists():
-                return candidate
-
-    return None
-
-
-def _launcher_dir_not_found_response(*, dry_run: bool = False) -> dict:
-    """Structured failure when ``_find_launcher_dir()`` returns None.
-
-    Centralized so the five callsites that need the launcher dir
-    return a consistent diagnostic listing the searched paths.
-    """
-    env_path = os.environ.get("MODELOPT_LAUNCHER_DIR") or "(unset)"
-    in_repo = _THIS_DIR.parent.parent / "launcher"
-    resp: dict = {
-        "ok": False,
-        "reason": "launcher_dir_not_found",
-        "diagnostic": (
-            "Could not locate tools/launcher/. Searched:\n"
-            f"  1. $MODELOPT_LAUNCHER_DIR={env_path}\n"
-            f"  2. in-repo layout: {in_repo} (exists={in_repo.exists()})\n"
-            f"  3. cwd walk-up from {Path.cwd().resolve()} looking for "
-            "modules/Model-Optimizer/tools/launcher or tools/launcher\n"
-            "Fix: set $MODELOPT_LAUNCHER_DIR to the absolute path of your "
-            "Model-Optimizer checkout's tools/launcher/, or run modelopt-mcp "
-            "from inside such a checkout."
-        ),
-    }
-    if dry_run:
-        resp["dry_run"] = True
-    return resp
-
-
 def _find_launcher_examples_dir() -> Path | None:
     """Resolve the launcher examples directory.
 
     Strategy (in order):
     1. ``MODELOPT_LAUNCHER_EXAMPLES_DIR`` env override — for tests + ad-hoc
        relocations.
-    2. ``../../launcher/examples/`` from this file — the in-repo layout
-       when running from a Model-Optimizer clone (this is the dev mode
-       AND the uvx-from-git mode, since uvx checks out the whole repo).
-    3. Site-packages install: walk back through the modelopt_launcher
-       package to find its examples/ — fallback for the case where the
-       launcher was pip-installed standalone.
+    2. ``import modelopt_launcher`` — works whether the launcher is
+       installed via pip/uvx or in editable dev mode; ``PACKAGE_DIR``
+       points at ``tools/launcher/``, which contains ``examples/``.
 
     Returns None if no candidate exists; callers surface that as a
     structured failure rather than blowing up.
@@ -154,19 +72,10 @@ def _find_launcher_examples_dir() -> Path | None:
         p = Path(env)
         return p if p.exists() else None
 
-    # In-repo: this file is at tools/mcp/modelopt_mcp/bridge.py;
-    # examples are at tools/launcher/examples/.
-    candidate = _THIS_DIR.parent.parent / "launcher" / "examples"
-    if candidate.exists():
-        return candidate
-
-    # Site-packages fallback: the modelopt-launcher package may carry
-    # its examples next to its core.py.
     try:
         import modelopt_launcher
 
-        pkg_dir = Path(modelopt_launcher.__file__).resolve().parent
-        candidate = pkg_dir / "examples"
+        candidate = Path(modelopt_launcher.PACKAGE_DIR) / "examples"
         if candidate.exists():
             return candidate
     except ImportError:
@@ -604,15 +513,14 @@ def submit_job_impl(
     # list never goes through a shell, so quoting bakes literal quote chars
     # into the values that nemo-run's CLI parser sees. Verbatim values
     # carry spaces / special chars safely.
-    argv = ["uv", "run", "launch.py", "--yaml", str(abs_yaml), "--yes"]
+    argv = ["modelopt-launcher", "--yaml", str(abs_yaml), "--yes"]
     if hf_local:
         argv.append(f"hf_local={hf_local}")
     else:
-        # Slurm mode — `launch.py`'s entrypoint does not accept a
-        # `cluster_host` arg (see tools/launcher/launch.py:82). The host
-        # is sourced via the SLURM_HOST env var, consumed by
-        # `slurm_factory(host=os.environ.get("SLURM_HOST", ""))` in
-        # tools/launcher/slurm_config.py. Propagate via env, not argv.
+        # Slurm mode — the launcher entrypoint does not accept a
+        # `cluster_host` arg. The host is sourced via the SLURM_HOST env
+        # var, consumed by slurm_factory in slurm_config.py.
+        # Propagate via env, not argv.
         if cluster_user:
             argv.append(f"user={cluster_user}")
         if identity:
@@ -625,11 +533,6 @@ def submit_job_impl(
     for k, v in (extra_overrides or {}).items():
         argv.append(f"{k}={v}")
 
-    # Run from the launcher dir so it picks up its own ./core.py etc.
-    launcher_dir = _find_launcher_dir()
-    if launcher_dir is None:
-        return _launcher_dir_not_found_response()
-
     # Propagate env so submit-side and status-side agree on NEMORUN_HOME.
     # Without this, `launch.py` defaults NEMORUN_HOME to its own cwd
     # (tools/launcher/), but `_resolve_experiment_dir` later checks the
@@ -655,7 +558,6 @@ def submit_job_impl(
         # B603 false positive — argv is a controlled list built above.
         proc = subprocess.Popen(  # nosec B603
             argv,
-            cwd=str(launcher_dir),
             env=child_env,
             stdout=subprocess.DEVNULL,
             stderr=subprocess.DEVNULL,
@@ -682,7 +584,6 @@ def submit_job_impl(
     try:
         proc = subprocess.run(  # nosec B603
             argv,
-            cwd=str(launcher_dir),
             env=child_env,
             capture_output=True,
             text=True,
@@ -825,7 +726,7 @@ def _submit_job_dry_run(
     # blocks on its confirmation prompt — and since we're capturing
     # stdout (no TTY), the prompt would hang until the 60-second
     # timeout fires.
-    argv = ["uv", "run", "launch.py", "--yaml", str(abs_yaml), "--dryrun", "--yes"]
+    argv = ["modelopt-launcher", "--yaml", str(abs_yaml), "--dryrun", "--yes"]
     if hf_local:
         argv.append(f"hf_local={hf_local}")
     if cluster_user:
@@ -839,10 +740,6 @@ def _submit_job_dry_run(
     for k, v in (extra_overrides or {}).items():
         argv.append(f"{k}={v}")
 
-    launcher_dir = _find_launcher_dir()
-    if launcher_dir is None:
-        return _launcher_dir_not_found_response(dry_run=True)
-
     # Propagate env so the launcher's factory resolution matches what
     # the live submit would see (mainly: SLURM_HOST for slurm-factory
     # default when cluster_host is set).
@@ -864,7 +761,6 @@ def _submit_job_dry_run(
     try:
         proc = subprocess.run(  # nosec B603
             argv,
-            cwd=str(launcher_dir),
             env=child_env,
             capture_output=True,
             text=True,
@@ -948,17 +844,6 @@ def _resolve_experiment_dir(experiment_id: str) -> Path | None:
         candidates.append(Path(nemorun_home) / "experiments" / experiment_id)
     candidates.append(Path.cwd() / "experiments" / experiment_id)
     candidates.append(Path.cwd() / "local_experiments" / experiment_id)
-    # The launcher's own experiments dir — submit_job_impl uses
-    # cwd=str(launcher_dir) for the subprocess, so when NEMORUN_HOME is
-    # unset, launch.py defaults to launcher_dir/experiments/. If the
-    # launcher dir can't be resolved (uv-tool-install without an
-    # override + the agent's cwd doesn't see a launcher checkout),
-    # we skip this fallback rather than crashing — the env-vs-cwd
-    # candidates above still cover the common cases.
-    launcher_dir = _find_launcher_dir()
-    if launcher_dir is not None:
-        candidates.append(launcher_dir / "experiments" / experiment_id)
-        candidates.append(launcher_dir / "local_experiments" / experiment_id)
     for c in candidates:
         if c.exists():
             return c
@@ -1281,12 +1166,9 @@ def read_cluster_artifact_impl(
             experiment_id,
             str(job_idx),
         ]
-        launcher_dir = _find_launcher_dir()
-        cwd = str(launcher_dir) if launcher_dir is not None else None
         try:
             proc = subprocess.run(  # nosec B603 B607
                 argv,
-                cwd=cwd,
                 capture_output=True,
                 text=True,
                 timeout=60,
diff --git a/tools/mcp/pyproject.toml b/tools/mcp/pyproject.toml
index 4df07bb0eef..3e30411baa1 100644
--- a/tools/mcp/pyproject.toml
+++ b/tools/mcp/pyproject.toml
@@ -5,20 +5,7 @@ description = "MCP server exposing ModelOpt launcher operations (submit, status,
 requires-python = ">=3.10"
 dependencies = [
     "mcp>=1.0",
-    # NOTE on modelopt-launcher: `tools/launcher/pyproject.toml` declares
-    # the package name as `modelopt-launcher` but configures
-    # `py-modules = []` — there is NO importable `modelopt_launcher`
-    # Python package on disk. bridge.py invokes the launcher via
-    # `uv run launch.py` (subprocess) from `<repo>/tools/launcher/` as
-    # a sibling directory; it does NOT `import modelopt_launcher`.
-    # Declaring the bare name here would add an unsatisfiable PyPI
-    # dependency for end users installing via
-    # `uvx --from "git+...#subdirectory=tools/mcp" modelopt-mcp`. So we
-    # do NOT declare it. The install relationship is documented in
-    # README.md as a sibling-checkout layout requirement instead. The
-    # uvx-from-git path satisfies this naturally because uvx clones
-    # the whole repo, putting tools/launcher and tools/mcp next to
-    # each other on disk.
+    "modelopt-launcher",
     "pyyaml",
     "pydantic>=2.0",
 ]
@@ -36,13 +23,8 @@ build-backend = "setuptools.build_meta"
 where = ["."]
 include = ["modelopt_mcp*"]
 
-# No [tool.uv.sources] for the launcher — bridge.py uses it via
-# `subprocess.run(["uv", "run", "launch.py", ...], cwd=<repo>/tools/launcher/)`,
-# so the launcher is a file-layout dependency, not a Python import
-# dependency. The uvx-from-git path clones the whole repo so the
-# sibling tools/launcher/ ends up on disk automatically. For dev:
-#   uv pip install -e .
-#   # then run from a clone where ../launcher exists.
+[tool.uv.sources]
+modelopt-launcher = { path = "../launcher", editable = true }
 
 [tool.pytest.ini_options]
 testpaths = ["tests"]
diff --git a/tools/mcp/tests/test_bridge.py b/tools/mcp/tests/test_bridge.py
index a605eabe822..4993c57bfc5 100644
--- a/tools/mcp/tests/test_bridge.py
+++ b/tools/mcp/tests/test_bridge.py
@@ -861,102 +861,3 @@ def fake_run(argv, **kwargs):
     assert result["ok"] is False
     assert result["reason"] == "gh_pr_create_failed"
     assert result["branch_pushed"] is True
-
-
-# ---------------------------------------------------------------------------
-# _find_launcher_dir — env override + walk-up search
-# ---------------------------------------------------------------------------
-
-
-def test_find_launcher_dir_env_override(monkeypatch, tmp_path):
-    """`$MODELOPT_LAUNCHER_DIR` wins over in-repo / cwd-walk."""
-    launcher = tmp_path / "custom-launcher"
-    launcher.mkdir()
-    monkeypatch.setenv("MODELOPT_LAUNCHER_DIR", str(launcher))
-    monkeypatch.chdir(tmp_path)  # ensure no walk-up match interferes
-    assert bridge._find_launcher_dir() == launcher
-
-
-def test_find_launcher_dir_env_override_missing_dir_fallthrough(monkeypatch, tmp_path):
-    """`$MODELOPT_LAUNCHER_DIR` pointing at a nonexistent path → fall through."""
-    monkeypatch.setenv("MODELOPT_LAUNCHER_DIR", str(tmp_path / "ghost"))
-    monkeypatch.chdir(tmp_path)  # no walk-up candidate
-    # In-repo candidate may or may not exist depending on test env; in
-    # the uv-tool-install case + this cwd it won't, so we get None.
-    in_repo = bridge._THIS_DIR.parent.parent / "launcher"
-    result = bridge._find_launcher_dir()
-    if in_repo.exists():
-        assert result == in_repo
-    else:
-        assert result is None
-
-
-def test_find_launcher_dir_walk_up_modules_layout(monkeypatch, tmp_path):
-    """Walk-up finds `modules/Model-Optimizer/tools/launcher/` from a deep cwd."""
-    workspace = tmp_path / "nmm-sandbox"
-    launcher = workspace / "modules" / "Model-Optimizer" / "tools" / "launcher"
-    launcher.mkdir(parents=True)
-    # Agent cwds deep inside the workspace
-    deep_cwd = workspace / "experiments" / "cicd" / "cicd_42"
-    deep_cwd.mkdir(parents=True)
-    monkeypatch.chdir(deep_cwd)
-    monkeypatch.delenv("MODELOPT_LAUNCHER_DIR", raising=False)
-
-    found = bridge._find_launcher_dir()
-    # In-repo `_THIS_DIR.parent.parent / "launcher"` may also exist in dev mode;
-    # accept either, but if it doesn't exist we MUST have walked up to find the
-    # workspace launcher.
-    in_repo = bridge._THIS_DIR.parent.parent / "launcher"
-    if in_repo.exists():
-        assert found == in_repo
-    else:
-        assert found == launcher
-
-
-def test_find_launcher_dir_walk_up_tools_layout(monkeypatch, tmp_path):
-    """Walk-up finds plain `tools/launcher/` (direct Model-Optimizer checkout)."""
-    checkout = tmp_path / "Model-Optimizer-clone"
-    launcher = checkout / "tools" / "launcher"
-    launcher.mkdir(parents=True)
-    deep_cwd = checkout / "examples" / "speculative_decoding"
-    deep_cwd.mkdir(parents=True)
-    monkeypatch.chdir(deep_cwd)
-    monkeypatch.delenv("MODELOPT_LAUNCHER_DIR", raising=False)
-
-    found = bridge._find_launcher_dir()
-    in_repo = bridge._THIS_DIR.parent.parent / "launcher"
-    if in_repo.exists():
-        assert found == in_repo
-    else:
-        assert found == launcher
-
-
-def test_find_launcher_dir_returns_none_when_nothing_found(monkeypatch, tmp_path):
-    """No env, no in-repo, no walk-up candidate → None."""
-    monkeypatch.delenv("MODELOPT_LAUNCHER_DIR", raising=False)
-    isolated = tmp_path / "iso"
-    isolated.mkdir()
-    monkeypatch.chdir(isolated)
-    found = bridge._find_launcher_dir()
-    # In a dev-install test env, the in-repo path may resolve. Accept
-    # either None or that specific path — but NEVER something unrelated.
-    in_repo = bridge._THIS_DIR.parent.parent / "launcher"
-    assert found is None or found == in_repo
-
-
-def test_launcher_dir_not_found_response_shape():
-    """Helper returns the canonical structured-failure dict."""
-    resp = bridge._launcher_dir_not_found_response()
-    assert resp["ok"] is False
-    assert resp["reason"] == "launcher_dir_not_found"
-    assert "Searched" in resp["diagnostic"]
-    assert "MODELOPT_LAUNCHER_DIR" in resp["diagnostic"]
-    assert "dry_run" not in resp
-
-
-def test_launcher_dir_not_found_response_dry_run_flag():
-    """`dry_run=True` adds `dry_run: True` to the response."""
-    resp = bridge._launcher_dir_not_found_response(dry_run=True)
-    assert resp["ok"] is False
-    assert resp["dry_run"] is True
-    assert resp["reason"] == "launcher_dir_not_found"

From 3aafbc83b10472ae8ad04ad7229e16c6459c055c Mon Sep 17 00:00:00 2001
From: Chenhan Yu <chenhany@nvidia.com>
Date: Wed, 17 Jun 2026 16:53:04 -0700
Subject: [PATCH 2/2] address review: fix _has_modelopt_src check, _mo_symlink
 guard, __all__, launcher not found

- launch.py: detect dev checkout via MODELOPT_ROOT/modelopt (actual dir)
  instead of the symlink path which doesn't exist in a clean checkout
- launch.py: initialize _mo_symlink = None before the conditional; guard
  --clean to raise ValueError instead of NameError in installed mode
- __init__.py: add __all__ = ["PACKAGE_DIR"] per coding guidelines
- bridge.py: add _launcher_not_installed() helper; catch FileNotFoundError
  on all three modelopt-launcher subprocess call sites (Docker Popen,
  Slurm run, dry-run run) and return structured ok=false instead of crash

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Signed-off-by: Chenhan Yu <chenhany@nvidia.com>
---
 tools/launcher/__init__.py       |  2 ++
 tools/launcher/launch.py         | 21 ++++++++++++-------
 tools/mcp/modelopt_mcp/bridge.py | 35 +++++++++++++++++++++++++-------
 3 files changed, 44 insertions(+), 14 deletions(-)

diff --git a/tools/launcher/__init__.py b/tools/launcher/__init__.py
index 88e2af4b450..baec2f6944a 100644
--- a/tools/launcher/__init__.py
+++ b/tools/launcher/__init__.py
@@ -17,4 +17,6 @@
 
 import os as _os
 
+__all__ = ["PACKAGE_DIR"]
+
 PACKAGE_DIR: str = _os.path.dirname(_os.path.abspath(__file__))
diff --git a/tools/launcher/launch.py b/tools/launcher/launch.py
index d95a84eabbf..99bd868749b 100644
--- a/tools/launcher/launch.py
+++ b/tools/launcher/launch.py
@@ -53,15 +53,18 @@
 
 LAUNCHER_DIR = _pkg.PACKAGE_DIR  # tools/launcher/ (dev or installed)
 
-# MODELOPT_ROOT is only available in a dev checkout (tools/launcher/../..):
-# when running as an installed console script the cluster container already
-# has modelopt pre-installed, so we skip packaging it from source.
+# Detect dev checkout by probing the actual MODELOPT_ROOT, not the symlink
+# path (which doesn't exist yet in a clean checkout). When running as an
+# installed console script the cluster container already has modelopt
+# pre-installed, so we skip packaging it from source.
 MODELOPT_ROOT = os.path.dirname(os.path.dirname(LAUNCHER_DIR))
-_modelopt_src = os.path.join(LAUNCHER_DIR, "modules", "Model-Optimizer", "modelopt")
-_has_modelopt_src = os.path.isdir(_modelopt_src)
+_has_modelopt_src = os.path.isdir(os.path.join(MODELOPT_ROOT, "modelopt"))
+
+# Symlink path used by the PatternPackager to resolve modules/Model-Optimizer/*
+# patterns; only valid in dev mode. Initialized to None so --clean in installed
+# mode gets a clear error instead of a NameError.
+_mo_symlink: str | None = None
 
-# Ensure modules/Model-Optimizer symlink exists so the packager can resolve
-# modules/Model-Optimizer/* patterns in dev mode.
 if _has_modelopt_src:
     _mo_symlink = os.path.join(LAUNCHER_DIR, "modules", "Model-Optimizer")
     if not os.path.exists(_mo_symlink):
@@ -70,6 +73,8 @@
             os.path.relpath(MODELOPT_ROOT, os.path.join(LAUNCHER_DIR, "modules")), _mo_symlink
         )
 
+_modelopt_src = os.path.join(LAUNCHER_DIR, "modules", "Model-Optimizer", "modelopt")
+
 EXPERIMENT_TITLE = "cicd"
 DEFAULT_SLURM_ENV, DEFAULT_LOCAL_ENV = get_default_env(EXPERIMENT_TITLE)
 
@@ -114,6 +119,8 @@ def launch(
 ) -> None:
     """Launch ModelOpt jobs on Slurm or locally with Docker."""
     if clean:
+        if _mo_symlink is None:
+            raise ValueError("--clean requires a dev checkout; modelopt source not found.")
         examples_dir = os.path.join(_mo_symlink, "examples")
         print(f"Cleaning {examples_dir} with git clean -xdf ...")
         subprocess.run(["git", "clean", "-xdf", "."], cwd=examples_dir, check=True)  # nosec B603 B607
diff --git a/tools/mcp/modelopt_mcp/bridge.py b/tools/mcp/modelopt_mcp/bridge.py
index cadb2a4a088..32290b654c6 100644
--- a/tools/mcp/modelopt_mcp/bridge.py
+++ b/tools/mcp/modelopt_mcp/bridge.py
@@ -83,6 +83,20 @@ def _find_launcher_examples_dir() -> Path | None:
     return None
 
 
+def _launcher_not_installed(argv: list[str]) -> dict:
+    """Structured failure when the ``modelopt-launcher`` binary is not on PATH."""
+    return {
+        "ok": False,
+        "reason": "launcher_not_installed",
+        "diagnostic": (
+            "`modelopt-launcher` was not found on PATH. "
+            "Install it with `pip install modelopt-launcher` or "
+            "`uv tool install modelopt-launcher` and retry."
+        ),
+        "argv": argv,
+    }
+
+
 # ---------------------------------------------------------------------------
 # list_examples
 # ---------------------------------------------------------------------------
@@ -556,13 +570,16 @@ def submit_job_impl(
         # group so an MCP server restart / SIGINT doesn't SIGHUP the
         # in-flight launcher.
         # B603 false positive — argv is a controlled list built above.
-        proc = subprocess.Popen(  # nosec B603
-            argv,
-            env=child_env,
-            stdout=subprocess.DEVNULL,
-            stderr=subprocess.DEVNULL,
-            start_new_session=True,
-        )
+        try:
+            proc = subprocess.Popen(  # nosec B603
+                argv,
+                env=child_env,
+                stdout=subprocess.DEVNULL,
+                stderr=subprocess.DEVNULL,
+                start_new_session=True,
+            )
+        except FileNotFoundError:
+            return _launcher_not_installed(argv)
         return {
             "ok": True,
             "executor": "docker",
@@ -590,6 +607,8 @@ def submit_job_impl(
             timeout=300,
             check=False,
         )
+    except FileNotFoundError:
+        return _launcher_not_installed(argv)
     except subprocess.TimeoutExpired as e:
         return {
             "ok": False,
@@ -767,6 +786,8 @@ def _submit_job_dry_run(
             timeout=60,
             check=False,
         )
+    except FileNotFoundError:
+        return {**_launcher_not_installed(argv), "dry_run": True}
     except subprocess.TimeoutExpired as e:
         return {
             "ok": False,