-
Notifications
You must be signed in to change notification settings - Fork 451
feat(launcher): add Megatron-Bridge quantize/generate/export wrappers #1767
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,105 @@ | ||
| #!/bin/bash | ||
|
|
||
| # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
| # Megatron-Bridge export: convert a quantized MCore checkpoint to HuggingFace format. | ||
| # Wraps /opt/Megatron-Bridge/examples/quantization/export.py. | ||
| # Assumes nvcr.io/nvidia/nemo:26.04+ container (megatron-bridge preinstalled at /opt/Megatron-Bridge). | ||
| # | ||
| # Required env: | ||
| # HF_MODEL_ID HF model id used for architecture template + tokenizer. | ||
| # MEGATRON_LOAD_PATH Quantized MCore ckpt dir produced by quantize.sh. | ||
| # Optional env: | ||
| # OUTPUT_DIR Parent dir for export (default: cwd). | ||
| # EXPORT_DIR HF output dir | ||
| # (default: ${OUTPUT_DIR}/<basename(HF_MODEL_ID)>_hf_export). | ||
| # TP, PP, EP, ETP Parallelism degrees (defaults: 1, 1, 1, 1). | ||
| # NOTE: HF exporter does not gather TP-sharded weights — | ||
| # use PP > 1 to shard large models across GPUs. | ||
| # NPROC_PER_NODE GPUs per node for torchrun (default: nvidia-smi GPU count). | ||
| # DTYPE Export dtype (default: bfloat16). One of bfloat16, float16, float32. | ||
| # EXPORT_EXTRA_MODULES "true" to include Medusa / EAGLE / MTP heads. | ||
| # TRUST_REMOTE_CODE "true" to pass --trust-remote-code. | ||
| # | ||
| # Extra positional args ("$@") are forwarded to export.py. | ||
|
|
||
| set -e | ||
|
|
||
| if [[ -z "${HF_MODEL_ID}" ]]; then | ||
| echo "[ERROR] HF_MODEL_ID is required" >&2 | ||
| exit 1 | ||
| fi | ||
| if [[ -z "${MEGATRON_LOAD_PATH}" ]]; then | ||
| echo "[ERROR] MEGATRON_LOAD_PATH is required" >&2 | ||
| exit 1 | ||
| fi | ||
|
|
||
| OUTPUT_DIR="${OUTPUT_DIR:-$(pwd)}" | ||
| MODEL_NAME="$(basename "${HF_MODEL_ID}")" | ||
| EXPORT_DIR="${EXPORT_DIR:-${OUTPUT_DIR}/${MODEL_NAME}_hf_export}" | ||
|
|
||
| TP="${TP:-1}" | ||
| PP="${PP:-1}" | ||
| EP="${EP:-1}" | ||
| ETP="${ETP:-1}" | ||
| DTYPE="${DTYPE:-bfloat16}" | ||
|
|
||
| if [[ -z "${NPROC_PER_NODE}" ]]; then | ||
| NPROC_PER_NODE=$(nvidia-smi --query-gpu=count --format=csv,noheader,nounits | head -n 1) | ||
| fi | ||
|
|
||
| # Multi-node torchrun: derive rendezvous from Slurm env. Falls back to standalone. | ||
| NNODES="${SLURM_NNODES:-${NNODES:-1}}" | ||
| NODE_RANK="${SLURM_NODEID:-${NODE_RANK:-0}}" | ||
| if [[ "${NNODES}" -gt 1 ]]; then | ||
| if [[ -z "${MASTER_ADDR}" && -n "${SLURM_NODELIST}" ]]; then | ||
| MASTER_ADDR=$(scontrol show hostname "${SLURM_NODELIST}" 2>/dev/null | head -n 1) | ||
| fi | ||
| MASTER_PORT="${MASTER_PORT:-29500}" | ||
| RDZV_ARGS=("--nnodes=${NNODES}" "--node-rank=${NODE_RANK}" \ | ||
| "--master-addr=${MASTER_ADDR}" "--master-port=${MASTER_PORT}") | ||
| else | ||
| RDZV_ARGS=("--standalone" "--nnodes=1") | ||
| fi | ||
|
|
||
| mkdir -p "${OUTPUT_DIR}" | ||
|
|
||
| EXTRA_FLAGS=() | ||
| [[ "${EXPORT_EXTRA_MODULES:-false}" == "true" ]] && EXTRA_FLAGS+=("--export-extra-modules") | ||
| [[ "${TRUST_REMOTE_CODE:-false}" == "true" ]] && EXTRA_FLAGS+=("--trust-remote-code") | ||
|
|
||
| cd /opt/Megatron-Bridge/examples/quantization | ||
|
|
||
| echo "=== Exporting ${HF_MODEL_ID} (TP=${TP} PP=${PP} EP=${EP} ETP=${ETP}, ${NPROC_PER_NODE} GPUs, dtype=${DTYPE}) ===" | ||
| echo " load <- ${MEGATRON_LOAD_PATH}" | ||
| echo " save -> ${EXPORT_DIR}" | ||
|
|
||
| python -m torch.distributed.run --nproc_per_node "${NPROC_PER_NODE}" "${RDZV_ARGS[@]}" export.py \ | ||
| --hf-model-id "${HF_MODEL_ID}" \ | ||
| --megatron-load-path "${MEGATRON_LOAD_PATH}" \ | ||
| --export-dir "${EXPORT_DIR}" \ | ||
| --tp "${TP}" \ | ||
| --pp "${PP}" \ | ||
| --ep "${EP}" \ | ||
| --etp "${ETP}" \ | ||
| --dtype "${DTYPE}" \ | ||
| "${EXTRA_FLAGS[@]}" \ | ||
| "$@" | ||
|
|
||
| ls "${EXPORT_DIR}" | ||
| if [[ -f "${EXPORT_DIR}/hf_quant_config.json" ]]; then | ||
| cat "${EXPORT_DIR}/hf_quant_config.json" | ||
| fi | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,88 @@ | ||
| #!/bin/bash | ||
|
|
||
| # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
| # Megatron-Bridge PTQ generation: load a quantized MCore checkpoint and run text generation. | ||
| # Wraps /opt/Megatron-Bridge/examples/quantization/ptq_generate.py. | ||
| # Assumes nvcr.io/nvidia/nemo:26.04+ container (megatron-bridge preinstalled at /opt/Megatron-Bridge). | ||
| # | ||
| # Required env: | ||
| # HF_MODEL_ID HF model id used for tokenizer and architecture template. | ||
| # MEGATRON_LOAD_PATH Quantized MCore ckpt dir produced by quantize.sh. | ||
| # Optional env: | ||
| # TP, PP, EP, ETP Parallelism degrees (defaults: 1, 1, 1, 1). | ||
| # NPROC_PER_NODE GPUs per node for torchrun (default: nvidia-smi GPU count). | ||
| # PROMPTS |-separated input prompts. | ||
| # OSL Output sequence length (default: 32). | ||
| # TRUST_REMOTE_CODE "true" to pass --trust-remote-code. | ||
| # | ||
| # Extra positional args ("$@") are forwarded to ptq_generate.py. | ||
|
|
||
| set -e | ||
|
|
||
| if [[ -z "${HF_MODEL_ID}" ]]; then | ||
| echo "[ERROR] HF_MODEL_ID is required" >&2 | ||
| exit 1 | ||
| fi | ||
| if [[ -z "${MEGATRON_LOAD_PATH}" ]]; then | ||
| echo "[ERROR] MEGATRON_LOAD_PATH is required" >&2 | ||
| exit 1 | ||
| fi | ||
|
|
||
| TP="${TP:-1}" | ||
| PP="${PP:-1}" | ||
| EP="${EP:-1}" | ||
| ETP="${ETP:-1}" | ||
| OSL="${OSL:-32}" | ||
|
|
||
| if [[ -z "${NPROC_PER_NODE}" ]]; then | ||
| NPROC_PER_NODE=$(nvidia-smi --query-gpu=count --format=csv,noheader,nounits | head -n 1) | ||
| fi | ||
|
|
||
| # Multi-node torchrun: derive rendezvous from Slurm env. Falls back to standalone. | ||
| NNODES="${SLURM_NNODES:-${NNODES:-1}}" | ||
| NODE_RANK="${SLURM_NODEID:-${NODE_RANK:-0}}" | ||
| if [[ "${NNODES}" -gt 1 ]]; then | ||
| if [[ -z "${MASTER_ADDR}" && -n "${SLURM_NODELIST}" ]]; then | ||
| MASTER_ADDR=$(scontrol show hostname "${SLURM_NODELIST}" 2>/dev/null | head -n 1) | ||
| fi | ||
| MASTER_PORT="${MASTER_PORT:-29500}" | ||
| RDZV_ARGS=("--nnodes=${NNODES}" "--node-rank=${NODE_RANK}" \ | ||
| "--master-addr=${MASTER_ADDR}" "--master-port=${MASTER_PORT}") | ||
| else | ||
|
Comment on lines
+58
to
+65
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add a hard check for When Suggested patch if [[ "${NNODES}" -gt 1 ]]; then
if [[ -z "${MASTER_ADDR}" && -n "${SLURM_NODELIST}" ]]; then
MASTER_ADDR=$(scontrol show hostname "${SLURM_NODELIST}" 2>/dev/null | head -n 1)
fi
+ if [[ -z "${MASTER_ADDR}" ]]; then
+ echo "[ERROR] MASTER_ADDR is required when NNODES>1 (or provide SLURM_NODELIST)" >&2
+ exit 1
+ fi
MASTER_PORT="${MASTER_PORT:-29500}"
RDZV_ARGS=("--nnodes=${NNODES}" "--node-rank=${NODE_RANK}" \
"--master-addr=${MASTER_ADDR}" "--master-port=${MASTER_PORT}")🤖 Prompt for AI Agents |
||
| RDZV_ARGS=("--standalone" "--nnodes=1") | ||
| fi | ||
|
|
||
| EXTRA_FLAGS=() | ||
| [[ "${TRUST_REMOTE_CODE:-false}" == "true" ]] && EXTRA_FLAGS+=("--trust-remote-code") | ||
| [[ -n "${PROMPTS}" ]] && EXTRA_FLAGS+=("--prompts" "${PROMPTS}") | ||
|
|
||
| # ptq_generate.py imports `quantize` as a sibling module — run from its directory. | ||
| cd /opt/Megatron-Bridge/examples/quantization | ||
|
|
||
| echo "=== Generating with ${HF_MODEL_ID} (TP=${TP} PP=${PP} EP=${EP} ETP=${ETP}, ${NPROC_PER_NODE} GPUs) ===" | ||
| echo " load <- ${MEGATRON_LOAD_PATH}" | ||
|
|
||
| exec python -m torch.distributed.run --nproc_per_node "${NPROC_PER_NODE}" "${RDZV_ARGS[@]}" ptq_generate.py \ | ||
| --hf-model-id "${HF_MODEL_ID}" \ | ||
| --megatron-load-path "${MEGATRON_LOAD_PATH}" \ | ||
| --tp "${TP}" \ | ||
| --pp "${PP}" \ | ||
| --ep "${EP}" \ | ||
| --etp "${ETP}" \ | ||
| --osl "${OSL}" \ | ||
| "${EXTRA_FLAGS[@]}" \ | ||
| "$@" | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,114 @@ | ||
| #!/bin/bash | ||
|
|
||
| # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
| # Megatron-Bridge PTQ quantization: HuggingFace -> quantized MCore checkpoint. | ||
| # Wraps /opt/Megatron-Bridge/examples/quantization/quantize.py. | ||
| # Assumes nvcr.io/nvidia/nemo:26.04+ container (megatron-bridge preinstalled at /opt/Megatron-Bridge). | ||
| # | ||
| # Required env: HF_MODEL_ID (e.g. meta-llama/Llama-3.2-1B) | ||
| # Optional env: | ||
| # OUTPUT_DIR Parent dir for outputs (default: cwd). | ||
| # EXPORT_QUANT_CFG ModelOpt quant config (default: fp8). Supported: | ||
| # int8_sq, fp8, fp8_blockwise, int4_awq, w4a8_awq, | ||
| # nvfp4, mamba_moe_fp8_aggressive, mamba_moe_fp8_conservative, | ||
| # mamba_moe_nvfp4_aggressive, mamba_moe_nvfp4_conservative. | ||
| # MEGATRON_SAVE_PATH Output MCore ckpt dir | ||
| # (default: ${OUTPUT_DIR}/<basename(HF_MODEL_ID)>_quantized_${EXPORT_QUANT_CFG}). | ||
| # TP, PP, EP, ETP Parallelism degrees (defaults: 1, 1, 1, 1). | ||
| # NPROC_PER_NODE GPUs per node for torchrun (default: nvidia-smi GPU count). | ||
| # CALIB_SIZE Calibration sample count (default: 512). | ||
| # COMPRESS "true" to apply mtq.compress() for real low-bit weights. | ||
| # WEIGHT_ONLY "true" to disable input quantization. | ||
| # EXPORT_KV_CACHE_QUANT "true" to enable FP8 KV-cache quantization. | ||
| # TRUST_REMOTE_CODE "true" to pass --trust-remote-code. | ||
| # PROMPTS |-separated test prompts. | ||
| # DISABLE_HF_DATASETS_FILE_LOCK "true" for read-only HF cache dirs. | ||
| # | ||
| # Extra positional args ("$@") are forwarded to quantize.py. | ||
|
|
||
| set -e | ||
|
|
||
| if [[ -z "${HF_MODEL_ID}" ]]; then | ||
| echo "[ERROR] HF_MODEL_ID is required" >&2 | ||
| exit 1 | ||
| fi | ||
|
|
||
| OUTPUT_DIR="${OUTPUT_DIR:-$(pwd)}" | ||
| EXPORT_QUANT_CFG="${EXPORT_QUANT_CFG:-fp8}" | ||
| MODEL_NAME="$(basename "${HF_MODEL_ID}")" | ||
| MEGATRON_SAVE_PATH="${MEGATRON_SAVE_PATH:-${OUTPUT_DIR}/${MODEL_NAME}_quantized_${EXPORT_QUANT_CFG}}" | ||
|
|
||
| TP="${TP:-1}" | ||
| PP="${PP:-1}" | ||
| EP="${EP:-1}" | ||
| ETP="${ETP:-1}" | ||
| CALIB_SIZE="${CALIB_SIZE:-512}" | ||
|
|
||
| if [[ -z "${NPROC_PER_NODE}" ]]; then | ||
| NPROC_PER_NODE=$(nvidia-smi --query-gpu=count --format=csv,noheader,nounits | head -n 1) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [SUGGESTION] (applies to all three scripts —
On most Slurm clusters cgroup isolation means Since the YAMLs already pin if [[ -z "${NPROC_PER_NODE}" ]]; then
NPROC_PER_NODE="${SLURM_GPUS_ON_NODE:-$(nvidia-smi --query-gpu=count --format=csv,noheader,nounits | head -n 1)}"
fiNon-blocking — the current path is correct on cgroup-isolated clusters, which is the common case. |
||
| fi | ||
|
Comment on lines
+61
to
+63
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Validate If auto-detection yields empty/non-numeric output, the script defers failure to Minimal fail-fast guard if [[ -z "${NPROC_PER_NODE}" ]]; then
NPROC_PER_NODE=$(nvidia-smi --query-gpu=count --format=csv,noheader,nounits | head -n 1)
fi
+if ! [[ "${NPROC_PER_NODE}" =~ ^[1-9][0-9]*$ ]]; then
+ echo "[ERROR] NPROC_PER_NODE must be a positive integer (set NPROC_PER_NODE explicitly if auto-detect fails)." >&2
+ exit 1
+fi🤖 Prompt for AI Agents |
||
|
|
||
| # Multi-node torchrun: derive rendezvous from Slurm env. Falls back to standalone. | ||
| NNODES="${SLURM_NNODES:-${NNODES:-1}}" | ||
| NODE_RANK="${SLURM_NODEID:-${NODE_RANK:-0}}" | ||
| if [[ "${NNODES}" -gt 1 ]]; then | ||
| if [[ -z "${MASTER_ADDR}" && -n "${SLURM_NODELIST}" ]]; then | ||
| MASTER_ADDR=$(scontrol show hostname "${SLURM_NODELIST}" 2>/dev/null | head -n 1) | ||
| fi | ||
| MASTER_PORT="${MASTER_PORT:-29500}" | ||
| RDZV_ARGS=("--nnodes=${NNODES}" "--node-rank=${NODE_RANK}" \ | ||
| "--master-addr=${MASTER_ADDR}" "--master-port=${MASTER_PORT}") | ||
| else | ||
|
Comment on lines
+68
to
+75
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fail fast when multi-node rendezvous address is unresolved. When Suggested fix if [[ "${NNODES}" -gt 1 ]]; then
if [[ -z "${MASTER_ADDR}" && -n "${SLURM_NODELIST}" ]]; then
MASTER_ADDR=$(scontrol show hostname "${SLURM_NODELIST}" 2>/dev/null | head -n 1)
fi
+ if [[ -z "${MASTER_ADDR}" ]]; then
+ echo "[ERROR] MASTER_ADDR is required for multi-node runs (set MASTER_ADDR or SLURM_NODELIST)." >&2
+ exit 1
+ fi
MASTER_PORT="${MASTER_PORT:-29500}"
RDZV_ARGS=("--nnodes=${NNODES}" "--node-rank=${NODE_RANK}" \
"--master-addr=${MASTER_ADDR}" "--master-port=${MASTER_PORT}")
else🤖 Prompt for AI Agents |
||
| RDZV_ARGS=("--standalone" "--nnodes=1") | ||
| fi | ||
|
|
||
| mkdir -p "${OUTPUT_DIR}" | ||
|
|
||
| EXTRA_FLAGS=() | ||
| [[ "${COMPRESS:-false}" == "true" ]] && EXTRA_FLAGS+=("--compress") | ||
| [[ "${WEIGHT_ONLY:-false}" == "true" ]] && EXTRA_FLAGS+=("--weight-only") | ||
| [[ "${EXPORT_KV_CACHE_QUANT:-false}" == "true" ]] && EXTRA_FLAGS+=("--export-kv-cache-quant") | ||
| [[ "${TRUST_REMOTE_CODE:-false}" == "true" ]] && EXTRA_FLAGS+=("--trust-remote-code") | ||
| [[ "${DISABLE_HF_DATASETS_FILE_LOCK:-false}" == "true" ]] && EXTRA_FLAGS+=("--disable-hf-datasets-file-lock") | ||
| [[ -n "${PROMPTS}" ]] && EXTRA_FLAGS+=("--prompts" "${PROMPTS}") | ||
|
|
||
| # Workaround for upstream Megatron-Bridge using the deprecated dataset id | ||
| # `cnn_dailymail` (no namespace). Newer huggingface_hub requires `namespace/name` | ||
| # and rejects the bare form with HfUriError. Rewrite to `abisee/cnn_dailymail`, | ||
| # which is the canonical id and is cached under /hf-local/abisee/cnn_dailymail. | ||
| _UPSTREAM_QUANT=/opt/Megatron-Bridge/examples/quantization/quantize.py | ||
| if [[ -w "${_UPSTREAM_QUANT}" ]] && grep -q 'load_dataset("cnn_dailymail"' "${_UPSTREAM_QUANT}"; then | ||
| sed -i 's|load_dataset("cnn_dailymail"|load_dataset("abisee/cnn_dailymail"|g' "${_UPSTREAM_QUANT}" | ||
| fi | ||
|
|
||
| # quantize.py imports `quantize_utils` as a sibling module — run from its directory. | ||
| cd /opt/Megatron-Bridge/examples/quantization | ||
|
|
||
| echo "=== Quantizing ${HF_MODEL_ID} with ${EXPORT_QUANT_CFG} (TP=${TP} PP=${PP} EP=${EP} ETP=${ETP}, ${NPROC_PER_NODE} GPUs) ===" | ||
| echo " save -> ${MEGATRON_SAVE_PATH}" | ||
|
|
||
| exec python -m torch.distributed.run --nproc_per_node "${NPROC_PER_NODE}" "${RDZV_ARGS[@]}" quantize.py \ | ||
| --hf-model-id "${HF_MODEL_ID}" \ | ||
| --export-quant-cfg "${EXPORT_QUANT_CFG}" \ | ||
| --megatron-save-path "${MEGATRON_SAVE_PATH}" \ | ||
| --tp "${TP}" \ | ||
| --pp "${PP}" \ | ||
| --ep "${EP}" \ | ||
| --etp "${ETP}" \ | ||
| --calib-size "${CALIB_SIZE}" \ | ||
| "${EXTRA_FLAGS[@]}" \ | ||
|
Comment on lines
+104
to
+113
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: #!/bin/bash
set -euo pipefail
# Verify launcher flags vs parser-defined flags in repo sources.
LAUNCHER="tools/launcher/common/megatron_bridge/quantize/quantize.sh"
PARSER="examples/megatron_bridge/quantize.py"
echo "== Launcher forwarded flags =="
rg -n --no-heading --pcre2 '\s--[a-z0-9][a-z0-9-]*' "$LAUNCHER" \
| sed -E 's/.*(--[a-z0-9][a-z0-9-]*).*/\1/' \
| sort -u
echo
echo "== Parser add_argument options =="
rg -n --no-heading --pcre2 'add_argument\(\s*"--[A-Za-z0-9_]+"' "$PARSER" \
| sed -E 's/.*"--([^"]+)".*/--\1/' \
| sort -u
echo
echo "== Potential launcher-only flags (likely mismatch) =="
comm -23 \
<(rg -o --no-line-number --pcre2 '(?<=\s)--[a-z0-9][a-z0-9-]*' "$LAUNCHER" | sort -u) \
<(rg -o --no-line-number --pcre2 '--[A-Za-z0-9_]+' "$PARSER" | sort -u)Repository: NVIDIA/Model-Optimizer Length of output: 898 🏁 Script executed: #!/bin/bash
set -euo pipefail
# Check the actual parser file for all add_argument calls
echo "=== Full parser argument definitions ==="
rg -A 2 'add_argument' examples/megatron_bridge/quantize.py | head -100
echo ""
echo "=== Check for any conditional or dynamic argument handling ==="
rg -n 'EXPORT_QUANT_CFG|megatron_save_path|etp|NPROC' examples/megatron_bridge/quantize.py
echo ""
echo "=== Launcher script context around args ==="
sed -n '100,115p' tools/launcher/common/megatron_bridge/quantize/quantize.shRepository: NVIDIA/Model-Optimizer Length of output: 1818 🏁 Script executed: #!/bin/bash
set -euo pipefail
# Verify the launcher is meant to call quantize.py
echo "=== Check launcher command line ==="
rg -A 5 'exec python' tools/launcher/common/megatron_bridge/quantize/quantize.sh
echo ""
echo "=== Check if quantize.py exists and its location ==="
find . -name "quantize.py" -type f | grep -E "(megatron|bridge)" | head -5
echo ""
echo "=== Verify argument mismatch by checking specific mappings ==="
echo "Launcher argument mapping needed:"
echo " --hf-model-id → --hf_model_name_or_path"
echo " --export-quant-cfg → --quant_cfg"
echo " --megatron-save-path → --export_megatron_path"
echo " --tp → --tp_size"
echo " --pp → --pp_size"
echo " --ep → --ep_size"
echo " --etp → (NO PARSER ARGUMENT)"
echo " --calib-size → --calib_num_samples"Repository: NVIDIA/Model-Optimizer Length of output: 822 Launcher CLI arguments are incompatible with the upstream The launcher forwards kebab-case arguments (lines 105–113) that do not match the parser's snake_case argument names. For example:
This will cause argparse to fail at runtime with unrecognized argument errors. Fix: align launcher arguments to parser contract exec python -m torch.distributed.run --nproc_per_node "${NPROC_PER_NODE}" "${RDZV_ARGS[@]}" quantize.py \
- --hf-model-id "${HF_MODEL_ID}" \
- --export-quant-cfg "${EXPORT_QUANT_CFG}" \
- --megatron-save-path "${MEGATRON_SAVE_PATH}" \
- --tp "${TP}" \
- --pp "${PP}" \
- --ep "${EP}" \
- --etp "${ETP}" \
- --calib-size "${CALIB_SIZE}" \
+ --hf_model_name_or_path "${HF_MODEL_ID}" \
+ --quant_cfg "${EXPORT_QUANT_CFG}" \
+ --export_megatron_path "${MEGATRON_SAVE_PATH}" \
+ --tp_size "${TP}" \
+ --pp_size "${PP}" \
+ --ep_size "${EP}" \
+ --calib_num_samples "${CALIB_SIZE}" \
"${EXTRA_FLAGS[@]}" \Remove 🤖 Prompt for AI Agents |
||
| "$@" | ||
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,43 @@ | ||||||||
| # Megatron-Bridge HF export for nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16 (NVFP4). | ||||||||
| # | ||||||||
| # Converts the quantized MCore ckpt (from megatron_bridge_ptq.yaml) to a deployable | ||||||||
| # HuggingFace checkpoint. The HF exporter does not gather TP-sharded weights — TP=1 | ||||||||
| # is required, PP is used to shard the model across GPUs. | ||||||||
| # | ||||||||
| # Cluster shape: 2 nodes x 4 GPUs = 8 ranks (PP=8, EP=1, TP=1). | ||||||||
| # | ||||||||
| # Usage: | ||||||||
| # source .env-slurm | ||||||||
| # cd tools/launcher | ||||||||
| # uv run launch.py --yaml examples/nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16/megatron_bridge_export.yaml --yes | ||||||||
|
|
||||||||
| job_name: Nemotron-3-Nano_megatron_bridge_export | ||||||||
| pipeline: | ||||||||
| skip: false | ||||||||
| allow_to_fail: false | ||||||||
| note: "Megatron-Bridge HF export for Nano-30B-A3B NVFP4: 2 nodes x 4 GPUs, TP=1 PP=8" | ||||||||
|
|
||||||||
| global_vars: | ||||||||
| output_dir: /cicd/megatron-bridge | ||||||||
|
|
||||||||
| task_0: | ||||||||
| script: common/megatron_bridge/export/export.sh | ||||||||
| environment: | ||||||||
| - HF_MODEL_ID: /hf-local/nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16 | ||||||||
| - MEGATRON_LOAD_PATH: <<global_vars.output_dir>>/NVIDIA-Nemotron-3-Nano-30B-A3B-NVFP4-MLM | ||||||||
| - EXPORT_DIR: /cicd/export/NVIDIA-Nemotron-3-Nano-30B-A3B-NVFP4-HF | ||||||||
| - TP: "1" | ||||||||
| - PP: "8" | ||||||||
| - EP: "1" | ||||||||
| - ETP: "1" | ||||||||
| - DTYPE: bfloat16 | ||||||||
| - TRUST_REMOTE_CODE: "true" | ||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: # Find and examine the target YAML file
find . -type f -name "megatron_bridge_export.yaml" | head -20Repository: NVIDIA/Model-Optimizer Length of output: 363 🏁 Script executed: # Also search for similar YAML configs to understand patterns
find tools/launcher/examples -type f -name "*.yaml" | head -10Repository: NVIDIA/Model-Optimizer Length of output: 776 🏁 Script executed: # Search for how TRUST_REMOTE_CODE is used in the codebase
rg "TRUST_REMOTE_CODE" -A 2 -B 2Repository: NVIDIA/Model-Optimizer Length of output: 14710 🏁 Script executed: # Search for MLM_MODEL_CFG and QUANT_CFG usage patterns in YAML files
rg "MLM_MODEL_CFG|QUANT_CFG" tools/launcher/examples -A 1 -B 1Repository: NVIDIA/Model-Optimizer Length of output: 5902 🏁 Script executed: # View the specific file mentioned in the review
cat -n tools/launcher/examples/nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16/megatron_bridge_export.yamlRepository: NVIDIA/Model-Optimizer Length of output: 1879 🏁 Script executed: # Check if there's any documentation or comments about TRUST_REMOTE_CODE in launcher configs
rg "TRUST_REMOTE_CODE" tools/launcher -B 5 -A 2 | head -100Repository: NVIDIA/Model-Optimizer Length of output: 11043 🏁 Script executed: # Look for any README or documentation in the launcher examples directory
find tools/launcher/examples -type f -name "README*" -o -name "*.md"Repository: NVIDIA/Model-Optimizer Length of output: 48 Remove the hardcoded This setting forces remote code execution for all runs, removing user control over a critical security boundary. Per security guidelines, do not hardcode The shell scripts (export.sh, generate.sh, quantize.sh) already support this as an optional flag with a safe default ( Suggested patch - DTYPE: bfloat16
- - TRUST_REMOTE_CODE: "true"
slurm_config:📝 Committable suggestion
Suggested change
🤖 Prompt for AI AgentsSource: Coding guidelines |
||||||||
| slurm_config: | ||||||||
| _factory_: "slurm_factory" | ||||||||
| container: nvcr.io/nvidia/nemo:26.04 | ||||||||
| modelopt_install_path: /opt/venv/lib/python3.12/site-packages/modelopt | ||||||||
| partition: batch | ||||||||
| nodes: 2 | ||||||||
| ntasks_per_node: 1 | ||||||||
| gpus_per_node: 4 | ||||||||
| time: "02:00:00" | ||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,44 @@ | ||
| # Megatron-Bridge generation on quantized nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16. | ||
| # | ||
| # Mirrors the user-provided reference command: | ||
| # torchrun --nproc_per_node 8 examples/quantization/ptq_generate.py \ | ||
| # --megatron-load-path /models/NVIDIA-Nemotron-3-Nano-30B-A3B-NVFP4-MLM \ | ||
| # --hf-model-id /models/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16 \ | ||
| # --trust-remote-code --tp 8 --ep 8 | ||
| # | ||
| # Cluster shape: 2 nodes x 4 GPUs = 8 ranks. | ||
| # Loads the MCore ckpt written by megatron_bridge_ptq.yaml (same OUTPUT_DIR). | ||
| # | ||
| # Usage: | ||
| # source .env-slurm | ||
| # cd tools/launcher | ||
| # uv run launch.py --yaml examples/nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16/megatron_bridge_generate.yaml --yes | ||
|
|
||
| job_name: Nemotron-3-Nano_megatron_bridge_generate | ||
| pipeline: | ||
| skip: false | ||
| allow_to_fail: false | ||
| note: "Megatron-Bridge PTQ generate on Nano-30B-A3B (nvfp4): 2 nodes x 4 GPUs, TP=8 EP=8" | ||
|
|
||
| global_vars: | ||
| output_dir: /cicd/megatron-bridge | ||
|
|
||
| task_0: | ||
| script: common/megatron_bridge/generate/generate.sh | ||
| environment: | ||
| - HF_MODEL_ID: /hf-local/nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16 | ||
| - MEGATRON_LOAD_PATH: <<global_vars.output_dir>>/NVIDIA-Nemotron-3-Nano-30B-A3B-NVFP4-MLM | ||
| - TP: "8" | ||
| - PP: "1" | ||
| - EP: "8" | ||
| - ETP: "1" | ||
| - TRUST_REMOTE_CODE: "true" | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: # First, check the actual file content and context
cd tools/launcher/examples/nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16 2>/dev/null && pwd && ls -la || echo "Directory may not exist, searching for the file..."Repository: NVIDIA/Model-Optimizer Length of output: 672 🏁 Script executed: # Find the file and examine its content
fd -type f -name "megatron_bridge_generate.yaml" | head -5 | xargs -I {} sh -c 'echo "File: {}" && cat -n {}'Repository: NVIDIA/Model-Optimizer Length of output: 237 🏁 Script executed: # Search for TRUST_REMOTE_CODE usage in Python code to understand how it's consumed
rg "TRUST_REMOTE_CODE" -A 3 -B 3 --type pyRepository: NVIDIA/Model-Optimizer Length of output: 1536 🏁 Script executed: # Check for similar environment variable patterns in other launcher configs
fd -type f -name "*.yaml" -path "*/tools/launcher/examples/*" | xargs grep -l "TRUST_REMOTE_CODE" 2>/dev/nullRepository: NVIDIA/Model-Optimizer Length of output: 239 🏁 Script executed: # Look for how environment variables are used in launcher code
fd -type f -name "*.py" -path "*/tools/launcher/*" | head -10 | xargs ls -lhRepository: NVIDIA/Model-Optimizer Length of output: 1442 🏁 Script executed: # Examine the actual megatron_bridge_generate.yaml file
cat -n tools/launcher/examples/nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16/megatron_bridge_generate.yamlRepository: NVIDIA/Model-Optimizer Length of output: 1939 🏁 Script executed: # Also check the export version to see if it has the same pattern
cat -n tools/launcher/examples/nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16/megatron_bridge_export.yamlRepository: NVIDIA/Model-Optimizer Length of output: 1879 🏁 Script executed: # Search for documentation on how launcher configs use environment variables
find tools/launcher -name "*.md" -o -name "README*" | xargs cat 2>/dev/null | head -100Repository: NVIDIA/Model-Optimizer Length of output: 3272 🏁 Script executed: # Check how fakequant_worker.py uses the TRUST_REMOTE_CODE env var
cat -n examples/vllm_serve/fakequant_worker.py | head -100Repository: NVIDIA/Model-Optimizer Length of output: 5192 🏁 Script executed: # Search for other launcher YAML configs with TRUST_REMOTE_CODE or similar patterns
grep -r "TRUST_REMOTE_CODE" tools/launcher --include="*.yaml"Repository: NVIDIA/Model-Optimizer Length of output: 1348 🏁 Script executed: # Check if there's any documentation about environment variable handling in launcher configs
find tools/launcher -type f \( -name "*.py" -o -name "*.md" \) | xargs grep -l "environment\|TRUST_REMOTE_CODE" 2>/dev/null | head -5Repository: NVIDIA/Model-Optimizer Length of output: 243 🏁 Script executed: # Check the launcher scripts to understand how environment variables flow from YAML to actual execution
find tools/launcher/common -name "*.sh" | head -3 | xargs head -30Repository: NVIDIA/Model-Optimizer Length of output: 3937 🏁 Script executed: # Check if there are any configuration override mechanisms in the launcher
cat tools/launcher/core.py | grep -A 10 -B 5 "environment\|override" | head -50Repository: NVIDIA/Model-Optimizer Length of output: 1613 🏁 Script executed: # Look for comments or documentation about setting TRUST_REMOTE_CODE in launcher configs
grep -r "trust.remote\|TRUST_REMOTE" tools/launcher --include="*.md" --include="*.py" -B 3 -A 3Repository: NVIDIA/Model-Optimizer Length of output: 48 🏁 Script executed: # Check the launcher's query.py to see if environment variables can be overridden
find tools/launcher/common -name "query.py" -exec head -50 {} \;Repository: NVIDIA/Model-Optimizer Length of output: 1848 🏁 Script executed: # Check if there's any documentation or comments explaining why TRUST_REMOTE_CODE is hardcoded
grep -B 10 "TRUST_REMOTE_CODE" tools/launcher/examples/nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16/megatron_bridge_generate.yamlRepository: NVIDIA/Model-Optimizer Length of output: 419 🏁 Script executed: # Check if launcher configs support environment variable overrides at runtime
cat tools/launcher/launch.py | head -100Repository: NVIDIA/Model-Optimizer Length of output: 4138 🏁 Script executed: # Look for documentation on how to override environment variables when running launcher
grep -r "override\|environment" tools/launcher/docs/ tools/launcher/*.md 2>/dev/null | head -20Repository: NVIDIA/Model-Optimizer Length of output: 1889 🏁 Script executed: # Check the shell script that actually runs - see if it respects or overrides TRUST_REMOTE_CODE
cat tools/launcher/common/megatron_bridge/generate/generate.sh 2>/dev/null | head -50Repository: NVIDIA/Model-Optimizer Length of output: 1911 🏁 Script executed: # Look for any comments or inline documentation about the TRUST_REMOTE_CODE setting in similar files
cat tools/launcher/examples/nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16/megatron_bridge_ptq.yaml | head -40Repository: NVIDIA/Model-Optimizer Length of output: 1567 🏁 Script executed: # Check if these launcher YAML files are used in actual CICD or if they're just examples
grep -r "megatron_bridge_generate" . --include="*.yml" --include="*.yaml" --include="*.py" | grep -v "examples" | head -10Repository: NVIDIA/Model-Optimizer Length of output: 48 🏁 Script executed: # Look at how the environment variables are actually consumed in the shell script
cat -n tools/launcher/common/megatron_bridge/generate/generate.sh | grep -A 30 "TRUST_REMOTE_CODE"Repository: NVIDIA/Model-Optimizer Length of output: 2096 🏁 Script executed: # Check if there are any warnings or comments in the generated shell scripts about trust_remote_code
find tools/launcher/common -name "*.sh" -exec grep -l "trust" {} \;Repository: NVIDIA/Model-Optimizer Length of output: 612 🏁 Script executed: # Look at the launcher configuration documentation to see if there's guidance on setting sensitive env vars
cat tools/launcher/docs/configuration.md 2>/dev/null || echo "File not found"Repository: NVIDIA/Model-Optimizer Length of output: 4251 🏁 Script executed: # Check the CONTRIBUTING.md for any guidance on launcher configs and security
grep -A 10 "launcher\|environment\|TRUST" tools/launcher/docs/contributing.md 2>/dev/null || grep -A 10 "launcher\|environment" CONTRIBUTING.md 2>/dev/null | head -40Repository: NVIDIA/Model-Optimizer Length of output: 277 🏁 Script executed: # Check if these are actual production configs or just examples
find tools/launcher/examples -type f -name "*.yaml" | wc -lRepository: NVIDIA/Model-Optimizer Length of output: 69 🏁 Script executed: # Verify the broader scope - how many YAML configs have this issue
grep -r "TRUST_REMOTE_CODE.*true" tools/launcher --include="*.yaml" | wc -lRepository: NVIDIA/Model-Optimizer Length of output: 68 🏁 Script executed: # Check if the shell scripts document that these env vars can be overridden
cat -n tools/launcher/common/megatron_bridge/quantize/quantize.sh | grep -A 10 -B 5 "TRUST_REMOTE_CODE"Repository: NVIDIA/Model-Optimizer Length of output: 1944 🏁 Script executed: # Look for any warnings or notes about security in the example configs
grep -r "security\|trusted\|remote.code" tools/launcher/examples --include="*.yaml" --include="*.md"Repository: NVIDIA/Model-Optimizer Length of output: 6233 Remove hardcoded These Megatron-Bridge launcher configs in This pattern appears in all 9 NVIDIA Nemotron launcher configs (Nano, Super, Ultra—across export, generate, and ptq tasks). Either remove the environment variable entirely (shell scripts default to 🤖 Prompt for AI AgentsSource: Coding guidelines |
||
| slurm_config: | ||
| _factory_: "slurm_factory" | ||
| container: nvcr.io/nvidia/nemo:26.04 | ||
| modelopt_install_path: /opt/venv/lib/python3.12/site-packages/modelopt | ||
| partition: batch | ||
| nodes: 2 | ||
| ntasks_per_node: 1 | ||
| gpus_per_node: 4 | ||
| time: "01:00:00" | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fail fast when
MASTER_ADDRis unresolved for multi-node runs.For
NNODES>1, Line 73 can end up passing an empty--master-addrifSLURM_NODELISTresolution fails. Add an explicit guard and exit early with a clear error.Suggested patch
if [[ "${NNODES}" -gt 1 ]]; then if [[ -z "${MASTER_ADDR}" && -n "${SLURM_NODELIST}" ]]; then MASTER_ADDR=$(scontrol show hostname "${SLURM_NODELIST}" 2>/dev/null | head -n 1) fi + if [[ -z "${MASTER_ADDR}" ]]; then + echo "[ERROR] MASTER_ADDR is required when NNODES>1 (or provide SLURM_NODELIST)" >&2 + exit 1 + fi MASTER_PORT="${MASTER_PORT:-29500}" RDZV_ARGS=("--nnodes=${NNODES}" "--node-rank=${NODE_RANK}" \ "--master-addr=${MASTER_ADDR}" "--master-port=${MASTER_PORT}")🤖 Prompt for AI Agents