Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions fastdeploy/worker/gpu_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -1426,6 +1426,52 @@ def load_model(self) -> None:
# 4. Init proposer for speculative method
self._init_speculative_proposer()

# Usage: FD_SAVE_PDPARAMS=1 FD_SAVE_DIR=/path/to/output python -m fastdeploy...
if os.getenv("FD_SAVE_PDPARAMS", "0") == "1":
import shutil
import glob as glob_mod

visible_devices = os.getenv("CUDA_VISIBLE_DEVICES", "0").split(",")
meta_src_id = int(visible_devices[int(os.getenv("FLAGS_selected_gpus", "0"))])
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔴 Bug visible_devices 数组可能越界,当 FLAGS_selected_gpus 值 ≥ visible_devices 长度时将抛出 IndexError。

例如 CUDA_VISIBLE_DEVICES=0FLAGS_selected_gpus=1 时触发;或 CUDA_VISIBLE_DEVICES 为空字符串时 split(',') 产生 ['']int('') 同样报错。

建议修复:

visible_devices = os.getenv("CUDA_VISIBLE_DEVICES", "0").split(",")
local_rank = int(os.getenv("FLAGS_selected_gpus", "0"))
if local_rank < len(visible_devices) and visible_devices[local_rank].strip():
    meta_src_id = int(visible_devices[local_rank].strip())
else:
    meta_src_id = local_rank  # fallback

rank = paddle.distributed.get_rank()
Comment on lines +1434 to +1436

# Determine save directory: FD_SAVE_DIR or default to model directory
save_dir = os.getenv("FD_SAVE_DIR", self.fd_config.model_config.model)
os.makedirs(save_dir, exist_ok=True)

# Copy config and tokenizer files (only rank 0 to avoid race)
if rank == 0:
src_dir = self.fd_config.model_config.model
copy_patterns = [
"config.json", "generation_config.json",
"tokenizer*", "added_tokens.json",
"special_tokens_map.json", "chat_template*",
]
for pattern in copy_patterns:
for f in glob_mod.glob(os.path.join(src_dir, pattern)):
dst = os.path.join(save_dir, os.path.basename(f))
if not os.path.exists(dst):
shutil.copy2(f, dst)

# Save model weights (main model + proposer/MTP model if exists)
model_state_dict = self.model.state_dict()
if hasattr(self, 'proposer') and self.proposer is not None and hasattr(self.proposer, 'model'):
proposer_state_dict = self.proposer.model.state_dict()
model_state_dict.update(proposer_state_dict)
logger.info(f"Including proposer model weights ({len(proposer_state_dict)} params)")

clean_state_dict = {
k: paddle.to_tensor(v.contiguous().numpy())
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 建议 .contiguous().numpy() + paddle.to_tensor() 构成两次完整的 GPU→CPU→GPU 内存拷贝,对大模型(数十 GB 参数)极易触发 OOM,且耗时倍增。

paddle.save 可直接保存 GPU tensor,无需中转 numpy。建议简化为:

clean_state_dict = {k: v.contiguous() for k, v in model_state_dict.items()}

或直接传入 model_state_dict(若已是 contiguous 则更佳)。

for k, v in model_state_dict.items()
}
model_path = os.path.join(
save_dir,
f"model_state.tp{rank}.{meta_src_id}.pdparams",
)
paddle.save(clean_state_dict, model_path, safetensors=True)
del clean_state_dict
logger.info(f"Saved model state dict to {model_path}")
Comment on lines +1429 to +1473

# Load RL dynamic model
if self.fd_config.load_config.dynamic_load_weight:
from fastdeploy.rl.dynamic_weight_manager import DynamicWeightManager
Expand Down
Loading