From 9827db84a178a0ca221c7dc4a7d9b62cdf494136 Mon Sep 17 00:00:00 2001 From: DanielDale Date: Mon, 11 May 2026 20:33:11 +0800 Subject: [PATCH] fix(npu): synchronize stream after backward for dp replicate training --- mova/engine/trainer/accelerate/accelerate_trainer.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mova/engine/trainer/accelerate/accelerate_trainer.py b/mova/engine/trainer/accelerate/accelerate_trainer.py index 82ec87a..f996012 100644 --- a/mova/engine/trainer/accelerate/accelerate_trainer.py +++ b/mova/engine/trainer/accelerate/accelerate_trainer.py @@ -9,6 +9,7 @@ import os, re import torch +import torch_npu from tqdm import tqdm try: @@ -411,6 +412,8 @@ def train(self): loss = loss_dict["loss"] self.accelerator.backward(loss) + # Synchronize NPU stream to avoid async ordering issues with DP replicate training. + torch_npu.npu.current_stream().synchronize() if self.gradient_clip_norm > 0: if self.accelerator.sync_gradients: