From 0e6976a0ae1e21062f697e294042d4edab8c581f Mon Sep 17 00:00:00 2001 From: Mr-Neutr0n <64578610+Mr-Neutr0n@users.noreply.github.com> Date: Wed, 11 Feb 2026 19:51:25 +0530 Subject: [PATCH 1/2] fix: prevent division by zero in trajectory imitation loss at last step --- diffsynth/diffusion/loss.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/diffsynth/diffusion/loss.py b/diffsynth/diffusion/loss.py index 14fdfd3be..4da195f53 100644 --- a/diffsynth/diffusion/loss.py +++ b/diffsynth/diffusion/loss.py @@ -91,7 +91,7 @@ def align_trajectory(self, pipe: BasePipeline, timesteps_teacher, trajectory_tea progress_id_teacher = torch.argmin((timesteps_teacher - pipe.scheduler.timesteps[progress_id + 1]).abs()) latents_ = trajectory_teacher[progress_id_teacher] - target = (latents_ - inputs_shared["latents"]) / (sigma_ - sigma) + target = (latents_ - inputs_shared["latents"]) / (sigma_ - sigma).clamp(min=1e-6) loss = loss + torch.nn.functional.mse_loss(noise_pred.float(), target.float()) * pipe.scheduler.training_weight(timestep) return loss From b68663426ff59a245a56e7cbf2b6b6bf6e7f879d Mon Sep 17 00:00:00 2001 From: Mr-Neutr0n <64578610+Mr-Neutr0n@users.noreply.github.com> Date: Wed, 11 Feb 2026 21:04:55 +0530 Subject: [PATCH 2/2] fix: preserve sign of denominator in clamp to avoid inverting gradient direction The previous .clamp(min=1e-6) on (sigma_ - sigma) flips the sign when the denominator is negative (which is the typical case since sigmas decrease monotonically). This would invert the target and cause training divergence. Use torch.sign(denom) * torch.clamp(denom.abs(), min=1e-6) instead, which prevents division by zero while preserving the correct sign. --- diffsynth/diffusion/loss.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/diffsynth/diffusion/loss.py b/diffsynth/diffusion/loss.py index 4da195f53..065e58909 100644 --- a/diffsynth/diffusion/loss.py +++ b/diffsynth/diffusion/loss.py @@ -91,7 +91,9 @@ def align_trajectory(self, pipe: BasePipeline, timesteps_teacher, trajectory_tea progress_id_teacher = torch.argmin((timesteps_teacher - pipe.scheduler.timesteps[progress_id + 1]).abs()) latents_ = trajectory_teacher[progress_id_teacher] - target = (latents_ - inputs_shared["latents"]) / (sigma_ - sigma).clamp(min=1e-6) + denom = sigma_ - sigma + denom = torch.sign(denom) * torch.clamp(denom.abs(), min=1e-6) + target = (latents_ - inputs_shared["latents"]) / denom loss = loss + torch.nn.functional.mse_loss(noise_pred.float(), target.float()) * pipe.scheduler.training_weight(timestep) return loss