From 0e6976a0ae1e21062f697e294042d4edab8c581f Mon Sep 17 00:00:00 2001
From: Mr-Neutr0n <64578610+Mr-Neutr0n@users.noreply.github.com>
Date: Wed, 11 Feb 2026 19:51:25 +0530
Subject: [PATCH 1/2] fix: prevent division by zero in trajectory imitation
 loss at last step

---
 diffsynth/diffusion/loss.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/diffsynth/diffusion/loss.py b/diffsynth/diffusion/loss.py
index 14fdfd3be..4da195f53 100644
--- a/diffsynth/diffusion/loss.py
+++ b/diffsynth/diffusion/loss.py
@@ -91,7 +91,7 @@ def align_trajectory(self, pipe: BasePipeline, timesteps_teacher, trajectory_tea
                 progress_id_teacher = torch.argmin((timesteps_teacher - pipe.scheduler.timesteps[progress_id + 1]).abs())
                 latents_ = trajectory_teacher[progress_id_teacher]
             
-            target = (latents_ - inputs_shared["latents"]) / (sigma_ - sigma)
+            target = (latents_ - inputs_shared["latents"]) / (sigma_ - sigma).clamp(min=1e-6)
             loss = loss + torch.nn.functional.mse_loss(noise_pred.float(), target.float()) * pipe.scheduler.training_weight(timestep)
         return loss
     

From b68663426ff59a245a56e7cbf2b6b6bf6e7f879d Mon Sep 17 00:00:00 2001
From: Mr-Neutr0n <64578610+Mr-Neutr0n@users.noreply.github.com>
Date: Wed, 11 Feb 2026 21:04:55 +0530
Subject: [PATCH 2/2] fix: preserve sign of denominator in clamp to avoid
 inverting gradient direction

The previous .clamp(min=1e-6) on (sigma_ - sigma) flips the sign when
the denominator is negative (which is the typical case since sigmas
decrease monotonically). This would invert the target and cause
training divergence.

Use torch.sign(denom) * torch.clamp(denom.abs(), min=1e-6) instead,
which prevents division by zero while preserving the correct sign.
---
 diffsynth/diffusion/loss.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/diffsynth/diffusion/loss.py b/diffsynth/diffusion/loss.py
index 4da195f53..065e58909 100644
--- a/diffsynth/diffusion/loss.py
+++ b/diffsynth/diffusion/loss.py
@@ -91,7 +91,9 @@ def align_trajectory(self, pipe: BasePipeline, timesteps_teacher, trajectory_tea
                 progress_id_teacher = torch.argmin((timesteps_teacher - pipe.scheduler.timesteps[progress_id + 1]).abs())
                 latents_ = trajectory_teacher[progress_id_teacher]
             
-            target = (latents_ - inputs_shared["latents"]) / (sigma_ - sigma).clamp(min=1e-6)
+            denom = sigma_ - sigma
+            denom = torch.sign(denom) * torch.clamp(denom.abs(), min=1e-6)
+            target = (latents_ - inputs_shared["latents"]) / denom
             loss = loss + torch.nn.functional.mse_loss(noise_pred.float(), target.float()) * pipe.scheduler.training_weight(timestep)
         return loss