From 9595c5ee0b9a2d094786de8ab2f241c0fb80214b Mon Sep 17 00:00:00 2001
From: Tsvika Shapira <tsvika@moonmath.ai>
Date: Thu, 18 Jun 2026 19:31:28 +0300
Subject: [PATCH 1/5] feat(ltx2): map LTX-2.3 gate_logits and prompt_adaln LoRA
 keys

Extend the LTX-2 NNX->Diffusers LoRA translate map so adapters that touch the
per-head attention gate (`*.to_gate_logits`, all six attention types) and the
prompt-conditioned AdaLN (`prompt_adaln` / `audio_prompt_adaln`) resolve to
their model layers.

Audited all 13 official Lightricks LTX-2.3-22b IC-LoRAs and the distilled-384
LoRA against the map: the 13 IC-LoRAs (attn + ff only) were already fully
covered, and `ltx-2.3-22b-distilled-lora-384` was the ONLY one with unmatched
keys - 294/1660, exactly the gate_logits + prompt_adaln families added here.
With these entries it fuses 1660/1660; the IC-LoRAs are unaffected.
---
 src/maxdiffusion/loaders/lora_conversion_utils.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/src/maxdiffusion/loaders/lora_conversion_utils.py b/src/maxdiffusion/loaders/lora_conversion_utils.py
index ca0371b76..44b5afdaa 100644
--- a/src/maxdiffusion/loaders/lora_conversion_utils.py
+++ b/src/maxdiffusion/loaders/lora_conversion_utils.py
@@ -716,31 +716,37 @@ def translate_ltx2_nnx_path_to_diffusers_lora(nnx_path_str, scan_layers=False):
       "attn1.to_k": "attn1.to_k",
       "attn1.to_v": "attn1.to_v",
       "attn1.to_out": "attn1.to_out.0",
+      "attn1.to_gate_logits": "attn1.to_gate_logits",  # per-head gating Linear
       # Audio Self Attention (audio_attn1)
       "audio_attn1.to_q": "audio_attn1.to_q",
       "audio_attn1.to_k": "audio_attn1.to_k",
       "audio_attn1.to_v": "audio_attn1.to_v",
       "audio_attn1.to_out": "audio_attn1.to_out.0",
+      "audio_attn1.to_gate_logits": "audio_attn1.to_gate_logits",
       # Audio Cross Attention (audio_attn2)
       "audio_attn2.to_q": "audio_attn2.to_q",
       "audio_attn2.to_k": "audio_attn2.to_k",
       "audio_attn2.to_v": "audio_attn2.to_v",
       "audio_attn2.to_out": "audio_attn2.to_out.0",
+      "audio_attn2.to_gate_logits": "audio_attn2.to_gate_logits",
       # Cross Attention (attn2)
       "attn2.to_q": "attn2.to_q",
       "attn2.to_k": "attn2.to_k",
       "attn2.to_v": "attn2.to_v",
       "attn2.to_out": "attn2.to_out.0",
+      "attn2.to_gate_logits": "attn2.to_gate_logits",
       # Audio to Video Cross Attention
       "audio_to_video_attn.to_q": "audio_to_video_attn.to_q",
       "audio_to_video_attn.to_k": "audio_to_video_attn.to_k",
       "audio_to_video_attn.to_v": "audio_to_video_attn.to_v",
       "audio_to_video_attn.to_out": "audio_to_video_attn.to_out.0",
+      "audio_to_video_attn.to_gate_logits": "audio_to_video_attn.to_gate_logits",
       # Video to Audio Cross Attention
       "video_to_audio_attn.to_q": "video_to_audio_attn.to_q",
       "video_to_audio_attn.to_k": "video_to_audio_attn.to_k",
       "video_to_audio_attn.to_v": "video_to_audio_attn.to_v",
       "video_to_audio_attn.to_out": "video_to_audio_attn.to_out.0",
+      "video_to_audio_attn.to_gate_logits": "video_to_audio_attn.to_gate_logits",
       # Feed Forward
       "ff.net_0": "ff.net.0.proj",
       "ff.net_2": "ff.net.2",
@@ -778,6 +784,13 @@ def translate_ltx2_nnx_path_to_diffusers_lora(nnx_path_str, scan_layers=False):
       "caption_projection.linear_2": "diffusion_model.caption_projection.linear_2",
       "audio_caption_projection.linear_1": "diffusion_model.audio_caption_projection.linear_1",
       "audio_caption_projection.linear_2": "diffusion_model.audio_caption_projection.linear_2",
+      # Prompt-conditioned AdaLN
+      "prompt_adaln.linear": "diffusion_model.prompt_adaln_single.linear",
+      "prompt_adaln.emb.timestep_embedder.linear_1": "diffusion_model.prompt_adaln_single.emb.timestep_embedder.linear_1",
+      "prompt_adaln.emb.timestep_embedder.linear_2": "diffusion_model.prompt_adaln_single.emb.timestep_embedder.linear_2",
+      "audio_prompt_adaln.linear": "diffusion_model.audio_prompt_adaln_single.linear",
+      "audio_prompt_adaln.emb.timestep_embedder.linear_1": "diffusion_model.audio_prompt_adaln_single.emb.timestep_embedder.linear_1",
+      "audio_prompt_adaln.emb.timestep_embedder.linear_2": "diffusion_model.audio_prompt_adaln_single.emb.timestep_embedder.linear_2",
       # Connectors
       "feature_extractor.linear": "text_embedding_projection.aggregate_embed",
   }

From 283c5f3cc89a66b7dc5376f08657a859aeaa9841 Mon Sep 17 00:00:00 2001
From: Tsvika Shapira <tsvika@moonmath.ai>
Date: Thu, 18 Jun 2026 19:31:39 +0300
Subject: [PATCH 2/5] chore(ltx2): clearer LoRA-loader logging; warn on unused
 keys

Logging/diagnostics only - what gets merged is unchanged:
- a missing LoRA weight name returns early with one clear message
  (was two unclear logs);
- a missing `transformer` is logged on its own (was folded into the
  weight-name message);
- a missing `connectors` is now logged (previously had no message);
- warn when a LoRA carries keys outside the two prefixes routed to the merges
  (`diffusion_model.` / `text_embedding_projection.`) - those keys were
  previously dropped without any notice.
---
 .../loaders/ltx2_lora_nnx_loader.py           | 32 ++++++++++++-------
 1 file changed, 20 insertions(+), 12 deletions(-)

diff --git a/src/maxdiffusion/loaders/ltx2_lora_nnx_loader.py b/src/maxdiffusion/loaders/ltx2_lora_nnx_loader.py
index 247b3ba2e..a74c0a157 100644
--- a/src/maxdiffusion/loaders/ltx2_lora_nnx_loader.py
+++ b/src/maxdiffusion/loaders/ltx2_lora_nnx_loader.py
@@ -50,26 +50,34 @@ def load_lora_weights(
     def translate_fn(nnx_path_str):
       return lora_conversion_utils.translate_ltx2_nnx_path_to_diffusers_lora(nnx_path_str, scan_layers=scan_layers)
 
-    h_state_dict = None
-    if hasattr(pipeline, "transformer") and transformer_weight_name:
+    if not transformer_weight_name:
+      max_logging.log("No LoRA weight name provided; skipping LoRA load.")
+      return pipeline
+
+    h_state_dict, _ = lora_loader.lora_state_dict(lora_model_path, weight_name=transformer_weight_name, **kwargs)
+    transformer_state_dict = {}
+    connector_state_dict = {}
+    if hasattr(pipeline, "transformer"):
       max_logging.log(f"Merging LoRA into transformer with rank={rank}")
-      h_state_dict, _ = lora_loader.lora_state_dict(lora_model_path, weight_name=transformer_weight_name, **kwargs)
       # Filter state dict for transformer keys to avoid confusing warnings
       transformer_state_dict = {k: v for k, v in h_state_dict.items() if k.startswith("diffusion_model")}
       merge_fn(pipeline.transformer, transformer_state_dict, rank, scale, translate_fn, dtype=dtype)
     else:
-      max_logging.log("transformer not found or no weight name provided for LoRA.")
+      max_logging.log("transformer not found.")
 
     if hasattr(pipeline, "connectors"):
       max_logging.log(f"Merging LoRA into connectors with rank={rank}")
-      if h_state_dict is None and transformer_weight_name:
-        h_state_dict, _ = lora_loader.lora_state_dict(lora_model_path, weight_name=transformer_weight_name, **kwargs)
+      connector_state_dict = {k: v for k, v in h_state_dict.items() if k.startswith("text_embedding_projection")}
+      merge_fn(pipeline.connectors, connector_state_dict, rank, scale, translate_fn, dtype=dtype)
+    else:
+      max_logging.log("connectors not found.")
 
-      if h_state_dict is not None:
-        # Filter state dict for connector keys to avoid confusing warnings
-        connector_state_dict = {k: v for k, v in h_state_dict.items() if k.startswith("text_embedding_projection")}
-        merge_fn(pipeline.connectors, connector_state_dict, rank, scale, translate_fn, dtype=dtype)
-      else:
-        max_logging.log("Could not load LoRA state dict for connectors.")
+    # Warn if there are keys routed to no target.
+    # the merge_fn warns about unmatched keys in each dict, so we only warn about any leftovers
+    unmatched_keys = set(h_state_dict) - set(transformer_state_dict) - set(connector_state_dict)
+    if unmatched_keys:
+      max_logging.log(
+          f"{len(unmatched_keys)} key(s) in LoRA dictionary routed to no merge target: {unmatched_keys}"
+      )
 
     return pipeline

From 7aa30fa242fe94a31b7d69139c52c2b554496331 Mon Sep 17 00:00:00 2001
From: Tsvika Shapira <tsvika@moonmath.ai>
Date: Thu, 18 Jun 2026 22:30:46 +0300
Subject: [PATCH 3/5] translateion add . after startswith

---
 src/maxdiffusion/loaders/ltx2_lora_nnx_loader.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/maxdiffusion/loaders/ltx2_lora_nnx_loader.py b/src/maxdiffusion/loaders/ltx2_lora_nnx_loader.py
index a74c0a157..1fad541c6 100644
--- a/src/maxdiffusion/loaders/ltx2_lora_nnx_loader.py
+++ b/src/maxdiffusion/loaders/ltx2_lora_nnx_loader.py
@@ -60,14 +60,14 @@ def translate_fn(nnx_path_str):
     if hasattr(pipeline, "transformer"):
       max_logging.log(f"Merging LoRA into transformer with rank={rank}")
       # Filter state dict for transformer keys to avoid confusing warnings
-      transformer_state_dict = {k: v for k, v in h_state_dict.items() if k.startswith("diffusion_model")}
+      transformer_state_dict = {k: v for k, v in h_state_dict.items() if k.startswith("diffusion_model.")}
       merge_fn(pipeline.transformer, transformer_state_dict, rank, scale, translate_fn, dtype=dtype)
     else:
       max_logging.log("transformer not found.")
 
     if hasattr(pipeline, "connectors"):
       max_logging.log(f"Merging LoRA into connectors with rank={rank}")
-      connector_state_dict = {k: v for k, v in h_state_dict.items() if k.startswith("text_embedding_projection")}
+      connector_state_dict = {k: v for k, v in h_state_dict.items() if k.startswith("text_embedding_projection.")}
       merge_fn(pipeline.connectors, connector_state_dict, rank, scale, translate_fn, dtype=dtype)
     else:
       max_logging.log("connectors not found.")

From 3d39cb9329cc4e6e3ed2b76969fdcb349aad93da Mon Sep 17 00:00:00 2001
From: Tsvika Shapira <tsvika@moonmath.ai>
Date: Sun, 21 Jun 2026 16:58:27 +0300
Subject: [PATCH 4/5] change the example lora files

---
 src/maxdiffusion/configs/ltx2_3_video.yml | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/maxdiffusion/configs/ltx2_3_video.yml b/src/maxdiffusion/configs/ltx2_3_video.yml
index a57106231..572d1ce94 100644
--- a/src/maxdiffusion/configs/ltx2_3_video.yml
+++ b/src/maxdiffusion/configs/ltx2_3_video.yml
@@ -142,18 +142,18 @@ enable_lora: False
 
 # Distilled LoRA
 # lora_config: {
-#   lora_model_name_or_path: ["Lightricks/LTX-2"],
-#   weight_name: ["ltx-2-19b-distilled-lora-384.safetensors"],
+#   lora_model_name_or_path: ["Lightricks/LTX-2.3"],
+#   weight_name: ["ltx-2.3-22b-distilled-lora-384.safetensors"],
 #   adapter_name: ["distilled-lora-384"],
 #   rank: [384]
 # }
 
 # Standard LoRA
 lora_config: {
-  lora_model_name_or_path: ["Lightricks/LTX-2-19b-LoRA-Camera-Control-Dolly-In"],
-  weight_name: ["ltx-2-19b-lora-camera-control-dolly-in.safetensors"],
-  adapter_name: ["camera-control-dolly-in"],
-  rank: [32]
+  lora_model_name_or_path: ["Lightricks/LTX-2.3-22b-IC-LoRA-Colorization"],
+  weight_name: ["ltx-2.3-22b-ic-lora-colorization-0.9.safetensors"],
+  adapter_name: ["colorization"],
+  rank: [128]
 }
 
 

From 02b7d54431e4068867c0035cc99e10fd8d3f6ea5 Mon Sep 17 00:00:00 2001
From: Tsvika Shapira <tsvika@moonmath.ai>
Date: Sun, 21 Jun 2026 19:12:07 +0300
Subject: [PATCH 5/5] comments

---
 src/maxdiffusion/configs/ltx2_3_video.yml         | 2 ++
 src/maxdiffusion/loaders/lora_conversion_utils.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/maxdiffusion/configs/ltx2_3_video.yml b/src/maxdiffusion/configs/ltx2_3_video.yml
index 572d1ce94..a6275cfae 100644
--- a/src/maxdiffusion/configs/ltx2_3_video.yml
+++ b/src/maxdiffusion/configs/ltx2_3_video.yml
@@ -145,6 +145,8 @@ enable_lora: False
 #   lora_model_name_or_path: ["Lightricks/LTX-2.3"],
 #   weight_name: ["ltx-2.3-22b-distilled-lora-384.safetensors"],
 #   adapter_name: ["distilled-lora-384"],
+#   # placeholder - the real value is mixed per-layer ranks: 32/128/256/384
+#   # and the loader reads each layer's REAL rank from the LoRA tensor shapes
 #   rank: [384]
 # }
 
diff --git a/src/maxdiffusion/loaders/lora_conversion_utils.py b/src/maxdiffusion/loaders/lora_conversion_utils.py
index 44b5afdaa..97fc63dcd 100644
--- a/src/maxdiffusion/loaders/lora_conversion_utils.py
+++ b/src/maxdiffusion/loaders/lora_conversion_utils.py
@@ -716,7 +716,7 @@ def translate_ltx2_nnx_path_to_diffusers_lora(nnx_path_str, scan_layers=False):
       "attn1.to_k": "attn1.to_k",
       "attn1.to_v": "attn1.to_v",
       "attn1.to_out": "attn1.to_out.0",
-      "attn1.to_gate_logits": "attn1.to_gate_logits",  # per-head gating Linear
+      "attn1.to_gate_logits": "attn1.to_gate_logits",
       # Audio Self Attention (audio_attn1)
       "audio_attn1.to_q": "audio_attn1.to_q",
       "audio_attn1.to_k": "audio_attn1.to_k",