From 9595c5ee0b9a2d094786de8ab2f241c0fb80214b Mon Sep 17 00:00:00 2001 From: Tsvika Shapira Date: Thu, 18 Jun 2026 19:31:28 +0300 Subject: [PATCH 1/5] feat(ltx2): map LTX-2.3 gate_logits and prompt_adaln LoRA keys Extend the LTX-2 NNX->Diffusers LoRA translate map so adapters that touch the per-head attention gate (`*.to_gate_logits`, all six attention types) and the prompt-conditioned AdaLN (`prompt_adaln` / `audio_prompt_adaln`) resolve to their model layers. Audited all 13 official Lightricks LTX-2.3-22b IC-LoRAs and the distilled-384 LoRA against the map: the 13 IC-LoRAs (attn + ff only) were already fully covered, and `ltx-2.3-22b-distilled-lora-384` was the ONLY one with unmatched keys - 294/1660, exactly the gate_logits + prompt_adaln families added here. With these entries it fuses 1660/1660; the IC-LoRAs are unaffected. --- src/maxdiffusion/loaders/lora_conversion_utils.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/maxdiffusion/loaders/lora_conversion_utils.py b/src/maxdiffusion/loaders/lora_conversion_utils.py index ca0371b76..44b5afdaa 100644 --- a/src/maxdiffusion/loaders/lora_conversion_utils.py +++ b/src/maxdiffusion/loaders/lora_conversion_utils.py @@ -716,31 +716,37 @@ def translate_ltx2_nnx_path_to_diffusers_lora(nnx_path_str, scan_layers=False): "attn1.to_k": "attn1.to_k", "attn1.to_v": "attn1.to_v", "attn1.to_out": "attn1.to_out.0", + "attn1.to_gate_logits": "attn1.to_gate_logits", # per-head gating Linear # Audio Self Attention (audio_attn1) "audio_attn1.to_q": "audio_attn1.to_q", "audio_attn1.to_k": "audio_attn1.to_k", "audio_attn1.to_v": "audio_attn1.to_v", "audio_attn1.to_out": "audio_attn1.to_out.0", + "audio_attn1.to_gate_logits": "audio_attn1.to_gate_logits", # Audio Cross Attention (audio_attn2) "audio_attn2.to_q": "audio_attn2.to_q", "audio_attn2.to_k": "audio_attn2.to_k", "audio_attn2.to_v": "audio_attn2.to_v", "audio_attn2.to_out": "audio_attn2.to_out.0", + "audio_attn2.to_gate_logits": "audio_attn2.to_gate_logits", # Cross Attention (attn2) "attn2.to_q": "attn2.to_q", "attn2.to_k": "attn2.to_k", "attn2.to_v": "attn2.to_v", "attn2.to_out": "attn2.to_out.0", + "attn2.to_gate_logits": "attn2.to_gate_logits", # Audio to Video Cross Attention "audio_to_video_attn.to_q": "audio_to_video_attn.to_q", "audio_to_video_attn.to_k": "audio_to_video_attn.to_k", "audio_to_video_attn.to_v": "audio_to_video_attn.to_v", "audio_to_video_attn.to_out": "audio_to_video_attn.to_out.0", + "audio_to_video_attn.to_gate_logits": "audio_to_video_attn.to_gate_logits", # Video to Audio Cross Attention "video_to_audio_attn.to_q": "video_to_audio_attn.to_q", "video_to_audio_attn.to_k": "video_to_audio_attn.to_k", "video_to_audio_attn.to_v": "video_to_audio_attn.to_v", "video_to_audio_attn.to_out": "video_to_audio_attn.to_out.0", + "video_to_audio_attn.to_gate_logits": "video_to_audio_attn.to_gate_logits", # Feed Forward "ff.net_0": "ff.net.0.proj", "ff.net_2": "ff.net.2", @@ -778,6 +784,13 @@ def translate_ltx2_nnx_path_to_diffusers_lora(nnx_path_str, scan_layers=False): "caption_projection.linear_2": "diffusion_model.caption_projection.linear_2", "audio_caption_projection.linear_1": "diffusion_model.audio_caption_projection.linear_1", "audio_caption_projection.linear_2": "diffusion_model.audio_caption_projection.linear_2", + # Prompt-conditioned AdaLN + "prompt_adaln.linear": "diffusion_model.prompt_adaln_single.linear", + "prompt_adaln.emb.timestep_embedder.linear_1": "diffusion_model.prompt_adaln_single.emb.timestep_embedder.linear_1", + "prompt_adaln.emb.timestep_embedder.linear_2": "diffusion_model.prompt_adaln_single.emb.timestep_embedder.linear_2", + "audio_prompt_adaln.linear": "diffusion_model.audio_prompt_adaln_single.linear", + "audio_prompt_adaln.emb.timestep_embedder.linear_1": "diffusion_model.audio_prompt_adaln_single.emb.timestep_embedder.linear_1", + "audio_prompt_adaln.emb.timestep_embedder.linear_2": "diffusion_model.audio_prompt_adaln_single.emb.timestep_embedder.linear_2", # Connectors "feature_extractor.linear": "text_embedding_projection.aggregate_embed", } From 283c5f3cc89a66b7dc5376f08657a859aeaa9841 Mon Sep 17 00:00:00 2001 From: Tsvika Shapira Date: Thu, 18 Jun 2026 19:31:39 +0300 Subject: [PATCH 2/5] chore(ltx2): clearer LoRA-loader logging; warn on unused keys Logging/diagnostics only - what gets merged is unchanged: - a missing LoRA weight name returns early with one clear message (was two unclear logs); - a missing `transformer` is logged on its own (was folded into the weight-name message); - a missing `connectors` is now logged (previously had no message); - warn when a LoRA carries keys outside the two prefixes routed to the merges (`diffusion_model.` / `text_embedding_projection.`) - those keys were previously dropped without any notice. --- .../loaders/ltx2_lora_nnx_loader.py | 32 ++++++++++++------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/src/maxdiffusion/loaders/ltx2_lora_nnx_loader.py b/src/maxdiffusion/loaders/ltx2_lora_nnx_loader.py index 247b3ba2e..a74c0a157 100644 --- a/src/maxdiffusion/loaders/ltx2_lora_nnx_loader.py +++ b/src/maxdiffusion/loaders/ltx2_lora_nnx_loader.py @@ -50,26 +50,34 @@ def load_lora_weights( def translate_fn(nnx_path_str): return lora_conversion_utils.translate_ltx2_nnx_path_to_diffusers_lora(nnx_path_str, scan_layers=scan_layers) - h_state_dict = None - if hasattr(pipeline, "transformer") and transformer_weight_name: + if not transformer_weight_name: + max_logging.log("No LoRA weight name provided; skipping LoRA load.") + return pipeline + + h_state_dict, _ = lora_loader.lora_state_dict(lora_model_path, weight_name=transformer_weight_name, **kwargs) + transformer_state_dict = {} + connector_state_dict = {} + if hasattr(pipeline, "transformer"): max_logging.log(f"Merging LoRA into transformer with rank={rank}") - h_state_dict, _ = lora_loader.lora_state_dict(lora_model_path, weight_name=transformer_weight_name, **kwargs) # Filter state dict for transformer keys to avoid confusing warnings transformer_state_dict = {k: v for k, v in h_state_dict.items() if k.startswith("diffusion_model")} merge_fn(pipeline.transformer, transformer_state_dict, rank, scale, translate_fn, dtype=dtype) else: - max_logging.log("transformer not found or no weight name provided for LoRA.") + max_logging.log("transformer not found.") if hasattr(pipeline, "connectors"): max_logging.log(f"Merging LoRA into connectors with rank={rank}") - if h_state_dict is None and transformer_weight_name: - h_state_dict, _ = lora_loader.lora_state_dict(lora_model_path, weight_name=transformer_weight_name, **kwargs) + connector_state_dict = {k: v for k, v in h_state_dict.items() if k.startswith("text_embedding_projection")} + merge_fn(pipeline.connectors, connector_state_dict, rank, scale, translate_fn, dtype=dtype) + else: + max_logging.log("connectors not found.") - if h_state_dict is not None: - # Filter state dict for connector keys to avoid confusing warnings - connector_state_dict = {k: v for k, v in h_state_dict.items() if k.startswith("text_embedding_projection")} - merge_fn(pipeline.connectors, connector_state_dict, rank, scale, translate_fn, dtype=dtype) - else: - max_logging.log("Could not load LoRA state dict for connectors.") + # Warn if there are keys routed to no target. + # the merge_fn warns about unmatched keys in each dict, so we only warn about any leftovers + unmatched_keys = set(h_state_dict) - set(transformer_state_dict) - set(connector_state_dict) + if unmatched_keys: + max_logging.log( + f"{len(unmatched_keys)} key(s) in LoRA dictionary routed to no merge target: {unmatched_keys}" + ) return pipeline From 7aa30fa242fe94a31b7d69139c52c2b554496331 Mon Sep 17 00:00:00 2001 From: Tsvika Shapira Date: Thu, 18 Jun 2026 22:30:46 +0300 Subject: [PATCH 3/5] translateion add . after startswith --- src/maxdiffusion/loaders/ltx2_lora_nnx_loader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/maxdiffusion/loaders/ltx2_lora_nnx_loader.py b/src/maxdiffusion/loaders/ltx2_lora_nnx_loader.py index a74c0a157..1fad541c6 100644 --- a/src/maxdiffusion/loaders/ltx2_lora_nnx_loader.py +++ b/src/maxdiffusion/loaders/ltx2_lora_nnx_loader.py @@ -60,14 +60,14 @@ def translate_fn(nnx_path_str): if hasattr(pipeline, "transformer"): max_logging.log(f"Merging LoRA into transformer with rank={rank}") # Filter state dict for transformer keys to avoid confusing warnings - transformer_state_dict = {k: v for k, v in h_state_dict.items() if k.startswith("diffusion_model")} + transformer_state_dict = {k: v for k, v in h_state_dict.items() if k.startswith("diffusion_model.")} merge_fn(pipeline.transformer, transformer_state_dict, rank, scale, translate_fn, dtype=dtype) else: max_logging.log("transformer not found.") if hasattr(pipeline, "connectors"): max_logging.log(f"Merging LoRA into connectors with rank={rank}") - connector_state_dict = {k: v for k, v in h_state_dict.items() if k.startswith("text_embedding_projection")} + connector_state_dict = {k: v for k, v in h_state_dict.items() if k.startswith("text_embedding_projection.")} merge_fn(pipeline.connectors, connector_state_dict, rank, scale, translate_fn, dtype=dtype) else: max_logging.log("connectors not found.") From 3d39cb9329cc4e6e3ed2b76969fdcb349aad93da Mon Sep 17 00:00:00 2001 From: Tsvika Shapira Date: Sun, 21 Jun 2026 16:58:27 +0300 Subject: [PATCH 4/5] change the example lora files --- src/maxdiffusion/configs/ltx2_3_video.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/maxdiffusion/configs/ltx2_3_video.yml b/src/maxdiffusion/configs/ltx2_3_video.yml index a57106231..572d1ce94 100644 --- a/src/maxdiffusion/configs/ltx2_3_video.yml +++ b/src/maxdiffusion/configs/ltx2_3_video.yml @@ -142,18 +142,18 @@ enable_lora: False # Distilled LoRA # lora_config: { -# lora_model_name_or_path: ["Lightricks/LTX-2"], -# weight_name: ["ltx-2-19b-distilled-lora-384.safetensors"], +# lora_model_name_or_path: ["Lightricks/LTX-2.3"], +# weight_name: ["ltx-2.3-22b-distilled-lora-384.safetensors"], # adapter_name: ["distilled-lora-384"], # rank: [384] # } # Standard LoRA lora_config: { - lora_model_name_or_path: ["Lightricks/LTX-2-19b-LoRA-Camera-Control-Dolly-In"], - weight_name: ["ltx-2-19b-lora-camera-control-dolly-in.safetensors"], - adapter_name: ["camera-control-dolly-in"], - rank: [32] + lora_model_name_or_path: ["Lightricks/LTX-2.3-22b-IC-LoRA-Colorization"], + weight_name: ["ltx-2.3-22b-ic-lora-colorization-0.9.safetensors"], + adapter_name: ["colorization"], + rank: [128] } From 02b7d54431e4068867c0035cc99e10fd8d3f6ea5 Mon Sep 17 00:00:00 2001 From: Tsvika Shapira Date: Sun, 21 Jun 2026 19:12:07 +0300 Subject: [PATCH 5/5] comments --- src/maxdiffusion/configs/ltx2_3_video.yml | 2 ++ src/maxdiffusion/loaders/lora_conversion_utils.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/maxdiffusion/configs/ltx2_3_video.yml b/src/maxdiffusion/configs/ltx2_3_video.yml index 572d1ce94..a6275cfae 100644 --- a/src/maxdiffusion/configs/ltx2_3_video.yml +++ b/src/maxdiffusion/configs/ltx2_3_video.yml @@ -145,6 +145,8 @@ enable_lora: False # lora_model_name_or_path: ["Lightricks/LTX-2.3"], # weight_name: ["ltx-2.3-22b-distilled-lora-384.safetensors"], # adapter_name: ["distilled-lora-384"], +# # placeholder - the real value is mixed per-layer ranks: 32/128/256/384 +# # and the loader reads each layer's REAL rank from the LoRA tensor shapes # rank: [384] # } diff --git a/src/maxdiffusion/loaders/lora_conversion_utils.py b/src/maxdiffusion/loaders/lora_conversion_utils.py index 44b5afdaa..97fc63dcd 100644 --- a/src/maxdiffusion/loaders/lora_conversion_utils.py +++ b/src/maxdiffusion/loaders/lora_conversion_utils.py @@ -716,7 +716,7 @@ def translate_ltx2_nnx_path_to_diffusers_lora(nnx_path_str, scan_layers=False): "attn1.to_k": "attn1.to_k", "attn1.to_v": "attn1.to_v", "attn1.to_out": "attn1.to_out.0", - "attn1.to_gate_logits": "attn1.to_gate_logits", # per-head gating Linear + "attn1.to_gate_logits": "attn1.to_gate_logits", # Audio Self Attention (audio_attn1) "audio_attn1.to_q": "audio_attn1.to_q", "audio_attn1.to_k": "audio_attn1.to_k",