From 60860c0941188f5a8ed12ccd94cc8ce3c04889ad Mon Sep 17 00:00:00 2001 From: Austen Dicken Date: Wed, 11 Feb 2026 00:07:07 -0600 Subject: [PATCH 1/3] add experimental support for sd_embed-style prompt embedding Signed-off-by: Austen Dicken --- backend/python/diffusers/backend.py | 47 +++++++++++++++++++ backend/python/diffusers/requirements-cpu.txt | 1 + .../diffusers/requirements-cublas12.txt | 1 + .../diffusers/requirements-cublas13.txt | 1 + .../python/diffusers/requirements-intel.txt | 1 + .../python/diffusers/requirements-l4t12.txt | 1 + .../python/diffusers/requirements-l4t13.txt | 1 + backend/python/diffusers/requirements-mps.txt | 1 + 8 files changed, 54 insertions(+) diff --git a/backend/python/diffusers/backend.py b/backend/python/diffusers/backend.py index 032af60c4164..bdab438d7557 100755 --- a/backend/python/diffusers/backend.py +++ b/backend/python/diffusers/backend.py @@ -40,6 +40,7 @@ from optimum.quanto import freeze, qfloat8, quantize from transformers import T5EncoderModel from safetensors.torch import load_file +from sd_embed.embedding_funcs import get_weighted_text_embeddings_sd15, get_weighted_text_embeddings_sdxl, get_weighted_text_embeddings_sd3, get_weighted_text_embeddings_flux1 # Import LTX-2 specific utilities from diffusers.pipelines.ltx2.export_utils import encode_video as ltx2_encode_video @@ -47,6 +48,7 @@ _ONE_DAY_IN_SECONDS = 60 * 60 * 24 COMPEL = os.environ.get("COMPEL", "0") == "1" +SD_EMBED = os.environ.get("SD_EMBED", "0") == "1" XPU = os.environ.get("XPU", "0") == "1" CLIPSKIP = os.environ.get("CLIPSKIP", "1") == "1" SAFETENSORS = os.environ.get("SAFETENSORS", "1") == "1" @@ -737,6 +739,51 @@ def GenerateImage(self, request, context): kwargs["prompt_embeds"] = conditioning kwargs["pooled_prompt_embeds"] = pooled # pass the kwargs dictionary to the self.pipe method + image = self.pipe( + guidance_scale=self.cfg_scale, + **kwargs + ).images[0] + elif SD_EMBED: + if self.PipelineType == "StableDiffusionPipeline": + ( + kwargs["prompt_embeds"], + kwargs["negative_prompt_embeds"], + ) = get_weighted_text_embeddings_sd15( + pipe = self.pipe, + prompt = prompt, + neg_prompt = request.negative_prompt if hasattr(request, 'negative_prompt') else None, + ) + if self.PipelineType == "StableDiffusionXLPipeline": + ( + kwargs["prompt_embeds"], + kwargs["negative_prompt_embeds"], + kwargs["pooled_prompt_embeds"], + kwargs["negative_pooled_prompt_embeds"], + ) = get_weighted_text_embeddings_sdxl( + pipe = self.pipe, + prompt = prompt, + neg_prompt = request.negative_prompt if hasattr(request, 'negative_prompt') else None + ) + if self.PipelineType == "StableDiffusion3Pipeline": + ( + kwargs["prompt_embeds"], + kwargs["negative_prompt_embeds"], + kwargs["pooled_prompt_embeds"], + kwargs["negative_pooled_prompt_embeds"], + ) = get_weighted_text_embeddings_sd3( + pipe = self.pipe, + prompt = prompt, + neg_prompt = request.negative_prompt if hasattr(request, 'negative_prompt') else None + ) + if self.PipelineType == "FluxTransformer2DModel": + ( + kwargs["prompt_embeds"], + kwargs["pooled_prompt_embeds"], + ) = get_weighted_text_embeddings_sd3( + pipe = self.pipe, + prompt = prompt, + ) + image = self.pipe( guidance_scale=self.cfg_scale, **kwargs diff --git a/backend/python/diffusers/requirements-cpu.txt b/backend/python/diffusers/requirements-cpu.txt index fceda06d2f03..2b76224d9695 100644 --- a/backend/python/diffusers/requirements-cpu.txt +++ b/backend/python/diffusers/requirements-cpu.txt @@ -5,6 +5,7 @@ transformers torchvision==0.22.1 accelerate compel +git+https://github.com/xhinker/sd_embed peft sentencepiece torch==2.7.1 diff --git a/backend/python/diffusers/requirements-cublas12.txt b/backend/python/diffusers/requirements-cublas12.txt index 632e9421f99c..5a1e947f26e7 100644 --- a/backend/python/diffusers/requirements-cublas12.txt +++ b/backend/python/diffusers/requirements-cublas12.txt @@ -5,6 +5,7 @@ transformers torchvision accelerate compel +git+https://github.com/xhinker/sd_embed peft sentencepiece torch diff --git a/backend/python/diffusers/requirements-cublas13.txt b/backend/python/diffusers/requirements-cublas13.txt index 4867a85cd405..354c6df070a2 100644 --- a/backend/python/diffusers/requirements-cublas13.txt +++ b/backend/python/diffusers/requirements-cublas13.txt @@ -5,6 +5,7 @@ transformers torchvision accelerate compel +git+https://github.com/xhinker/sd_embed peft sentencepiece torch diff --git a/backend/python/diffusers/requirements-intel.txt b/backend/python/diffusers/requirements-intel.txt index e0fa69fb048c..3fd3cde74466 100644 --- a/backend/python/diffusers/requirements-intel.txt +++ b/backend/python/diffusers/requirements-intel.txt @@ -8,6 +8,7 @@ opencv-python transformers accelerate compel +git+https://github.com/xhinker/sd_embed peft sentencepiece optimum-quanto diff --git a/backend/python/diffusers/requirements-l4t12.txt b/backend/python/diffusers/requirements-l4t12.txt index 9f77a9d09014..814a22dff5c8 100644 --- a/backend/python/diffusers/requirements-l4t12.txt +++ b/backend/python/diffusers/requirements-l4t12.txt @@ -4,6 +4,7 @@ git+https://github.com/huggingface/diffusers transformers accelerate compel +git+https://github.com/xhinker/sd_embed peft optimum-quanto numpy<2 diff --git a/backend/python/diffusers/requirements-l4t13.txt b/backend/python/diffusers/requirements-l4t13.txt index 560858e354f4..3eb79ecd25b3 100644 --- a/backend/python/diffusers/requirements-l4t13.txt +++ b/backend/python/diffusers/requirements-l4t13.txt @@ -4,6 +4,7 @@ git+https://github.com/huggingface/diffusers transformers accelerate compel +git+https://github.com/xhinker/sd_embed peft optimum-quanto numpy<2 diff --git a/backend/python/diffusers/requirements-mps.txt b/backend/python/diffusers/requirements-mps.txt index 8b7c2413bffa..984cb4dc7d11 100644 --- a/backend/python/diffusers/requirements-mps.txt +++ b/backend/python/diffusers/requirements-mps.txt @@ -5,6 +5,7 @@ opencv-python transformers accelerate compel +git+https://github.com/xhinker/sd_embed peft sentencepiece optimum-quanto From 60f301368d6c460a7f7c8345895793d40c78af07 Mon Sep 17 00:00:00 2001 From: Austen Dicken Date: Wed, 11 Feb 2026 00:22:12 -0600 Subject: [PATCH 2/3] add doc equivalent to compel Signed-off-by: Austen Dicken --- .env | 3 +++ backend/python/diffusers/README.md | 1 + 2 files changed, 4 insertions(+) diff --git a/.env b/.env index 852d3dac63bd..ca2d149fe946 100644 --- a/.env +++ b/.env @@ -26,6 +26,9 @@ ## Disables COMPEL (Diffusers) # COMPEL=0 +## Disables SD_EMBED (Diffusers) +# SD_EMBED=0 + ## Enable/Disable single backend (useful if only one GPU is available) # LOCALAI_SINGLE_ACTIVE_BACKEND=true diff --git a/backend/python/diffusers/README.md b/backend/python/diffusers/README.md index 91fff3127694..70fddb80ffcd 100644 --- a/backend/python/diffusers/README.md +++ b/backend/python/diffusers/README.md @@ -115,6 +115,7 @@ Available pipelines: AnimateDiffPipeline, AnimateDiffVideoToVideoPipeline, ... | Variable | Default | Description | |----------|---------|-------------| | `COMPEL` | `0` | Enable Compel for prompt weighting | +| `SD_EMBED` | `0` | Enable sd_embed for prompt weighting | | `XPU` | `0` | Enable Intel XPU support | | `CLIPSKIP` | `1` | Enable CLIP skip support | | `SAFETENSORS` | `1` | Use safetensors format | From 80ca06dde50a141e5bad46a6443b19fdd0f7f6c9 Mon Sep 17 00:00:00 2001 From: Austen Dicken Date: Wed, 11 Feb 2026 10:32:50 -0600 Subject: [PATCH 3/3] need to use flux1 embedding function for flux model Signed-off-by: Austen Dicken --- backend/python/diffusers/backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/python/diffusers/backend.py b/backend/python/diffusers/backend.py index bdab438d7557..29a86b23e53d 100755 --- a/backend/python/diffusers/backend.py +++ b/backend/python/diffusers/backend.py @@ -779,7 +779,7 @@ def GenerateImage(self, request, context): ( kwargs["prompt_embeds"], kwargs["pooled_prompt_embeds"], - ) = get_weighted_text_embeddings_sd3( + ) = get_weighted_text_embeddings_flux1( pipe = self.pipe, prompt = prompt, )