Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion invokeai/app/invocations/anima_latents_to_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from invokeai.app.invocations.primitives import ImageOutput
from invokeai.app.services.shared.invocation_context import InvocationContext
from invokeai.backend.flux.modules.autoencoder import AutoEncoder as FluxAutoEncoder
from invokeai.backend.model_manager.load.model_cache.utils import get_effective_device
from invokeai.backend.util.devices import TorchDevice
from invokeai.backend.util.vae_working_memory import estimate_vae_working_memory_flux

Expand Down Expand Up @@ -72,7 +73,8 @@ def invoke(self, context: InvocationContext) -> ImageOutput:
raise TypeError(f"Expected AutoencoderKLWan or FluxAutoEncoder, got {type(vae).__name__}.")

vae_dtype = next(iter(vae.parameters())).dtype
latents = latents.to(device=TorchDevice.choose_torch_device(), dtype=vae_dtype)
# Use the VAE's actual device (may be CPU if the model is configured cpu_only).
latents = latents.to(device=get_effective_device(vae), dtype=vae_dtype)

TorchDevice.empty_cache()

Expand Down
4 changes: 3 additions & 1 deletion invokeai/app/invocations/cogview4_latents_to_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from invokeai.app.invocations.model import VAEField
from invokeai.app.invocations.primitives import ImageOutput
from invokeai.app.services.shared.invocation_context import InvocationContext
from invokeai.backend.model_manager.load.model_cache.utils import get_effective_device
from invokeai.backend.stable_diffusion.extensions.seamless import SeamlessExt
from invokeai.backend.util.devices import TorchDevice
from invokeai.backend.util.vae_working_memory import estimate_vae_working_memory_cogview4
Expand Down Expand Up @@ -54,7 +55,8 @@ def invoke(self, context: InvocationContext) -> ImageOutput:
):
context.util.signal_progress("Running VAE")
assert isinstance(vae, (AutoencoderKL))
latents = latents.to(TorchDevice.choose_torch_device())
# Use the VAE's actual device (may be CPU if the model is configured cpu_only).
latents = latents.to(get_effective_device(vae))

vae.disable_tiling()

Expand Down
4 changes: 3 additions & 1 deletion invokeai/app/invocations/flux2_vae_decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from invokeai.app.invocations.primitives import ImageOutput
from invokeai.app.services.shared.invocation_context import InvocationContext
from invokeai.backend.model_manager.load.load_base import LoadedModel
from invokeai.backend.model_manager.load.model_cache.utils import get_effective_device
from invokeai.backend.util.devices import TorchDevice


Expand Down Expand Up @@ -51,7 +52,8 @@ def _vae_decode(self, vae_info: LoadedModel, latents: torch.Tensor) -> Image.Ima
"""
with vae_info.model_on_device() as (_, vae):
vae_dtype = next(iter(vae.parameters())).dtype
device = TorchDevice.choose_torch_device()
# Use the VAE's actual device (may be CPU if the model is configured cpu_only).
device = get_effective_device(vae)
latents = latents.to(device=device, dtype=vae_dtype)

# Decode using diffusers API
Expand Down
4 changes: 3 additions & 1 deletion invokeai/app/invocations/flux_vae_decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from invokeai.app.services.shared.invocation_context import InvocationContext
from invokeai.backend.flux.modules.autoencoder import AutoEncoder
from invokeai.backend.model_manager.load.load_base import LoadedModel
from invokeai.backend.model_manager.load.model_cache.utils import get_effective_device
from invokeai.backend.util.devices import TorchDevice
from invokeai.backend.util.vae_working_memory import estimate_vae_working_memory_flux

Expand Down Expand Up @@ -47,7 +48,8 @@ def _vae_decode(self, vae_info: LoadedModel, latents: torch.Tensor) -> Image.Ima
with vae_info.model_on_device(working_mem_bytes=estimated_working_memory) as (_, vae):
assert isinstance(vae, AutoEncoder)
vae_dtype = next(iter(vae.parameters())).dtype
latents = latents.to(device=TorchDevice.choose_torch_device(), dtype=vae_dtype)
# Use the VAE's actual device (may be CPU if the model is configured cpu_only).
latents = latents.to(device=get_effective_device(vae), dtype=vae_dtype)
img = vae.decode(latents)

img = img.clamp(-1, 1)
Expand Down
9 changes: 7 additions & 2 deletions invokeai/app/invocations/latents_to_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from invokeai.app.invocations.model import VAEField
from invokeai.app.invocations.primitives import ImageOutput
from invokeai.app.services.shared.invocation_context import InvocationContext
from invokeai.backend.model_manager.load.model_cache.utils import get_effective_device
from invokeai.backend.stable_diffusion.extensions.seamless import SeamlessExt
from invokeai.backend.stable_diffusion.vae_tiling import patch_vae_tiling_params
from invokeai.backend.util.devices import TorchDevice
Expand Down Expand Up @@ -69,8 +70,12 @@ def invoke(self, context: InvocationContext) -> ImageOutput:
):
context.util.signal_progress("Running VAE decoder")
assert isinstance(vae, (AutoencoderKL, AutoencoderTiny))
latents = latents.to(TorchDevice.choose_torch_device())
if self.fp32:
# Use the VAE's actual device (may be CPU if the model is configured cpu_only).
device = get_effective_device(vae)
latents = latents.to(device)
# fp16 VAE ops are not supported on CPU, so force fp32 when running on CPU
# (e.g. when the VAE is configured cpu_only).
if self.fp32 or device.type == "cpu":
# FP32 mode: convert everything to float32 for maximum precision
vae.to(dtype=torch.float32)
latents = latents.float()
Expand Down
4 changes: 3 additions & 1 deletion invokeai/app/invocations/qwen_image_latents_to_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from invokeai.app.invocations.model import VAEField
from invokeai.app.invocations.primitives import ImageOutput
from invokeai.app.services.shared.invocation_context import InvocationContext
from invokeai.backend.model_manager.load.model_cache.utils import get_effective_device
from invokeai.backend.stable_diffusion.extensions.seamless import SeamlessExt
from invokeai.backend.util.devices import TorchDevice

Expand Down Expand Up @@ -47,7 +48,8 @@ def invoke(self, context: InvocationContext) -> ImageOutput:
):
context.util.signal_progress("Running VAE")
assert isinstance(vae, AutoencoderKLQwenImage)
latents = latents.to(device=TorchDevice.choose_torch_device(), dtype=vae.dtype)
# Use the VAE's actual device (may be CPU if the model is configured cpu_only).
latents = latents.to(device=get_effective_device(vae), dtype=vae.dtype)

vae.disable_tiling()

Expand Down
4 changes: 3 additions & 1 deletion invokeai/app/invocations/sd3_latents_to_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from invokeai.app.invocations.model import VAEField
from invokeai.app.invocations.primitives import ImageOutput
from invokeai.app.services.shared.invocation_context import InvocationContext
from invokeai.backend.model_manager.load.model_cache.utils import get_effective_device
from invokeai.backend.stable_diffusion.extensions.seamless import SeamlessExt
from invokeai.backend.util.devices import TorchDevice
from invokeai.backend.util.vae_working_memory import estimate_vae_working_memory_sd3
Expand Down Expand Up @@ -56,7 +57,8 @@ def invoke(self, context: InvocationContext) -> ImageOutput:
):
context.util.signal_progress("Running VAE")
assert isinstance(vae, (AutoencoderKL))
latents = latents.to(TorchDevice.choose_torch_device())
# Use the VAE's actual device (may be CPU if the model is configured cpu_only).
latents = latents.to(get_effective_device(vae))

vae.disable_tiling()

Expand Down
4 changes: 3 additions & 1 deletion invokeai/app/invocations/z_image_latents_to_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from invokeai.app.invocations.primitives import ImageOutput
from invokeai.app.services.shared.invocation_context import InvocationContext
from invokeai.backend.flux.modules.autoencoder import AutoEncoder as FluxAutoEncoder
from invokeai.backend.model_manager.load.model_cache.utils import get_effective_device
from invokeai.backend.stable_diffusion.extensions.seamless import SeamlessExt
from invokeai.backend.util.devices import TorchDevice
from invokeai.backend.util.vae_working_memory import estimate_vae_working_memory_flux
Expand Down Expand Up @@ -75,7 +76,8 @@ def invoke(self, context: InvocationContext) -> ImageOutput:
)

vae_dtype = next(iter(vae.parameters())).dtype
latents = latents.to(device=TorchDevice.choose_torch_device(), dtype=vae_dtype)
# Use the VAE's actual device (may be CPU if the model is configured cpu_only).
latents = latents.to(device=get_effective_device(vae), dtype=vae_dtype)

# Disable tiling for AutoencoderKL
if isinstance(vae, AutoencoderKL):
Expand Down
6 changes: 6 additions & 0 deletions invokeai/backend/model_manager/configs/vae.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ class VAE_Checkpoint_Config_Base(Checkpoint_Config_Base):

type: Literal[ModelType.VAE] = Field(default=ModelType.VAE)
format: Literal[ModelFormat.Checkpoint] = Field(default=ModelFormat.Checkpoint)
cpu_only: bool | None = Field(default=None, description="Whether this model should run on CPU only")

@classmethod
def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self:
Expand Down Expand Up @@ -166,6 +167,7 @@ class VAE_Checkpoint_Flux2_Config(Checkpoint_Config_Base, Config_Base):
type: Literal[ModelType.VAE] = Field(default=ModelType.VAE)
format: Literal[ModelFormat.Checkpoint] = Field(default=ModelFormat.Checkpoint)
base: Literal[BaseModelType.Flux2] = Field(default=BaseModelType.Flux2)
cpu_only: bool | None = Field(default=None, description="Whether this model should run on CPU only")

@classmethod
def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self:
Expand Down Expand Up @@ -204,6 +206,7 @@ class VAE_Checkpoint_QwenImage_Config(Checkpoint_Config_Base, Config_Base):
type: Literal[ModelType.VAE] = Field(default=ModelType.VAE)
format: Literal[ModelFormat.Checkpoint] = Field(default=ModelFormat.Checkpoint)
base: Literal[BaseModelType.QwenImage] = Field(default=BaseModelType.QwenImage)
cpu_only: bool | None = Field(default=None, description="Whether this model should run on CPU only")

@classmethod
def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self:
Expand Down Expand Up @@ -241,6 +244,7 @@ class VAE_Checkpoint_Anima_Config(Checkpoint_Config_Base, Config_Base):
type: Literal[ModelType.VAE] = Field(default=ModelType.VAE)
format: Literal[ModelFormat.Checkpoint] = Field(default=ModelFormat.Checkpoint)
base: Literal[BaseModelType.Anima] = Field(default=BaseModelType.Anima)
cpu_only: bool | None = Field(default=None, description="Whether this model should run on CPU only")

@classmethod
def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self:
Expand All @@ -260,6 +264,7 @@ class VAE_Diffusers_Config_Base(Diffusers_Config_Base):

type: Literal[ModelType.VAE] = Field(default=ModelType.VAE)
format: Literal[ModelFormat.Diffusers] = Field(default=ModelFormat.Diffusers)
cpu_only: bool | None = Field(default=None, description="Whether this model should run on CPU only")

@classmethod
def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self:
Expand Down Expand Up @@ -328,6 +333,7 @@ class VAE_Diffusers_Flux2_Config(Diffusers_Config_Base, Config_Base):
type: Literal[ModelType.VAE] = Field(default=ModelType.VAE)
format: Literal[ModelFormat.Diffusers] = Field(default=ModelFormat.Diffusers)
base: Literal[BaseModelType.Flux2] = Field(default=BaseModelType.Flux2)
cpu_only: bool | None = Field(default=None, description="Whether this model should run on CPU only")

@classmethod
def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -441,8 +441,13 @@ def lock(self, cache_entry: CacheRecord, working_mem_bytes: Optional[int]) -> No
# Check if the model's specific compute_device is CPU, not just the cache's default execution_device
model_compute_device = cache_entry.cached_model.compute_device
if model_compute_device.type == "cpu":
# Models configured for CPU execution don't need to be loaded into VRAM
self._logger.debug(f"Model {cache_entry.key} is configured for CPU execution, skipping VRAM load")
# Models configured for CPU execution (cpu_only) don't need to be loaded into VRAM. Log at INFO so it
# mirrors the "Loaded model ... onto <device> device" line emitted for GPU loads below — otherwise there
# is no visible indication that the model is running on CPU at the default log level.
self._logger.info(
f"Loaded model '{cache_entry.key}' ({cache_entry.cached_model.model.__class__.__name__}) onto "
f"cpu device (cpu_only); skipping VRAM load"
)
return

try:
Expand Down
Loading
Loading