From bc212020df35bef9524558b2593db86d118257c0 Mon Sep 17 00:00:00 2001 From: Alexander Eichhorn Date: Wed, 17 Jun 2026 23:40:09 +0200 Subject: [PATCH 1/2] feat(vae): support running VAEs on CPU via cpu_only setting Extends the cpu_only mechanism from #8777 (text encoders) to VAE decode. Adds a cpu_only field to all standalone VAE configs; the loader already forces standalone configs with cpu_only=True onto the CPU. The 7 decode invocations now move latents to the VAE's effective device instead of hard-coding CUDA, and the SD/SDXL path falls back to fp32 on CPU (fp16 conv is unsupported there). Adds a "Run on CPU" toggle to the VAE model settings panel and regenerates the API schema. Decode-only for now; encode and main-model VAE submodels are unchanged. Closes #7276 (VAE part) --- .../app/invocations/anima_latents_to_image.py | 4 +- .../invocations/cogview4_latents_to_image.py | 4 +- invokeai/app/invocations/flux2_vae_decode.py | 4 +- invokeai/app/invocations/flux_vae_decode.py | 4 +- invokeai/app/invocations/latents_to_image.py | 9 +- .../qwen_image_latents_to_image.py | 4 +- .../app/invocations/sd3_latents_to_image.py | 4 +- .../invocations/z_image_latents_to_image.py | 4 +- invokeai/backend/model_manager/configs/vae.py | 6 + .../hooks/useVAEModelSettings.ts | 18 +++ .../subpanels/ModelPanel/ModelView.tsx | 6 + .../VAEModelSettings/VAEModelSettings.tsx | 126 ++++++++++++++++++ .../frontend/web/src/services/api/schema.ts | 50 +++++++ .../load/test_load_default_cpu_only.py | 49 +++++++ 14 files changed, 283 insertions(+), 9 deletions(-) create mode 100644 invokeai/frontend/web/src/features/modelManagerV2/hooks/useVAEModelSettings.ts create mode 100644 invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/VAEModelSettings/VAEModelSettings.tsx create mode 100644 tests/backend/model_manager/load/test_load_default_cpu_only.py diff --git a/invokeai/app/invocations/anima_latents_to_image.py b/invokeai/app/invocations/anima_latents_to_image.py index 080c101fa44..3bf3265bfd7 100644 --- a/invokeai/app/invocations/anima_latents_to_image.py +++ b/invokeai/app/invocations/anima_latents_to_image.py @@ -28,6 +28,7 @@ from invokeai.app.invocations.primitives import ImageOutput from invokeai.app.services.shared.invocation_context import InvocationContext from invokeai.backend.flux.modules.autoencoder import AutoEncoder as FluxAutoEncoder +from invokeai.backend.model_manager.load.model_cache.utils import get_effective_device from invokeai.backend.util.devices import TorchDevice from invokeai.backend.util.vae_working_memory import estimate_vae_working_memory_flux @@ -72,7 +73,8 @@ def invoke(self, context: InvocationContext) -> ImageOutput: raise TypeError(f"Expected AutoencoderKLWan or FluxAutoEncoder, got {type(vae).__name__}.") vae_dtype = next(iter(vae.parameters())).dtype - latents = latents.to(device=TorchDevice.choose_torch_device(), dtype=vae_dtype) + # Use the VAE's actual device (may be CPU if the model is configured cpu_only). + latents = latents.to(device=get_effective_device(vae), dtype=vae_dtype) TorchDevice.empty_cache() diff --git a/invokeai/app/invocations/cogview4_latents_to_image.py b/invokeai/app/invocations/cogview4_latents_to_image.py index 1b77ed8a1f8..bc9d208b669 100644 --- a/invokeai/app/invocations/cogview4_latents_to_image.py +++ b/invokeai/app/invocations/cogview4_latents_to_image.py @@ -17,6 +17,7 @@ from invokeai.app.invocations.model import VAEField from invokeai.app.invocations.primitives import ImageOutput from invokeai.app.services.shared.invocation_context import InvocationContext +from invokeai.backend.model_manager.load.model_cache.utils import get_effective_device from invokeai.backend.stable_diffusion.extensions.seamless import SeamlessExt from invokeai.backend.util.devices import TorchDevice from invokeai.backend.util.vae_working_memory import estimate_vae_working_memory_cogview4 @@ -54,7 +55,8 @@ def invoke(self, context: InvocationContext) -> ImageOutput: ): context.util.signal_progress("Running VAE") assert isinstance(vae, (AutoencoderKL)) - latents = latents.to(TorchDevice.choose_torch_device()) + # Use the VAE's actual device (may be CPU if the model is configured cpu_only). + latents = latents.to(get_effective_device(vae)) vae.disable_tiling() diff --git a/invokeai/app/invocations/flux2_vae_decode.py b/invokeai/app/invocations/flux2_vae_decode.py index ecbc7d9cb83..25ada406873 100644 --- a/invokeai/app/invocations/flux2_vae_decode.py +++ b/invokeai/app/invocations/flux2_vae_decode.py @@ -20,6 +20,7 @@ from invokeai.app.invocations.primitives import ImageOutput from invokeai.app.services.shared.invocation_context import InvocationContext from invokeai.backend.model_manager.load.load_base import LoadedModel +from invokeai.backend.model_manager.load.model_cache.utils import get_effective_device from invokeai.backend.util.devices import TorchDevice @@ -51,7 +52,8 @@ def _vae_decode(self, vae_info: LoadedModel, latents: torch.Tensor) -> Image.Ima """ with vae_info.model_on_device() as (_, vae): vae_dtype = next(iter(vae.parameters())).dtype - device = TorchDevice.choose_torch_device() + # Use the VAE's actual device (may be CPU if the model is configured cpu_only). + device = get_effective_device(vae) latents = latents.to(device=device, dtype=vae_dtype) # Decode using diffusers API diff --git a/invokeai/app/invocations/flux_vae_decode.py b/invokeai/app/invocations/flux_vae_decode.py index c55dfb539ac..400e36bff45 100644 --- a/invokeai/app/invocations/flux_vae_decode.py +++ b/invokeai/app/invocations/flux_vae_decode.py @@ -16,6 +16,7 @@ from invokeai.app.services.shared.invocation_context import InvocationContext from invokeai.backend.flux.modules.autoencoder import AutoEncoder from invokeai.backend.model_manager.load.load_base import LoadedModel +from invokeai.backend.model_manager.load.model_cache.utils import get_effective_device from invokeai.backend.util.devices import TorchDevice from invokeai.backend.util.vae_working_memory import estimate_vae_working_memory_flux @@ -47,7 +48,8 @@ def _vae_decode(self, vae_info: LoadedModel, latents: torch.Tensor) -> Image.Ima with vae_info.model_on_device(working_mem_bytes=estimated_working_memory) as (_, vae): assert isinstance(vae, AutoEncoder) vae_dtype = next(iter(vae.parameters())).dtype - latents = latents.to(device=TorchDevice.choose_torch_device(), dtype=vae_dtype) + # Use the VAE's actual device (may be CPU if the model is configured cpu_only). + latents = latents.to(device=get_effective_device(vae), dtype=vae_dtype) img = vae.decode(latents) img = img.clamp(-1, 1) diff --git a/invokeai/app/invocations/latents_to_image.py b/invokeai/app/invocations/latents_to_image.py index 608485a078b..f6edf44cc6c 100644 --- a/invokeai/app/invocations/latents_to_image.py +++ b/invokeai/app/invocations/latents_to_image.py @@ -18,6 +18,7 @@ from invokeai.app.invocations.model import VAEField from invokeai.app.invocations.primitives import ImageOutput from invokeai.app.services.shared.invocation_context import InvocationContext +from invokeai.backend.model_manager.load.model_cache.utils import get_effective_device from invokeai.backend.stable_diffusion.extensions.seamless import SeamlessExt from invokeai.backend.stable_diffusion.vae_tiling import patch_vae_tiling_params from invokeai.backend.util.devices import TorchDevice @@ -69,8 +70,12 @@ def invoke(self, context: InvocationContext) -> ImageOutput: ): context.util.signal_progress("Running VAE decoder") assert isinstance(vae, (AutoencoderKL, AutoencoderTiny)) - latents = latents.to(TorchDevice.choose_torch_device()) - if self.fp32: + # Use the VAE's actual device (may be CPU if the model is configured cpu_only). + device = get_effective_device(vae) + latents = latents.to(device) + # fp16 VAE ops are not supported on CPU, so force fp32 when running on CPU + # (e.g. when the VAE is configured cpu_only). + if self.fp32 or device.type == "cpu": # FP32 mode: convert everything to float32 for maximum precision vae.to(dtype=torch.float32) latents = latents.float() diff --git a/invokeai/app/invocations/qwen_image_latents_to_image.py b/invokeai/app/invocations/qwen_image_latents_to_image.py index b3ea39c4bbf..fc220feff0d 100644 --- a/invokeai/app/invocations/qwen_image_latents_to_image.py +++ b/invokeai/app/invocations/qwen_image_latents_to_image.py @@ -17,6 +17,7 @@ from invokeai.app.invocations.model import VAEField from invokeai.app.invocations.primitives import ImageOutput from invokeai.app.services.shared.invocation_context import InvocationContext +from invokeai.backend.model_manager.load.model_cache.utils import get_effective_device from invokeai.backend.stable_diffusion.extensions.seamless import SeamlessExt from invokeai.backend.util.devices import TorchDevice @@ -47,7 +48,8 @@ def invoke(self, context: InvocationContext) -> ImageOutput: ): context.util.signal_progress("Running VAE") assert isinstance(vae, AutoencoderKLQwenImage) - latents = latents.to(device=TorchDevice.choose_torch_device(), dtype=vae.dtype) + # Use the VAE's actual device (may be CPU if the model is configured cpu_only). + latents = latents.to(device=get_effective_device(vae), dtype=vae.dtype) vae.disable_tiling() diff --git a/invokeai/app/invocations/sd3_latents_to_image.py b/invokeai/app/invocations/sd3_latents_to_image.py index e6a20d38a9c..38c93305df7 100644 --- a/invokeai/app/invocations/sd3_latents_to_image.py +++ b/invokeai/app/invocations/sd3_latents_to_image.py @@ -17,6 +17,7 @@ from invokeai.app.invocations.model import VAEField from invokeai.app.invocations.primitives import ImageOutput from invokeai.app.services.shared.invocation_context import InvocationContext +from invokeai.backend.model_manager.load.model_cache.utils import get_effective_device from invokeai.backend.stable_diffusion.extensions.seamless import SeamlessExt from invokeai.backend.util.devices import TorchDevice from invokeai.backend.util.vae_working_memory import estimate_vae_working_memory_sd3 @@ -56,7 +57,8 @@ def invoke(self, context: InvocationContext) -> ImageOutput: ): context.util.signal_progress("Running VAE") assert isinstance(vae, (AutoencoderKL)) - latents = latents.to(TorchDevice.choose_torch_device()) + # Use the VAE's actual device (may be CPU if the model is configured cpu_only). + latents = latents.to(get_effective_device(vae)) vae.disable_tiling() diff --git a/invokeai/app/invocations/z_image_latents_to_image.py b/invokeai/app/invocations/z_image_latents_to_image.py index a2e6fdcc077..6ba34632d44 100644 --- a/invokeai/app/invocations/z_image_latents_to_image.py +++ b/invokeai/app/invocations/z_image_latents_to_image.py @@ -19,6 +19,7 @@ from invokeai.app.invocations.primitives import ImageOutput from invokeai.app.services.shared.invocation_context import InvocationContext from invokeai.backend.flux.modules.autoencoder import AutoEncoder as FluxAutoEncoder +from invokeai.backend.model_manager.load.model_cache.utils import get_effective_device from invokeai.backend.stable_diffusion.extensions.seamless import SeamlessExt from invokeai.backend.util.devices import TorchDevice from invokeai.backend.util.vae_working_memory import estimate_vae_working_memory_flux @@ -75,7 +76,8 @@ def invoke(self, context: InvocationContext) -> ImageOutput: ) vae_dtype = next(iter(vae.parameters())).dtype - latents = latents.to(device=TorchDevice.choose_torch_device(), dtype=vae_dtype) + # Use the VAE's actual device (may be CPU if the model is configured cpu_only). + latents = latents.to(device=get_effective_device(vae), dtype=vae_dtype) # Disable tiling for AutoencoderKL if isinstance(vae, AutoencoderKL): diff --git a/invokeai/backend/model_manager/configs/vae.py b/invokeai/backend/model_manager/configs/vae.py index 5a88cf12781..30735b443ed 100644 --- a/invokeai/backend/model_manager/configs/vae.py +++ b/invokeai/backend/model_manager/configs/vae.py @@ -76,6 +76,7 @@ class VAE_Checkpoint_Config_Base(Checkpoint_Config_Base): type: Literal[ModelType.VAE] = Field(default=ModelType.VAE) format: Literal[ModelFormat.Checkpoint] = Field(default=ModelFormat.Checkpoint) + cpu_only: bool | None = Field(default=None, description="Whether this model should run on CPU only") @classmethod def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self: @@ -166,6 +167,7 @@ class VAE_Checkpoint_Flux2_Config(Checkpoint_Config_Base, Config_Base): type: Literal[ModelType.VAE] = Field(default=ModelType.VAE) format: Literal[ModelFormat.Checkpoint] = Field(default=ModelFormat.Checkpoint) base: Literal[BaseModelType.Flux2] = Field(default=BaseModelType.Flux2) + cpu_only: bool | None = Field(default=None, description="Whether this model should run on CPU only") @classmethod def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self: @@ -204,6 +206,7 @@ class VAE_Checkpoint_QwenImage_Config(Checkpoint_Config_Base, Config_Base): type: Literal[ModelType.VAE] = Field(default=ModelType.VAE) format: Literal[ModelFormat.Checkpoint] = Field(default=ModelFormat.Checkpoint) base: Literal[BaseModelType.QwenImage] = Field(default=BaseModelType.QwenImage) + cpu_only: bool | None = Field(default=None, description="Whether this model should run on CPU only") @classmethod def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self: @@ -241,6 +244,7 @@ class VAE_Checkpoint_Anima_Config(Checkpoint_Config_Base, Config_Base): type: Literal[ModelType.VAE] = Field(default=ModelType.VAE) format: Literal[ModelFormat.Checkpoint] = Field(default=ModelFormat.Checkpoint) base: Literal[BaseModelType.Anima] = Field(default=BaseModelType.Anima) + cpu_only: bool | None = Field(default=None, description="Whether this model should run on CPU only") @classmethod def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self: @@ -260,6 +264,7 @@ class VAE_Diffusers_Config_Base(Diffusers_Config_Base): type: Literal[ModelType.VAE] = Field(default=ModelType.VAE) format: Literal[ModelFormat.Diffusers] = Field(default=ModelFormat.Diffusers) + cpu_only: bool | None = Field(default=None, description="Whether this model should run on CPU only") @classmethod def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self: @@ -328,6 +333,7 @@ class VAE_Diffusers_Flux2_Config(Diffusers_Config_Base, Config_Base): type: Literal[ModelType.VAE] = Field(default=ModelType.VAE) format: Literal[ModelFormat.Diffusers] = Field(default=ModelFormat.Diffusers) base: Literal[BaseModelType.Flux2] = Field(default=BaseModelType.Flux2) + cpu_only: bool | None = Field(default=None, description="Whether this model should run on CPU only") @classmethod def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self: diff --git a/invokeai/frontend/web/src/features/modelManagerV2/hooks/useVAEModelSettings.ts b/invokeai/frontend/web/src/features/modelManagerV2/hooks/useVAEModelSettings.ts new file mode 100644 index 00000000000..3d75b094c75 --- /dev/null +++ b/invokeai/frontend/web/src/features/modelManagerV2/hooks/useVAEModelSettings.ts @@ -0,0 +1,18 @@ +import type { EncoderModelSettingsFormData } from 'features/modelManagerV2/subpanels/ModelPanel/EncoderModelSettings/EncoderModelSettings'; +import { useMemo } from 'react'; +import type { VAEModelConfig } from 'services/api/types'; + +export const useVAEModelSettings = (modelConfig: VAEModelConfig) => { + const vaeModelSettingsDefaults = useMemo(() => { + const cpuOnly = modelConfig.cpu_only ?? false; + + return { + cpuOnly: { + value: cpuOnly, + isEnabled: cpuOnly, + }, + }; + }, [modelConfig]); + + return vaeModelSettingsDefaults; +}; diff --git a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/ModelView.tsx b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/ModelView.tsx index 365f7cff4b8..e666ebfd1b4 100644 --- a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/ModelView.tsx +++ b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/ModelView.tsx @@ -9,6 +9,7 @@ import { ModelHeader } from 'features/modelManagerV2/subpanels/ModelPanel/ModelH import { ModelSettingsExportButton } from 'features/modelManagerV2/subpanels/ModelPanel/ModelSettingsExportButton'; import { ModelSettingsImportButton } from 'features/modelManagerV2/subpanels/ModelPanel/ModelSettingsImportButton'; import { TriggerPhrases } from 'features/modelManagerV2/subpanels/ModelPanel/TriggerPhrases'; +import { VAEModelSettings } from 'features/modelManagerV2/subpanels/ModelPanel/VAEModelSettings/VAEModelSettings'; import { filesize } from 'filesize'; import { memo, useMemo } from 'react'; import { useTranslation } from 'react-i18next'; @@ -82,6 +83,10 @@ export const ModelView = memo(({ modelConfig }: Props) => { if (isEncoderModel(modelConfig)) { return true; } + // VAE models (cpu_only toggle) + if (modelConfig.type === 'vae') { + return true; + } return false; }, [modelConfig]); @@ -151,6 +156,7 @@ export const ModelView = memo(({ modelConfig }: Props) => { )} {modelConfig.type === 'main' && } {isEncoderModel(modelConfig) && } + {modelConfig.type === 'vae' && } )} diff --git a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/VAEModelSettings/VAEModelSettings.tsx b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/VAEModelSettings/VAEModelSettings.tsx new file mode 100644 index 00000000000..210caf48f73 --- /dev/null +++ b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/VAEModelSettings/VAEModelSettings.tsx @@ -0,0 +1,126 @@ +import { Button, Flex, FormControl, FormLabel, Heading, Switch } from '@invoke-ai/ui-library'; +import { useAppSelector } from 'app/store/storeHooks'; +import { InformationalPopover } from 'common/components/InformationalPopover/InformationalPopover'; +import { useVAEModelSettings } from 'features/modelManagerV2/hooks/useVAEModelSettings'; +import { selectSelectedModelKey } from 'features/modelManagerV2/store/modelManagerV2Slice'; +import type { EncoderModelSettingsFormData } from 'features/modelManagerV2/subpanels/ModelPanel/EncoderModelSettings/EncoderModelSettings'; +import type { FormField } from 'features/modelManagerV2/subpanels/ModelPanel/MainModelDefaultSettings/MainModelDefaultSettings'; +import { toast } from 'features/toast/toast'; +import type { ChangeEvent } from 'react'; +import { memo, useCallback, useEffect, useMemo } from 'react'; +import type { Control, SubmitHandler } from 'react-hook-form'; +import { useController, useForm } from 'react-hook-form'; +import { useTranslation } from 'react-i18next'; +import { PiCheckBold } from 'react-icons/pi'; +import { useUpdateModelMutation } from 'services/api/endpoints/models'; +import type { VAEModelConfig } from 'services/api/types'; + +type Props = { + modelConfig: VAEModelConfig; +}; + +const DefaultCpuOnly = memo((props: { name: 'cpuOnly'; control: Control }) => { + const { field } = useController(props); + const { t } = useTranslation(); + + const onChange = useCallback( + (e: ChangeEvent) => { + const updatedValue = { + ...(field.value as FormField), + value: e.target.checked, + isEnabled: e.target.checked, + }; + field.onChange(updatedValue); + }, + [field] + ); + + const value = useMemo(() => { + return (field.value as FormField).value; + }, [field.value]); + + return ( + + + {t('modelManager.runOnCpu')} + + + + ); +}); + +DefaultCpuOnly.displayName = 'DefaultCpuOnly'; + +export const VAEModelSettings = memo(({ modelConfig }: Props) => { + const selectedModelKey = useAppSelector(selectSelectedModelKey); + const { t } = useTranslation(); + + const settingsDefaults = useVAEModelSettings(modelConfig); + const [updateModel, { isLoading: isLoadingUpdateModel }] = useUpdateModelMutation(); + + const { handleSubmit, control, formState, reset } = useForm({ + defaultValues: settingsDefaults, + }); + + useEffect(() => { + reset(settingsDefaults); + }, [settingsDefaults, reset]); + + const onSubmit = useCallback>( + (data) => { + if (!selectedModelKey) { + return; + } + + const body = { + cpu_only: data.cpuOnly.isEnabled ? data.cpuOnly.value : null, + }; + + updateModel({ + key: selectedModelKey, + body, + }) + .unwrap() + .then((_) => { + toast({ + id: 'VAE_SETTINGS_SAVED', + title: t('modelManager.settingsSaved'), + status: 'success', + }); + reset(data); + }) + .catch((error) => { + if (error) { + toast({ + id: 'VAE_SETTINGS_SAVE_FAILED', + title: `${error.data.detail} `, + status: 'error', + }); + } + }); + }, + [selectedModelKey, reset, updateModel, t] + ); + + return ( + <> + + {t('modelManager.settings')} + + + + + + ); +}); + +VAEModelSettings.displayName = 'VAEModelSettings'; diff --git a/invokeai/frontend/web/src/services/api/schema.ts b/invokeai/frontend/web/src/services/api/schema.ts index 5726458dc3a..502e04bc14c 100644 --- a/invokeai/frontend/web/src/services/api/schema.ts +++ b/invokeai/frontend/web/src/services/api/schema.ts @@ -31065,6 +31065,11 @@ export type components = { * @constant */ base: "anima"; + /** + * Cpu Only + * @description Whether this model should run on CPU only + */ + cpu_only: boolean | null; }; /** VAE_Checkpoint_FLUX_Config */ VAE_Checkpoint_FLUX_Config: { @@ -31137,6 +31142,11 @@ export type components = { * @constant */ format: "checkpoint"; + /** + * Cpu Only + * @description Whether this model should run on CPU only + */ + cpu_only: boolean | null; /** * Base * @default flux @@ -31224,6 +31234,11 @@ export type components = { * @constant */ base: "flux2"; + /** + * Cpu Only + * @description Whether this model should run on CPU only + */ + cpu_only: boolean | null; }; /** * VAE_Checkpoint_QwenImage_Config @@ -31305,6 +31320,11 @@ export type components = { * @constant */ base: "qwen-image"; + /** + * Cpu Only + * @description Whether this model should run on CPU only + */ + cpu_only: boolean | null; }; /** VAE_Checkpoint_SD1_Config */ VAE_Checkpoint_SD1_Config: { @@ -31377,6 +31397,11 @@ export type components = { * @constant */ format: "checkpoint"; + /** + * Cpu Only + * @description Whether this model should run on CPU only + */ + cpu_only: boolean | null; /** * Base * @default sd-1 @@ -31455,6 +31480,11 @@ export type components = { * @constant */ format: "checkpoint"; + /** + * Cpu Only + * @description Whether this model should run on CPU only + */ + cpu_only: boolean | null; /** * Base * @default sd-2 @@ -31533,6 +31563,11 @@ export type components = { * @constant */ format: "checkpoint"; + /** + * Cpu Only + * @description Whether this model should run on CPU only + */ + cpu_only: boolean | null; /** * Base * @default sdxl @@ -31617,6 +31652,11 @@ export type components = { * @constant */ base: "flux2"; + /** + * Cpu Only + * @description Whether this model should run on CPU only + */ + cpu_only: boolean | null; }; /** VAE_Diffusers_SD1_Config */ VAE_Diffusers_SD1_Config: { @@ -31686,6 +31726,11 @@ export type components = { * @constant */ type: "vae"; + /** + * Cpu Only + * @description Whether this model should run on CPU only + */ + cpu_only: boolean | null; /** * Base * @default sd-1 @@ -31761,6 +31806,11 @@ export type components = { * @constant */ type: "vae"; + /** + * Cpu Only + * @description Whether this model should run on CPU only + */ + cpu_only: boolean | null; /** * Base * @default sdxl diff --git a/tests/backend/model_manager/load/test_load_default_cpu_only.py b/tests/backend/model_manager/load/test_load_default_cpu_only.py new file mode 100644 index 00000000000..d99eb5466d0 --- /dev/null +++ b/tests/backend/model_manager/load/test_load_default_cpu_only.py @@ -0,0 +1,49 @@ +"""Tests for `ModelLoader._get_execution_device` — the helper that forces a model onto the CPU +when its config requests `cpu_only`. + +A VAE (or text encoder) configured with `cpu_only=True` must load onto the CPU so its weights +never occupy VRAM. The loader signals this by returning `torch.device("cpu")` from +`_get_execution_device`, which is then passed to `ModelCache.put(..., execution_device=...)`. +""" + +from types import SimpleNamespace +from typing import Optional + +import torch + +from invokeai.backend.model_manager.load.load_default import ModelLoader +from invokeai.backend.model_manager.taxonomy import SubModelType + + +def _loader() -> ModelLoader: + # `_get_execution_device` only reads the config, so an uninitialized loader is sufficient. + return ModelLoader.__new__(ModelLoader) + + +def _vae_config(cpu_only: Optional[bool]) -> SimpleNamespace: + # Mirrors the relevant surface of a standalone VAE config: a `cpu_only` field and no + # `default_settings` (VAE configs do not carry default settings). + return SimpleNamespace(cpu_only=cpu_only, default_settings=None) + + +def test_vae_cpu_only_true_returns_cpu(): + assert _loader()._get_execution_device(_vae_config(cpu_only=True), None) == torch.device("cpu") + + +def test_vae_cpu_only_false_or_unset_returns_none(): + # Falsy values must not force CPU execution — the cache falls back to its default device. + assert _loader()._get_execution_device(_vae_config(cpu_only=False), None) is None + assert _loader()._get_execution_device(_vae_config(cpu_only=None), None) is None + + +def test_vae_cpu_only_applies_regardless_of_submodel_type(): + # The VAE is loaded as a standalone model (submodel_type=None), but the standalone branch + # must not depend on the submodel type either way. + loader = _loader() + assert loader._get_execution_device(_vae_config(cpu_only=True), SubModelType.VAE) == torch.device("cpu") + + +def test_config_without_cpu_only_attr_returns_none(): + # A config type that has neither `cpu_only` nor `default_settings` must be left on the + # cache default (return None), not crash. + assert _loader()._get_execution_device(SimpleNamespace(), None) is None From 3a7a5754bff13165acfe41ce230268b8f217000a Mon Sep 17 00:00:00 2001 From: Alexander Eichhorn Date: Fri, 19 Jun 2026 02:47:20 +0200 Subject: [PATCH 2/2] Chore Openapi + Fix logging --- .../load/model_cache/model_cache.py | 9 +- invokeai/frontend/web/openapi.json | 138 +++++++++++++++++- 2 files changed, 141 insertions(+), 6 deletions(-) diff --git a/invokeai/backend/model_manager/load/model_cache/model_cache.py b/invokeai/backend/model_manager/load/model_cache/model_cache.py index e3a0928e52b..bb2d693167f 100644 --- a/invokeai/backend/model_manager/load/model_cache/model_cache.py +++ b/invokeai/backend/model_manager/load/model_cache/model_cache.py @@ -441,8 +441,13 @@ def lock(self, cache_entry: CacheRecord, working_mem_bytes: Optional[int]) -> No # Check if the model's specific compute_device is CPU, not just the cache's default execution_device model_compute_device = cache_entry.cached_model.compute_device if model_compute_device.type == "cpu": - # Models configured for CPU execution don't need to be loaded into VRAM - self._logger.debug(f"Model {cache_entry.key} is configured for CPU execution, skipping VRAM load") + # Models configured for CPU execution (cpu_only) don't need to be loaded into VRAM. Log at INFO so it + # mirrors the "Loaded model ... onto device" line emitted for GPU loads below — otherwise there + # is no visible indication that the model is running on CPU at the default log level. + self._logger.info( + f"Loaded model '{cache_entry.key}' ({cache_entry.cached_model.model.__class__.__name__}) onto " + f"cpu device (cpu_only); skipping VRAM load" + ) return try: diff --git a/invokeai/frontend/web/openapi.json b/invokeai/frontend/web/openapi.json index 2c9526c59a9..5fc5217ca24 100644 --- a/invokeai/frontend/web/openapi.json +++ b/invokeai/frontend/web/openapi.json @@ -70443,6 +70443,18 @@ "const": "anima", "title": "Base", "default": "anima" + }, + "cpu_only": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Cpu Only", + "description": "Whether this model should run on CPU only" } }, "type": "object", @@ -70461,7 +70473,8 @@ "config_path", "type", "format", - "base" + "base", + "cpu_only" ], "title": "VAE_Checkpoint_Anima_Config", "description": "Model config for Anima QwenImage VAE checkpoint models (AutoencoderKLQwenImage)." @@ -70574,6 +70587,18 @@ "title": "Format", "default": "checkpoint" }, + "cpu_only": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Cpu Only", + "description": "Whether this model should run on CPU only" + }, "base": { "type": "string", "const": "flux", @@ -70597,6 +70622,7 @@ "config_path", "type", "format", + "cpu_only", "base" ], "title": "VAE_Checkpoint_FLUX_Config" @@ -70714,6 +70740,18 @@ "const": "flux2", "title": "Base", "default": "flux2" + }, + "cpu_only": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Cpu Only", + "description": "Whether this model should run on CPU only" } }, "type": "object", @@ -70732,7 +70770,8 @@ "config_path", "type", "format", - "base" + "base", + "cpu_only" ], "title": "VAE_Checkpoint_Flux2_Config", "description": "Model config for FLUX.2 VAE checkpoint models (AutoencoderKLFlux2)." @@ -70850,6 +70889,18 @@ "const": "qwen-image", "title": "Base", "default": "qwen-image" + }, + "cpu_only": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Cpu Only", + "description": "Whether this model should run on CPU only" } }, "type": "object", @@ -70868,7 +70919,8 @@ "config_path", "type", "format", - "base" + "base", + "cpu_only" ], "title": "VAE_Checkpoint_QwenImage_Config", "description": "Model config for Qwen Image VAE checkpoint models (AutoencoderKLQwenImage)." @@ -70981,6 +71033,18 @@ "title": "Format", "default": "checkpoint" }, + "cpu_only": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Cpu Only", + "description": "Whether this model should run on CPU only" + }, "base": { "type": "string", "const": "sd-1", @@ -71004,6 +71068,7 @@ "config_path", "type", "format", + "cpu_only", "base" ], "title": "VAE_Checkpoint_SD1_Config" @@ -71116,6 +71181,18 @@ "title": "Format", "default": "checkpoint" }, + "cpu_only": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Cpu Only", + "description": "Whether this model should run on CPU only" + }, "base": { "type": "string", "const": "sd-2", @@ -71139,6 +71216,7 @@ "config_path", "type", "format", + "cpu_only", "base" ], "title": "VAE_Checkpoint_SD2_Config" @@ -71251,6 +71329,18 @@ "title": "Format", "default": "checkpoint" }, + "cpu_only": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Cpu Only", + "description": "Whether this model should run on CPU only" + }, "base": { "type": "string", "const": "sdxl", @@ -71274,6 +71364,7 @@ "config_path", "type", "format", + "cpu_only", "base" ], "title": "VAE_Checkpoint_SDXL_Config" @@ -71383,6 +71474,18 @@ "const": "flux2", "title": "Base", "default": "flux2" + }, + "cpu_only": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Cpu Only", + "description": "Whether this model should run on CPU only" } }, "type": "object", @@ -71401,7 +71504,8 @@ "format", "repo_variant", "type", - "base" + "base", + "cpu_only" ], "title": "VAE_Diffusers_Flux2_Config", "description": "Model config for FLUX.2 VAE models in diffusers format (AutoencoderKLFlux2)." @@ -71506,6 +71610,18 @@ "title": "Type", "default": "vae" }, + "cpu_only": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Cpu Only", + "description": "Whether this model should run on CPU only" + }, "base": { "type": "string", "const": "sd-1", @@ -71529,6 +71645,7 @@ "format", "repo_variant", "type", + "cpu_only", "base" ], "title": "VAE_Diffusers_SD1_Config" @@ -71633,6 +71750,18 @@ "title": "Type", "default": "vae" }, + "cpu_only": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Cpu Only", + "description": "Whether this model should run on CPU only" + }, "base": { "type": "string", "const": "sdxl", @@ -71656,6 +71785,7 @@ "format", "repo_variant", "type", + "cpu_only", "base" ], "title": "VAE_Diffusers_SDXL_Config"