From bc212020df35bef9524558b2593db86d118257c0 Mon Sep 17 00:00:00 2001
From: Alexander Eichhorn <alex@eichhorn.dev>
Date: Wed, 17 Jun 2026 23:40:09 +0200
Subject: [PATCH 1/2] feat(vae): support running VAEs on CPU via cpu_only
 setting

Extends the cpu_only mechanism from #8777 (text encoders) to VAE decode.
Adds a cpu_only field to all standalone VAE configs; the loader already
forces standalone configs with cpu_only=True onto the CPU. The 7 decode
invocations now move latents to the VAE's effective device instead of
hard-coding CUDA, and the SD/SDXL path falls back to fp32 on CPU (fp16
conv is unsupported there). Adds a "Run on CPU" toggle to the VAE model
settings panel and regenerates the API schema.

Decode-only for now; encode and main-model VAE submodels are unchanged.

Closes #7276 (VAE part)
---
 .../app/invocations/anima_latents_to_image.py |   4 +-
 .../invocations/cogview4_latents_to_image.py  |   4 +-
 invokeai/app/invocations/flux2_vae_decode.py  |   4 +-
 invokeai/app/invocations/flux_vae_decode.py   |   4 +-
 invokeai/app/invocations/latents_to_image.py  |   9 +-
 .../qwen_image_latents_to_image.py            |   4 +-
 .../app/invocations/sd3_latents_to_image.py   |   4 +-
 .../invocations/z_image_latents_to_image.py   |   4 +-
 invokeai/backend/model_manager/configs/vae.py |   6 +
 .../hooks/useVAEModelSettings.ts              |  18 +++
 .../subpanels/ModelPanel/ModelView.tsx        |   6 +
 .../VAEModelSettings/VAEModelSettings.tsx     | 126 ++++++++++++++++++
 .../frontend/web/src/services/api/schema.ts   |  50 +++++++
 .../load/test_load_default_cpu_only.py        |  49 +++++++
 14 files changed, 283 insertions(+), 9 deletions(-)
 create mode 100644 invokeai/frontend/web/src/features/modelManagerV2/hooks/useVAEModelSettings.ts
 create mode 100644 invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/VAEModelSettings/VAEModelSettings.tsx
 create mode 100644 tests/backend/model_manager/load/test_load_default_cpu_only.py

diff --git a/invokeai/app/invocations/anima_latents_to_image.py b/invokeai/app/invocations/anima_latents_to_image.py
index 080c101fa44..3bf3265bfd7 100644
--- a/invokeai/app/invocations/anima_latents_to_image.py
+++ b/invokeai/app/invocations/anima_latents_to_image.py
@@ -28,6 +28,7 @@
 from invokeai.app.invocations.primitives import ImageOutput
 from invokeai.app.services.shared.invocation_context import InvocationContext
 from invokeai.backend.flux.modules.autoencoder import AutoEncoder as FluxAutoEncoder
+from invokeai.backend.model_manager.load.model_cache.utils import get_effective_device
 from invokeai.backend.util.devices import TorchDevice
 from invokeai.backend.util.vae_working_memory import estimate_vae_working_memory_flux
 
@@ -72,7 +73,8 @@ def invoke(self, context: InvocationContext) -> ImageOutput:
                 raise TypeError(f"Expected AutoencoderKLWan or FluxAutoEncoder, got {type(vae).__name__}.")
 
             vae_dtype = next(iter(vae.parameters())).dtype
-            latents = latents.to(device=TorchDevice.choose_torch_device(), dtype=vae_dtype)
+            # Use the VAE's actual device (may be CPU if the model is configured cpu_only).
+            latents = latents.to(device=get_effective_device(vae), dtype=vae_dtype)
 
             TorchDevice.empty_cache()
 
diff --git a/invokeai/app/invocations/cogview4_latents_to_image.py b/invokeai/app/invocations/cogview4_latents_to_image.py
index 1b77ed8a1f8..bc9d208b669 100644
--- a/invokeai/app/invocations/cogview4_latents_to_image.py
+++ b/invokeai/app/invocations/cogview4_latents_to_image.py
@@ -17,6 +17,7 @@
 from invokeai.app.invocations.model import VAEField
 from invokeai.app.invocations.primitives import ImageOutput
 from invokeai.app.services.shared.invocation_context import InvocationContext
+from invokeai.backend.model_manager.load.model_cache.utils import get_effective_device
 from invokeai.backend.stable_diffusion.extensions.seamless import SeamlessExt
 from invokeai.backend.util.devices import TorchDevice
 from invokeai.backend.util.vae_working_memory import estimate_vae_working_memory_cogview4
@@ -54,7 +55,8 @@ def invoke(self, context: InvocationContext) -> ImageOutput:
         ):
             context.util.signal_progress("Running VAE")
             assert isinstance(vae, (AutoencoderKL))
-            latents = latents.to(TorchDevice.choose_torch_device())
+            # Use the VAE's actual device (may be CPU if the model is configured cpu_only).
+            latents = latents.to(get_effective_device(vae))
 
             vae.disable_tiling()
 
diff --git a/invokeai/app/invocations/flux2_vae_decode.py b/invokeai/app/invocations/flux2_vae_decode.py
index ecbc7d9cb83..25ada406873 100644
--- a/invokeai/app/invocations/flux2_vae_decode.py
+++ b/invokeai/app/invocations/flux2_vae_decode.py
@@ -20,6 +20,7 @@
 from invokeai.app.invocations.primitives import ImageOutput
 from invokeai.app.services.shared.invocation_context import InvocationContext
 from invokeai.backend.model_manager.load.load_base import LoadedModel
+from invokeai.backend.model_manager.load.model_cache.utils import get_effective_device
 from invokeai.backend.util.devices import TorchDevice
 
 
@@ -51,7 +52,8 @@ def _vae_decode(self, vae_info: LoadedModel, latents: torch.Tensor) -> Image.Ima
         """
         with vae_info.model_on_device() as (_, vae):
             vae_dtype = next(iter(vae.parameters())).dtype
-            device = TorchDevice.choose_torch_device()
+            # Use the VAE's actual device (may be CPU if the model is configured cpu_only).
+            device = get_effective_device(vae)
             latents = latents.to(device=device, dtype=vae_dtype)
 
             # Decode using diffusers API
diff --git a/invokeai/app/invocations/flux_vae_decode.py b/invokeai/app/invocations/flux_vae_decode.py
index c55dfb539ac..400e36bff45 100644
--- a/invokeai/app/invocations/flux_vae_decode.py
+++ b/invokeai/app/invocations/flux_vae_decode.py
@@ -16,6 +16,7 @@
 from invokeai.app.services.shared.invocation_context import InvocationContext
 from invokeai.backend.flux.modules.autoencoder import AutoEncoder
 from invokeai.backend.model_manager.load.load_base import LoadedModel
+from invokeai.backend.model_manager.load.model_cache.utils import get_effective_device
 from invokeai.backend.util.devices import TorchDevice
 from invokeai.backend.util.vae_working_memory import estimate_vae_working_memory_flux
 
@@ -47,7 +48,8 @@ def _vae_decode(self, vae_info: LoadedModel, latents: torch.Tensor) -> Image.Ima
         with vae_info.model_on_device(working_mem_bytes=estimated_working_memory) as (_, vae):
             assert isinstance(vae, AutoEncoder)
             vae_dtype = next(iter(vae.parameters())).dtype
-            latents = latents.to(device=TorchDevice.choose_torch_device(), dtype=vae_dtype)
+            # Use the VAE's actual device (may be CPU if the model is configured cpu_only).
+            latents = latents.to(device=get_effective_device(vae), dtype=vae_dtype)
             img = vae.decode(latents)
 
         img = img.clamp(-1, 1)
diff --git a/invokeai/app/invocations/latents_to_image.py b/invokeai/app/invocations/latents_to_image.py
index 608485a078b..f6edf44cc6c 100644
--- a/invokeai/app/invocations/latents_to_image.py
+++ b/invokeai/app/invocations/latents_to_image.py
@@ -18,6 +18,7 @@
 from invokeai.app.invocations.model import VAEField
 from invokeai.app.invocations.primitives import ImageOutput
 from invokeai.app.services.shared.invocation_context import InvocationContext
+from invokeai.backend.model_manager.load.model_cache.utils import get_effective_device
 from invokeai.backend.stable_diffusion.extensions.seamless import SeamlessExt
 from invokeai.backend.stable_diffusion.vae_tiling import patch_vae_tiling_params
 from invokeai.backend.util.devices import TorchDevice
@@ -69,8 +70,12 @@ def invoke(self, context: InvocationContext) -> ImageOutput:
         ):
             context.util.signal_progress("Running VAE decoder")
             assert isinstance(vae, (AutoencoderKL, AutoencoderTiny))
-            latents = latents.to(TorchDevice.choose_torch_device())
-            if self.fp32:
+            # Use the VAE's actual device (may be CPU if the model is configured cpu_only).
+            device = get_effective_device(vae)
+            latents = latents.to(device)
+            # fp16 VAE ops are not supported on CPU, so force fp32 when running on CPU
+            # (e.g. when the VAE is configured cpu_only).
+            if self.fp32 or device.type == "cpu":
                 # FP32 mode: convert everything to float32 for maximum precision
                 vae.to(dtype=torch.float32)
                 latents = latents.float()
diff --git a/invokeai/app/invocations/qwen_image_latents_to_image.py b/invokeai/app/invocations/qwen_image_latents_to_image.py
index b3ea39c4bbf..fc220feff0d 100644
--- a/invokeai/app/invocations/qwen_image_latents_to_image.py
+++ b/invokeai/app/invocations/qwen_image_latents_to_image.py
@@ -17,6 +17,7 @@
 from invokeai.app.invocations.model import VAEField
 from invokeai.app.invocations.primitives import ImageOutput
 from invokeai.app.services.shared.invocation_context import InvocationContext
+from invokeai.backend.model_manager.load.model_cache.utils import get_effective_device
 from invokeai.backend.stable_diffusion.extensions.seamless import SeamlessExt
 from invokeai.backend.util.devices import TorchDevice
 
@@ -47,7 +48,8 @@ def invoke(self, context: InvocationContext) -> ImageOutput:
         ):
             context.util.signal_progress("Running VAE")
             assert isinstance(vae, AutoencoderKLQwenImage)
-            latents = latents.to(device=TorchDevice.choose_torch_device(), dtype=vae.dtype)
+            # Use the VAE's actual device (may be CPU if the model is configured cpu_only).
+            latents = latents.to(device=get_effective_device(vae), dtype=vae.dtype)
 
             vae.disable_tiling()
 
diff --git a/invokeai/app/invocations/sd3_latents_to_image.py b/invokeai/app/invocations/sd3_latents_to_image.py
index e6a20d38a9c..38c93305df7 100644
--- a/invokeai/app/invocations/sd3_latents_to_image.py
+++ b/invokeai/app/invocations/sd3_latents_to_image.py
@@ -17,6 +17,7 @@
 from invokeai.app.invocations.model import VAEField
 from invokeai.app.invocations.primitives import ImageOutput
 from invokeai.app.services.shared.invocation_context import InvocationContext
+from invokeai.backend.model_manager.load.model_cache.utils import get_effective_device
 from invokeai.backend.stable_diffusion.extensions.seamless import SeamlessExt
 from invokeai.backend.util.devices import TorchDevice
 from invokeai.backend.util.vae_working_memory import estimate_vae_working_memory_sd3
@@ -56,7 +57,8 @@ def invoke(self, context: InvocationContext) -> ImageOutput:
         ):
             context.util.signal_progress("Running VAE")
             assert isinstance(vae, (AutoencoderKL))
-            latents = latents.to(TorchDevice.choose_torch_device())
+            # Use the VAE's actual device (may be CPU if the model is configured cpu_only).
+            latents = latents.to(get_effective_device(vae))
 
             vae.disable_tiling()
 
diff --git a/invokeai/app/invocations/z_image_latents_to_image.py b/invokeai/app/invocations/z_image_latents_to_image.py
index a2e6fdcc077..6ba34632d44 100644
--- a/invokeai/app/invocations/z_image_latents_to_image.py
+++ b/invokeai/app/invocations/z_image_latents_to_image.py
@@ -19,6 +19,7 @@
 from invokeai.app.invocations.primitives import ImageOutput
 from invokeai.app.services.shared.invocation_context import InvocationContext
 from invokeai.backend.flux.modules.autoencoder import AutoEncoder as FluxAutoEncoder
+from invokeai.backend.model_manager.load.model_cache.utils import get_effective_device
 from invokeai.backend.stable_diffusion.extensions.seamless import SeamlessExt
 from invokeai.backend.util.devices import TorchDevice
 from invokeai.backend.util.vae_working_memory import estimate_vae_working_memory_flux
@@ -75,7 +76,8 @@ def invoke(self, context: InvocationContext) -> ImageOutput:
                 )
 
             vae_dtype = next(iter(vae.parameters())).dtype
-            latents = latents.to(device=TorchDevice.choose_torch_device(), dtype=vae_dtype)
+            # Use the VAE's actual device (may be CPU if the model is configured cpu_only).
+            latents = latents.to(device=get_effective_device(vae), dtype=vae_dtype)
 
             # Disable tiling for AutoencoderKL
             if isinstance(vae, AutoencoderKL):
diff --git a/invokeai/backend/model_manager/configs/vae.py b/invokeai/backend/model_manager/configs/vae.py
index 5a88cf12781..30735b443ed 100644
--- a/invokeai/backend/model_manager/configs/vae.py
+++ b/invokeai/backend/model_manager/configs/vae.py
@@ -76,6 +76,7 @@ class VAE_Checkpoint_Config_Base(Checkpoint_Config_Base):
 
     type: Literal[ModelType.VAE] = Field(default=ModelType.VAE)
     format: Literal[ModelFormat.Checkpoint] = Field(default=ModelFormat.Checkpoint)
+    cpu_only: bool | None = Field(default=None, description="Whether this model should run on CPU only")
 
     @classmethod
     def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self:
@@ -166,6 +167,7 @@ class VAE_Checkpoint_Flux2_Config(Checkpoint_Config_Base, Config_Base):
     type: Literal[ModelType.VAE] = Field(default=ModelType.VAE)
     format: Literal[ModelFormat.Checkpoint] = Field(default=ModelFormat.Checkpoint)
     base: Literal[BaseModelType.Flux2] = Field(default=BaseModelType.Flux2)
+    cpu_only: bool | None = Field(default=None, description="Whether this model should run on CPU only")
 
     @classmethod
     def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self:
@@ -204,6 +206,7 @@ class VAE_Checkpoint_QwenImage_Config(Checkpoint_Config_Base, Config_Base):
     type: Literal[ModelType.VAE] = Field(default=ModelType.VAE)
     format: Literal[ModelFormat.Checkpoint] = Field(default=ModelFormat.Checkpoint)
     base: Literal[BaseModelType.QwenImage] = Field(default=BaseModelType.QwenImage)
+    cpu_only: bool | None = Field(default=None, description="Whether this model should run on CPU only")
 
     @classmethod
     def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self:
@@ -241,6 +244,7 @@ class VAE_Checkpoint_Anima_Config(Checkpoint_Config_Base, Config_Base):
     type: Literal[ModelType.VAE] = Field(default=ModelType.VAE)
     format: Literal[ModelFormat.Checkpoint] = Field(default=ModelFormat.Checkpoint)
     base: Literal[BaseModelType.Anima] = Field(default=BaseModelType.Anima)
+    cpu_only: bool | None = Field(default=None, description="Whether this model should run on CPU only")
 
     @classmethod
     def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self:
@@ -260,6 +264,7 @@ class VAE_Diffusers_Config_Base(Diffusers_Config_Base):
 
     type: Literal[ModelType.VAE] = Field(default=ModelType.VAE)
     format: Literal[ModelFormat.Diffusers] = Field(default=ModelFormat.Diffusers)
+    cpu_only: bool | None = Field(default=None, description="Whether this model should run on CPU only")
 
     @classmethod
     def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self:
@@ -328,6 +333,7 @@ class VAE_Diffusers_Flux2_Config(Diffusers_Config_Base, Config_Base):
     type: Literal[ModelType.VAE] = Field(default=ModelType.VAE)
     format: Literal[ModelFormat.Diffusers] = Field(default=ModelFormat.Diffusers)
     base: Literal[BaseModelType.Flux2] = Field(default=BaseModelType.Flux2)
+    cpu_only: bool | None = Field(default=None, description="Whether this model should run on CPU only")
 
     @classmethod
     def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self:
diff --git a/invokeai/frontend/web/src/features/modelManagerV2/hooks/useVAEModelSettings.ts b/invokeai/frontend/web/src/features/modelManagerV2/hooks/useVAEModelSettings.ts
new file mode 100644
index 00000000000..3d75b094c75
--- /dev/null
+++ b/invokeai/frontend/web/src/features/modelManagerV2/hooks/useVAEModelSettings.ts
@@ -0,0 +1,18 @@
+import type { EncoderModelSettingsFormData } from 'features/modelManagerV2/subpanels/ModelPanel/EncoderModelSettings/EncoderModelSettings';
+import { useMemo } from 'react';
+import type { VAEModelConfig } from 'services/api/types';
+
+export const useVAEModelSettings = (modelConfig: VAEModelConfig) => {
+  const vaeModelSettingsDefaults = useMemo<EncoderModelSettingsFormData>(() => {
+    const cpuOnly = modelConfig.cpu_only ?? false;
+
+    return {
+      cpuOnly: {
+        value: cpuOnly,
+        isEnabled: cpuOnly,
+      },
+    };
+  }, [modelConfig]);
+
+  return vaeModelSettingsDefaults;
+};
diff --git a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/ModelView.tsx b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/ModelView.tsx
index 365f7cff4b8..e666ebfd1b4 100644
--- a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/ModelView.tsx
+++ b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/ModelView.tsx
@@ -9,6 +9,7 @@ import { ModelHeader } from 'features/modelManagerV2/subpanels/ModelPanel/ModelH
 import { ModelSettingsExportButton } from 'features/modelManagerV2/subpanels/ModelPanel/ModelSettingsExportButton';
 import { ModelSettingsImportButton } from 'features/modelManagerV2/subpanels/ModelPanel/ModelSettingsImportButton';
 import { TriggerPhrases } from 'features/modelManagerV2/subpanels/ModelPanel/TriggerPhrases';
+import { VAEModelSettings } from 'features/modelManagerV2/subpanels/ModelPanel/VAEModelSettings/VAEModelSettings';
 import { filesize } from 'filesize';
 import { memo, useMemo } from 'react';
 import { useTranslation } from 'react-i18next';
@@ -82,6 +83,10 @@ export const ModelView = memo(({ modelConfig }: Props) => {
     if (isEncoderModel(modelConfig)) {
       return true;
     }
+    // VAE models (cpu_only toggle)
+    if (modelConfig.type === 'vae') {
+      return true;
+    }
 
     return false;
   }, [modelConfig]);
@@ -151,6 +156,7 @@ export const ModelView = memo(({ modelConfig }: Props) => {
               )}
               {modelConfig.type === 'main' && <TriggerPhrases modelConfig={modelConfig} />}
               {isEncoderModel(modelConfig) && <EncoderModelSettings modelConfig={modelConfig} />}
+              {modelConfig.type === 'vae' && <VAEModelSettings modelConfig={modelConfig} />}
             </Box>
           </>
         )}
diff --git a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/VAEModelSettings/VAEModelSettings.tsx b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/VAEModelSettings/VAEModelSettings.tsx
new file mode 100644
index 00000000000..210caf48f73
--- /dev/null
+++ b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/VAEModelSettings/VAEModelSettings.tsx
@@ -0,0 +1,126 @@
+import { Button, Flex, FormControl, FormLabel, Heading, Switch } from '@invoke-ai/ui-library';
+import { useAppSelector } from 'app/store/storeHooks';
+import { InformationalPopover } from 'common/components/InformationalPopover/InformationalPopover';
+import { useVAEModelSettings } from 'features/modelManagerV2/hooks/useVAEModelSettings';
+import { selectSelectedModelKey } from 'features/modelManagerV2/store/modelManagerV2Slice';
+import type { EncoderModelSettingsFormData } from 'features/modelManagerV2/subpanels/ModelPanel/EncoderModelSettings/EncoderModelSettings';
+import type { FormField } from 'features/modelManagerV2/subpanels/ModelPanel/MainModelDefaultSettings/MainModelDefaultSettings';
+import { toast } from 'features/toast/toast';
+import type { ChangeEvent } from 'react';
+import { memo, useCallback, useEffect, useMemo } from 'react';
+import type { Control, SubmitHandler } from 'react-hook-form';
+import { useController, useForm } from 'react-hook-form';
+import { useTranslation } from 'react-i18next';
+import { PiCheckBold } from 'react-icons/pi';
+import { useUpdateModelMutation } from 'services/api/endpoints/models';
+import type { VAEModelConfig } from 'services/api/types';
+
+type Props = {
+  modelConfig: VAEModelConfig;
+};
+
+const DefaultCpuOnly = memo((props: { name: 'cpuOnly'; control: Control<EncoderModelSettingsFormData> }) => {
+  const { field } = useController(props);
+  const { t } = useTranslation();
+
+  const onChange = useCallback(
+    (e: ChangeEvent<HTMLInputElement>) => {
+      const updatedValue = {
+        ...(field.value as FormField<boolean>),
+        value: e.target.checked,
+        isEnabled: e.target.checked,
+      };
+      field.onChange(updatedValue);
+    },
+    [field]
+  );
+
+  const value = useMemo(() => {
+    return (field.value as FormField<boolean>).value;
+  }, [field.value]);
+
+  return (
+    <FormControl>
+      <InformationalPopover feature="cpuOnly">
+        <FormLabel>{t('modelManager.runOnCpu')}</FormLabel>
+      </InformationalPopover>
+      <Switch isChecked={value} onChange={onChange} />
+    </FormControl>
+  );
+});
+
+DefaultCpuOnly.displayName = 'DefaultCpuOnly';
+
+export const VAEModelSettings = memo(({ modelConfig }: Props) => {
+  const selectedModelKey = useAppSelector(selectSelectedModelKey);
+  const { t } = useTranslation();
+
+  const settingsDefaults = useVAEModelSettings(modelConfig);
+  const [updateModel, { isLoading: isLoadingUpdateModel }] = useUpdateModelMutation();
+
+  const { handleSubmit, control, formState, reset } = useForm<EncoderModelSettingsFormData>({
+    defaultValues: settingsDefaults,
+  });
+
+  useEffect(() => {
+    reset(settingsDefaults);
+  }, [settingsDefaults, reset]);
+
+  const onSubmit = useCallback<SubmitHandler<EncoderModelSettingsFormData>>(
+    (data) => {
+      if (!selectedModelKey) {
+        return;
+      }
+
+      const body = {
+        cpu_only: data.cpuOnly.isEnabled ? data.cpuOnly.value : null,
+      };
+
+      updateModel({
+        key: selectedModelKey,
+        body,
+      })
+        .unwrap()
+        .then((_) => {
+          toast({
+            id: 'VAE_SETTINGS_SAVED',
+            title: t('modelManager.settingsSaved'),
+            status: 'success',
+          });
+          reset(data);
+        })
+        .catch((error) => {
+          if (error) {
+            toast({
+              id: 'VAE_SETTINGS_SAVE_FAILED',
+              title: `${error.data.detail} `,
+              status: 'error',
+            });
+          }
+        });
+    },
+    [selectedModelKey, reset, updateModel, t]
+  );
+
+  return (
+    <>
+      <Flex gap="4" justifyContent="space-between" w="full" pb={4}>
+        <Heading fontSize="md">{t('modelManager.settings')}</Heading>
+        <Button
+          size="sm"
+          leftIcon={<PiCheckBold />}
+          colorScheme="invokeYellow"
+          isDisabled={!formState.isDirty}
+          onClick={handleSubmit(onSubmit)}
+          isLoading={isLoadingUpdateModel}
+        >
+          {t('common.save')}
+        </Button>
+      </Flex>
+
+      <DefaultCpuOnly control={control} name="cpuOnly" />
+    </>
+  );
+});
+
+VAEModelSettings.displayName = 'VAEModelSettings';
diff --git a/invokeai/frontend/web/src/services/api/schema.ts b/invokeai/frontend/web/src/services/api/schema.ts
index 5726458dc3a..502e04bc14c 100644
--- a/invokeai/frontend/web/src/services/api/schema.ts
+++ b/invokeai/frontend/web/src/services/api/schema.ts
@@ -31065,6 +31065,11 @@ export type components = {
              * @constant
              */
             base: "anima";
+            /**
+             * Cpu Only
+             * @description Whether this model should run on CPU only
+             */
+            cpu_only: boolean | null;
         };
         /** VAE_Checkpoint_FLUX_Config */
         VAE_Checkpoint_FLUX_Config: {
@@ -31137,6 +31142,11 @@ export type components = {
              * @constant
              */
             format: "checkpoint";
+            /**
+             * Cpu Only
+             * @description Whether this model should run on CPU only
+             */
+            cpu_only: boolean | null;
             /**
              * Base
              * @default flux
@@ -31224,6 +31234,11 @@ export type components = {
              * @constant
              */
             base: "flux2";
+            /**
+             * Cpu Only
+             * @description Whether this model should run on CPU only
+             */
+            cpu_only: boolean | null;
         };
         /**
          * VAE_Checkpoint_QwenImage_Config
@@ -31305,6 +31320,11 @@ export type components = {
              * @constant
              */
             base: "qwen-image";
+            /**
+             * Cpu Only
+             * @description Whether this model should run on CPU only
+             */
+            cpu_only: boolean | null;
         };
         /** VAE_Checkpoint_SD1_Config */
         VAE_Checkpoint_SD1_Config: {
@@ -31377,6 +31397,11 @@ export type components = {
              * @constant
              */
             format: "checkpoint";
+            /**
+             * Cpu Only
+             * @description Whether this model should run on CPU only
+             */
+            cpu_only: boolean | null;
             /**
              * Base
              * @default sd-1
@@ -31455,6 +31480,11 @@ export type components = {
              * @constant
              */
             format: "checkpoint";
+            /**
+             * Cpu Only
+             * @description Whether this model should run on CPU only
+             */
+            cpu_only: boolean | null;
             /**
              * Base
              * @default sd-2
@@ -31533,6 +31563,11 @@ export type components = {
              * @constant
              */
             format: "checkpoint";
+            /**
+             * Cpu Only
+             * @description Whether this model should run on CPU only
+             */
+            cpu_only: boolean | null;
             /**
              * Base
              * @default sdxl
@@ -31617,6 +31652,11 @@ export type components = {
              * @constant
              */
             base: "flux2";
+            /**
+             * Cpu Only
+             * @description Whether this model should run on CPU only
+             */
+            cpu_only: boolean | null;
         };
         /** VAE_Diffusers_SD1_Config */
         VAE_Diffusers_SD1_Config: {
@@ -31686,6 +31726,11 @@ export type components = {
              * @constant
              */
             type: "vae";
+            /**
+             * Cpu Only
+             * @description Whether this model should run on CPU only
+             */
+            cpu_only: boolean | null;
             /**
              * Base
              * @default sd-1
@@ -31761,6 +31806,11 @@ export type components = {
              * @constant
              */
             type: "vae";
+            /**
+             * Cpu Only
+             * @description Whether this model should run on CPU only
+             */
+            cpu_only: boolean | null;
             /**
              * Base
              * @default sdxl
diff --git a/tests/backend/model_manager/load/test_load_default_cpu_only.py b/tests/backend/model_manager/load/test_load_default_cpu_only.py
new file mode 100644
index 00000000000..d99eb5466d0
--- /dev/null
+++ b/tests/backend/model_manager/load/test_load_default_cpu_only.py
@@ -0,0 +1,49 @@
+"""Tests for `ModelLoader._get_execution_device` — the helper that forces a model onto the CPU
+when its config requests `cpu_only`.
+
+A VAE (or text encoder) configured with `cpu_only=True` must load onto the CPU so its weights
+never occupy VRAM. The loader signals this by returning `torch.device("cpu")` from
+`_get_execution_device`, which is then passed to `ModelCache.put(..., execution_device=...)`.
+"""
+
+from types import SimpleNamespace
+from typing import Optional
+
+import torch
+
+from invokeai.backend.model_manager.load.load_default import ModelLoader
+from invokeai.backend.model_manager.taxonomy import SubModelType
+
+
+def _loader() -> ModelLoader:
+    # `_get_execution_device` only reads the config, so an uninitialized loader is sufficient.
+    return ModelLoader.__new__(ModelLoader)
+
+
+def _vae_config(cpu_only: Optional[bool]) -> SimpleNamespace:
+    # Mirrors the relevant surface of a standalone VAE config: a `cpu_only` field and no
+    # `default_settings` (VAE configs do not carry default settings).
+    return SimpleNamespace(cpu_only=cpu_only, default_settings=None)
+
+
+def test_vae_cpu_only_true_returns_cpu():
+    assert _loader()._get_execution_device(_vae_config(cpu_only=True), None) == torch.device("cpu")
+
+
+def test_vae_cpu_only_false_or_unset_returns_none():
+    # Falsy values must not force CPU execution — the cache falls back to its default device.
+    assert _loader()._get_execution_device(_vae_config(cpu_only=False), None) is None
+    assert _loader()._get_execution_device(_vae_config(cpu_only=None), None) is None
+
+
+def test_vae_cpu_only_applies_regardless_of_submodel_type():
+    # The VAE is loaded as a standalone model (submodel_type=None), but the standalone branch
+    # must not depend on the submodel type either way.
+    loader = _loader()
+    assert loader._get_execution_device(_vae_config(cpu_only=True), SubModelType.VAE) == torch.device("cpu")
+
+
+def test_config_without_cpu_only_attr_returns_none():
+    # A config type that has neither `cpu_only` nor `default_settings` must be left on the
+    # cache default (return None), not crash.
+    assert _loader()._get_execution_device(SimpleNamespace(), None) is None

From 3a7a5754bff13165acfe41ce230268b8f217000a Mon Sep 17 00:00:00 2001
From: Alexander Eichhorn <alex@eichhorn.dev>
Date: Fri, 19 Jun 2026 02:47:20 +0200
Subject: [PATCH 2/2] Chore Openapi + Fix logging

---
 .../load/model_cache/model_cache.py           |   9 +-
 invokeai/frontend/web/openapi.json            | 138 +++++++++++++++++-
 2 files changed, 141 insertions(+), 6 deletions(-)

diff --git a/invokeai/backend/model_manager/load/model_cache/model_cache.py b/invokeai/backend/model_manager/load/model_cache/model_cache.py
index e3a0928e52b..bb2d693167f 100644
--- a/invokeai/backend/model_manager/load/model_cache/model_cache.py
+++ b/invokeai/backend/model_manager/load/model_cache/model_cache.py
@@ -441,8 +441,13 @@ def lock(self, cache_entry: CacheRecord, working_mem_bytes: Optional[int]) -> No
         # Check if the model's specific compute_device is CPU, not just the cache's default execution_device
         model_compute_device = cache_entry.cached_model.compute_device
         if model_compute_device.type == "cpu":
-            # Models configured for CPU execution don't need to be loaded into VRAM
-            self._logger.debug(f"Model {cache_entry.key} is configured for CPU execution, skipping VRAM load")
+            # Models configured for CPU execution (cpu_only) don't need to be loaded into VRAM. Log at INFO so it
+            # mirrors the "Loaded model ... onto <device> device" line emitted for GPU loads below — otherwise there
+            # is no visible indication that the model is running on CPU at the default log level.
+            self._logger.info(
+                f"Loaded model '{cache_entry.key}' ({cache_entry.cached_model.model.__class__.__name__}) onto "
+                f"cpu device (cpu_only); skipping VRAM load"
+            )
             return
 
         try:
diff --git a/invokeai/frontend/web/openapi.json b/invokeai/frontend/web/openapi.json
index 2c9526c59a9..5fc5217ca24 100644
--- a/invokeai/frontend/web/openapi.json
+++ b/invokeai/frontend/web/openapi.json
@@ -70443,6 +70443,18 @@
             "const": "anima",
             "title": "Base",
             "default": "anima"
+          },
+          "cpu_only": {
+            "anyOf": [
+              {
+                "type": "boolean"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Cpu Only",
+            "description": "Whether this model should run on CPU only"
           }
         },
         "type": "object",
@@ -70461,7 +70473,8 @@
           "config_path",
           "type",
           "format",
-          "base"
+          "base",
+          "cpu_only"
         ],
         "title": "VAE_Checkpoint_Anima_Config",
         "description": "Model config for Anima QwenImage VAE checkpoint models (AutoencoderKLQwenImage)."
@@ -70574,6 +70587,18 @@
             "title": "Format",
             "default": "checkpoint"
           },
+          "cpu_only": {
+            "anyOf": [
+              {
+                "type": "boolean"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Cpu Only",
+            "description": "Whether this model should run on CPU only"
+          },
           "base": {
             "type": "string",
             "const": "flux",
@@ -70597,6 +70622,7 @@
           "config_path",
           "type",
           "format",
+          "cpu_only",
           "base"
         ],
         "title": "VAE_Checkpoint_FLUX_Config"
@@ -70714,6 +70740,18 @@
             "const": "flux2",
             "title": "Base",
             "default": "flux2"
+          },
+          "cpu_only": {
+            "anyOf": [
+              {
+                "type": "boolean"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Cpu Only",
+            "description": "Whether this model should run on CPU only"
           }
         },
         "type": "object",
@@ -70732,7 +70770,8 @@
           "config_path",
           "type",
           "format",
-          "base"
+          "base",
+          "cpu_only"
         ],
         "title": "VAE_Checkpoint_Flux2_Config",
         "description": "Model config for FLUX.2 VAE checkpoint models (AutoencoderKLFlux2)."
@@ -70850,6 +70889,18 @@
             "const": "qwen-image",
             "title": "Base",
             "default": "qwen-image"
+          },
+          "cpu_only": {
+            "anyOf": [
+              {
+                "type": "boolean"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Cpu Only",
+            "description": "Whether this model should run on CPU only"
           }
         },
         "type": "object",
@@ -70868,7 +70919,8 @@
           "config_path",
           "type",
           "format",
-          "base"
+          "base",
+          "cpu_only"
         ],
         "title": "VAE_Checkpoint_QwenImage_Config",
         "description": "Model config for Qwen Image VAE checkpoint models (AutoencoderKLQwenImage)."
@@ -70981,6 +71033,18 @@
             "title": "Format",
             "default": "checkpoint"
           },
+          "cpu_only": {
+            "anyOf": [
+              {
+                "type": "boolean"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Cpu Only",
+            "description": "Whether this model should run on CPU only"
+          },
           "base": {
             "type": "string",
             "const": "sd-1",
@@ -71004,6 +71068,7 @@
           "config_path",
           "type",
           "format",
+          "cpu_only",
           "base"
         ],
         "title": "VAE_Checkpoint_SD1_Config"
@@ -71116,6 +71181,18 @@
             "title": "Format",
             "default": "checkpoint"
           },
+          "cpu_only": {
+            "anyOf": [
+              {
+                "type": "boolean"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Cpu Only",
+            "description": "Whether this model should run on CPU only"
+          },
           "base": {
             "type": "string",
             "const": "sd-2",
@@ -71139,6 +71216,7 @@
           "config_path",
           "type",
           "format",
+          "cpu_only",
           "base"
         ],
         "title": "VAE_Checkpoint_SD2_Config"
@@ -71251,6 +71329,18 @@
             "title": "Format",
             "default": "checkpoint"
           },
+          "cpu_only": {
+            "anyOf": [
+              {
+                "type": "boolean"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Cpu Only",
+            "description": "Whether this model should run on CPU only"
+          },
           "base": {
             "type": "string",
             "const": "sdxl",
@@ -71274,6 +71364,7 @@
           "config_path",
           "type",
           "format",
+          "cpu_only",
           "base"
         ],
         "title": "VAE_Checkpoint_SDXL_Config"
@@ -71383,6 +71474,18 @@
             "const": "flux2",
             "title": "Base",
             "default": "flux2"
+          },
+          "cpu_only": {
+            "anyOf": [
+              {
+                "type": "boolean"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Cpu Only",
+            "description": "Whether this model should run on CPU only"
           }
         },
         "type": "object",
@@ -71401,7 +71504,8 @@
           "format",
           "repo_variant",
           "type",
-          "base"
+          "base",
+          "cpu_only"
         ],
         "title": "VAE_Diffusers_Flux2_Config",
         "description": "Model config for FLUX.2 VAE models in diffusers format (AutoencoderKLFlux2)."
@@ -71506,6 +71610,18 @@
             "title": "Type",
             "default": "vae"
           },
+          "cpu_only": {
+            "anyOf": [
+              {
+                "type": "boolean"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Cpu Only",
+            "description": "Whether this model should run on CPU only"
+          },
           "base": {
             "type": "string",
             "const": "sd-1",
@@ -71529,6 +71645,7 @@
           "format",
           "repo_variant",
           "type",
+          "cpu_only",
           "base"
         ],
         "title": "VAE_Diffusers_SD1_Config"
@@ -71633,6 +71750,18 @@
             "title": "Type",
             "default": "vae"
           },
+          "cpu_only": {
+            "anyOf": [
+              {
+                "type": "boolean"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Cpu Only",
+            "description": "Whether this model should run on CPU only"
+          },
           "base": {
             "type": "string",
             "const": "sdxl",
@@ -71656,6 +71785,7 @@
           "format",
           "repo_variant",
           "type",
+          "cpu_only",
           "base"
         ],
         "title": "VAE_Diffusers_SDXL_Config"