diff --git a/diffsynth/core/vram/layers.py b/diffsynth/core/vram/layers.py
index 751792d0..b0179b91 100644
--- a/diffsynth/core/vram/layers.py
+++ b/diffsynth/core/vram/layers.py
@@ -64,7 +64,11 @@ def cast_to(self, weight, dtype, device):
 
     def check_free_vram(self):
         device = self.computation_device if self.computation_device != "npu" else "npu:0"
-        gpu_mem_state = getattr(torch, self.computation_device_type).mem_get_info(device)
+        device_module = getattr(torch, self.computation_device_type, None)
+        # Only CUDA and NPU have mem_get_info, for MPS/CPU assume enough memory
+        if device_module is None or not hasattr(device_module, "mem_get_info"):
+            return True
+        gpu_mem_state = device_module.mem_get_info(device)
         used_memory = (gpu_mem_state[1] - gpu_mem_state[0]) / (1024**3)
         return used_memory < self.vram_limit
 
diff --git a/diffsynth/diffusion/base_pipeline.py b/diffsynth/diffusion/base_pipeline.py
index fa355a16..4d90ff29 100644
--- a/diffsynth/diffusion/base_pipeline.py
+++ b/diffsynth/diffusion/base_pipeline.py
@@ -155,7 +155,10 @@ def load_models_to_device(self, model_names):
                             for module in model.modules():
                                 if hasattr(module, "offload"):
                                     module.offload()
-            getattr(torch, self.device_type).empty_cache()
+            # Clear cache if available (only CUDA has empty_cache)
+            device_module = getattr(torch, self.device_type, None)
+            if device_module is not None and hasattr(device_module, "empty_cache"):
+                device_module.empty_cache()
             # onload models
             for name, model in self.named_children():
                 if name in model_names:
diff --git a/diffsynth/models/dinov3_image_encoder.py b/diffsynth/models/dinov3_image_encoder.py
index be2ee587..70eec5b2 100644
--- a/diffsynth/models/dinov3_image_encoder.py
+++ b/diffsynth/models/dinov3_image_encoder.py
@@ -70,7 +70,10 @@ def __init__(self):
             }
         )
         
-    def forward(self, image, torch_dtype=torch.bfloat16, device="cuda"):
+    def forward(self, image, torch_dtype=torch.bfloat16, device=None):
+        # Use model's device if not specified
+        if device is None:
+            device = next(self.parameters()).device
         inputs = self.processor(images=image, return_tensors="pt")
         pixel_values = inputs["pixel_values"].to(dtype=torch_dtype, device=device)
         bool_masked_pos = None
diff --git a/diffsynth/models/siglip2_image_encoder.py b/diffsynth/models/siglip2_image_encoder.py
index 10184f85..dbc3a0e1 100644
--- a/diffsynth/models/siglip2_image_encoder.py
+++ b/diffsynth/models/siglip2_image_encoder.py
@@ -47,7 +47,10 @@ def __init__(self):
             }
         )
         
-    def forward(self, image, torch_dtype=torch.bfloat16, device="cuda"):
+    def forward(self, image, torch_dtype=torch.bfloat16, device=None):
+        # Use model's device if not specified
+        if device is None:
+            device = next(self.parameters()).device
         pixel_values = self.processor(images=[image], return_tensors="pt")["pixel_values"]
         pixel_values = pixel_values.to(device=device, dtype=torch_dtype)
         output_attentions = False