diff --git a/tests/pipelines/ip_adapters/test_ip_adapter_stable_diffusion.py b/tests/pipelines/ip_adapters/test_ip_adapter_stable_diffusion.py
index 32590111cdf3..6689526e8b13 100644
--- a/tests/pipelines/ip_adapters/test_ip_adapter_stable_diffusion.py
+++ b/tests/pipelines/ip_adapters/test_ip_adapter_stable_diffusion.py
@@ -15,6 +15,7 @@
 
 import gc
 import unittest
+from unittest import mock
 
 import numpy as np
 import torch
@@ -53,6 +54,15 @@
 class IPAdapterNightlyTestsMixin(unittest.TestCase):
     dtype = torch.float16
 
+    _SD_PIPELINE_RANDN_TENSOR_TARGETS = {
+        StableDiffusionPipeline: "diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.randn_tensor",
+        StableDiffusionImg2ImgPipeline: "diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.randn_tensor",
+        StableDiffusionInpaintPipeline: "diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_inpaint.randn_tensor",
+        StableDiffusionXLPipeline: "diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.randn_tensor",
+        StableDiffusionXLImg2ImgPipeline: "diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl_img2img.randn_tensor",
+        StableDiffusionXLInpaintPipeline: "diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl_inpaint.randn_tensor",
+    }
+
     def setUp(self):
         # clean up the VRAM before each test
         super().setUp()
@@ -65,6 +75,22 @@ def tearDown(self):
         gc.collect()
         backend_empty_cache(torch_device)
 
+    def get_fixed_noise(self, shape=(1, 4, 64, 64), seed=33):
+        return torch.from_numpy(np.random.RandomState(seed).standard_normal(shape)).to(torch.float32)
+
+    def get_fixed_randn_tensor_patch(self, pipeline, shape=(1, 4, 64, 64), seed=33):
+        fixed_noise = self.get_fixed_noise(shape=shape, seed=seed)
+
+        def fake_randn_tensor(requested_shape, generator=None, device=None, dtype=None, layout=None):
+            self.assertEqual(tuple(requested_shape), tuple(fixed_noise.shape))
+            return fixed_noise.to(device=device, dtype=dtype)
+
+        for pipeline_cls, target in self._SD_PIPELINE_RANDN_TENSOR_TARGETS.items():
+            if isinstance(pipeline, pipeline_cls):
+                return mock.patch(target, side_effect=fake_randn_tensor)
+
+        self.fail(f"No fixed randn_tensor patch target configured for pipeline type {type(pipeline)}")
+
     def get_image_encoder(self, repo_id, subfolder):
         image_encoder = CLIPVisionModelWithProjection.from_pretrained(
             repo_id, subfolder=subfolder, torch_dtype=self.dtype
@@ -182,10 +208,11 @@ def test_text_to_image(self):
         pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="models", weight_name="ip-adapter_sd15.bin")
 
         inputs = self.get_dummy_inputs()
-        images = pipeline(**inputs).images
+        with self.get_fixed_randn_tensor_patch(pipeline):
+            images = pipeline(**inputs).images
         image_slice = images[0, :3, :3, -1].flatten()
 
-        expected_slice = np.array([0.80810547, 0.88183594, 0.9296875, 0.9189453, 0.9848633, 1.0, 0.97021484, 1.0, 1.0])
+        expected_slice = np.array([0.3291, 0.2964, 0.2742, 0.3010, 0.2698, 0.2507, 0.2917, 0.2671, 0.2478])
 
         max_diff = numpy_cosine_similarity_distance(image_slice, expected_slice)
         assert max_diff < 5e-4
@@ -193,13 +220,19 @@ def test_text_to_image(self):
         pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="models", weight_name="ip-adapter-plus_sd15.bin")
 
         inputs = self.get_dummy_inputs()
-        images = pipeline(**inputs).images
+        with self.get_fixed_randn_tensor_patch(pipeline):
+            images = pipeline(**inputs).images
         image_slice = images[0, :3, :3, -1].flatten()
 
-        expected_slice = np.array(
-            [0.30444336, 0.26513672, 0.22436523, 0.2758789, 0.25585938, 0.20751953, 0.25390625, 0.24633789, 0.21923828]
-        )
-
+        expected_slice = Expectations(
+            {
+                ("cuda", None): np.array([0.1238, 0.0579, 0.0312, 0.0493, 0.0010, 0.0, 0.0188, 0.0, 0.0]),
+                ("xpu", None): np.array(
+                    [0.11938477, 0.05249023, 0.02490234, 0.04370117, 0.0, 0.0, 0.01342773, 0.0, 0.0]
+                ),
+                (None, None): np.array([0.1238, 0.0579, 0.0312, 0.0493, 0.0010, 0.0, 0.0188, 0.0, 0.0]),
+            }
+        ).get_expectation()
         max_diff = numpy_cosine_similarity_distance(image_slice, expected_slice)
         assert max_diff < 5e-4
 
@@ -214,26 +247,30 @@ def test_image_to_image(self):
         pipeline.to(torch_device)
         pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="models", weight_name="ip-adapter_sd15.bin")
 
-        inputs = self.get_dummy_inputs(for_image_to_image=True)
-        images = pipeline(**inputs).images
-        image_slice = images[0, :3, :3, -1].flatten()
+        with self.get_fixed_randn_tensor_patch(pipeline):
+            inputs = self.get_dummy_inputs(for_image_to_image=True)
+            images = pipeline(**inputs).images
+            image_slice = images[0, :3, :3, -1].flatten()
 
-        expected_slice = np.array(
-            [0.22167969, 0.21875, 0.21728516, 0.22607422, 0.21948242, 0.23925781, 0.22387695, 0.25268555, 0.2722168]
-        )
+        expected_slice = np.array([0.1492, 0.1294, 0.1123, 0.1504, 0.1328, 0.0923, 0.1428, 0.1479, 0.1370])
 
         max_diff = numpy_cosine_similarity_distance(image_slice, expected_slice)
         assert max_diff < 5e-4
 
         pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="models", weight_name="ip-adapter-plus_sd15.bin")
 
-        inputs = self.get_dummy_inputs(for_image_to_image=True)
-        images = pipeline(**inputs).images
-        image_slice = images[0, :3, :3, -1].flatten()
+        with self.get_fixed_randn_tensor_patch(pipeline):
+            inputs = self.get_dummy_inputs(for_image_to_image=True)
+            images = pipeline(**inputs).images
+            image_slice = images[0, :3, :3, -1].flatten()
 
-        expected_slice = np.array(
-            [0.35913086, 0.265625, 0.26367188, 0.24658203, 0.19750977, 0.39990234, 0.15258789, 0.20336914, 0.5517578]
-        )
+        expected_slice = Expectations(
+            {
+                ("cuda", None): np.array([0.0493, 0.0059, 0.0, 0.0166, 0.0056, 0.0027, 0.0139, 0.0090, 0.0129]),
+                ("xpu", None): np.array([0.0513, 0.0083, 0.0, 0.0183, 0.0073, 0.0039, 0.0159, 0.0100, 0.0142]),
+                (None, None): np.array([0.0493, 0.0059, 0.0, 0.0166, 0.0056, 0.0027, 0.0139, 0.0090, 0.0129]),
+            }
+        ).get_expectation()
 
         max_diff = numpy_cosine_similarity_distance(image_slice, expected_slice)
         assert max_diff < 5e-4
@@ -250,12 +287,11 @@ def test_inpainting(self):
         pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="models", weight_name="ip-adapter_sd15.bin")
 
         inputs = self.get_dummy_inputs(for_inpainting=True)
-        images = pipeline(**inputs).images
+        with self.get_fixed_randn_tensor_patch(pipeline):
+            images = pipeline(**inputs).images
         image_slice = images[0, :3, :3, -1].flatten()
 
-        expected_slice = np.array(
-            [0.27148438, 0.24047852, 0.22167969, 0.23217773, 0.21118164, 0.21142578, 0.21875, 0.20751953, 0.20019531]
-        )
+        expected_slice = np.array([0.2766, 0.2437, 0.2246, 0.2354, 0.2126, 0.2119, 0.2207, 0.2075, 0.1992])
 
         max_diff = numpy_cosine_similarity_distance(image_slice, expected_slice)
         assert max_diff < 5e-4
@@ -263,9 +299,12 @@ def test_inpainting(self):
         pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="models", weight_name="ip-adapter-plus_sd15.bin")
 
         inputs = self.get_dummy_inputs(for_inpainting=True)
-        images = pipeline(**inputs).images
+        with self.get_fixed_randn_tensor_patch(pipeline):
+            images = pipeline(**inputs).images
         image_slice = images[0, :3, :3, -1].flatten()
 
+        expected_slice = np.array([0.3042, 0.2739, 0.2532, 0.2666, 0.2434, 0.2351, 0.2507, 0.2358, 0.2217])
+
         max_diff = numpy_cosine_similarity_distance(image_slice, expected_slice)
         assert max_diff < 5e-4
 
@@ -281,11 +320,13 @@ def test_text_to_image_model_cpu_offload(self):
         pipeline.to(torch_device)
 
         inputs = self.get_dummy_inputs()
-        output_without_offload = pipeline(**inputs).images
+        with self.get_fixed_randn_tensor_patch(pipeline):
+            output_without_offload = pipeline(**inputs).images
 
         pipeline.enable_model_cpu_offload(device=torch_device)
         inputs = self.get_dummy_inputs()
-        output_with_offload = pipeline(**inputs).images
+        with self.get_fixed_randn_tensor_patch(pipeline):
+            output_with_offload = pipeline(**inputs).images
         max_diff = np.abs(output_with_offload - output_without_offload).max()
         self.assertLess(max_diff, 1e-3, "CPU offloading should not affect the inference results")
 
@@ -312,9 +353,10 @@ def test_text_to_image_full_face(self):
         pipeline.set_ip_adapter_scale(0.7)
 
         inputs = self.get_dummy_inputs()
-        images = pipeline(**inputs).images
+        with self.get_fixed_randn_tensor_patch(pipeline):
+            images = pipeline(**inputs).images
         image_slice = images[0, :3, :3, -1].flatten()
-        expected_slice = np.array([0.1704, 0.1296, 0.1272, 0.2212, 0.1514, 0.1479, 0.4172, 0.4263, 0.4360])
+        expected_slice = np.array([0.4033, 0.3989, 0.3992, 0.4006, 0.3879, 0.4355, 0.4192, 0.4333, 0.4753])
 
         max_diff = numpy_cosine_similarity_distance(image_slice, expected_slice)
         assert max_diff < 5e-4
@@ -358,9 +400,10 @@ def test_multi(self):
         inputs = self.get_dummy_inputs()
         ip_adapter_image = inputs["ip_adapter_image"]
         inputs["ip_adapter_image"] = [ip_adapter_image, [ip_adapter_image] * 2]
-        images = pipeline(**inputs).images
+        with self.get_fixed_randn_tensor_patch(pipeline):
+            images = pipeline(**inputs).images
         image_slice = images[0, :3, :3, -1].flatten()
-        expected_slice = np.array([0.5234, 0.5352, 0.5625, 0.5713, 0.5947, 0.6206, 0.5786, 0.6187, 0.6494])
+        expected_slice = np.array([0.2783, 0.2302, 0.1921, 0.2354, 0.1934, 0.1528, 0.2207, 0.1902, 0.1526])
 
         max_diff = numpy_cosine_similarity_distance(image_slice, expected_slice)
         assert max_diff < 5e-4
@@ -386,10 +429,10 @@ def test_text_to_image_face_id(self):
         id_embeds = id_embeds.reshape((2, 1, 1, 512))
         inputs["ip_adapter_image_embeds"] = [id_embeds]
         inputs["ip_adapter_image"] = None
-        images = pipeline(**inputs).images
+        with self.get_fixed_randn_tensor_patch(pipeline):
+            images = pipeline(**inputs).images
         image_slice = images[0, :3, :3, -1].flatten()
-
-        expected_slice = np.array([0.3237, 0.3186, 0.3406, 0.3154, 0.2942, 0.3220, 0.3188, 0.3528, 0.3242])
+        expected_slice = np.array([0.4780, 0.5117, 0.5103, 0.5044, 0.4922, 0.4932, 0.5029, 0.4954, 0.4802])
         max_diff = numpy_cosine_similarity_distance(image_slice, expected_slice)
         assert max_diff < 5e-4
 
@@ -411,23 +454,23 @@ def test_text_to_image_sdxl(self):
         pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")
 
         inputs = self.get_dummy_inputs()
-        images = pipeline(**inputs).images
+        with self.get_fixed_randn_tensor_patch(pipeline, shape=(1, 4, 128, 128)):
+            images = pipeline(**inputs).images
         image_slice = images[0, :3, :3, -1].flatten()
 
         expected_slice = np.array(
             [
-                0.09630299,
-                0.09551358,
-                0.08480701,
-                0.09070173,
-                0.09437338,
-                0.09264627,
-                0.08883232,
-                0.09287417,
-                0.09197289,
+                0.15138859,
+                0.15170279,
+                0.14246401,
+                0.15483627,
+                0.15317351,
+                0.15564519,
+                0.14952978,
+                0.15584505,
+                0.14940351,
             ]
         )
-
         max_diff = numpy_cosine_similarity_distance(image_slice, expected_slice)
         assert max_diff < 5e-4
 
@@ -447,11 +490,23 @@ def test_text_to_image_sdxl(self):
         )
 
         inputs = self.get_dummy_inputs()
-        images = pipeline(**inputs).images
+        with self.get_fixed_randn_tensor_patch(pipeline, shape=(1, 4, 128, 128)):
+            images = pipeline(**inputs).images
         image_slice = images[0, :3, :3, -1].flatten()
 
-        expected_slice = np.array([0.0596, 0.0539, 0.0459, 0.0580, 0.0560, 0.0548, 0.0501, 0.0563, 0.0500])
-
+        expected_slice = np.array(
+            [
+                0.09022659,
+                0.08629113,
+                0.07586601,
+                0.09006533,
+                0.08684656,
+                0.08665657,
+                0.08367643,
+                0.08839294,
+                0.08377907,
+            ]
+        )
         max_diff = numpy_cosine_similarity_distance(image_slice, expected_slice)
         assert max_diff < 5e-4
 
@@ -469,23 +524,13 @@ def test_image_to_image_sdxl(self):
         pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")
 
         inputs = self.get_dummy_inputs(for_image_to_image=True)
-        images = pipeline(**inputs).images
+        with self.get_fixed_randn_tensor_patch(pipeline, shape=(1, 4, 64, 64)):
+            images = pipeline(**inputs).images
         image_slice = images[0, :3, :3, -1].flatten()
 
         expected_slice = np.array(
-            [
-                0.06513795,
-                0.07009393,
-                0.07234055,
-                0.07426041,
-                0.07002589,
-                0.06415862,
-                0.07827643,
-                0.07962808,
-                0.07411247,
-            ]
+            [0.05107406, 0.05074775, 0.00099546, 0.05845362, 0.05587912, 0.0, 0.06056768, 0.05724522, 0.0648115]
         )
-
         assert np.allclose(image_slice, expected_slice, atol=1e-3)
 
         image_encoder = self.get_image_encoder(repo_id="h94/IP-Adapter", subfolder="models/image_encoder")
@@ -505,21 +550,11 @@ def test_image_to_image_sdxl(self):
         )
 
         inputs = self.get_dummy_inputs(for_image_to_image=True)
-        images = pipeline(**inputs).images
+        with self.get_fixed_randn_tensor_patch(pipeline, shape=(1, 4, 64, 64)):
+            images = pipeline(**inputs).images
         image_slice = images[0, :3, :3, -1].flatten()
-
         expected_slice = np.array(
-            [
-                0.07126552,
-                0.07025367,
-                0.07348302,
-                0.07580167,
-                0.07467338,
-                0.06918576,
-                0.07480252,
-                0.08279955,
-                0.08547315,
-            ]
+            [0.05652112, 0.05557555, 0.00392720, 0.06261870, 0.06117940, 0.0, 0.05906063, 0.06035855, 0.06263199]
         )
 
         assert np.allclose(image_slice, expected_slice, atol=1e-3)
@@ -538,12 +573,21 @@ def test_inpainting_sdxl(self):
         pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")
 
         inputs = self.get_dummy_inputs(for_inpainting=True)
-        images = pipeline(**inputs).images
+        with self.get_fixed_randn_tensor_patch(pipeline, shape=(1, 4, 128, 128)):
+            images = pipeline(**inputs).images
         image_slice = images[0, :3, :3, -1].flatten()
-        image_slice.tolist()
-
         expected_slice = np.array(
-            [0.14181179, 0.1493012, 0.14283323, 0.14602411, 0.14915377, 0.15015268, 0.14725655, 0.15009224, 0.15164584]
+            [
+                0.14227295,
+                0.14525282,
+                0.14307272,
+                0.15040666,
+                0.14928216,
+                0.14794737,
+                0.14742243,
+                0.15273672,
+                0.15166444,
+            ]
         )
 
         max_diff = numpy_cosine_similarity_distance(image_slice, expected_slice)
@@ -566,11 +610,22 @@ def test_inpainting_sdxl(self):
         )
 
         inputs = self.get_dummy_inputs(for_inpainting=True)
-        images = pipeline(**inputs).images
+        with self.get_fixed_randn_tensor_patch(pipeline, shape=(1, 4, 128, 128)):
+            images = pipeline(**inputs).images
         image_slice = images[0, :3, :3, -1].flatten()
-        image_slice.tolist()
-
-        expected_slice = np.array([0.1398, 0.1476, 0.1407, 0.1442, 0.1470, 0.1480, 0.1449, 0.1481, 0.1494])
+        expected_slice = np.array(
+            [
+                0.14031684,
+                0.14346808,
+                0.14132470,
+                0.14918229,
+                0.14789128,
+                0.14650577,
+                0.14599693,
+                0.15143514,
+                0.15061957,
+            ]
+        )
 
         max_diff = numpy_cosine_similarity_distance(image_slice, expected_slice)
         assert max_diff < 5e-4
@@ -594,12 +649,22 @@ def test_ip_adapter_mask(self):
         mask = processor.preprocess(mask)
         inputs["cross_attention_kwargs"]["ip_adapter_masks"] = mask
         inputs["ip_adapter_image"] = inputs["ip_adapter_image"][0]
-        images = pipeline(**inputs).images
+        with self.get_fixed_randn_tensor_patch(pipeline, shape=(1, 4, 128, 128)):
+            images = pipeline(**inputs).images
         image_slice = images[0, :3, :3, -1].flatten()
         expected_slice = np.array(
-            [0.7307304, 0.73450166, 0.73731124, 0.7377061, 0.7318013, 0.73720926, 0.74746597, 0.7409929, 0.74074936]
+            [
+                0.47833657,
+                0.50273246,
+                0.49865803,
+                0.46196738,
+                0.51376355,
+                0.49931064,
+                0.45902768,
+                0.55391037,
+                0.50260746,
+            ]
         )
-
         max_diff = numpy_cosine_similarity_distance(image_slice, expected_slice)
         assert max_diff < 5e-4
 
@@ -621,10 +686,11 @@ def test_ip_adapter_multiple_masks(self):
         processor = IPAdapterMaskProcessor()
         masks = processor.preprocess(masks)
         inputs["cross_attention_kwargs"]["ip_adapter_masks"] = masks
-        images = pipeline(**inputs).images
+        with self.get_fixed_randn_tensor_patch(pipeline, shape=(1, 4, 128, 128)):
+            images = pipeline(**inputs).images
         image_slice = images[0, :3, :3, -1].flatten()
         expected_slice = np.array(
-            [0.79474676, 0.7977683, 0.8013954, 0.7988008, 0.7970615, 0.8029355, 0.80614823, 0.8050743, 0.80627424]
+            [0.3578991, 0.39458388, 0.43545875, 0.35710996, 0.3885604, 0.43619853, 0.37826842, 0.39264038, 0.45008034]
         )
 
         max_diff = numpy_cosine_similarity_distance(image_slice, expected_slice)
@@ -663,54 +729,23 @@ def test_instant_style_multiple_masks(self):
         masks2 = processor.preprocess(masks2, height=1024, width=1024)
         masks2 = masks2.reshape(1, masks2.shape[0], masks2.shape[2], masks2.shape[3])
         inputs["cross_attention_kwargs"]["ip_adapter_masks"] = [masks1, masks2]
-        images = pipeline(**inputs).images
+        with self.get_fixed_randn_tensor_patch(pipeline, shape=(1, 4, 128, 128)):
+            images = pipeline(**inputs).images
         image_slice = images[0, :3, :3, -1].flatten()
 
-        expected_slices = Expectations(
-            {
-                ("xpu", 3): np.array(
-                    [
-                        0.2520,
-                        0.1050,
-                        0.1510,
-                        0.0997,
-                        0.0893,
-                        0.0019,
-                        0.0000,
-                        0.0000,
-                        0.0210,
-                    ]
-                ),
-                ("cuda", 7): np.array(
-                    [
-                        0.2323,
-                        0.1026,
-                        0.1338,
-                        0.0638,
-                        0.0662,
-                        0.0000,
-                        0.0000,
-                        0.0000,
-                        0.0199,
-                    ]
-                ),
-                ("cuda", 8): np.array(
-                    [
-                        0.2518,
-                        0.1059,
-                        0.1553,
-                        0.0977,
-                        0.0852,
-                        0.0000,
-                        0.0000,
-                        0.0000,
-                        0.0220,
-                    ]
-                ),
-            }
+        expected_slice = np.array(
+            [
+                0.0271,
+                0.0004,
+                0.0000,
+                0.0011,
+                0.0000,
+                0.0000,
+                0.0000,
+                0.0000,
+                0.0037,
+            ]
         )
-        expected_slice = expected_slices.get_expectation()
-
         max_diff = numpy_cosine_similarity_distance(image_slice, expected_slice)
         assert max_diff < 5e-4
 
@@ -735,11 +770,11 @@ def test_ip_adapter_multiple_masks_one_adapter(self):
         inputs["cross_attention_kwargs"]["ip_adapter_masks"] = [masks]
         ip_images = inputs["ip_adapter_image"]
         inputs["ip_adapter_image"] = [[image[0] for image in ip_images]]
-        images = pipeline(**inputs).images
+        with self.get_fixed_randn_tensor_patch(pipeline, shape=(1, 4, 128, 128)):
+            images = pipeline(**inputs).images
         image_slice = images[0, :3, :3, -1].flatten()
         expected_slice = np.array(
-            [0.79474676, 0.7977683, 0.8013954, 0.7988008, 0.7970615, 0.8029355, 0.80614823, 0.8050743, 0.80627424]
+            [0.35761628, 0.39357206, 0.43524706, 0.3571607, 0.38741112, 0.43580052, 0.37814528, 0.3915079, 0.44959208]
         )
-
         max_diff = numpy_cosine_similarity_distance(image_slice, expected_slice)
         assert max_diff < 5e-4