diff --git a/fastdeploy/envs.py b/fastdeploy/envs.py
index c8aa6d0ff34..68695acec32 100644
--- a/fastdeploy/envs.py
+++ b/fastdeploy/envs.py
@@ -225,10 +225,10 @@ def _validate_split_kv_size(value: int) -> int:
     "FD_WORKER_ALIVE_TIMEOUT": lambda: int(os.getenv("FD_WORKER_ALIVE_TIMEOUT", "30")),
     # File path for file storage backend
     "FILE_BACKEND_STORAGE_DIR": lambda: str(os.getenv("FILE_BACKEND_STORAGE_DIR", "/tmp/fastdeploy")),
-    # Custom all-reduce max buffer size in MB (default 64MB).
+    # Custom all-reduce max buffer size in MB (default 8MB).
     # Increase this to avoid NCCL fallback for large tensors in deterministic mode.
     # E.g. FD_CUSTOM_AR_MAX_SIZE_MB=128 for 128MB.
-    "FD_CUSTOM_AR_MAX_SIZE_MB": lambda: int(os.getenv("FD_CUSTOM_AR_MAX_SIZE_MB", "64")),
+    "FD_CUSTOM_AR_MAX_SIZE_MB": lambda: int(os.getenv("FD_CUSTOM_AR_MAX_SIZE_MB", "8")),
     # Enable deterministic inference mode for chunked prefill alignment
     "FD_DETERMINISTIC_MODE": lambda: bool(int(os.getenv("FD_DETERMINISTIC_MODE", "0"))),
     # Split KV block size for deterministic alignment (must be power of 2 and > 0, default 16)
diff --git a/tests/e2e/4cards_cases/test_determinism_long.py b/tests/e2e/4cards_cases/test_determinism_long.py
index 94104636e9a..fa92c41e15b 100644
--- a/tests/e2e/4cards_cases/test_determinism_long.py
+++ b/tests/e2e/4cards_cases/test_determinism_long.py
@@ -143,7 +143,7 @@ def _module_env():
         {
             "CUDA_VISIBLE_DEVICES": os.environ.get("CUDA_VISIBLE_DEVICES", "0,1,2,3"),
             "FD_DETERMINISTIC_MODE": "1",
-            "FD_CUSTOM_AR_MAX_SIZE_MB": os.environ.get("FD_CUSTOM_AR_MAX_SIZE_MB", "57"),
+            "FD_CUSTOM_AR_MAX_SIZE_MB": os.environ.get("FD_CUSTOM_AR_MAX_SIZE_MB", "64"),
             "FLAGS_max_partition_size": _CHUNK_SIZE_FOR_TEST,
         }
     ):