bitsandbytes-foundation · Titus-von-Koeller · Apr 16, 2026 · Apr 16, 2026 · Apr 17, 2026
diff --git a/.github/workflows/test-runner.yml b/.github/workflows/test-runner.yml
@@ -221,12 +221,6 @@ jobs:
           pip install pytest-cov
         shell: bash
 
-      # Windows: Downgrade NumPy for torch<2.4.1 compatibility
-      # See: https://github.com/pytorch/pytorch/issues/131668
-      - name: Downgrade NumPy
-        if: inputs.platform == 'windows' && startsWith(inputs.torch_version, '2.3.')
-        run: pip install "numpy<2"
-
       - name: Show installed packages
         run: pip list
 

diff --git a/.github/workflows/tests-nightly.yml b/.github/workflows/tests-nightly.yml
@@ -20,12 +20,12 @@ jobs:
         platform: [linux-x64, linux-aarch64, macos, windows]
         # default runners don't have AVX-512 support, but icelake does
         cpu_type: ["", icelake]
-        torch_version: ["2.3.1", "2.10.0", "2.11.0"]
+        torch_version: ["2.4.1", "2.10.0", "2.11.0"]
 
         exclude:
           # aarch64 minimum torch version is 2.5.1
           - platform: linux-aarch64
-            torch_version: "2.3.1"
+            torch_version: "2.4.1"
           # icelake only applies to linux-x64
           - platform: linux-aarch64
             cpu_type: icelake
@@ -62,7 +62,7 @@ jobs:
         include:
           # Map CUDA version to torch version and PyPI index
           - cuda_version: "11.8.0"
-            torch_version: "2.3.1"
+            torch_version: "2.4.1"
             pypi_index: "https://download.pytorch.org/whl/cu118"
           - cuda_version: "12.6.3"
             torch_version: "2.8.0"
@@ -82,7 +82,7 @@ jobs:
           - platform: windows
             gpu_type: T4
             cuda_version: "11.8.0"
-            torch_version: "2.3.1"
+            torch_version: "2.4.1"
             pypi_index: "https://download.pytorch.org/whl/cu118"
           - platform: windows
             gpu_type: T4

diff --git a/.github/workflows/tests-pr.yml b/.github/workflows/tests-pr.yml
@@ -31,20 +31,20 @@ jobs:
         platform: [linux-x64, linux-aarch64, macos]
         # default runners don't have AVX-512 support, but icelake does
         cpu_type: ["", icelake]
-        torch_version: ["2.3.1", "2.11.0"]
+        torch_version: ["2.4.1", "2.11.0"]
 
         exclude:
           # aarch64 minimum torch version is 2.5.1
           - platform: linux-aarch64
-            torch_version: "2.3.1"
+            torch_version: "2.4.1"
           # icelake only applies to linux-x64
           - platform: linux-aarch64
             cpu_type: icelake
           - platform: macos
             cpu_type: icelake
 
         include:
-          # Add aarch64 with torch 2.5.1 instead of 2.3.1
+          # Add aarch64 with torch 2.5.1 instead of 2.4.1
           - platform: linux-aarch64
             cpu_type: ""
             torch_version: "2.5.1"
@@ -70,7 +70,7 @@ jobs:
         include:
           # Map CUDA version to torch version and PyPI index
           - cuda_version: "11.8.0"
-            torch_version: "2.3.1"
+            torch_version: "2.4.1"
             pypi_index: "https://download.pytorch.org/whl/cu118"
           - cuda_version: "12.8.1"
             torch_version: "2.9.1"

diff --git a/README.md b/README.md
@@ -20,7 +20,7 @@ The library includes quantization primitives for 8-bit & 4-bit operations, throu
 bitsandbytes has the following minimum requirements for all platforms:
 
 * Python 3.10+
-* [PyTorch](https://pytorch.org/get-started/locally/) 2.3+
+* [PyTorch](https://pytorch.org/get-started/locally/) 2.4+
   * _Note: While we aim to provide wide backwards compatibility, we recommend using the latest version of PyTorch for the best experience._
 
 #### Accelerator support:

diff --git a/bitsandbytes/_ops.py b/bitsandbytes/_ops.py
@@ -4,16 +4,8 @@
 
 import torch
 
-_IS_TORCH_GTE_24 = False
-
-if hasattr(torch.library, "register_fake"):
-    _IS_TORCH_GTE_24 = True
-    register_fake = torch.library.register_fake
-    register_kernel = torch.library.register_kernel
-else:
-    # PyTorch <= 2.3
-    register_fake = torch.library.impl_abstract
-    register_kernel = torch.library.impl
+register_fake = torch.library.register_fake
+register_kernel = torch.library.register_kernel
 
 # Int8 mixed precision matmul + dequant + bias
 torch.library.define(

diff --git a/docs/source/installation.mdx b/docs/source/installation.mdx
@@ -27,7 +27,7 @@ We provide official support for NVIDIA GPUs, CPUs, Intel XPUs, and Intel Gaudi.
 These are the minimum requirements for `bitsandbytes` across all platforms. Please be aware that some compute platforms may impose more strict requirements.
 
 * Python >= 3.10
-* PyTorch >= 2.3
+* PyTorch >= 2.4
 
 ## NVIDIA CUDA[[cuda]]
 

diff --git a/docs/source/quickstart.mdx b/docs/source/quickstart.mdx
@@ -8,7 +8,7 @@ Welcome to bitsandbytes! This library enables accessible large language models v
 pip install bitsandbytes
 ```
 
-**Requirements:** Python 3.10+, PyTorch 2.3+
+**Requirements:** Python 3.10+, PyTorch 2.4+
 
 For detailed installation instructions, see the [Installation Guide](./installation).
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -43,7 +43,7 @@ classifiers = [
     "Topic :: Scientific/Engineering :: Artificial Intelligence"
 ]
 dependencies = [
-    "torch>=2.3,<3",
+    "torch>=2.4,<3",
     "numpy>=1.17",
     "packaging>=20.9",
 ]

diff --git a/tests/test_linear4bit.py b/tests/test_linear4bit.py
@@ -355,7 +355,6 @@ def test_params4bit_real_serialization(device, quant_type, blocksize, compress_s
 @pytest.mark.parametrize("bias", TRUE_FALSE, ids=id_formatter("bias"))
 @pytest.mark.parametrize("fullgraph", TRUE_FALSE, ids=id_formatter("fullgraph"))
 @pytest.mark.parametrize("mode", ["default", "reduce-overhead"], ids=id_formatter("mode"))
-@pytest.mark.skipif(torch.__version__ < (2, 4), reason="Not supported in torch < 2.4")
 @pytest.mark.skipif(
     torch.__version__ < (2, 10) and sys.version_info >= (3, 14), reason="Not supported in Python 3.14 until torch 2.10"
 )

diff --git a/tests/test_linear8bitlt.py b/tests/test_linear8bitlt.py
@@ -253,11 +253,13 @@ def test_linear8bit_load_state_dict_raises_runtime_for_tied_weight():
 @pytest.mark.parametrize("bias", TRUE_FALSE, ids=id_formatter("bias"))
 @pytest.mark.parametrize("fullgraph", TRUE_FALSE, ids=id_formatter("fullgraph"))
 @pytest.mark.parametrize("mode", ["default", "reduce-overhead"], ids=id_formatter("mode"))
-@pytest.mark.skipif(torch.__version__ < (2, 4), reason="Not supported in torch < 2.4")
 @pytest.mark.skipif(
     torch.__version__ < (2, 10) and sys.version_info >= (3, 14), reason="Not supported in Python 3.14 until torch 2.10"
 )
 def test_linear8bitlt_torch_compile(device, threshold, bias, fullgraph, mode):
+    if fullgraph and torch.__version__ < (2, 5):
+        pytest.skip("fullgraph tracing of MatmulLtState requires torch >= 2.5")
+
     if device == "cuda" and platform.system() == "Windows":
         pytest.skip("Triton is not officially supported on Windows")
 

diff --git a/tests/test_ops.py b/tests/test_ops.py
@@ -6,12 +6,7 @@
 import bitsandbytes
 from tests.helpers import TRUE_FALSE, get_available_devices, id_formatter, is_supported_on_hpu
 
-# torch.library.opcheck is only available in torch 2.4 and later.
-# When testing with older versions, we will skip it as a no-op.
-if torch.__version__ >= (2, 4):
-    opcheck = torch.library.opcheck
-else:
-    opcheck = lambda *args, **kwargs: None
+opcheck = torch.library.opcheck
 
 
 class TestLLMInt8Ops: