From 3353751aaa35db35baa5c96afa4d7080759f90a9 Mon Sep 17 00:00:00 2001
From: Titus von Koeller <9048635+Titus-von-Koeller@users.noreply.github.com>
Date: Thu, 16 Apr 2026 14:48:59 +0200
Subject: [PATCH 1/3] chore: bump minimum PyTorch version from 2.3 to 2.4
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Update dependency spec in pyproject.toml
- Update docs (README, installation, quickstart)
- Remove _IS_TORCH_GTE_24 compat shim in _ops.py (register_fake/register_kernel
  are always available in torch 2.4+)
- Remove torch < 2.4 skipif guards in tests
- Remove NumPy < 2 downgrade workaround for torch 2.3 on Windows
- Update CI test matrices: 2.3.1 → 2.4.1

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .github/workflows/test-runner.yml   |  6 ------
 .github/workflows/tests-nightly.yml |  8 ++++----
 .github/workflows/tests-pr.yml      |  8 ++++----
 README.md                           |  2 +-
 bitsandbytes/_ops.py                | 12 ++----------
 docs/source/installation.mdx        |  2 +-
 docs/source/quickstart.mdx          |  2 +-
 pyproject.toml                      |  2 +-
 tests/test_linear4bit.py            |  1 -
 tests/test_linear8bitlt.py          |  1 -
 tests/test_ops.py                   |  7 +------
 11 files changed, 15 insertions(+), 36 deletions(-)

diff --git a/.github/workflows/test-runner.yml b/.github/workflows/test-runner.yml
index 69f618eb6..0c04e3f8b 100644
--- a/.github/workflows/test-runner.yml
+++ b/.github/workflows/test-runner.yml
@@ -221,12 +221,6 @@ jobs:
           pip install pytest-cov
         shell: bash
 
-      # Windows: Downgrade NumPy for torch<2.4.1 compatibility
-      # See: https://github.com/pytorch/pytorch/issues/131668
-      - name: Downgrade NumPy
-        if: inputs.platform == 'windows' && startsWith(inputs.torch_version, '2.3.')
-        run: pip install "numpy<2"
-
       - name: Show installed packages
         run: pip list
 
diff --git a/.github/workflows/tests-nightly.yml b/.github/workflows/tests-nightly.yml
index 24eb4e0d4..12d0059af 100644
--- a/.github/workflows/tests-nightly.yml
+++ b/.github/workflows/tests-nightly.yml
@@ -20,12 +20,12 @@ jobs:
         platform: [linux-x64, linux-aarch64, macos, windows]
         # default runners don't have AVX-512 support, but icelake does
         cpu_type: ["", icelake]
-        torch_version: ["2.3.1", "2.10.0", "2.11.0"]
+        torch_version: ["2.4.1", "2.10.0", "2.11.0"]
 
         exclude:
           # aarch64 minimum torch version is 2.5.1
           - platform: linux-aarch64
-            torch_version: "2.3.1"
+            torch_version: "2.4.1"
           # icelake only applies to linux-x64
           - platform: linux-aarch64
             cpu_type: icelake
@@ -62,7 +62,7 @@ jobs:
         include:
           # Map CUDA version to torch version and PyPI index
           - cuda_version: "11.8.0"
-            torch_version: "2.3.1"
+            torch_version: "2.4.1"
             pypi_index: "https://download.pytorch.org/whl/cu118"
           - cuda_version: "12.6.3"
             torch_version: "2.8.0"
@@ -82,7 +82,7 @@ jobs:
           - platform: windows
             gpu_type: T4
             cuda_version: "11.8.0"
-            torch_version: "2.3.1"
+            torch_version: "2.4.1"
             pypi_index: "https://download.pytorch.org/whl/cu118"
           - platform: windows
             gpu_type: T4
diff --git a/.github/workflows/tests-pr.yml b/.github/workflows/tests-pr.yml
index cafa1a9b5..c188f6b49 100644
--- a/.github/workflows/tests-pr.yml
+++ b/.github/workflows/tests-pr.yml
@@ -31,12 +31,12 @@ jobs:
         platform: [linux-x64, linux-aarch64, macos]
         # default runners don't have AVX-512 support, but icelake does
         cpu_type: ["", icelake]
-        torch_version: ["2.3.1", "2.11.0"]
+        torch_version: ["2.4.1", "2.11.0"]
 
         exclude:
           # aarch64 minimum torch version is 2.5.1
           - platform: linux-aarch64
-            torch_version: "2.3.1"
+            torch_version: "2.4.1"
           # icelake only applies to linux-x64
           - platform: linux-aarch64
             cpu_type: icelake
@@ -44,7 +44,7 @@ jobs:
             cpu_type: icelake
 
         include:
-          # Add aarch64 with torch 2.5.1 instead of 2.3.1
+          # Add aarch64 with torch 2.5.1 instead of 2.4.1
           - platform: linux-aarch64
             cpu_type: ""
             torch_version: "2.5.1"
@@ -70,7 +70,7 @@ jobs:
         include:
           # Map CUDA version to torch version and PyPI index
           - cuda_version: "11.8.0"
-            torch_version: "2.3.1"
+            torch_version: "2.4.1"
             pypi_index: "https://download.pytorch.org/whl/cu118"
           - cuda_version: "12.8.1"
             torch_version: "2.9.1"
diff --git a/README.md b/README.md
index b4fd29b3a..beabca3e2 100644
--- a/README.md
+++ b/README.md
@@ -20,7 +20,7 @@ The library includes quantization primitives for 8-bit & 4-bit operations, throu
 bitsandbytes has the following minimum requirements for all platforms:
 
 * Python 3.10+
-* [PyTorch](https://pytorch.org/get-started/locally/) 2.3+
+* [PyTorch](https://pytorch.org/get-started/locally/) 2.4+
   * _Note: While we aim to provide wide backwards compatibility, we recommend using the latest version of PyTorch for the best experience._
 
 #### Accelerator support:
diff --git a/bitsandbytes/_ops.py b/bitsandbytes/_ops.py
index 532fe7afa..3bb7a2810 100644
--- a/bitsandbytes/_ops.py
+++ b/bitsandbytes/_ops.py
@@ -4,16 +4,8 @@
 
 import torch
 
-_IS_TORCH_GTE_24 = False
-
-if hasattr(torch.library, "register_fake"):
-    _IS_TORCH_GTE_24 = True
-    register_fake = torch.library.register_fake
-    register_kernel = torch.library.register_kernel
-else:
-    # PyTorch <= 2.3
-    register_fake = torch.library.impl_abstract
-    register_kernel = torch.library.impl
+register_fake = torch.library.register_fake
+register_kernel = torch.library.register_kernel
 
 # Int8 mixed precision matmul + dequant + bias
 torch.library.define(
diff --git a/docs/source/installation.mdx b/docs/source/installation.mdx
index 5fcbea288..ecd3be8bc 100644
--- a/docs/source/installation.mdx
+++ b/docs/source/installation.mdx
@@ -27,7 +27,7 @@ We provide official support for NVIDIA GPUs, CPUs, Intel XPUs, and Intel Gaudi.
 These are the minimum requirements for `bitsandbytes` across all platforms. Please be aware that some compute platforms may impose more strict requirements.
 
 * Python >= 3.10
-* PyTorch >= 2.3
+* PyTorch >= 2.4
 
 ## NVIDIA CUDA[[cuda]]
 
diff --git a/docs/source/quickstart.mdx b/docs/source/quickstart.mdx
index 7ce93e282..e24f6a261 100644
--- a/docs/source/quickstart.mdx
+++ b/docs/source/quickstart.mdx
@@ -8,7 +8,7 @@ Welcome to bitsandbytes! This library enables accessible large language models v
 pip install bitsandbytes
 ```
 
-**Requirements:** Python 3.10+, PyTorch 2.3+
+**Requirements:** Python 3.10+, PyTorch 2.4+
 
 For detailed installation instructions, see the [Installation Guide](./installation).
 
diff --git a/pyproject.toml b/pyproject.toml
index f448a079e..745f74df4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -43,7 +43,7 @@ classifiers = [
     "Topic :: Scientific/Engineering :: Artificial Intelligence"
 ]
 dependencies = [
-    "torch>=2.3,<3",
+    "torch>=2.4,<3",
     "numpy>=1.17",
     "packaging>=20.9",
 ]
diff --git a/tests/test_linear4bit.py b/tests/test_linear4bit.py
index d9a25c90e..57f3d4891 100644
--- a/tests/test_linear4bit.py
+++ b/tests/test_linear4bit.py
@@ -355,7 +355,6 @@ def test_params4bit_real_serialization(device, quant_type, blocksize, compress_s
 @pytest.mark.parametrize("bias", TRUE_FALSE, ids=id_formatter("bias"))
 @pytest.mark.parametrize("fullgraph", TRUE_FALSE, ids=id_formatter("fullgraph"))
 @pytest.mark.parametrize("mode", ["default", "reduce-overhead"], ids=id_formatter("mode"))
-@pytest.mark.skipif(torch.__version__ < (2, 4), reason="Not supported in torch < 2.4")
 @pytest.mark.skipif(
     torch.__version__ < (2, 10) and sys.version_info >= (3, 14), reason="Not supported in Python 3.14 until torch 2.10"
 )
diff --git a/tests/test_linear8bitlt.py b/tests/test_linear8bitlt.py
index 410961e0b..314eb1829 100644
--- a/tests/test_linear8bitlt.py
+++ b/tests/test_linear8bitlt.py
@@ -253,7 +253,6 @@ def test_linear8bit_load_state_dict_raises_runtime_for_tied_weight():
 @pytest.mark.parametrize("bias", TRUE_FALSE, ids=id_formatter("bias"))
 @pytest.mark.parametrize("fullgraph", TRUE_FALSE, ids=id_formatter("fullgraph"))
 @pytest.mark.parametrize("mode", ["default", "reduce-overhead"], ids=id_formatter("mode"))
-@pytest.mark.skipif(torch.__version__ < (2, 4), reason="Not supported in torch < 2.4")
 @pytest.mark.skipif(
     torch.__version__ < (2, 10) and sys.version_info >= (3, 14), reason="Not supported in Python 3.14 until torch 2.10"
 )
diff --git a/tests/test_ops.py b/tests/test_ops.py
index 1dbeb0a53..3d8461f0d 100644
--- a/tests/test_ops.py
+++ b/tests/test_ops.py
@@ -6,12 +6,7 @@
 import bitsandbytes
 from tests.helpers import TRUE_FALSE, get_available_devices, id_formatter, is_supported_on_hpu
 
-# torch.library.opcheck is only available in torch 2.4 and later.
-# When testing with older versions, we will skip it as a no-op.
-if torch.__version__ >= (2, 4):
-    opcheck = torch.library.opcheck
-else:
-    opcheck = lambda *args, **kwargs: None
+opcheck = torch.library.opcheck
 
 
 class TestLLMInt8Ops:

From 5a91ad2f9f281fe2d35d90f51556431652096b9a Mon Sep 17 00:00:00 2001
From: Titus von Koeller <9048635+Titus-von-Koeller@users.noreply.github.com>
Date: Thu, 16 Apr 2026 15:23:34 +0200
Subject: [PATCH 2/3] test: skip linear8bitlt fullgraph compile test on torch <
 2.8

fullgraph mode for torch.compile requires torch 2.8+, matching the
existing guard in test_linear4bit_torch_compile. Previously masked
by the torch < 2.4 skipif.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 tests/test_linear8bitlt.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/test_linear8bitlt.py b/tests/test_linear8bitlt.py
index 314eb1829..8fce7cef3 100644
--- a/tests/test_linear8bitlt.py
+++ b/tests/test_linear8bitlt.py
@@ -257,6 +257,9 @@ def test_linear8bit_load_state_dict_raises_runtime_for_tied_weight():
     torch.__version__ < (2, 10) and sys.version_info >= (3, 14), reason="Not supported in Python 3.14 until torch 2.10"
 )
 def test_linear8bitlt_torch_compile(device, threshold, bias, fullgraph, mode):
+    if fullgraph and torch.__version__ < (2, 8, 0, "dev"):
+        pytest.skip("fullgraph mode requires torch 2.8 or higher")
+
     if device == "cuda" and platform.system() == "Windows":
         pytest.skip("Triton is not officially supported on Windows")
 

From 33d812118322ebbfe286efc57161596a5f14234a Mon Sep 17 00:00:00 2001
From: Titus von Koeller <9048635+Titus-von-Koeller@users.noreply.github.com>
Date: Fri, 17 Apr 2026 10:30:10 +0200
Subject: [PATCH 3/3] test: tighten 8bitlt fullgraph skip to torch < 2.5

The failure on torch 2.4.1 is dynamo failing to trace MatmulLtState
(UserDefinedObject `__bool__`), which is a different root cause than
the Params4bit `.t()` issue that needs torch 2.8+ in the 4bit test.
Torch 2.5+ has the dynamo UDO improvements needed for this to work.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 tests/test_linear8bitlt.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_linear8bitlt.py b/tests/test_linear8bitlt.py
index 8fce7cef3..ab2baf0f8 100644
--- a/tests/test_linear8bitlt.py
+++ b/tests/test_linear8bitlt.py
@@ -257,8 +257,8 @@ def test_linear8bit_load_state_dict_raises_runtime_for_tied_weight():
     torch.__version__ < (2, 10) and sys.version_info >= (3, 14), reason="Not supported in Python 3.14 until torch 2.10"
 )
 def test_linear8bitlt_torch_compile(device, threshold, bias, fullgraph, mode):
-    if fullgraph and torch.__version__ < (2, 8, 0, "dev"):
-        pytest.skip("fullgraph mode requires torch 2.8 or higher")
+    if fullgraph and torch.__version__ < (2, 5):
+        pytest.skip("fullgraph tracing of MatmulLtState requires torch >= 2.5")
 
     if device == "cuda" and platform.system() == "Windows":
         pytest.skip("Triton is not officially supported on Windows")