From 3353751aaa35db35baa5c96afa4d7080759f90a9 Mon Sep 17 00:00:00 2001 From: Titus von Koeller <9048635+Titus-von-Koeller@users.noreply.github.com> Date: Thu, 16 Apr 2026 14:48:59 +0200 Subject: [PATCH 1/3] chore: bump minimum PyTorch version from 2.3 to 2.4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Update dependency spec in pyproject.toml - Update docs (README, installation, quickstart) - Remove _IS_TORCH_GTE_24 compat shim in _ops.py (register_fake/register_kernel are always available in torch 2.4+) - Remove torch < 2.4 skipif guards in tests - Remove NumPy < 2 downgrade workaround for torch 2.3 on Windows - Update CI test matrices: 2.3.1 → 2.4.1 Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/test-runner.yml | 6 ------ .github/workflows/tests-nightly.yml | 8 ++++---- .github/workflows/tests-pr.yml | 8 ++++---- README.md | 2 +- bitsandbytes/_ops.py | 12 ++---------- docs/source/installation.mdx | 2 +- docs/source/quickstart.mdx | 2 +- pyproject.toml | 2 +- tests/test_linear4bit.py | 1 - tests/test_linear8bitlt.py | 1 - tests/test_ops.py | 7 +------ 11 files changed, 15 insertions(+), 36 deletions(-) diff --git a/.github/workflows/test-runner.yml b/.github/workflows/test-runner.yml index 69f618eb6..0c04e3f8b 100644 --- a/.github/workflows/test-runner.yml +++ b/.github/workflows/test-runner.yml @@ -221,12 +221,6 @@ jobs: pip install pytest-cov shell: bash - # Windows: Downgrade NumPy for torch<2.4.1 compatibility - # See: https://github.com/pytorch/pytorch/issues/131668 - - name: Downgrade NumPy - if: inputs.platform == 'windows' && startsWith(inputs.torch_version, '2.3.') - run: pip install "numpy<2" - - name: Show installed packages run: pip list diff --git a/.github/workflows/tests-nightly.yml b/.github/workflows/tests-nightly.yml index 24eb4e0d4..12d0059af 100644 --- a/.github/workflows/tests-nightly.yml +++ b/.github/workflows/tests-nightly.yml @@ -20,12 +20,12 @@ jobs: platform: [linux-x64, linux-aarch64, macos, windows] # default runners don't have AVX-512 support, but icelake does cpu_type: ["", icelake] - torch_version: ["2.3.1", "2.10.0", "2.11.0"] + torch_version: ["2.4.1", "2.10.0", "2.11.0"] exclude: # aarch64 minimum torch version is 2.5.1 - platform: linux-aarch64 - torch_version: "2.3.1" + torch_version: "2.4.1" # icelake only applies to linux-x64 - platform: linux-aarch64 cpu_type: icelake @@ -62,7 +62,7 @@ jobs: include: # Map CUDA version to torch version and PyPI index - cuda_version: "11.8.0" - torch_version: "2.3.1" + torch_version: "2.4.1" pypi_index: "https://download.pytorch.org/whl/cu118" - cuda_version: "12.6.3" torch_version: "2.8.0" @@ -82,7 +82,7 @@ jobs: - platform: windows gpu_type: T4 cuda_version: "11.8.0" - torch_version: "2.3.1" + torch_version: "2.4.1" pypi_index: "https://download.pytorch.org/whl/cu118" - platform: windows gpu_type: T4 diff --git a/.github/workflows/tests-pr.yml b/.github/workflows/tests-pr.yml index cafa1a9b5..c188f6b49 100644 --- a/.github/workflows/tests-pr.yml +++ b/.github/workflows/tests-pr.yml @@ -31,12 +31,12 @@ jobs: platform: [linux-x64, linux-aarch64, macos] # default runners don't have AVX-512 support, but icelake does cpu_type: ["", icelake] - torch_version: ["2.3.1", "2.11.0"] + torch_version: ["2.4.1", "2.11.0"] exclude: # aarch64 minimum torch version is 2.5.1 - platform: linux-aarch64 - torch_version: "2.3.1" + torch_version: "2.4.1" # icelake only applies to linux-x64 - platform: linux-aarch64 cpu_type: icelake @@ -44,7 +44,7 @@ jobs: cpu_type: icelake include: - # Add aarch64 with torch 2.5.1 instead of 2.3.1 + # Add aarch64 with torch 2.5.1 instead of 2.4.1 - platform: linux-aarch64 cpu_type: "" torch_version: "2.5.1" @@ -70,7 +70,7 @@ jobs: include: # Map CUDA version to torch version and PyPI index - cuda_version: "11.8.0" - torch_version: "2.3.1" + torch_version: "2.4.1" pypi_index: "https://download.pytorch.org/whl/cu118" - cuda_version: "12.8.1" torch_version: "2.9.1" diff --git a/README.md b/README.md index b4fd29b3a..beabca3e2 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ The library includes quantization primitives for 8-bit & 4-bit operations, throu bitsandbytes has the following minimum requirements for all platforms: * Python 3.10+ -* [PyTorch](https://pytorch.org/get-started/locally/) 2.3+ +* [PyTorch](https://pytorch.org/get-started/locally/) 2.4+ * _Note: While we aim to provide wide backwards compatibility, we recommend using the latest version of PyTorch for the best experience._ #### Accelerator support: diff --git a/bitsandbytes/_ops.py b/bitsandbytes/_ops.py index 532fe7afa..3bb7a2810 100644 --- a/bitsandbytes/_ops.py +++ b/bitsandbytes/_ops.py @@ -4,16 +4,8 @@ import torch -_IS_TORCH_GTE_24 = False - -if hasattr(torch.library, "register_fake"): - _IS_TORCH_GTE_24 = True - register_fake = torch.library.register_fake - register_kernel = torch.library.register_kernel -else: - # PyTorch <= 2.3 - register_fake = torch.library.impl_abstract - register_kernel = torch.library.impl +register_fake = torch.library.register_fake +register_kernel = torch.library.register_kernel # Int8 mixed precision matmul + dequant + bias torch.library.define( diff --git a/docs/source/installation.mdx b/docs/source/installation.mdx index 5fcbea288..ecd3be8bc 100644 --- a/docs/source/installation.mdx +++ b/docs/source/installation.mdx @@ -27,7 +27,7 @@ We provide official support for NVIDIA GPUs, CPUs, Intel XPUs, and Intel Gaudi. These are the minimum requirements for `bitsandbytes` across all platforms. Please be aware that some compute platforms may impose more strict requirements. * Python >= 3.10 -* PyTorch >= 2.3 +* PyTorch >= 2.4 ## NVIDIA CUDA[[cuda]] diff --git a/docs/source/quickstart.mdx b/docs/source/quickstart.mdx index 7ce93e282..e24f6a261 100644 --- a/docs/source/quickstart.mdx +++ b/docs/source/quickstart.mdx @@ -8,7 +8,7 @@ Welcome to bitsandbytes! This library enables accessible large language models v pip install bitsandbytes ``` -**Requirements:** Python 3.10+, PyTorch 2.3+ +**Requirements:** Python 3.10+, PyTorch 2.4+ For detailed installation instructions, see the [Installation Guide](./installation). diff --git a/pyproject.toml b/pyproject.toml index f448a079e..745f74df4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,7 +43,7 @@ classifiers = [ "Topic :: Scientific/Engineering :: Artificial Intelligence" ] dependencies = [ - "torch>=2.3,<3", + "torch>=2.4,<3", "numpy>=1.17", "packaging>=20.9", ] diff --git a/tests/test_linear4bit.py b/tests/test_linear4bit.py index d9a25c90e..57f3d4891 100644 --- a/tests/test_linear4bit.py +++ b/tests/test_linear4bit.py @@ -355,7 +355,6 @@ def test_params4bit_real_serialization(device, quant_type, blocksize, compress_s @pytest.mark.parametrize("bias", TRUE_FALSE, ids=id_formatter("bias")) @pytest.mark.parametrize("fullgraph", TRUE_FALSE, ids=id_formatter("fullgraph")) @pytest.mark.parametrize("mode", ["default", "reduce-overhead"], ids=id_formatter("mode")) -@pytest.mark.skipif(torch.__version__ < (2, 4), reason="Not supported in torch < 2.4") @pytest.mark.skipif( torch.__version__ < (2, 10) and sys.version_info >= (3, 14), reason="Not supported in Python 3.14 until torch 2.10" ) diff --git a/tests/test_linear8bitlt.py b/tests/test_linear8bitlt.py index 410961e0b..314eb1829 100644 --- a/tests/test_linear8bitlt.py +++ b/tests/test_linear8bitlt.py @@ -253,7 +253,6 @@ def test_linear8bit_load_state_dict_raises_runtime_for_tied_weight(): @pytest.mark.parametrize("bias", TRUE_FALSE, ids=id_formatter("bias")) @pytest.mark.parametrize("fullgraph", TRUE_FALSE, ids=id_formatter("fullgraph")) @pytest.mark.parametrize("mode", ["default", "reduce-overhead"], ids=id_formatter("mode")) -@pytest.mark.skipif(torch.__version__ < (2, 4), reason="Not supported in torch < 2.4") @pytest.mark.skipif( torch.__version__ < (2, 10) and sys.version_info >= (3, 14), reason="Not supported in Python 3.14 until torch 2.10" ) diff --git a/tests/test_ops.py b/tests/test_ops.py index 1dbeb0a53..3d8461f0d 100644 --- a/tests/test_ops.py +++ b/tests/test_ops.py @@ -6,12 +6,7 @@ import bitsandbytes from tests.helpers import TRUE_FALSE, get_available_devices, id_formatter, is_supported_on_hpu -# torch.library.opcheck is only available in torch 2.4 and later. -# When testing with older versions, we will skip it as a no-op. -if torch.__version__ >= (2, 4): - opcheck = torch.library.opcheck -else: - opcheck = lambda *args, **kwargs: None +opcheck = torch.library.opcheck class TestLLMInt8Ops: From 5a91ad2f9f281fe2d35d90f51556431652096b9a Mon Sep 17 00:00:00 2001 From: Titus von Koeller <9048635+Titus-von-Koeller@users.noreply.github.com> Date: Thu, 16 Apr 2026 15:23:34 +0200 Subject: [PATCH 2/3] test: skip linear8bitlt fullgraph compile test on torch < 2.8 fullgraph mode for torch.compile requires torch 2.8+, matching the existing guard in test_linear4bit_torch_compile. Previously masked by the torch < 2.4 skipif. Co-Authored-By: Claude Opus 4.6 (1M context) --- tests/test_linear8bitlt.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/test_linear8bitlt.py b/tests/test_linear8bitlt.py index 314eb1829..8fce7cef3 100644 --- a/tests/test_linear8bitlt.py +++ b/tests/test_linear8bitlt.py @@ -257,6 +257,9 @@ def test_linear8bit_load_state_dict_raises_runtime_for_tied_weight(): torch.__version__ < (2, 10) and sys.version_info >= (3, 14), reason="Not supported in Python 3.14 until torch 2.10" ) def test_linear8bitlt_torch_compile(device, threshold, bias, fullgraph, mode): + if fullgraph and torch.__version__ < (2, 8, 0, "dev"): + pytest.skip("fullgraph mode requires torch 2.8 or higher") + if device == "cuda" and platform.system() == "Windows": pytest.skip("Triton is not officially supported on Windows") From 33d812118322ebbfe286efc57161596a5f14234a Mon Sep 17 00:00:00 2001 From: Titus von Koeller <9048635+Titus-von-Koeller@users.noreply.github.com> Date: Fri, 17 Apr 2026 10:30:10 +0200 Subject: [PATCH 3/3] test: tighten 8bitlt fullgraph skip to torch < 2.5 The failure on torch 2.4.1 is dynamo failing to trace MatmulLtState (UserDefinedObject `__bool__`), which is a different root cause than the Params4bit `.t()` issue that needs torch 2.8+ in the 4bit test. Torch 2.5+ has the dynamo UDO improvements needed for this to work. Co-Authored-By: Claude Opus 4.6 (1M context) --- tests/test_linear8bitlt.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_linear8bitlt.py b/tests/test_linear8bitlt.py index 8fce7cef3..ab2baf0f8 100644 --- a/tests/test_linear8bitlt.py +++ b/tests/test_linear8bitlt.py @@ -257,8 +257,8 @@ def test_linear8bit_load_state_dict_raises_runtime_for_tied_weight(): torch.__version__ < (2, 10) and sys.version_info >= (3, 14), reason="Not supported in Python 3.14 until torch 2.10" ) def test_linear8bitlt_torch_compile(device, threshold, bias, fullgraph, mode): - if fullgraph and torch.__version__ < (2, 8, 0, "dev"): - pytest.skip("fullgraph mode requires torch 2.8 or higher") + if fullgraph and torch.__version__ < (2, 5): + pytest.skip("fullgraph tracing of MatmulLtState requires torch >= 2.5") if device == "cuda" and platform.system() == "Windows": pytest.skip("Triton is not officially supported on Windows")