diff --git a/.github/workflows/test-runner.yml b/.github/workflows/test-runner.yml index 69f618eb6..0c04e3f8b 100644 --- a/.github/workflows/test-runner.yml +++ b/.github/workflows/test-runner.yml @@ -221,12 +221,6 @@ jobs: pip install pytest-cov shell: bash - # Windows: Downgrade NumPy for torch<2.4.1 compatibility - # See: https://github.com/pytorch/pytorch/issues/131668 - - name: Downgrade NumPy - if: inputs.platform == 'windows' && startsWith(inputs.torch_version, '2.3.') - run: pip install "numpy<2" - - name: Show installed packages run: pip list diff --git a/.github/workflows/tests-nightly.yml b/.github/workflows/tests-nightly.yml index 24eb4e0d4..12d0059af 100644 --- a/.github/workflows/tests-nightly.yml +++ b/.github/workflows/tests-nightly.yml @@ -20,12 +20,12 @@ jobs: platform: [linux-x64, linux-aarch64, macos, windows] # default runners don't have AVX-512 support, but icelake does cpu_type: ["", icelake] - torch_version: ["2.3.1", "2.10.0", "2.11.0"] + torch_version: ["2.4.1", "2.10.0", "2.11.0"] exclude: # aarch64 minimum torch version is 2.5.1 - platform: linux-aarch64 - torch_version: "2.3.1" + torch_version: "2.4.1" # icelake only applies to linux-x64 - platform: linux-aarch64 cpu_type: icelake @@ -62,7 +62,7 @@ jobs: include: # Map CUDA version to torch version and PyPI index - cuda_version: "11.8.0" - torch_version: "2.3.1" + torch_version: "2.4.1" pypi_index: "https://download.pytorch.org/whl/cu118" - cuda_version: "12.6.3" torch_version: "2.8.0" @@ -82,7 +82,7 @@ jobs: - platform: windows gpu_type: T4 cuda_version: "11.8.0" - torch_version: "2.3.1" + torch_version: "2.4.1" pypi_index: "https://download.pytorch.org/whl/cu118" - platform: windows gpu_type: T4 diff --git a/.github/workflows/tests-pr.yml b/.github/workflows/tests-pr.yml index cafa1a9b5..c188f6b49 100644 --- a/.github/workflows/tests-pr.yml +++ b/.github/workflows/tests-pr.yml @@ -31,12 +31,12 @@ jobs: platform: [linux-x64, linux-aarch64, macos] # default runners don't have AVX-512 support, but icelake does cpu_type: ["", icelake] - torch_version: ["2.3.1", "2.11.0"] + torch_version: ["2.4.1", "2.11.0"] exclude: # aarch64 minimum torch version is 2.5.1 - platform: linux-aarch64 - torch_version: "2.3.1" + torch_version: "2.4.1" # icelake only applies to linux-x64 - platform: linux-aarch64 cpu_type: icelake @@ -44,7 +44,7 @@ jobs: cpu_type: icelake include: - # Add aarch64 with torch 2.5.1 instead of 2.3.1 + # Add aarch64 with torch 2.5.1 instead of 2.4.1 - platform: linux-aarch64 cpu_type: "" torch_version: "2.5.1" @@ -70,7 +70,7 @@ jobs: include: # Map CUDA version to torch version and PyPI index - cuda_version: "11.8.0" - torch_version: "2.3.1" + torch_version: "2.4.1" pypi_index: "https://download.pytorch.org/whl/cu118" - cuda_version: "12.8.1" torch_version: "2.9.1" diff --git a/README.md b/README.md index b4fd29b3a..beabca3e2 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ The library includes quantization primitives for 8-bit & 4-bit operations, throu bitsandbytes has the following minimum requirements for all platforms: * Python 3.10+ -* [PyTorch](https://pytorch.org/get-started/locally/) 2.3+ +* [PyTorch](https://pytorch.org/get-started/locally/) 2.4+ * _Note: While we aim to provide wide backwards compatibility, we recommend using the latest version of PyTorch for the best experience._ #### Accelerator support: diff --git a/bitsandbytes/_ops.py b/bitsandbytes/_ops.py index 532fe7afa..3bb7a2810 100644 --- a/bitsandbytes/_ops.py +++ b/bitsandbytes/_ops.py @@ -4,16 +4,8 @@ import torch -_IS_TORCH_GTE_24 = False - -if hasattr(torch.library, "register_fake"): - _IS_TORCH_GTE_24 = True - register_fake = torch.library.register_fake - register_kernel = torch.library.register_kernel -else: - # PyTorch <= 2.3 - register_fake = torch.library.impl_abstract - register_kernel = torch.library.impl +register_fake = torch.library.register_fake +register_kernel = torch.library.register_kernel # Int8 mixed precision matmul + dequant + bias torch.library.define( diff --git a/docs/source/installation.mdx b/docs/source/installation.mdx index 5fcbea288..ecd3be8bc 100644 --- a/docs/source/installation.mdx +++ b/docs/source/installation.mdx @@ -27,7 +27,7 @@ We provide official support for NVIDIA GPUs, CPUs, Intel XPUs, and Intel Gaudi. These are the minimum requirements for `bitsandbytes` across all platforms. Please be aware that some compute platforms may impose more strict requirements. * Python >= 3.10 -* PyTorch >= 2.3 +* PyTorch >= 2.4 ## NVIDIA CUDA[[cuda]] diff --git a/docs/source/quickstart.mdx b/docs/source/quickstart.mdx index 7ce93e282..e24f6a261 100644 --- a/docs/source/quickstart.mdx +++ b/docs/source/quickstart.mdx @@ -8,7 +8,7 @@ Welcome to bitsandbytes! This library enables accessible large language models v pip install bitsandbytes ``` -**Requirements:** Python 3.10+, PyTorch 2.3+ +**Requirements:** Python 3.10+, PyTorch 2.4+ For detailed installation instructions, see the [Installation Guide](./installation). diff --git a/pyproject.toml b/pyproject.toml index f448a079e..745f74df4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,7 +43,7 @@ classifiers = [ "Topic :: Scientific/Engineering :: Artificial Intelligence" ] dependencies = [ - "torch>=2.3,<3", + "torch>=2.4,<3", "numpy>=1.17", "packaging>=20.9", ] diff --git a/tests/test_linear4bit.py b/tests/test_linear4bit.py index d9a25c90e..57f3d4891 100644 --- a/tests/test_linear4bit.py +++ b/tests/test_linear4bit.py @@ -355,7 +355,6 @@ def test_params4bit_real_serialization(device, quant_type, blocksize, compress_s @pytest.mark.parametrize("bias", TRUE_FALSE, ids=id_formatter("bias")) @pytest.mark.parametrize("fullgraph", TRUE_FALSE, ids=id_formatter("fullgraph")) @pytest.mark.parametrize("mode", ["default", "reduce-overhead"], ids=id_formatter("mode")) -@pytest.mark.skipif(torch.__version__ < (2, 4), reason="Not supported in torch < 2.4") @pytest.mark.skipif( torch.__version__ < (2, 10) and sys.version_info >= (3, 14), reason="Not supported in Python 3.14 until torch 2.10" ) diff --git a/tests/test_linear8bitlt.py b/tests/test_linear8bitlt.py index 410961e0b..ab2baf0f8 100644 --- a/tests/test_linear8bitlt.py +++ b/tests/test_linear8bitlt.py @@ -253,11 +253,13 @@ def test_linear8bit_load_state_dict_raises_runtime_for_tied_weight(): @pytest.mark.parametrize("bias", TRUE_FALSE, ids=id_formatter("bias")) @pytest.mark.parametrize("fullgraph", TRUE_FALSE, ids=id_formatter("fullgraph")) @pytest.mark.parametrize("mode", ["default", "reduce-overhead"], ids=id_formatter("mode")) -@pytest.mark.skipif(torch.__version__ < (2, 4), reason="Not supported in torch < 2.4") @pytest.mark.skipif( torch.__version__ < (2, 10) and sys.version_info >= (3, 14), reason="Not supported in Python 3.14 until torch 2.10" ) def test_linear8bitlt_torch_compile(device, threshold, bias, fullgraph, mode): + if fullgraph and torch.__version__ < (2, 5): + pytest.skip("fullgraph tracing of MatmulLtState requires torch >= 2.5") + if device == "cuda" and platform.system() == "Windows": pytest.skip("Triton is not officially supported on Windows") diff --git a/tests/test_ops.py b/tests/test_ops.py index 1dbeb0a53..3d8461f0d 100644 --- a/tests/test_ops.py +++ b/tests/test_ops.py @@ -6,12 +6,7 @@ import bitsandbytes from tests.helpers import TRUE_FALSE, get_available_devices, id_formatter, is_supported_on_hpu -# torch.library.opcheck is only available in torch 2.4 and later. -# When testing with older versions, we will skip it as a no-op. -if torch.__version__ >= (2, 4): - opcheck = torch.library.opcheck -else: - opcheck = lambda *args, **kwargs: None +opcheck = torch.library.opcheck class TestLLMInt8Ops: