Bump bitsandbytes minimum version (#19520)

kashif · Borda · awaelchli · web-flow · commit 527d071f4934 · 2024-03-04T16:11:31.000+01:00
Co-authored-by: Jirka Borovec &lt;6035284+Borda@users.noreply.github.com&gt;
Co-authored-by: awaelchli &lt;aedu.waelchli@gmail.com&gt;
Co-authored-by: Carlos Mocholí &lt;carlossmocholi@gmail.com&gt;
diff --git a/requirements/fabric/strategies.txt b/requirements/fabric/strategies.txt
@@ -6,4 +6,4 @@
 # note: is a bug around 0.10 with `MPS_Accelerator must implement all abstract methods`
 #  shall be resolved by https://github.com/microsoft/DeepSpeed/issues/4372
 deepspeed >=0.8.2, <=0.9.3; platform_system != "Windows"  # strict
-bitsandbytes ==0.41.0  # strict
+bitsandbytes >=0.42.0,<0.43.0
diff --git a/requirements/pytorch/extra.txt b/requirements/pytorch/extra.txt
@@ -8,4 +8,4 @@ hydra-core >=1.0.5, <1.4.0
 jsonargparse[signatures] >=4.27.5, <4.28.0
 rich >=12.3.0, <13.6.0
 tensorboardX >=2.2, <2.7.0  # min version is set by torch.onnx missing attribute
-bitsandbytes ==0.41.0  # strict
+bitsandbytes >=0.42.0,<0.43.0
diff --git a/src/lightning/fabric/plugins/precision/bitsandbytes.py b/src/lightning/fabric/plugins/precision/bitsandbytes.py
@@ -39,8 +39,7 @@
 
 log = logging.getLogger(__name__)
 
-# TODO: unpin after resolving the `quant_state` format breaking changes
-_BITSANDBYTES_AVAILABLE = RequirementCache("bitsandbytes==0.41.0")
+_BITSANDBYTES_AVAILABLE = RequirementCache("bitsandbytes>=0.42.0")
 
 
 class BitsandbytesPrecision(Precision):
@@ -344,7 +343,7 @@ def quantize(
         def to_empty(self, *, device: _DEVICE, recurse: bool = True) -> Self:
             if self.weight.dtype == torch.uint8:  # was quantized
                 # cannot init the quantized params directly
-                weight = torch.empty(self.weight.quant_state[1], device=device, dtype=torch.half)
+                weight = torch.empty(self.weight.quant_state.shape, device=device, dtype=torch.half)
             else:
                 weight = torch.empty_like(self.weight.data, device=device)
             device = torch.device(device)
@@ -366,7 +365,7 @@ def reset_parameters(self) -> None:
             linear_init_finished = isinstance(self.weight, bnb.nn.Params4bit)
             if linear_init_finished and self.weight.dtype == torch.uint8:  # was quantized
                 # cannot init the quantized params directly
-                weight = torch.empty(self.weight.quant_state[1], device=self.weight.device, dtype=torch.half)
+                weight = torch.empty(self.weight.quant_state.shape, device=self.weight.device, dtype=torch.half)
             else:
                 weight = self.weight.data
             torch.nn.init.kaiming_uniform_(weight, a=math.sqrt(5))