speed up a bit

2026-05-11 08:20:27 +00:00 · 2024-08-19 05:06:46 -07:00
parent d38e560e42
commit d03fc5c2b1
1 changed files with 8 additions and 2 deletions
--- a/backend/operations.py
+++ b/backend/operations.py
@@ -354,7 +354,7 @@ try:
                if self.bias is not None and self.bias.dtype != x.dtype:
                    # Maybe this can also be set to all non-bnb ops since the cost is very low.
                    # And it only invokes one time, and most linear does not have bias
-                    self.bias.data = self.bias.data.to(x.dtype)
+                    self.bias = utils.tensor2parameter(self.bias.to(x.dtype))

                if not self.parameters_manual_cast:
                    return functional_linear_4bits(x, self.weight, self.bias)
@@ -409,7 +409,13 @@ class ForgeOperationsGGUF(ForgeOperations):
            return self

        def forward(self, x):
-            weight, bias, signal = weights_manual_cast(self, x, weight_fn=dequantize_tensor, bias_fn=dequantize_tensor)
+            if self.bias is not None and self.bias.dtype != x.dtype:
+                self.bias = utils.tensor2parameter(dequantize_tensor(self.bias).to(x.dtype))
+
+            if self.weight is not None and self.weight.dtype != x.dtype and getattr(self.weight, 'gguf_cls', None) is None:
+                self.weight = utils.tensor2parameter(self.weight.to(x.dtype))
+
+            weight, bias, signal = weights_manual_cast(self, x, weight_fn=dequantize_tensor, bias_fn=None, skip_bias_dtype=True)
            with main_stream_worker(weight, bias, signal):
                return torch.nn.functional.linear(x, weight, bias)