Fix cublas ops on dynamic vram. (#12776)

2026-03-07 22:30:00 +00:00 · 2026-03-04 22:21:55 -08:00
parent 43c64b6308
commit f2ee7f2d36
1 changed files with 12 additions and 6 deletions
--- a/comfy/ops.py
+++ b/comfy/ops.py
@@ -660,23 +660,29 @@ class fp8_ops(manual_cast):
 CUBLAS_IS_AVAILABLE = False
 try:
-    from cublas_ops import CublasLinear
+    from cublas_ops import CublasLinear, cublas_half_matmul
    CUBLAS_IS_AVAILABLE = True
 except ImportError:
    pass
 if CUBLAS_IS_AVAILABLE:
-    class cublas_ops(disable_weight_init):
+    class cublas_ops(manual_cast):
-        class Linear(CublasLinear, disable_weight_init.Linear):
+        class Linear(CublasLinear, manual_cast.Linear):
            def reset_parameters(self):
                return None
            def forward_comfy_cast_weights(self, input):
-                return super().forward(input)
+                weight, bias, offload_stream = cast_bias_weight(self, input, offloadable=True)
                x = cublas_half_matmul(input, weight, bias, self._epilogue_str, self.has_bias)
                uncast_bias_weight(self, weight, bias, offload_stream)
                return x
            def forward(self, *args, **kwargs):
-                return super().forward(*args, **kwargs)
+                run_every_op()
-
+                if self.comfy_cast_weights or len(self.weight_function) > 0 or len(self.bias_function) > 0:
                    return self.forward_comfy_cast_weights(*args, **kwargs)
                else:
                    return super().forward(*args, **kwargs)
 # ==============================================================================
 # Mixed Precision Operations