Make loras work on nvfp4 models. (#11837)

The initial applying is a bit slow but will probably be sped up in the future.
2026-02-27 10:24:06 +00:00 · 2026-01-12 19:33:54 -08:00
parent ecaeeb990d
commit b3c0e4de57
4 changed files with 150 additions and 4 deletions
--- a/comfy/quant_ops.py
+++ b/comfy/quant_ops.py
@@ -7,7 +7,7 @@ try:
        QuantizedTensor,
        QuantizedLayout,
        TensorCoreFP8Layout as _CKFp8Layout,
-        TensorCoreNVFP4Layout,  # Direct import, no wrapper needed
+        TensorCoreNVFP4Layout as _CKNvfp4Layout,
        register_layout_op,
        register_layout_class,
        get_layout_class,
@@ -34,7 +34,7 @@ except ImportError as e:
    class _CKFp8Layout:
        pass

-    class TensorCoreNVFP4Layout:
+    class _CKNvfp4Layout:
        pass

    def register_layout_class(name, cls):
@@ -84,6 +84,39 @@ class _TensorCoreFP8LayoutBase(_CKFp8Layout):
        return qdata, params


+class TensorCoreNVFP4Layout(_CKNvfp4Layout):
+    @classmethod
+    def quantize(cls, tensor, scale=None, stochastic_rounding=0, inplace_ops=False):
+        if tensor.dim() != 2:
+            raise ValueError(f"NVFP4 requires 2D tensor, got {tensor.dim()}D")
+
+        orig_dtype = tensor.dtype
+        orig_shape = tuple(tensor.shape)
+
+        if scale is None or (isinstance(scale, str) and scale == "recalculate"):
+            scale = torch.amax(tensor.abs()) / (ck.float_utils.F8_E4M3_MAX * ck.float_utils.F4_E2M1_MAX)
+
+        if not isinstance(scale, torch.Tensor):
+            scale = torch.tensor(scale)
+        scale = scale.to(device=tensor.device, dtype=torch.float32)
+
+        padded_shape = cls.get_padded_shape(orig_shape)
+        needs_padding = padded_shape != orig_shape
+
+        if stochastic_rounding > 0:
+            qdata, block_scale = comfy.float.stochastic_round_quantize_nvfp4(tensor, scale, pad_16x=needs_padding, seed=stochastic_rounding)
+        else:
+            qdata, block_scale = ck.quantize_nvfp4(tensor, scale, pad_16x=needs_padding)
+
+        params = cls.Params(
+            scale=scale,
+            orig_dtype=orig_dtype,
+            orig_shape=orig_shape,
+            block_scale=block_scale,
+        )
+        return qdata, params
+
+
 class TensorCoreFP8E4M3Layout(_TensorCoreFP8LayoutBase):
    FP8_DTYPE = torch.float8_e4m3fn