fix pinning with model defined dtype (#12208)

pinned memory was converted back to pinning the CPU side weight without any changes. Fix the pinner to use the CPU weight and not the model defined geometry. This will either save RAM or stop buffer overruns when the types mismatch. Fix the model defined weight caster to use the [ s.weight, s.bias ] interpretation, as xfer_dest might be the flattened pin now. Fix the detection of needing to cast to not be conditional on !pin.
2026-02-03 06:57:34 +00:00 · 2026-02-01 08:42:32 -08:00
parent 667a1b8878
commit 361b9a82a3
2 changed files with 12 additions and 13 deletions
--- a/comfy/ops.py
+++ b/comfy/ops.py
@@ -96,16 +96,16 @@ def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compu
        pin = comfy.pinned_memory.get_pin(s)
        if pin is not None:
            xfer_source = [ pin ]
-        else:
-            for data, geometry in zip([ s.weight, s.bias ], cast_geometry):
-                if data is None:
-                    continue
-                if data.dtype != geometry.dtype:
-                    cast_dest = xfer_dest
-                    if cast_dest is None:
-                        cast_dest = torch.empty((comfy.memory_management.vram_aligned_size(cast_geometry),), dtype=torch.uint8, device=device)
-                    xfer_dest = None
-                    break
+
+        for data, geometry in zip([ s.weight, s.bias ], cast_geometry):
+            if data is None:
+                continue
+            if data.dtype != geometry.dtype:
+                cast_dest = xfer_dest
+                if cast_dest is None:
+                    cast_dest = torch.empty((comfy.memory_management.vram_aligned_size(cast_geometry),), dtype=torch.uint8, device=device)
+                xfer_dest = None
+                break

        dest_size = comfy.memory_management.vram_aligned_size(xfer_source)
        offload_stream = comfy.model_management.get_offload_stream(device)
@@ -132,7 +132,7 @@ def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compu
        comfy.model_management.sync_stream(device, offload_stream)

        if cast_dest is not None:
-            for pre_cast, post_cast in zip(comfy.memory_management.interpret_gathered_like(xfer_source, xfer_dest),
+            for pre_cast, post_cast in zip(comfy.memory_management.interpret_gathered_like([s.weight, s.bias ], xfer_dest),
                                           comfy.memory_management.interpret_gathered_like(cast_geometry, cast_dest)):
                if post_cast is not None:
                    post_cast.copy_(pre_cast)
--- a/comfy/pinned_memory.py
+++ b/comfy/pinned_memory.py
@@ -11,8 +11,7 @@ def pin_memory(module):
    if module.pin_failed or args.disable_pinned_memory or get_pin(module) is not None:
        return
    #FIXME: This is a RAM cache trigger event
-    params = comfy.memory_management.tensors_to_geometries([ module.weight, module.bias ])
-    size = comfy.memory_management.vram_aligned_size(params)
+    size = comfy.memory_management.vram_aligned_size([ module.weight, module.bias ])
    pin = torch.empty((size,), dtype=torch.uint8)
    if comfy.model_management.pin_memory(pin):
        module._pin = pin