From 361b9a82a3e2445fc22e352df187138b0c8f67fb Mon Sep 17 00:00:00 2001
From: rattus <46076784+rattus128@users.noreply.github.com>
Date: Sun, 1 Feb 2026 08:42:32 -0800
Subject: [PATCH] fix pinning with model defined dtype (#12208)

pinned memory was converted back to pinning the CPU side weight without
any changes. Fix the pinner to use the CPU weight and not the model defined
geometry. This will either save RAM or stop buffer overruns when the types
mismatch.

Fix the model defined weight caster to use the [ s.weight, s.bias ]
interpretation, as xfer_dest might be the flattened pin now. Fix the detection
of needing to cast to not be conditional on !pin.
---
 comfy/ops.py           | 22 +++++++++++-----------
 comfy/pinned_memory.py |  3 +--
 2 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/comfy/ops.py b/comfy/ops.py
index c3a1825ce..53c5e4dc3 100644
--- a/comfy/ops.py
+++ b/comfy/ops.py
@@ -96,16 +96,16 @@ def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compu
         pin = comfy.pinned_memory.get_pin(s)
         if pin is not None:
             xfer_source = [ pin ]
-        else:
-            for data, geometry in zip([ s.weight, s.bias ], cast_geometry):
-                if data is None:
-                    continue
-                if data.dtype != geometry.dtype:
-                    cast_dest = xfer_dest
-                    if cast_dest is None:
-                        cast_dest = torch.empty((comfy.memory_management.vram_aligned_size(cast_geometry),), dtype=torch.uint8, device=device)
-                    xfer_dest = None
-                    break
+
+        for data, geometry in zip([ s.weight, s.bias ], cast_geometry):
+            if data is None:
+                continue
+            if data.dtype != geometry.dtype:
+                cast_dest = xfer_dest
+                if cast_dest is None:
+                    cast_dest = torch.empty((comfy.memory_management.vram_aligned_size(cast_geometry),), dtype=torch.uint8, device=device)
+                xfer_dest = None
+                break
 
         dest_size = comfy.memory_management.vram_aligned_size(xfer_source)
         offload_stream = comfy.model_management.get_offload_stream(device)
@@ -132,7 +132,7 @@ def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compu
         comfy.model_management.sync_stream(device, offload_stream)
 
         if cast_dest is not None:
-            for pre_cast, post_cast in zip(comfy.memory_management.interpret_gathered_like(xfer_source, xfer_dest),
+            for pre_cast, post_cast in zip(comfy.memory_management.interpret_gathered_like([s.weight, s.bias ], xfer_dest),
                                            comfy.memory_management.interpret_gathered_like(cast_geometry, cast_dest)):
                 if post_cast is not None:
                     post_cast.copy_(pre_cast)
diff --git a/comfy/pinned_memory.py b/comfy/pinned_memory.py
index 0650e4d1a..8acc327a7 100644
--- a/comfy/pinned_memory.py
+++ b/comfy/pinned_memory.py
@@ -11,8 +11,7 @@ def pin_memory(module):
     if module.pin_failed or args.disable_pinned_memory or get_pin(module) is not None:
         return
     #FIXME: This is a RAM cache trigger event
-    params = comfy.memory_management.tensors_to_geometries([ module.weight, module.bias ])
-    size = comfy.memory_management.vram_aligned_size(params)
+    size = comfy.memory_management.vram_aligned_size([ module.weight, module.bias ])
     pin = torch.empty((size,), dtype=torch.uint8)
     if comfy.model_management.pin_memory(pin):
         module._pin = pin