From ecd2a19661ecccd96e26f111af21781f3e613f59 Mon Sep 17 00:00:00 2001
From: rattus <46076784+rattus128@users.noreply.github.com>
Date: Sun, 15 Feb 2026 17:28:51 -0800
Subject: [PATCH] Fix lora Extraction in offload conditions (+ dynamic_vram
 mode) (#12479)

* lora_extract: Add a trange

If you bite off more than your GPU can chew, this kinda just hangs.
Give a rough indication of progress counting the weights in a trange.

* lora_extract: Support on-the-fly patching

Use the on-the-fly approach from the regular model saving logic for
lora extraction too. Switch off force_cast_weights accordingly.

This gets extraction working in dynamic vram while also supporting
extraction on GPU offloaded.
---
 comfy_extras/nodes_lora_extract.py | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/comfy_extras/nodes_lora_extract.py b/comfy_extras/nodes_lora_extract.py
index fb89e03f4..1542d0a88 100644
--- a/comfy_extras/nodes_lora_extract.py
+++ b/comfy_extras/nodes_lora_extract.py
@@ -7,6 +7,7 @@ import logging
 from enum import Enum
 from typing_extensions import override
 from comfy_api.latest import ComfyExtension, io
+from tqdm.auto import trange
 
 CLAMP_QUANTILE = 0.99
 
@@ -49,12 +50,22 @@ LORA_TYPES = {"standard": LORAType.STANDARD,
               "full_diff": LORAType.FULL_DIFF}
 
 def calc_lora_model(model_diff, rank, prefix_model, prefix_lora, output_sd, lora_type, bias_diff=False):
-    comfy.model_management.load_models_gpu([model_diff], force_patch_weights=True)
+    comfy.model_management.load_models_gpu([model_diff])
     sd = model_diff.model_state_dict(filter_prefix=prefix_model)
 
-    for k in sd:
-        if k.endswith(".weight"):
+    sd_keys = list(sd.keys())
+    for index in trange(len(sd_keys), unit="weight"):
+        k = sd_keys[index]
+        op_keys = sd_keys[index].rsplit('.', 1)
+        if len(op_keys) < 2 or op_keys[1] not in ["weight", "bias"] or (op_keys[1] == "bias" and not bias_diff):
+            continue
+        op = comfy.utils.get_attr(model_diff.model, op_keys[0])
+        if hasattr(op, "comfy_cast_weights") and not getattr(op, "comfy_patched_weights", False):
+            weight_diff = model_diff.patch_weight_to_device(k, model_diff.load_device, return_weight=True)
+        else:
             weight_diff = sd[k]
+
+        if op_keys[1] == "weight":
             if lora_type == LORAType.STANDARD:
                 if weight_diff.ndim < 2:
                     if bias_diff:
@@ -69,8 +80,8 @@ def calc_lora_model(model_diff, rank, prefix_model, prefix_lora, output_sd, lora
             elif lora_type == LORAType.FULL_DIFF:
                 output_sd["{}{}.diff".format(prefix_lora, k[len(prefix_model):-7])] = weight_diff.contiguous().half().cpu()
 
-        elif bias_diff and k.endswith(".bias"):
-            output_sd["{}{}.diff_b".format(prefix_lora, k[len(prefix_model):-5])] = sd[k].contiguous().half().cpu()
+        elif bias_diff and op_keys[1] == "bias":
+            output_sd["{}{}.diff_b".format(prefix_lora, k[len(prefix_model):-5])] = weight_diff.contiguous().half().cpu()
     return output_sd
 
 class LoraSave(io.ComfyNode):