From f4b99bc62389af315013dda85f24f2bbd262b686 Mon Sep 17 00:00:00 2001 From: Jedrzej Kosinski Date: Tue, 17 Feb 2026 04:55:00 -0800 Subject: [PATCH] Made multigpu deepclone load model from disk to avoid needing to deepclone actual model object, fixed issues with merge, turn off cuda backend as it causes device mismatch issue with rope (and potentially other ops), will investigate --- comfy/model_patcher.py | 11 ++++++++++- comfy/quant_ops.py | 2 +- comfy/samplers.py | 4 ++-- comfy/sd.py | 2 ++ 4 files changed, 15 insertions(+), 4 deletions(-) diff --git a/comfy/model_patcher.py b/comfy/model_patcher.py index d0110c7c6..aa7b862e7 100644 --- a/comfy/model_patcher.py +++ b/comfy/model_patcher.py @@ -23,6 +23,7 @@ import inspect import logging import math import uuid +import copy from typing import Callable, Optional import torch @@ -274,6 +275,7 @@ class ModelPatcher: self.is_clip = False self.hook_mode = comfy.hooks.EnumHookMode.MaxSpeed + self.cached_patcher_init: tuple[Callable, tuple] | tuple[Callable, tuple, int] | None = None self.is_multigpu_base_clone = False self.clone_base_uuid = uuid.uuid4() @@ -368,6 +370,7 @@ class ModelPatcher: n.is_clip = self.is_clip n.hook_mode = self.hook_mode + n.cached_patcher_init = self.cached_patcher_init n.is_multigpu_base_clone = self.is_multigpu_base_clone n.clone_base_uuid = self.clone_base_uuid @@ -382,12 +385,18 @@ class ModelPatcher: # set load device, if present if new_load_device is not None: n.load_device = new_load_device + if self.cached_patcher_init is not None: + temp_model_patcher: ModelPatcher | list[ModelPatcher] = self.cached_patcher_init[0](*self.cached_patcher_init[1]) + if len(self.cached_patcher_init) > 2: + temp_model_patcher = temp_model_patcher[self.cached_patcher_init[2]] + n.model = temp_model_patcher.model + else: + n.model = copy.deepcopy(n.model) # unlike for normal clone, backup dicts that shared same ref should not; # otherwise, patchers that have deep copies of base models will erroneously influence each other. n.backup = copy.deepcopy(n.backup) n.object_patches_backup = copy.deepcopy(n.object_patches_backup) n.hook_backup = copy.deepcopy(n.hook_backup) - n.model = copy.deepcopy(n.model) # multigpu clone should not have multigpu additional_models entry n.remove_additional_models("multigpu") # multigpu_clone all stored additional_models; make sure circular references are properly handled diff --git a/comfy/quant_ops.py b/comfy/quant_ops.py index 15a4f457b..d8addefd8 100644 --- a/comfy/quant_ops.py +++ b/comfy/quant_ops.py @@ -20,7 +20,7 @@ try: if cuda_version < (13,): ck.registry.disable("cuda") logging.warning("WARNING: You need pytorch with cu130 or higher to use optimized CUDA operations.") - + ck.registry.disable("cuda") # multigpu will not work rn with comfy-kitchen on cuda backend ck.registry.disable("triton") for k, v in ck.list_backends().items(): logging.info(f"Found comfy_kitchen backend {k}: {v}") diff --git a/comfy/samplers.py b/comfy/samplers.py index 3f5a699d9..5dee49e7e 100755 --- a/comfy/samplers.py +++ b/comfy/samplers.py @@ -418,7 +418,7 @@ def _calc_cond_batch_multigpu(model: BaseModel, conds: list[list[dict]], x_in: t to_batch_temp.reverse() to_batch = to_batch_temp[:1] - free_memory = model_management.get_free_memory(current_device) + free_memory = comfy.model_management.get_free_memory(current_device) for i in range(1, len(to_batch_temp) + 1): batch_amount = to_batch_temp[:len(to_batch_temp)//i] input_shape = [len(batch_amount) * first_shape[0]] + list(first_shape)[1:] @@ -487,7 +487,7 @@ def _calc_cond_batch_multigpu(model: BaseModel, conds: list[list[dict]], x_in: t transformer_options["cond_or_uncond"] = cond_or_uncond[:] transformer_options["uuids"] = uuids[:] - transformer_options["sigmas"] = timestep + transformer_options["sigmas"] = timestep.to(device) transformer_options["sample_sigmas"] = transformer_options["sample_sigmas"].to(device) transformer_options["multigpu_thread_device"] = device diff --git a/comfy/sd.py b/comfy/sd.py index f65e7cadd..2643de26d 100644 --- a/comfy/sd.py +++ b/comfy/sd.py @@ -1510,6 +1510,7 @@ def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, o out = load_state_dict_guess_config(sd, output_vae, output_clip, output_clipvision, embedding_directory, output_model, model_options, te_model_options=te_model_options, metadata=metadata) if out is None: raise RuntimeError("ERROR: Could not detect model type of: {}\n{}".format(ckpt_path, model_detection_error_hint(ckpt_path, sd))) + out[0].cached_patcher_init = (load_checkpoint_guess_config, (ckpt_path, False, False, False, embedding_directory, output_model, model_options, te_model_options), 0) return out def load_state_dict_guess_config(sd, output_vae=True, output_clip=True, output_clipvision=False, embedding_directory=None, output_model=True, model_options={}, te_model_options={}, metadata=None): @@ -1711,6 +1712,7 @@ def load_diffusion_model(unet_path, model_options={}): if model is None: logging.error("ERROR UNSUPPORTED DIFFUSION MODEL {}".format(unet_path)) raise RuntimeError("ERROR: Could not detect model type of: {}\n{}".format(unet_path, model_detection_error_hint(unet_path, sd))) + model.cached_patcher_init = (load_diffusion_model, (unet_path, model_options)) return model def load_unet(unet_path, dtype=None):