feat(isolation): DynamicVRAM compatibility for process isolation

DynamicVRAM's on-demand model loading/offloading conflicted with process isolation in three ways: RPC tensor transport stalls from mid-call GPU offload, race conditions between model lifecycle and active RPC operations, and false positive memory leak detection from changed finalizer patterns. - Marshal CUDA tensors to CPU before RPC transport for dynamic models - Add operation state tracking + quiescence waits at workflow boundaries - Distinguish proxy reference release from actual leaks in cleanup_models_gc - Fix init order: DynamicVRAM must initialize before isolation proxies - Add RPC timeouts to prevent indefinite hangs on model unavailability - Prevent proxy-of-proxy chains from DynamicVRAM model reload cycles - Add torch.device/torch.dtype serializers for new DynamicVRAM RPC paths - Guard isolation overhead so non-isolated workflows are unaffected - Migrate env var to PYISOLATE_CHILD
2026-03-13 00:59:59 +00:00 · 2026-03-04 23:48:02 -06:00
parent a0f8784e9f
commit 9250191c65
38 changed files with 94595 additions and 307 deletions
--- a/comfy/cli_args.py
+++ b/comfy/cli_args.py
@@ -146,6 +146,7 @@ parser.add_argument("--reserve-vram", type=float, default=None, help="Set the am

 parser.add_argument("--async-offload", nargs='?', const=2, type=int, default=None, metavar="NUM_STREAMS", help="Use async weight offloading. An optional argument controls the amount of offload streams. Default is 2. Enabled by default on Nvidia.")
 parser.add_argument("--disable-async-offload", action="store_true", help="Disable async weight offloading.")
+parser.add_argument("--disable-dynamic-vram", action="store_true", help="Disable dynamic VRAM and use estimate based model loading.")

 parser.add_argument("--force-non-blocking", action="store_true", help="Force ComfyUI to use non-blocking operations for all applicable tensors. This may improve performance on some non-Nvidia systems but can cause issues with some workflows.")

@@ -159,7 +160,6 @@ class PerformanceFeature(enum.Enum):
    Fp8MatrixMultiplication = "fp8_matrix_mult"
    CublasOps = "cublas_ops"
    AutoTune = "autotune"
-    DynamicVRAM = "dynamic_vram"

 parser.add_argument("--fast", nargs="*", type=PerformanceFeature, help="Enable some untested and potentially quality deteriorating optimizations. This is used to test new features so using it might crash your comfyui. --fast with no arguments enables everything. You can pass a list specific optimizations if you only want to enable specific ones. Current valid optimizations: {}".format(" ".join(map(lambda c: c.value, PerformanceFeature))))

@@ -262,4 +262,4 @@ else:
    args.fast = set(args.fast)

 def enables_dynamic_vram():
-    return PerformanceFeature.DynamicVRAM in args.fast and not args.highvram and not args.gpu_only
+    return not args.disable_dynamic_vram and not args.highvram and not args.gpu_only and not args.novram and not args.cpu