feat: add essential subgraph blueprints

Add 24 non-cloud essential blueprints from comfyui-wiki/Subgraph-Blueprints. These cover common workflows: text/image/video generation, editing, inpainting, outpainting, upscaling, depth maps, pose, captioning, and more. Cloud-only blueprints (5) are excluded and will be added once client-side distribution filtering lands. Amp-Thread-ID: https://ampcode.com/threads/T-019c6f43-6212-7308-bea6-bfc35a486cbf
2026-04-22 15:39:08 +00:00 · 2026-02-20 23:21:47 -08:00
11 changed files with 40 additions and 26 deletions
--- a/app/subgraph_manager.py
+++ b/app/subgraph_manager.py
@@ -53,7 +53,7 @@ class SubgraphManager:
        return entry_id, entry

    async def load_entry_data(self, entry: SubgraphEntry):
-        with open(entry['path'], 'r', encoding='utf-8') as f:
+        with open(entry['path'], 'r') as f:
            entry['data'] = f.read()
        return entry

--- a/comfy/memory_management.py
+++ b/comfy/memory_management.py
@@ -78,4 +78,4 @@ def interpret_gathered_like(tensors, gathered):

    return dest_views

-aimdo_enabled = False
+aimdo_allocator = None
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -836,7 +836,7 @@ def unet_inital_load_device(parameters, dtype):

    mem_dev = get_free_memory(torch_dev)
    mem_cpu = get_free_memory(cpu_dev)
-    if mem_dev > mem_cpu and model_size < mem_dev and comfy.memory_management.aimdo_enabled:
+    if mem_dev > mem_cpu and model_size < mem_dev and comfy.memory_management.aimdo_allocator is None:
        return torch_dev
    else:
        return cpu_dev
@@ -1121,6 +1121,7 @@ def get_cast_buffer(offload_stream, device, size, ref):
            synchronize()
            del STREAM_CAST_BUFFERS[offload_stream]
            del cast_buffer
+            #FIXME: This doesn't work in Aimdo because mempool cant clear cache
            soft_empty_cache()
        with wf_context:
            cast_buffer = torch.empty((size), dtype=torch.int8, device=device)
--- a/comfy/utils.py
+++ b/comfy/utils.py
@@ -1154,7 +1154,7 @@ def tiled_scale(samples, function, tile_x=64, tile_y=64, overlap = 8, upscale_am
    return tiled_scale_multidim(samples, function, (tile_y, tile_x), overlap=overlap, upscale_amount=upscale_amount, out_channels=out_channels, output_device=output_device, pbar=pbar)

 def model_trange(*args, **kwargs):
-    if not comfy.memory_management.aimdo_enabled:
+    if comfy.memory_management.aimdo_allocator is None:
        return trange(*args, **kwargs)

    pbar = trange(*args, **kwargs, smoothing=1.0)
--- a/comfy_extras/nodes_glsl.py
+++ b/comfy_extras/nodes_glsl.py
@@ -716,12 +716,12 @@ def _render_shader_batch(
        gl.glBindFramebuffer(gl.GL_FRAMEBUFFER, 0)
        gl.glUseProgram(0)

-        for tex in input_textures:
-            gl.glDeleteTextures(tex)
-        for tex in output_textures:
-            gl.glDeleteTextures(tex)
-        for tex in ping_pong_textures:
-            gl.glDeleteTextures(tex)
+        if input_textures:
+            gl.glDeleteTextures(len(input_textures), input_textures)
+        if output_textures:
+            gl.glDeleteTextures(len(output_textures), output_textures)
+        if ping_pong_textures:
+            gl.glDeleteTextures(len(ping_pong_textures), ping_pong_textures)
        if fbo is not None:
            gl.glDeleteFramebuffers(1, [fbo])
        for pp_fbo in ping_pong_fbos:
--- a/comfy_extras/nodes_primitive.py
+++ b/comfy_extras/nodes_primitive.py
@@ -29,7 +29,6 @@ class StringMultiline(io.ComfyNode):
            node_id="PrimitiveStringMultiline",
            display_name="String (Multiline)",
            category="utils/primitive",
-            essentials_category="Basics",
            inputs=[
                io.String.Input("value", multiline=True),
            ],
--- a/cuda_malloc.py
+++ b/cuda_malloc.py
@@ -1,8 +1,10 @@
 import os
 import importlib.util
-from comfy.cli_args import args, PerformanceFeature
+from comfy.cli_args import args, PerformanceFeature, enables_dynamic_vram
 import subprocess

+import comfy_aimdo.control
+
 #Can't use pytorch to get the GPU names because the cuda malloc has to be set before the first import.
 def get_gpu_names():
    if os.name == 'nt':
@@ -85,6 +87,10 @@ if not args.cuda_malloc:
    except:
        pass

+if enables_dynamic_vram() and comfy_aimdo.control.init():
+    args.cuda_malloc = False
+    os.environ['PYTORCH_CUDA_ALLOC_CONF'] = ""
+
 if args.disable_cuda_malloc:
    args.cuda_malloc = False

--- a/execution.py
+++ b/execution.py
@@ -9,6 +9,7 @@ import traceback
 from enum import Enum
 from typing import List, Literal, NamedTuple, Optional, Union
 import asyncio
+from contextlib import nullcontext

 import torch

@@ -520,14 +521,19 @@ async def execute(server, dynprompt, caches, current_item, extra_data, executed,
                # TODO - How to handle this with async functions without contextvars (which requires Python 3.12)?
                GraphBuilder.set_default_prefix(unique_id, call_index, 0)

-            try:
-                output_data, output_ui, has_subgraph, has_pending_tasks = await get_output_data(prompt_id, unique_id, obj, input_data_all, execution_block_cb=execution_block_cb, pre_execute_cb=pre_execute_cb, v3_data=v3_data)
-            finally:
-                if comfy.memory_management.aimdo_enabled:
-                    if args.verbose == "DEBUG":
-                        comfy_aimdo.control.analyze()
-                    comfy.model_management.reset_cast_buffers()
-                    comfy_aimdo.model_vbar.vbars_reset_watermark_limits()
+            #Do comfy_aimdo mempool chunking here on the per-node level. Multi-model workflows
+            #will cause all sorts of incompatible memory shapes to fragment the pytorch alloc
+            #that we just want to cull out each model run.
+            allocator = comfy.memory_management.aimdo_allocator
+            with nullcontext() if allocator is None else torch.cuda.use_mem_pool(torch.cuda.MemPool(allocator.allocator())):
+                try:
+                    output_data, output_ui, has_subgraph, has_pending_tasks = await get_output_data(prompt_id, unique_id, obj, input_data_all, execution_block_cb=execution_block_cb, pre_execute_cb=pre_execute_cb, v3_data=v3_data)
+                finally:
+                    if allocator is not None:
+                        if args.verbose == "DEBUG":
+                            comfy_aimdo.model_vbar.vbars_analyze()
+                        comfy.model_management.reset_cast_buffers()
+                        comfy_aimdo.model_vbar.vbars_reset_watermark_limits()

            if has_pending_tasks:
                pending_async_nodes[unique_id] = output_data
--- a/main.py
+++ b/main.py
@@ -173,10 +173,6 @@ import gc
 if 'torch' in sys.modules:
    logging.warning("WARNING: Potential Error in code: Torch already imported, torch should never be imported before this point.")

-import comfy_aimdo.control
-
-if enables_dynamic_vram():
-    comfy_aimdo.control.init()

 import comfy.utils

@@ -192,9 +188,13 @@ import hook_breaker_ac10a0
 import comfy.memory_management
 import comfy.model_patcher

+import comfy_aimdo.control
+import comfy_aimdo.torch
+
 if enables_dynamic_vram():
    if comfy.model_management.torch_version_numeric < (2, 8):
        logging.warning("Unsupported Pytorch detected. DynamicVRAM support requires Pytorch version 2.8 or later. Falling back to legacy ModelPatcher. VRAM estimates may be unreliable especially on Windows")
+        comfy.memory_management.aimdo_allocator = None
    elif comfy_aimdo.control.init_device(comfy.model_management.get_torch_device().index):
        if args.verbose == 'DEBUG':
            comfy_aimdo.control.set_log_debug()
@@ -208,10 +208,11 @@ if enables_dynamic_vram():
            comfy_aimdo.control.set_log_info()

        comfy.model_patcher.CoreModelPatcher = comfy.model_patcher.ModelPatcherDynamic
-        comfy.memory_management.aimdo_enabled = True
+        comfy.memory_management.aimdo_allocator = comfy_aimdo.torch.get_torch_allocator()
        logging.info("DynamicVRAM support detected and enabled")
    else:
        logging.warning("No working comfy-aimdo install detected. DynamicVRAM support disabled. Falling back to legacy ModelPatcher. VRAM estimates may be unreliable especially on Windows")
+        comfy.memory_management.aimdo_allocator = None


 def cuda_malloc_warning():
--- a/nodes.py
+++ b/nodes.py
@@ -70,6 +70,7 @@ class CLIPTextEncode(ComfyNodeABC):
    FUNCTION = "encode"

    CATEGORY = "conditioning"
+    ESSENTIALS_CATEGORY = "Basics"
    DESCRIPTION = "Encodes a text prompt using a CLIP model into an embedding that can be used to guide the diffusion model towards generating specific images."
    SEARCH_ALIASES = ["text", "prompt", "text prompt", "positive prompt", "negative prompt", "encode text", "text encoder", "encode prompt"]

--- a/requirements.txt
+++ b/requirements.txt
@@ -22,7 +22,7 @@ alembic
 SQLAlchemy
 av>=14.2.0
 comfy-kitchen>=0.2.7
-comfy-aimdo>=0.2.0
+comfy-aimdo>=0.1.8
 requests

 #non essential dependencies: