Compare commits

..

1 Commits

Author SHA1 Message Date
bymyself
fcc84a8ead feat: add essential subgraph blueprints
Add 24 non-cloud essential blueprints from comfyui-wiki/Subgraph-Blueprints.
These cover common workflows: text/image/video generation, editing,
inpainting, outpainting, upscaling, depth maps, pose, captioning, and more.

Cloud-only blueprints (5) are excluded and will be added once
client-side distribution filtering lands.

Amp-Thread-ID: https://ampcode.com/threads/T-019c6f43-6212-7308-bea6-bfc35a486cbf
2026-02-20 23:21:47 -08:00
11 changed files with 40 additions and 26 deletions

View File

@@ -53,7 +53,7 @@ class SubgraphManager:
return entry_id, entry
async def load_entry_data(self, entry: SubgraphEntry):
with open(entry['path'], 'r', encoding='utf-8') as f:
with open(entry['path'], 'r') as f:
entry['data'] = f.read()
return entry

View File

@@ -78,4 +78,4 @@ def interpret_gathered_like(tensors, gathered):
return dest_views
aimdo_enabled = False
aimdo_allocator = None

View File

@@ -836,7 +836,7 @@ def unet_inital_load_device(parameters, dtype):
mem_dev = get_free_memory(torch_dev)
mem_cpu = get_free_memory(cpu_dev)
if mem_dev > mem_cpu and model_size < mem_dev and comfy.memory_management.aimdo_enabled:
if mem_dev > mem_cpu and model_size < mem_dev and comfy.memory_management.aimdo_allocator is None:
return torch_dev
else:
return cpu_dev
@@ -1121,6 +1121,7 @@ def get_cast_buffer(offload_stream, device, size, ref):
synchronize()
del STREAM_CAST_BUFFERS[offload_stream]
del cast_buffer
#FIXME: This doesn't work in Aimdo because mempool cant clear cache
soft_empty_cache()
with wf_context:
cast_buffer = torch.empty((size), dtype=torch.int8, device=device)

View File

@@ -1154,7 +1154,7 @@ def tiled_scale(samples, function, tile_x=64, tile_y=64, overlap = 8, upscale_am
return tiled_scale_multidim(samples, function, (tile_y, tile_x), overlap=overlap, upscale_amount=upscale_amount, out_channels=out_channels, output_device=output_device, pbar=pbar)
def model_trange(*args, **kwargs):
if not comfy.memory_management.aimdo_enabled:
if comfy.memory_management.aimdo_allocator is None:
return trange(*args, **kwargs)
pbar = trange(*args, **kwargs, smoothing=1.0)

View File

@@ -716,12 +716,12 @@ def _render_shader_batch(
gl.glBindFramebuffer(gl.GL_FRAMEBUFFER, 0)
gl.glUseProgram(0)
for tex in input_textures:
gl.glDeleteTextures(tex)
for tex in output_textures:
gl.glDeleteTextures(tex)
for tex in ping_pong_textures:
gl.glDeleteTextures(tex)
if input_textures:
gl.glDeleteTextures(len(input_textures), input_textures)
if output_textures:
gl.glDeleteTextures(len(output_textures), output_textures)
if ping_pong_textures:
gl.glDeleteTextures(len(ping_pong_textures), ping_pong_textures)
if fbo is not None:
gl.glDeleteFramebuffers(1, [fbo])
for pp_fbo in ping_pong_fbos:

View File

@@ -29,7 +29,6 @@ class StringMultiline(io.ComfyNode):
node_id="PrimitiveStringMultiline",
display_name="String (Multiline)",
category="utils/primitive",
essentials_category="Basics",
inputs=[
io.String.Input("value", multiline=True),
],

View File

@@ -1,8 +1,10 @@
import os
import importlib.util
from comfy.cli_args import args, PerformanceFeature
from comfy.cli_args import args, PerformanceFeature, enables_dynamic_vram
import subprocess
import comfy_aimdo.control
#Can't use pytorch to get the GPU names because the cuda malloc has to be set before the first import.
def get_gpu_names():
if os.name == 'nt':
@@ -85,6 +87,10 @@ if not args.cuda_malloc:
except:
pass
if enables_dynamic_vram() and comfy_aimdo.control.init():
args.cuda_malloc = False
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = ""
if args.disable_cuda_malloc:
args.cuda_malloc = False

View File

@@ -9,6 +9,7 @@ import traceback
from enum import Enum
from typing import List, Literal, NamedTuple, Optional, Union
import asyncio
from contextlib import nullcontext
import torch
@@ -520,14 +521,19 @@ async def execute(server, dynprompt, caches, current_item, extra_data, executed,
# TODO - How to handle this with async functions without contextvars (which requires Python 3.12)?
GraphBuilder.set_default_prefix(unique_id, call_index, 0)
try:
output_data, output_ui, has_subgraph, has_pending_tasks = await get_output_data(prompt_id, unique_id, obj, input_data_all, execution_block_cb=execution_block_cb, pre_execute_cb=pre_execute_cb, v3_data=v3_data)
finally:
if comfy.memory_management.aimdo_enabled:
if args.verbose == "DEBUG":
comfy_aimdo.control.analyze()
comfy.model_management.reset_cast_buffers()
comfy_aimdo.model_vbar.vbars_reset_watermark_limits()
#Do comfy_aimdo mempool chunking here on the per-node level. Multi-model workflows
#will cause all sorts of incompatible memory shapes to fragment the pytorch alloc
#that we just want to cull out each model run.
allocator = comfy.memory_management.aimdo_allocator
with nullcontext() if allocator is None else torch.cuda.use_mem_pool(torch.cuda.MemPool(allocator.allocator())):
try:
output_data, output_ui, has_subgraph, has_pending_tasks = await get_output_data(prompt_id, unique_id, obj, input_data_all, execution_block_cb=execution_block_cb, pre_execute_cb=pre_execute_cb, v3_data=v3_data)
finally:
if allocator is not None:
if args.verbose == "DEBUG":
comfy_aimdo.model_vbar.vbars_analyze()
comfy.model_management.reset_cast_buffers()
comfy_aimdo.model_vbar.vbars_reset_watermark_limits()
if has_pending_tasks:
pending_async_nodes[unique_id] = output_data

11
main.py
View File

@@ -173,10 +173,6 @@ import gc
if 'torch' in sys.modules:
logging.warning("WARNING: Potential Error in code: Torch already imported, torch should never be imported before this point.")
import comfy_aimdo.control
if enables_dynamic_vram():
comfy_aimdo.control.init()
import comfy.utils
@@ -192,9 +188,13 @@ import hook_breaker_ac10a0
import comfy.memory_management
import comfy.model_patcher
import comfy_aimdo.control
import comfy_aimdo.torch
if enables_dynamic_vram():
if comfy.model_management.torch_version_numeric < (2, 8):
logging.warning("Unsupported Pytorch detected. DynamicVRAM support requires Pytorch version 2.8 or later. Falling back to legacy ModelPatcher. VRAM estimates may be unreliable especially on Windows")
comfy.memory_management.aimdo_allocator = None
elif comfy_aimdo.control.init_device(comfy.model_management.get_torch_device().index):
if args.verbose == 'DEBUG':
comfy_aimdo.control.set_log_debug()
@@ -208,10 +208,11 @@ if enables_dynamic_vram():
comfy_aimdo.control.set_log_info()
comfy.model_patcher.CoreModelPatcher = comfy.model_patcher.ModelPatcherDynamic
comfy.memory_management.aimdo_enabled = True
comfy.memory_management.aimdo_allocator = comfy_aimdo.torch.get_torch_allocator()
logging.info("DynamicVRAM support detected and enabled")
else:
logging.warning("No working comfy-aimdo install detected. DynamicVRAM support disabled. Falling back to legacy ModelPatcher. VRAM estimates may be unreliable especially on Windows")
comfy.memory_management.aimdo_allocator = None
def cuda_malloc_warning():

View File

@@ -70,6 +70,7 @@ class CLIPTextEncode(ComfyNodeABC):
FUNCTION = "encode"
CATEGORY = "conditioning"
ESSENTIALS_CATEGORY = "Basics"
DESCRIPTION = "Encodes a text prompt using a CLIP model into an embedding that can be used to guide the diffusion model towards generating specific images."
SEARCH_ALIASES = ["text", "prompt", "text prompt", "positive prompt", "negative prompt", "encode text", "text encoder", "encode prompt"]

View File

@@ -22,7 +22,7 @@ alembic
SQLAlchemy
av>=14.2.0
comfy-kitchen>=0.2.7
comfy-aimdo>=0.2.0
comfy-aimdo>=0.1.8
requests
#non essential dependencies: