fix(assets): recognize temp directory in asset category resolution

Files written to the temp directory (e.g. GLSLShader_output_00004_.png) caused a ValueError in get_asset_category_and_relative_path when --enable-assets was active, because the temp directory was not a recognized asset root. Add 'temp' as a category alongside input/output/models so temp outputs can be registered as assets. Amp-Thread-ID: https://ampcode.com/threads/T-019d2800-6944-7039-b2c4-53f809bc7f4f Co-authored-by: Amp <amp@ampcode.com>
2026-04-29 19:01:27 +00:00 · 2026-03-25 19:53:15 -07:00
27 changed files with 60 additions and 348 deletions
--- a/blueprints/.glsl/Color_Balance_15.frag
+++ b/blueprints/.glsl/Color_Balance_15.frag
@@ -1,90 +0,0 @@
-#version 300 es
-precision highp float;
-
-uniform sampler2D u_image0;
-uniform float u_float0;
-uniform float u_float1;
-uniform float u_float2;
-uniform float u_float3;
-uniform float u_float4;
-uniform float u_float5;
-uniform float u_float6;
-uniform float u_float7;
-uniform float u_float8;
-uniform bool u_bool0;
-
-in vec2 v_texCoord;
-out vec4 fragColor;
-
-vec3 rgb2hsl(vec3 c) {
-    float maxC = max(c.r, max(c.g, c.b));
-    float minC = min(c.r, min(c.g, c.b));
-    float l = (maxC + minC) * 0.5;
-    if (maxC == minC) return vec3(0.0, 0.0, l);
-    float d = maxC - minC;
-    float s = l > 0.5 ? d / (2.0 - maxC - minC) : d / (maxC + minC);
-    float h;
-    if (maxC == c.r) {
-        h = (c.g - c.b) / d + (c.g < c.b ? 6.0 : 0.0);
-    } else if (maxC == c.g) {
-        h = (c.b - c.r) / d + 2.0;
-    } else {
-        h = (c.r - c.g) / d + 4.0;
-    }
-    h /= 6.0;
-    return vec3(h, s, l);
-}
-
-float hue2rgb(float p, float q, float t) {
-    if (t < 0.0) t += 1.0;
-    if (t > 1.0) t -= 1.0;
-    if (t < 1.0 / 6.0) return p + (q - p) * 6.0 * t;
-    if (t < 1.0 / 2.0) return q;
-    if (t < 2.0 / 3.0) return p + (q - p) * (2.0 / 3.0 - t) * 6.0;
-    return p;
-}
-
-vec3 hsl2rgb(vec3 hsl) {
-    float h = hsl.x, s = hsl.y, l = hsl.z;
-    if (s == 0.0) return vec3(l);
-    float q = l < 0.5 ? l * (1.0 + s) : l + s - l * s;
-    float p = 2.0 * l - q;
-    return vec3(
-        hue2rgb(p, q, h + 1.0 / 3.0),
-        hue2rgb(p, q, h),
-        hue2rgb(p, q, h - 1.0 / 3.0)
-    );
-}
-
-void main() {
-    vec4 tex = texture(u_image0, v_texCoord);
-    vec3 color = tex.rgb;
-
-    vec3 shadows = vec3(u_float0, u_float1, u_float2) * 0.01;
-    vec3 midtones = vec3(u_float3, u_float4, u_float5) * 0.01;
-    vec3 highlights = vec3(u_float6, u_float7, u_float8) * 0.01;
-
-    float maxC = max(color.r, max(color.g, color.b));
-    float minC = min(color.r, min(color.g, color.b));
-    float lightness = (maxC + minC) * 0.5;
-
-    // GIMP weight curves: linear ramps with constants a=0.25, b=0.333, scale=0.7
-    const float a = 0.25;
-    const float b = 0.333;
-    const float scale = 0.7;
-
-    float sw = clamp((lightness - b) / -a + 0.5, 0.0, 1.0) * scale;
-    float mw = clamp((lightness - b) / a + 0.5, 0.0, 1.0) *
-               clamp((lightness + b - 1.0) / -a + 0.5, 0.0, 1.0) * scale;
-    float hw = clamp((lightness + b - 1.0) / a + 0.5, 0.0, 1.0) * scale;
-
-    color += sw * shadows + mw * midtones + hw * highlights;
-
-    if (u_bool0) {
-        vec3 hsl = rgb2hsl(clamp(color, 0.0, 1.0));
-        hsl.z = lightness;
-        color = hsl2rgb(hsl);
-    }
-
-    fragColor = vec4(clamp(color, 0.0, 1.0), tex.a);
-}
--- a/blueprints/.glsl/Color_Curves_8.frag
+++ b/blueprints/.glsl/Color_Curves_8.frag
@@ -1,49 +0,0 @@
-#version 300 es
-precision highp float;
-
-uniform sampler2D u_image0;
-uniform sampler2D u_curve0;  // RGB master curve (256x1 LUT)
-uniform sampler2D u_curve1;  // Red channel curve
-uniform sampler2D u_curve2;  // Green channel curve
-uniform sampler2D u_curve3;  // Blue channel curve
-
-in vec2 v_texCoord;
-layout(location = 0) out vec4 fragColor0;
-
-// GIMP-compatible curve lookup with manual linear interpolation.
-// Matches gimp_curve_map_value_inline() from gimpcurve-map.c:
-//   index = value * (n_samples - 1)
-//   f = fract(index)
-//   result = (1-f) * samples[floor] + f * samples[ceil]
-//
-// Uses texelFetch (NEAREST) to avoid GPU half-texel offset issues
-// that occur with texture() + GL_LINEAR on small 256x1 LUTs.
-float applyCurve(sampler2D curve, float value) {
-    value = clamp(value, 0.0, 1.0);
-
-    float pos = value * 255.0;
-    int lo = int(floor(pos));
-    int hi = min(lo + 1, 255);
-    float f = pos - float(lo);
-
-    float a = texelFetch(curve, ivec2(lo, 0), 0).r;
-    float b = texelFetch(curve, ivec2(hi, 0), 0).r;
-
-    return a + f * (b - a);
-}
-
-void main() {
-    vec4 color = texture(u_image0, v_texCoord);
-
-    // GIMP order: per-channel curves first, then RGB master curve.
-    // See gimp_curve_map_pixels() default case in gimpcurve-map.c:
-    //   dest = colors_curve( channel_curve( src ) )
-    float tmp_r = applyCurve(u_curve1, color.r);
-    float tmp_g = applyCurve(u_curve2, color.g);
-    float tmp_b = applyCurve(u_curve3, color.b);
-    color.r = applyCurve(u_curve0, tmp_r);
-    color.g = applyCurve(u_curve0, tmp_g);
-    color.b = applyCurve(u_curve0, tmp_b);
-
-    fragColor0 = vec4(color.rgb, color.a);
-}
--- a/blueprints/Color
+++ b/blueprints/Color
--- a/blueprints/Color
+++ b/blueprints/Color
--- a/comfy/cli_args.py
+++ b/comfy/cli_args.py
@@ -110,13 +110,11 @@ parser.add_argument("--preview-method", type=LatentPreviewMethod, default=Latent

 parser.add_argument("--preview-size", type=int, default=512, help="Sets the maximum preview size for sampler nodes.")

-CACHE_RAM_AUTO_GB = -1.0
-
 cache_group = parser.add_mutually_exclusive_group()
 cache_group.add_argument("--cache-classic", action="store_true", help="Use the old style (aggressive) caching.")
 cache_group.add_argument("--cache-lru", type=int, default=0, help="Use LRU caching with a maximum of N node results cached. May use more RAM/VRAM.")
 cache_group.add_argument("--cache-none", action="store_true", help="Reduced RAM/VRAM usage at the expense of executing every node for each run.")
-cache_group.add_argument("--cache-ram", nargs='?', const=CACHE_RAM_AUTO_GB, type=float, default=0, help="Use RAM pressure caching with the specified headroom threshold. If available RAM drops below the threshold the cache removes large items to free RAM. Default (when no value is provided): 25%% of system RAM (min 4GB, max 32GB).")
+cache_group.add_argument("--cache-ram", nargs='?', const=4.0, type=float, default=0, help="Use RAM pressure caching with the specified headroom threshold. If available RAM drops below the threhold the cache remove large items to free RAM. Default 4GB")

 attn_group = parser.add_mutually_exclusive_group()
 attn_group.add_argument("--use-split-cross-attention", action="store_true", help="Use the split cross attention optimization. Ignored when xformers is used.")
--- a/comfy/memory_management.py
+++ b/comfy/memory_management.py
@@ -141,17 +141,3 @@ def interpret_gathered_like(tensors, gathered):
    return dest_views

 aimdo_enabled = False
-
-extra_ram_release_callback = None
-RAM_CACHE_HEADROOM = 0
-
-def set_ram_cache_release_state(callback, headroom):
-    global extra_ram_release_callback
-    global RAM_CACHE_HEADROOM
-    extra_ram_release_callback = callback
-    RAM_CACHE_HEADROOM = max(0, int(headroom))
-
-def extra_ram_release(target):
-    if extra_ram_release_callback is None:
-        return 0
-    return extra_ram_release_callback(target)
--- a/comfy/model_base.py
+++ b/comfy/model_base.py
@@ -890,7 +890,7 @@ class Flux(BaseModel):
        return torch.cat((image, mask), dim=1)

    def encode_adm(self, **kwargs):
-        return kwargs.get("pooled_output", None)
+        return kwargs["pooled_output"]

    def extra_conds(self, **kwargs):
        out = super().extra_conds(**kwargs)
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -669,7 +669,7 @@ def free_memory(memory_required, device, keep_loaded=[], for_dynamic=False, pins

    for i in range(len(current_loaded_models) -1, -1, -1):
        shift_model = current_loaded_models[i]
-        if device is None or shift_model.device == device:
+        if shift_model.device == device:
            if shift_model not in keep_loaded and not shift_model.is_dead():
                can_unload.append((-shift_model.model_offloaded_memory(), sys.getrefcount(shift_model.model), shift_model.model_memory(), i))
                shift_model.currently_used = False
@@ -679,8 +679,8 @@ def free_memory(memory_required, device, keep_loaded=[], for_dynamic=False, pins
        i = x[-1]
        memory_to_free = 1e32
        pins_to_free = 1e32
-        if not DISABLE_SMART_MEMORY or device is None:
-            memory_to_free = 0 if device is None else memory_required - get_free_memory(device)
+        if not DISABLE_SMART_MEMORY:
+            memory_to_free = memory_required - get_free_memory(device)
            pins_to_free = pins_required - get_free_ram()
            if current_loaded_models[i].model.is_dynamic() and for_dynamic:
                #don't actually unload dynamic models for the sake of other dynamic models
@@ -708,7 +708,7 @@ def free_memory(memory_required, device, keep_loaded=[], for_dynamic=False, pins

    if len(unloaded_model) > 0:
        soft_empty_cache()
-    elif device is not None:
+    else:
        if vram_state != VRAMState.HIGH_VRAM:
            mem_free_total, mem_free_torch = get_free_memory(device, torch_free_too=True)
            if mem_free_torch > mem_free_total * 0.25:
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@@ -300,6 +300,9 @@ class ModelPatcher:
    def model_mmap_residency(self, free=False):
        return comfy.model_management.module_mmap_residency(self.model, free=free)

+    def get_ram_usage(self):
+        return self.model_size()
+
    def loaded_size(self):
        return self.model.model_loaded_weight_memory

--- a/comfy/ops.py
+++ b/comfy/ops.py
@@ -928,7 +928,6 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec
                weight = state_dict.pop(weight_key, None)
                if weight is None:
                    logging.warning(f"Missing weight for layer {layer_name}")
-                    self.weight = None
                    return

                manually_loaded_keys = [weight_key]
@@ -1035,9 +1034,6 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec
                if self.bias is not None:
                    sd["{}bias".format(prefix)] = self.bias

-                if self.weight is None:
-                    return sd
-
                if isinstance(self.weight, QuantizedTensor):
                    sd_out = self.weight.state_dict("{}weight".format(prefix))
                    for k in sd_out:
--- a/comfy/pinned_memory.py
+++ b/comfy/pinned_memory.py
@@ -2,7 +2,6 @@ import comfy.model_management
 import comfy.memory_management
 import comfy_aimdo.host_buffer
 import comfy_aimdo.torch
-import psutil

 from comfy.cli_args import args

@@ -13,11 +12,6 @@ def pin_memory(module):
    if module.pin_failed or args.disable_pinned_memory or get_pin(module) is not None:
        return
    #FIXME: This is a RAM cache trigger event
-    ram_headroom = comfy.memory_management.RAM_CACHE_HEADROOM
-    #we split the difference and assume half the RAM cache headroom is for us
-    if ram_headroom > 0 and psutil.virtual_memory().available < (ram_headroom * 0.5):
-        comfy.memory_management.extra_ram_release(ram_headroom)
-
    size = comfy.memory_management.vram_aligned_size([ module.weight, module.bias ])

    if comfy.model_management.MAX_PINNED_MEMORY <= 0 or (comfy.model_management.TOTAL_PINNED_MEMORY + size) > comfy.model_management.MAX_PINNED_MEMORY:
--- a/comfy/sd.py
+++ b/comfy/sd.py
@@ -280,6 +280,9 @@ class CLIP:
        n.apply_hooks_to_conds = self.apply_hooks_to_conds
        return n

+    def get_ram_usage(self):
+        return self.patcher.get_ram_usage()
+
    def add_patches(self, patches, strength_patch=1.0, strength_model=1.0):
        return self.patcher.add_patches(patches, strength_patch, strength_model)

@@ -837,6 +840,9 @@ class VAE:
        self.size = comfy.model_management.module_size(self.first_stage_model)
        return self.size

+    def get_ram_usage(self):
+        return self.model_size()
+
    def throw_exception_if_invalid(self):
        if self.first_stage_model is None:
            raise RuntimeError("ERROR: VAE is invalid: None\n\nIf the VAE is from a checkpoint loader node your checkpoint does not contain a valid VAE.")
--- a/comfy/text_encoders/llama.py
+++ b/comfy/text_encoders/llama.py
@@ -224,7 +224,7 @@ class Qwen3_8BConfig:
    k_norm = "gemma3"
    rope_scale = None
    final_norm: bool = True
-    lm_head: bool = True
+    lm_head: bool = False
    stop_tokens = [151643, 151645]

@dataclass
@@ -912,9 +912,6 @@ class BaseGenerate:
 class BaseQwen3:
    def logits(self, x):
        input = x[:, -1:]
-        if self.model.config.lm_head:
-            return self.model.lm_head(input)
-
        module = self.model.embed_tokens

        offload_stream = None
--- a/comfy/text_encoders/lt.py
+++ b/comfy/text_encoders/lt.py
@@ -91,11 +91,11 @@ class Gemma3_12BModel(sd1_clip.SDClipModel):
        self.dtypes.add(dtype)
        super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config={}, dtype=dtype, special_tokens={"start": 2, "pad": 0}, layer_norm_hidden_state=False, model_class=comfy.text_encoders.llama.Gemma3_12B, enable_attention_masks=attention_mask, return_attention_masks=attention_mask, model_options=model_options)

-    def generate(self, tokens, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed, presence_penalty):
+    def generate(self, tokens, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed):
        tokens_only = [[t[0] for t in b] for b in tokens]
        embeds, _, _, embeds_info = self.process_tokens(tokens_only, self.execution_device)
        comfy.utils.normalize_image_embeddings(embeds, embeds_info, self.transformer.model.config.hidden_size ** 0.5)
-        return self.transformer.generate(embeds, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed, stop_tokens=[106], presence_penalty=presence_penalty)  # 106 is <end_of_turn>
+        return self.transformer.generate(embeds, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed, stop_tokens=[106])  # 106 is <end_of_turn>

 class DualLinearProjection(torch.nn.Module):
    def __init__(self, in_dim, out_dim_video, out_dim_audio, dtype=None, device=None, operations=None):
@@ -189,8 +189,8 @@ class LTXAVTEModel(torch.nn.Module):

        return out.to(device=out_device, dtype=torch.float), pooled, extra

-    def generate(self, tokens, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed, presence_penalty):
-        return self.gemma3_12b.generate(tokens["gemma3_12b"], do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed, presence_penalty)
+    def generate(self, tokens, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed):
+        return self.gemma3_12b.generate(tokens["gemma3_12b"], do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed)

    def load_sd(self, sd):
        if "model.layers.47.self_attn.q_norm.weight" in sd:
--- a/comfy_api/latest/_io.py
+++ b/comfy_api/latest/_io.py
@@ -1373,7 +1373,6 @@ class NodeInfoV1:
    price_badge: dict | None = None
    search_aliases: list[str]=None
    essentials_category: str=None
-    has_intermediate_output: bool=None


@dataclass
@@ -1497,16 +1496,6 @@ class Schema:
    """When True, all inputs from the prompt will be passed to the node as kwargs, even if not defined in the schema."""
    essentials_category: str | None = None
    """Optional category for the Essentials tab. Path-based like category field (e.g., 'Basic', 'Image Tools/Editing')."""
-    has_intermediate_output: bool=False
-    """Flags this node as having intermediate output that should persist across page refreshes.
-
-    Nodes with this flag behave like output nodes (their UI results are cached and resent
-    to the frontend) but do NOT automatically get added to the execution list. This means
-    they will only execute if they are on the dependency path of a real output node.
-
-    Use this for nodes with interactive/operable UI regions that produce intermediate outputs
-    (e.g., Image Crop, Painter) rather than final outputs (e.g., Save Image).
-    """

    def validate(self):
        '''Validate the schema:
@@ -1606,7 +1595,6 @@ class Schema:
            category=self.category,
            description=self.description,
            output_node=self.is_output_node,
-            has_intermediate_output=self.has_intermediate_output,
            deprecated=self.is_deprecated,
            experimental=self.is_experimental,
            dev_only=self.is_dev_only,
@@ -1898,14 +1886,6 @@ class _ComfyNodeBaseInternal(_ComfyNodeInternal):
            cls.GET_SCHEMA()
        return cls._OUTPUT_NODE

-    _HAS_INTERMEDIATE_OUTPUT = None
-    @final
-    @classproperty
-    def HAS_INTERMEDIATE_OUTPUT(cls):  # noqa
-        if cls._HAS_INTERMEDIATE_OUTPUT is None:
-            cls.GET_SCHEMA()
-        return cls._HAS_INTERMEDIATE_OUTPUT
-
    _INPUT_IS_LIST = None
    @final
    @classproperty
@@ -1998,8 +1978,6 @@ class _ComfyNodeBaseInternal(_ComfyNodeInternal):
            cls._API_NODE = schema.is_api_node
        if cls._OUTPUT_NODE is None:
            cls._OUTPUT_NODE = schema.is_output_node
-        if cls._HAS_INTERMEDIATE_OUTPUT is None:
-            cls._HAS_INTERMEDIATE_OUTPUT = schema.has_intermediate_output
        if cls._INPUT_IS_LIST is None:
            cls._INPUT_IS_LIST = schema.is_input_list
        if cls._NOT_IDEMPOTENT is None:
--- a/comfy_api_nodes/nodes_gemini.py
+++ b/comfy_api_nodes/nodes_gemini.py
@@ -201,16 +201,6 @@ async def get_image_from_response(response: GeminiGenerateContentResponse, thoug
            returned_image = await download_url_to_image_tensor(part.fileData.fileUri)
        image_tensors.append(returned_image)
    if len(image_tensors) == 0:
-        if not thought:
-            # No images generated --> extract text response for a meaningful error
-            model_message = get_text_from_response(response).strip()
-            if model_message:
-                raise ValueError(f"Gemini did not generate an image. Model response: {model_message}")
-            raise ValueError(
-                "Gemini did not generate an image. "
-                "Try rephrasing your prompt or changing the response modality to 'IMAGE+TEXT' "
-                "to see the model's reasoning."
-            )
        return torch.zeros((1, 1024, 1024, 4))
    return torch.cat(image_tensors, dim=0)

--- a/comfy_api_nodes/nodes_topaz.py
+++ b/comfy_api_nodes/nodes_topaz.py
@@ -38,7 +38,6 @@ from comfy_api_nodes.util import (
 UPSCALER_MODELS_MAP = {
    "Starlight (Astra) Fast": "slf-1",
    "Starlight (Astra) Creative": "slc-1",
-    "Starlight Precise 2.5": "slp-2.5",
 }


--- a/comfy_execution/caching.py
+++ b/comfy_execution/caching.py
@@ -1,5 +1,6 @@
 import asyncio
 import bisect
+import gc
 import itertools
 import psutil
 import time
@@ -474,10 +475,6 @@ class LRUCache(BasicCache):
        self._mark_used(node_id)
        return await self._set_immediate(node_id, value)

-    def set_local(self, node_id, value):
-        self._mark_used(node_id)
-        BasicCache.set_local(self, node_id, value)
-
    async def ensure_subcache_for(self, node_id, children_ids):
        # Just uses subcaches for tracking 'live' nodes
        await super()._ensure_subcache(node_id, children_ids)
@@ -492,10 +489,15 @@ class LRUCache(BasicCache):
        return self


-#Small baseline weight used when a cache entry has no measurable CPU tensors.
-#Keeps unknown-sized entries in eviction scoring without dominating tensor-backed entries.
+#Iterating the cache for usage analysis might be expensive, so if we trigger make sure
+#to take a chunk out to give breathing space on high-node / low-ram-per-node flows.

-RAM_CACHE_DEFAULT_RAM_USAGE = 0.05
+RAM_CACHE_HYSTERESIS = 1.1
+
+#This is kinda in GB but not really. It needs to be non-zero for the below heuristic
+#and as long as Multi GB models dwarf this it will approximate OOM scoring OK
+
+RAM_CACHE_DEFAULT_RAM_USAGE = 0.1

 #Exponential bias towards evicting older workflows so garbage will be taken out
 #in constantly changing setups.
@@ -519,17 +521,19 @@ class RAMPressureCache(LRUCache):
        self.timestamps[self.cache_key_set.get_data_key(node_id)] = time.time()
        return await super().get(node_id)

-    def set_local(self, node_id, value):
-        self.timestamps[self.cache_key_set.get_data_key(node_id)] = time.time()
-        super().set_local(node_id, value)
+    def poll(self, ram_headroom):
+        def _ram_gb():
+            return psutil.virtual_memory().available / (1024**3)

-    def ram_release(self, target):
-        if psutil.virtual_memory().available >= target:
+        if _ram_gb() > ram_headroom:
+            return
+        gc.collect()
+        if _ram_gb() > ram_headroom:
            return

        clean_list = []

-        for key, cache_entry in self.cache.items():
+        for key, (outputs, _), in self.cache.items():
            oom_score =  RAM_CACHE_OLD_WORKFLOW_OOM_MULTIPLIER ** (self.generation - self.used_generation[key])

            ram_usage = RAM_CACHE_DEFAULT_RAM_USAGE
@@ -538,20 +542,22 @@ class RAMPressureCache(LRUCache):
                if outputs is None:
                    return
                for output in outputs:
-                    if isinstance(output, (list, tuple)):
+                    if isinstance(output, list):
                        scan_list_for_ram_usage(output)
                    elif isinstance(output, torch.Tensor) and output.device.type == 'cpu':
-                        ram_usage += output.numel() * output.element_size()
-            scan_list_for_ram_usage(cache_entry.outputs)
+                        #score Tensors at a 50% discount for RAM usage as they are likely to
+                        #be high value intermediates
+                        ram_usage += (output.numel() * output.element_size()) * 0.5
+                    elif hasattr(output, "get_ram_usage"):
+                        ram_usage += output.get_ram_usage()
+            scan_list_for_ram_usage(outputs)

            oom_score *= ram_usage
            #In the case where we have no information on the node ram usage at all,
            #break OOM score ties on the last touch timestamp (pure LRU)
            bisect.insort(clean_list, (oom_score, self.timestamps[key], key))

-        while psutil.virtual_memory().available < target and clean_list:
+        while _ram_gb() < ram_headroom * RAM_CACHE_HYSTERESIS and clean_list:
            _, _, key = clean_list.pop()
            del self.cache[key]
-            self.used_generation.pop(key, None)
-            self.timestamps.pop(key, None)
-            self.children.pop(key, None)
+            gc.collect()
--- a/comfy_extras/nodes_glsl.py
+++ b/comfy_extras/nodes_glsl.py
@@ -87,9 +87,7 @@ class SizeModeInput(TypedDict):


 MAX_IMAGES = 5      # u_image0-4
-MAX_UNIFORMS = 20   # u_float0-19, u_int0-19
-MAX_BOOLS = 10      # u_bool0-9
-MAX_CURVES = 4      # u_curve0-3 (1D LUT textures)
+MAX_UNIFORMS = 5    # u_float0-4, u_int0-4
 MAX_OUTPUTS = 4     # fragColor0-3 (MRT)

 # Vertex shader using gl_VertexID trick - no VBO needed.
@@ -499,8 +497,6 @@ def _render_shader_batch(
    image_batches: list[list[np.ndarray]],
    floats: list[float],
    ints: list[int],
-    bools: list[bool] | None = None,
-    curves: list[np.ndarray] | None = None,
 ) -> list[list[np.ndarray]]:
    """
    Render a fragment shader for multiple batches efficiently.
@@ -515,8 +511,6 @@ def _render_shader_batch(
        image_batches: List of batches, each batch is a list of input images (H, W, C) float32 [0,1]
        floats: List of float uniforms
        ints: List of int uniforms
-        bools: List of bool uniforms (passed as int 0/1 to GLSL bool uniforms)
-        curves: List of 1D LUT arrays (float32) of arbitrary size for u_curve0-N

    Returns:
        List of batch outputs, each is a list of output images (H, W, 4) float32 [0,1]
@@ -539,17 +533,11 @@ def _render_shader_batch(
    # Detect multi-pass rendering
    num_passes = _detect_pass_count(fragment_code)

-    if bools is None:
-        bools = []
-    if curves is None:
-        curves = []
-
    # Track resources for cleanup
    program = None
    fbo = None
    output_textures = []
    input_textures = []
-    curve_textures = []
    ping_pong_textures = []
    ping_pong_fbos = []

@@ -636,28 +624,6 @@ def _render_shader_batch(
            if loc >= 0:
                gl.glUniform1i(loc, v)

-        for i, v in enumerate(bools):
-            loc = gl.glGetUniformLocation(program, f"u_bool{i}")
-            if loc >= 0:
-                gl.glUniform1i(loc, 1 if v else 0)
-
-        # Create 1D LUT textures for curves (bound after image texture units)
-        for i, lut in enumerate(curves):
-            tex = gl.glGenTextures(1)
-            curve_textures.append(tex)
-            unit = MAX_IMAGES + i
-            gl.glActiveTexture(gl.GL_TEXTURE0 + unit)
-            gl.glBindTexture(gl.GL_TEXTURE_2D, tex)
-            gl.glTexImage2D(gl.GL_TEXTURE_2D, 0, gl.GL_R32F, len(lut), 1, 0, gl.GL_RED, gl.GL_FLOAT, lut)
-            gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MIN_FILTER, gl.GL_LINEAR)
-            gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MAG_FILTER, gl.GL_LINEAR)
-            gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_WRAP_S, gl.GL_CLAMP_TO_EDGE)
-            gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_WRAP_T, gl.GL_CLAMP_TO_EDGE)
-
-            loc = gl.glGetUniformLocation(program, f"u_curve{i}")
-            if loc >= 0:
-                gl.glUniform1i(loc, unit)
-
        # Get u_pass uniform location for multi-pass
        pass_loc = gl.glGetUniformLocation(program, "u_pass")

@@ -752,8 +718,6 @@ def _render_shader_batch(

        for tex in input_textures:
            gl.glDeleteTextures(int(tex))
-        for tex in curve_textures:
-            gl.glDeleteTextures(int(tex))
        for tex in output_textures:
            gl.glDeleteTextures(int(tex))
        for tex in ping_pong_textures:
@@ -790,20 +754,6 @@ class GLSLShader(io.ComfyNode):
            max=MAX_UNIFORMS,
        )

-        bool_template = io.Autogrow.TemplatePrefix(
-            io.Boolean.Input("bool", default=False),
-            prefix="u_bool",
-            min=0,
-            max=MAX_BOOLS,
-        )
-
-        curve_template = io.Autogrow.TemplatePrefix(
-            io.Curve.Input("curve"),
-            prefix="u_curve",
-            min=0,
-            max=MAX_CURVES,
-        )
-
        return io.Schema(
            node_id="GLSLShader",
            display_name="GLSL Shader",
@@ -812,8 +762,6 @@ class GLSLShader(io.ComfyNode):
                "Apply GLSL ES fragment shaders to images. "
                "u_resolution (vec2) is always available."
            ),
-            is_experimental=True,
-            has_intermediate_output=True,
            inputs=[
                io.String.Input(
                    "fragment_shader",
@@ -848,8 +796,6 @@ class GLSLShader(io.ComfyNode):
                io.Autogrow.Input("images", template=image_template, tooltip=f"Images are available as u_image0-{MAX_IMAGES-1} (sampler2D) in the shader code"),
                io.Autogrow.Input("floats", template=float_template, tooltip=f"Floats are available as u_float0-{MAX_UNIFORMS-1} in the shader code"),
                io.Autogrow.Input("ints", template=int_template, tooltip=f"Ints are available as u_int0-{MAX_UNIFORMS-1} in the shader code"),
-                io.Autogrow.Input("bools", template=bool_template, tooltip=f"Booleans are available as u_bool0-{MAX_BOOLS-1} (bool) in the shader code"),
-                io.Autogrow.Input("curves", template=curve_template, tooltip=f"Curves are available as u_curve0-{MAX_CURVES-1} (sampler2D, 1D LUT) in the shader code. Sample with texture(u_curve0, vec2(x, 0.5)).r"),
            ],
            outputs=[
                io.Image.Output(display_name="IMAGE0", tooltip="Available via layout(location = 0) out vec4 fragColor0 in the shader code"),
@@ -867,19 +813,13 @@ class GLSLShader(io.ComfyNode):
        images: io.Autogrow.Type,
        floats: io.Autogrow.Type = None,
        ints: io.Autogrow.Type = None,
-        bools: io.Autogrow.Type = None,
-        curves: io.Autogrow.Type = None,
        **kwargs,
    ) -> io.NodeOutput:
-
        image_list = [v for v in images.values() if v is not None]
        float_list = (
            [v if v is not None else 0.0 for v in floats.values()] if floats else []
        )
        int_list = [v if v is not None else 0 for v in ints.values()] if ints else []
-        bool_list = [v if v is not None else False for v in bools.values()] if bools else []
-
-        curve_luts = [v.to_lut().astype(np.float32) for v in curves.values() if v is not None] if curves else []

        if not image_list:
            raise ValueError("At least one input image is required")
@@ -906,8 +846,6 @@ class GLSLShader(io.ComfyNode):
            image_batches,
            float_list,
            int_list,
-            bool_list,
-            curve_luts,
        )

        # Collect outputs into tensors
--- a/comfy_extras/nodes_images.py
+++ b/comfy_extras/nodes_images.py
@@ -59,7 +59,6 @@ class ImageCropV2(IO.ComfyNode):
            display_name="Image Crop",
            category="image/transform",
            essentials_category="Image Tools",
-            has_intermediate_output=True,
            inputs=[
                IO.Image.Input("image"),
                IO.BoundingBox.Input("crop_region", component="ImageCrop"),
--- a/comfy_extras/nodes_painter.py
+++ b/comfy_extras/nodes_painter.py
@@ -30,7 +30,6 @@ class PainterNode(io.ComfyNode):
            node_id="Painter",
            display_name="Painter",
            category="image",
-            has_intermediate_output=True,
            inputs=[
                io.Image.Input(
                    "image",
--- a/comfy_extras/nodes_post_processing.py
+++ b/comfy_extras/nodes_post_processing.py
@@ -67,11 +67,11 @@ class Blend(io.ComfyNode):
    def g(cls, x):
        return torch.where(x <= 0.25, ((16 * x - 12) * x + 4) * x, torch.sqrt(x))

-def gaussian_kernel(kernel_size: int, sigma: float, device=None, dtype=torch.float32):
+def gaussian_kernel(kernel_size: int, sigma: float, device=None):
    x, y = torch.meshgrid(torch.linspace(-1, 1, kernel_size, device=device), torch.linspace(-1, 1, kernel_size, device=device), indexing="ij")
    d = torch.sqrt(x * x + y * y)
    g = torch.exp(-(d * d) / (2.0 * sigma * sigma))
-    return (g / g.sum()).to(dtype)
+    return g / g.sum()

 class Blur(io.ComfyNode):
    @classmethod
@@ -99,7 +99,7 @@ class Blur(io.ComfyNode):
        batch_size, height, width, channels = image.shape

        kernel_size = blur_radius * 2 + 1
-        kernel = gaussian_kernel(kernel_size, sigma, device=image.device, dtype=image.dtype).repeat(channels, 1, 1).unsqueeze(1)
+        kernel = gaussian_kernel(kernel_size, sigma, device=image.device).repeat(channels, 1, 1).unsqueeze(1)

        image = image.permute(0, 3, 1, 2) # Torch wants (B, C, H, W) we use (B, H, W, C)
        padded_image = F.pad(image, (blur_radius,blur_radius,blur_radius,blur_radius), 'reflect')
@@ -200,7 +200,7 @@ class Sharpen(io.ComfyNode):
        image = image.to(comfy.model_management.get_torch_device())

        kernel_size = sharpen_radius * 2 + 1
-        kernel = gaussian_kernel(kernel_size, sigma, device=image.device, dtype=image.dtype) * -(alpha*10)
+        kernel = gaussian_kernel(kernel_size, sigma, device=image.device) * -(alpha*10)
        kernel = kernel.to(dtype=image.dtype)
        center = kernel_size // 2
        kernel[center, center] = kernel[center, center] - kernel.sum() + 1.0
--- a/execution.py
+++ b/execution.py
@@ -411,19 +411,6 @@ def format_value(x):
    else:
        return str(x)

-def _is_intermediate_output(dynprompt, node_id):
-    class_type = dynprompt.get_node(node_id)["class_type"]
-    class_def = nodes.NODE_CLASS_MAPPINGS[class_type]
-    return getattr(class_def, 'HAS_INTERMEDIATE_OUTPUT', False)
-
-def _send_cached_ui(server, node_id, display_node_id, cached, prompt_id, ui_outputs):
-    if server.client_id is None:
-        return
-    cached_ui = cached.ui or {}
-    server.send_sync("executed", { "node": node_id, "display_node": display_node_id, "output": cached_ui.get("output", None), "prompt_id": prompt_id }, server.client_id)
-    if cached.ui is not None:
-        ui_outputs[node_id] = cached.ui
-
 async def execute(server, dynprompt, caches, current_item, extra_data, executed, prompt_id, execution_list, pending_subgraph_results, pending_async_nodes, ui_outputs):
    unique_id = current_item
    real_node_id = dynprompt.get_real_node_id(unique_id)
@@ -434,7 +421,11 @@ async def execute(server, dynprompt, caches, current_item, extra_data, executed,
    class_def = nodes.NODE_CLASS_MAPPINGS[class_type]
    cached = await caches.outputs.get(unique_id)
    if cached is not None:
-        _send_cached_ui(server, unique_id, display_node_id, cached, prompt_id, ui_outputs)
+        if server.client_id is not None:
+            cached_ui = cached.ui or {}
+            server.send_sync("executed", { "node": unique_id, "display_node": display_node_id, "output": cached_ui.get("output",None), "prompt_id": prompt_id }, server.client_id)
+            if cached.ui is not None:
+                ui_outputs[unique_id] = cached.ui
        get_progress_state().finish_progress(unique_id)
        execution_list.cache_update(unique_id, cached)
        return (ExecutionResult.SUCCESS, None, None)
@@ -724,9 +715,6 @@ class PromptExecutor:
        self.add_message("execution_start", { "prompt_id": prompt_id}, broadcast=False)

        self._notify_prompt_lifecycle("start", prompt_id)
-        ram_headroom = int(self.cache_args["ram"] * (1024 ** 3))
-        ram_release_callback = self.caches.outputs.ram_release if self.cache_type == CacheType.RAM_PRESSURE else None
-        comfy.memory_management.set_ram_cache_release_state(ram_release_callback, ram_headroom)

        try:
            with torch.inference_mode():
@@ -776,22 +764,9 @@ class PromptExecutor:
                        execution_list.unstage_node_execution()
                    else: # result == ExecutionResult.SUCCESS:
                        execution_list.complete_node_execution()
-
-                    if self.cache_type == CacheType.RAM_PRESSURE:
-                        comfy.model_management.free_memory(0, None, pins_required=ram_headroom, ram_required=ram_headroom)
-                        comfy.memory_management.extra_ram_release(ram_headroom)
+                    self.caches.outputs.poll(ram_headroom=self.cache_args["ram"])
                else:
                    # Only execute when the while-loop ends without break
-                    # Send cached UI for intermediate output nodes that weren't executed
-                    for node_id in dynamic_prompt.all_node_ids():
-                        if node_id in executed:
-                            continue
-                        if not _is_intermediate_output(dynamic_prompt, node_id):
-                            continue
-                        cached = await self.caches.outputs.get(node_id)
-                        if cached is not None:
-                            display_node_id = dynamic_prompt.get_display_node_id(node_id)
-                            _send_cached_ui(self.server, node_id, display_node_id, cached, prompt_id, ui_node_outputs)
                    self.add_message("execution_success", { "prompt_id": prompt_id }, broadcast=False)

                ui_outputs = {}
@@ -807,7 +782,6 @@ class PromptExecutor:
                if comfy.model_management.DISABLE_SMART_MEMORY:
                    comfy.model_management.unload_all_models()
        finally:
-            comfy.memory_management.set_ram_cache_release_state(None, 0)
            self._notify_prompt_lifecycle("end", prompt_id)


--- a/main.py
+++ b/main.py
@@ -275,19 +275,15 @@ def _collect_output_absolute_paths(history_result: dict) -> list[str]:

 def prompt_worker(q, server_instance):
    current_time: float = 0.0
-    cache_ram = args.cache_ram
-    if cache_ram < 0:
-        cache_ram = min(32.0, max(4.0, comfy.model_management.total_ram * 0.25 / 1024.0))
-
    cache_type = execution.CacheType.CLASSIC
    if args.cache_lru > 0:
        cache_type = execution.CacheType.LRU
-    elif cache_ram > 0:
+    elif args.cache_ram > 0:
        cache_type = execution.CacheType.RAM_PRESSURE
    elif args.cache_none:
        cache_type = execution.CacheType.NONE

-    e = execution.PromptExecutor(server_instance, cache_type=cache_type, cache_args={ "lru" : args.cache_lru, "ram" : cache_ram } )
+    e = execution.PromptExecutor(server_instance, cache_type=cache_type, cache_args={ "lru" : args.cache_lru, "ram" : args.cache_ram } )
    last_gc_collect = 0
    need_gc = False
    gc_collect_interval = 10.0
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 comfyui-frontend-package==1.42.8
-comfyui-workflow-templates==0.9.39
+comfyui-workflow-templates==0.9.36
 comfyui-embedded-docs==0.4.3
 torch
 torchsde
--- a/server.py
+++ b/server.py
@@ -709,11 +709,6 @@ class PromptServer():
            else:
                info['output_node'] = False

-            if hasattr(obj_class, 'HAS_INTERMEDIATE_OUTPUT') and obj_class.HAS_INTERMEDIATE_OUTPUT == True:
-                info['has_intermediate_output'] = True
-            else:
-                info['has_intermediate_output'] = False
-
            if hasattr(obj_class, 'CATEGORY'):
                info['category'] = obj_class.CATEGORY

--- a/utils/mime_types.py
+++ b/utils/mime_types.py
@@ -24,7 +24,6 @@ def init_mime_types():
    # Web types (used by server.py for static file serving)
    mimetypes.add_type('application/javascript; charset=utf-8', '.js')
    mimetypes.add_type('image/webp', '.webp')
-    mimetypes.add_type('image/svg+xml', '.svg')

    # Model and data file types (used by asset scanning / metadata extraction)
    mimetypes.add_type("application/safetensors", ".safetensors")