Merge remote-tracking branch 'comfy-org/master' into test-glsl-nodes

Remove unsafe pickle loading code that was used on pytorch older than 2.4 (#12473 )
ComfyUI hasn't started on pytorch 2.4 since last month.
2026-02-15 04:30:02 +00:00 · 2026-02-14 20:20:56 -08:00 · 2026-02-14 22:53:52 -05:00 · 2026-02-14 11:01:10 -08:00 · 2026-02-13 22:23:52 -05:00 · 2026-02-13 21:56:54 -05:00
70 changed files with 3600 additions and 509 deletions
--- a/.github/workflows/release-webhook.yml
+++ b/.github/workflows/release-webhook.yml
@@ -7,6 +7,8 @@ on:
 jobs:
  send-webhook:
    runs-on: ubuntu-latest
+    env:
+      DESKTOP_REPO_DISPATCH_TOKEN: ${{ secrets.DESKTOP_REPO_DISPATCH_TOKEN }}
    steps:
      - name: Send release webhook
        env:
@@ -106,3 +108,37 @@ jobs:
            --fail --silent --show-error
          
          echo "✅ Release webhook sent successfully"
+
+      - name: Send repository dispatch to desktop
+        env:
+          DISPATCH_TOKEN: ${{ env.DESKTOP_REPO_DISPATCH_TOKEN }}
+          RELEASE_TAG: ${{ github.event.release.tag_name }}
+          RELEASE_URL: ${{ github.event.release.html_url }}
+        run: |
+          set -euo pipefail
+
+          if [ -z "${DISPATCH_TOKEN:-}" ]; then
+            echo "::error::DESKTOP_REPO_DISPATCH_TOKEN is required but not set."
+            exit 1
+          fi
+
+          PAYLOAD="$(jq -n \
+            --arg release_tag "$RELEASE_TAG" \
+            --arg release_url "$RELEASE_URL" \
+            '{
+              event_type: "comfyui_release_published",
+              client_payload: {
+                release_tag: $release_tag,
+                release_url: $release_url
+              }
+            }')"
+
+          curl -fsSL \
+            -X POST \
+            -H "Accept: application/vnd.github+json" \
+            -H "Content-Type: application/json" \
+            -H "Authorization: Bearer ${DISPATCH_TOKEN}" \
+            https://api.github.com/repos/Comfy-Org/desktop/dispatches \
+            -d "$PAYLOAD"
+
+          echo "✅ Dispatched ComfyUI release ${RELEASE_TAG} to Comfy-Org/desktop"
--- a/README.md
+++ b/README.md
@@ -227,7 +227,7 @@ Put your VAE in: models/vae

 AMD users can install rocm and pytorch with pip if you don't have it already installed, this is the command to install the stable version:

-```pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.4```
+```pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm7.1```

 This is the command to install the nightly with ROCm 7.1 which might have some performance improvements:

--- a/blueprints/.glsl/Brightness_and_Contrast_1.frag
+++ b/blueprints/.glsl/Brightness_and_Contrast_1.frag
@@ -0,0 +1,44 @@
+#version 300 es
+precision highp float;
+
+uniform sampler2D u_image0;
+uniform float u_float0; // Brightness slider -100..100
+uniform float u_float1; // Contrast slider -100..100
+
+in vec2 v_texCoord;
+out vec4 fragColor;
+
+const float MID_GRAY = 0.18;  // 18% reflectance
+
+// sRGB gamma 2.2 approximation
+vec3 srgbToLinear(vec3 c) {
+    return pow(max(c, 0.0), vec3(2.2));
+}
+
+vec3 linearToSrgb(vec3 c) {
+    return pow(max(c, 0.0), vec3(1.0/2.2));
+}
+
+float mapBrightness(float b) {
+    return clamp(b / 100.0, -1.0, 1.0);
+}
+
+float mapContrast(float c) {
+    return clamp(c / 100.0 + 1.0, 0.0, 2.0);
+}
+
+void main() {
+    vec4 orig = texture(u_image0, v_texCoord);
+
+    float brightness = mapBrightness(u_float0);
+    float contrast   = mapContrast(u_float1);
+
+    vec3 lin = srgbToLinear(orig.rgb);
+
+    lin = (lin - MID_GRAY) * contrast + brightness + MID_GRAY;
+
+    // Convert back to sRGB
+    vec3 result = linearToSrgb(clamp(lin, 0.0, 1.0));
+
+    fragColor = vec4(result, orig.a);
+}
--- a/blueprints/.glsl/Chromatic_Aberration_16.frag
+++ b/blueprints/.glsl/Chromatic_Aberration_16.frag
@@ -0,0 +1,72 @@
+#version 300 es
+precision highp float;
+
+uniform sampler2D u_image0;
+uniform vec2 u_resolution;
+uniform int u_int0;      // Mode
+uniform float u_float0;  // Amount (0 to 100)
+
+in vec2 v_texCoord;
+out vec4 fragColor;
+
+const int MODE_LINEAR   = 0;
+const int MODE_RADIAL   = 1;
+const int MODE_BARREL   = 2;
+const int MODE_SWIRL    = 3;
+const int MODE_DIAGONAL = 4;
+
+const float AMOUNT_SCALE = 0.0005;
+const float RADIAL_MULT = 4.0;
+const float BARREL_MULT = 8.0;
+const float INV_SQRT2 = 0.70710678118;
+
+void main() {
+    vec2 uv = v_texCoord;
+    vec4 original = texture(u_image0, uv);
+
+    float amount = u_float0 * AMOUNT_SCALE;
+
+    if (amount < 0.000001) {
+        fragColor = original;
+        return;
+    }
+
+    // Aspect-corrected coordinates for circular effects
+    float aspect = u_resolution.x / u_resolution.y;
+    vec2 centered = uv - 0.5;
+    vec2 corrected = vec2(centered.x * aspect, centered.y);
+    float r = length(corrected);
+    vec2 dir = r > 0.0001 ? corrected / r : vec2(0.0);
+    vec2 offset = vec2(0.0);
+
+    if (u_int0 == MODE_LINEAR) {
+        // Horizontal shift (no aspect correction needed)
+        offset = vec2(amount, 0.0);
+    }
+    else if (u_int0 == MODE_RADIAL) {
+        // Outward from center, stronger at edges
+        offset = dir * r * amount * RADIAL_MULT;
+        offset.x /= aspect;  // Convert back to UV space
+    }
+    else if (u_int0 == MODE_BARREL) {
+        // Lens distortion simulation (r² falloff)
+        offset = dir * r * r * amount * BARREL_MULT;
+        offset.x /= aspect;  // Convert back to UV space
+    }
+    else if (u_int0 == MODE_SWIRL) {
+        // Perpendicular to radial (rotational aberration)
+        vec2 perp = vec2(-dir.y, dir.x);
+        offset = perp * r * amount * RADIAL_MULT;
+        offset.x /= aspect;  // Convert back to UV space
+    }
+    else if (u_int0 == MODE_DIAGONAL) {
+        // 45° offset (no aspect correction needed)
+        offset = vec2(amount, amount) * INV_SQRT2;
+    }
+    
+    float red = texture(u_image0, uv + offset).r;
+    float green = original.g;
+    float blue = texture(u_image0, uv - offset).b;
+    
+    fragColor = vec4(red, green, blue, original.a);
+}
--- a/blueprints/.glsl/Color_Adjustment_15.frag
+++ b/blueprints/.glsl/Color_Adjustment_15.frag
@@ -0,0 +1,78 @@
+#version 300 es
+precision highp float;
+
+uniform sampler2D u_image0;
+uniform float u_float0; // temperature (-100 to 100)
+uniform float u_float1; // tint (-100 to 100)
+uniform float u_float2; // vibrance (-100 to 100)
+uniform float u_float3; // saturation (-100 to 100)
+
+in vec2 v_texCoord;
+out vec4 fragColor;
+
+const float INPUT_SCALE = 0.01;
+const float TEMP_TINT_PRIMARY = 0.3;
+const float TEMP_TINT_SECONDARY = 0.15;
+const float VIBRANCE_BOOST = 2.0;
+const float SATURATION_BOOST = 2.0;
+const float SKIN_PROTECTION = 0.5;
+const float EPSILON = 0.001;
+const vec3 LUMA_WEIGHTS = vec3(0.299, 0.587, 0.114);
+
+void main() {
+    vec4 tex = texture(u_image0, v_texCoord);
+    vec3 color = tex.rgb;
+    
+    // Scale inputs: -100/100 → -1/1
+    float temperature = u_float0 * INPUT_SCALE;
+    float tint = u_float1 * INPUT_SCALE;
+    float vibrance = u_float2 * INPUT_SCALE;
+    float saturation = u_float3 * INPUT_SCALE;
+    
+    // Temperature (warm/cool): positive = warm, negative = cool
+    color.r += temperature * TEMP_TINT_PRIMARY;
+    color.b -= temperature * TEMP_TINT_PRIMARY;
+    
+    // Tint (green/magenta): positive = green, negative = magenta
+    color.g += tint * TEMP_TINT_PRIMARY;
+    color.r -= tint * TEMP_TINT_SECONDARY;
+    color.b -= tint * TEMP_TINT_SECONDARY;
+    
+    // Single clamp after temperature/tint
+    color = clamp(color, 0.0, 1.0);
+    
+    // Vibrance with skin protection
+    if (vibrance != 0.0) {
+        float maxC = max(color.r, max(color.g, color.b));
+        float minC = min(color.r, min(color.g, color.b));
+        float sat = maxC - minC;
+        float gray = dot(color, LUMA_WEIGHTS);
+        
+        if (vibrance < 0.0) {
+            // Desaturate: -100 → gray
+            color = mix(vec3(gray), color, 1.0 + vibrance);
+        } else {
+            // Boost less saturated colors more
+            float vibranceAmt = vibrance * (1.0 - sat);
+            
+            // Branchless skin tone protection
+            float isWarmTone = step(color.b, color.g) * step(color.g, color.r);
+            float warmth = (color.r - color.b) / max(maxC, EPSILON);
+            float skinTone = isWarmTone * warmth * sat * (1.0 - sat);
+            vibranceAmt *= (1.0 - skinTone * SKIN_PROTECTION);
+            
+            color = mix(vec3(gray), color, 1.0 + vibranceAmt * VIBRANCE_BOOST);
+        }
+    }
+    
+    // Saturation
+    if (saturation != 0.0) {
+        float gray = dot(color, LUMA_WEIGHTS);
+        float satMix = saturation < 0.0
+            ? 1.0 + saturation                      // -100 → gray
+            : 1.0 + saturation * SATURATION_BOOST;  // +100 → 3x boost
+        color = mix(vec3(gray), color, satMix);
+    }
+    
+    fragColor = vec4(clamp(color, 0.0, 1.0), tex.a);
+}
--- a/blueprints/.glsl/Edge-Preserving_Blur_128.frag
+++ b/blueprints/.glsl/Edge-Preserving_Blur_128.frag
@@ -0,0 +1,94 @@
+#version 300 es
+precision highp float;
+
+uniform sampler2D u_image0;
+uniform float u_float0;   // Blur radius (0–20, default ~5)
+uniform float u_float1;   // Edge threshold (0–100, default ~30)
+uniform int u_int0;       // Step size (0/1 = every pixel, 2+ = skip pixels)
+
+in vec2 v_texCoord;
+out vec4 fragColor;
+
+const int MAX_RADIUS = 20;
+const float EPSILON = 0.0001;
+
+// Perceptual luminance
+float getLuminance(vec3 rgb) {
+    return dot(rgb, vec3(0.299, 0.587, 0.114));
+}
+
+vec4 bilateralFilter(vec2 uv, vec2 texelSize, int radius,
+                     float sigmaSpatial, float sigmaColor)
+{
+    vec4 center = texture(u_image0, uv);
+    vec3 centerRGB = center.rgb;
+
+    float invSpatial2 = -0.5 / (sigmaSpatial * sigmaSpatial);
+    float invColor2   = -0.5 / (sigmaColor * sigmaColor + EPSILON);
+
+    vec3 sumRGB = vec3(0.0);
+    float sumWeight = 0.0;
+
+    int step = max(u_int0, 1);
+    float radius2 = float(radius * radius);
+
+    for (int dy = -MAX_RADIUS; dy <= MAX_RADIUS; dy++) {
+        if (dy < -radius || dy > radius) continue;
+        if (abs(dy) % step != 0) continue;
+
+        for (int dx = -MAX_RADIUS; dx <= MAX_RADIUS; dx++) {
+            if (dx < -radius || dx > radius) continue;
+            if (abs(dx) % step != 0) continue;
+
+            vec2 offset = vec2(float(dx), float(dy));
+            float dist2 = dot(offset, offset);
+            if (dist2 > radius2) continue;
+
+            vec3 sampleRGB = texture(u_image0, uv + offset * texelSize).rgb;
+
+            // Spatial Gaussian
+            float spatialWeight = exp(dist2 * invSpatial2);
+
+            // Perceptual color distance (weighted RGB)
+            vec3 diff = sampleRGB - centerRGB;
+            float colorDist = dot(diff * diff, vec3(0.299, 0.587, 0.114));
+            float colorWeight = exp(colorDist * invColor2);
+
+            float w = spatialWeight * colorWeight;
+            sumRGB += sampleRGB * w;
+            sumWeight += w;
+        }
+    }
+
+    vec3 resultRGB = sumRGB / max(sumWeight, EPSILON);
+    return vec4(resultRGB, center.a); // preserve center alpha
+}
+
+void main() {
+    vec2 texelSize = 1.0 / vec2(textureSize(u_image0, 0));
+
+    float radiusF = clamp(u_float0, 0.0, float(MAX_RADIUS));
+    int radius = int(radiusF + 0.5);
+
+    if (radius == 0) {
+        fragColor = texture(u_image0, v_texCoord);
+        return;
+    }
+
+    // Edge threshold → color sigma
+    // Squared curve for better low-end control
+    float t = clamp(u_float1, 0.0, 100.0) / 100.0;
+    t *= t;
+    float sigmaColor = mix(0.01, 0.5, t);
+
+    // Spatial sigma tied to radius
+    float sigmaSpatial = max(radiusF * 0.75, 0.5);
+
+    fragColor = bilateralFilter(
+        v_texCoord,
+        texelSize,
+        radius,
+        sigmaSpatial,
+        sigmaColor
+    );
+}
--- a/blueprints/.glsl/Film_Grain_15.frag
+++ b/blueprints/.glsl/Film_Grain_15.frag
@@ -0,0 +1,124 @@
+#version 300 es
+precision highp float;
+
+uniform sampler2D u_image0;
+uniform vec2 u_resolution;
+uniform float u_float0; // grain amount      [0.0 – 1.0]   typical: 0.2–0.8
+uniform float u_float1; // grain size        [0.3 – 3.0]   lower = finer grain
+uniform float u_float2; // color amount      [0.0 – 1.0]   0 = monochrome, 1 = RGB grain
+uniform float u_float3; // luminance bias    [0.0 – 1.0]   0 = uniform, 1 = shadows only
+uniform int   u_int0;   // noise mode        [0 or 1]      0 = smooth, 1 = grainy
+
+in vec2 v_texCoord;
+layout(location = 0) out vec4 fragColor0;
+
+// High-quality integer hash (pcg-like)
+uint pcg(uint v) {
+    uint state = v * 747796405u + 2891336453u;
+    uint word = ((state >> ((state >> 28u) + 4u)) ^ state) * 277803737u;
+    return (word >> 22u) ^ word;
+}
+
+// 2D -> 1D hash input
+uint hash2d(uvec2 p) {
+    return pcg(p.x + pcg(p.y));
+}
+
+// Hash to float [0, 1]
+float hashf(uvec2 p) {
+    return float(hash2d(p)) / float(0xffffffffu);
+}
+
+// Hash to float with offset (for RGB channels)
+float hashf(uvec2 p, uint offset) {
+    return float(pcg(hash2d(p) + offset)) / float(0xffffffffu);
+}
+
+// Convert uniform [0,1] to roughly Gaussian distribution
+// Using simple approximation: average of multiple samples
+float toGaussian(uvec2 p) {
+    float sum = hashf(p, 0u) + hashf(p, 1u) + hashf(p, 2u) + hashf(p, 3u);
+    return (sum - 2.0) * 0.7;  // Centered, scaled
+}
+
+float toGaussian(uvec2 p, uint offset) {
+    float sum = hashf(p, offset) + hashf(p, offset + 1u) 
+              + hashf(p, offset + 2u) + hashf(p, offset + 3u);
+    return (sum - 2.0) * 0.7;
+}
+
+// Smooth noise with better interpolation
+float smoothNoise(vec2 p) {
+    vec2 i = floor(p);
+    vec2 f = fract(p);
+    
+    // Quintic interpolation (less banding than cubic)
+    f = f * f * f * (f * (f * 6.0 - 15.0) + 10.0);
+    
+    uvec2 ui = uvec2(i);
+    float a = toGaussian(ui);
+    float b = toGaussian(ui + uvec2(1u, 0u));
+    float c = toGaussian(ui + uvec2(0u, 1u));
+    float d = toGaussian(ui + uvec2(1u, 1u));
+    
+    return mix(mix(a, b, f.x), mix(c, d, f.x), f.y);
+}
+
+float smoothNoise(vec2 p, uint offset) {
+    vec2 i = floor(p);
+    vec2 f = fract(p);
+    
+    f = f * f * f * (f * (f * 6.0 - 15.0) + 10.0);
+    
+    uvec2 ui = uvec2(i);
+    float a = toGaussian(ui, offset);
+    float b = toGaussian(ui + uvec2(1u, 0u), offset);
+    float c = toGaussian(ui + uvec2(0u, 1u), offset);
+    float d = toGaussian(ui + uvec2(1u, 1u), offset);
+    
+    return mix(mix(a, b, f.x), mix(c, d, f.x), f.y);
+}
+
+void main() {
+    vec4 color = texture(u_image0, v_texCoord);
+    
+    // Luminance (Rec.709)
+    float luma = dot(color.rgb, vec3(0.2126, 0.7152, 0.0722));
+    
+    // Grain UV (resolution-independent)
+    vec2 grainUV = v_texCoord * u_resolution / max(u_float1, 0.01);
+    uvec2 grainPixel = uvec2(grainUV);
+    
+    float g;
+    vec3 grainRGB;
+    
+    if (u_int0 == 1) {
+        // Grainy mode: pure hash noise (no interpolation = no banding)
+        g = toGaussian(grainPixel);
+        grainRGB = vec3(
+            toGaussian(grainPixel, 100u),
+            toGaussian(grainPixel, 200u),
+            toGaussian(grainPixel, 300u)
+        );
+    } else {
+        // Smooth mode: interpolated with quintic curve
+        g = smoothNoise(grainUV);
+        grainRGB = vec3(
+            smoothNoise(grainUV, 100u),
+            smoothNoise(grainUV, 200u),
+            smoothNoise(grainUV, 300u)
+        );
+    }
+    
+    // Luminance weighting (less grain in highlights)
+    float lumWeight = mix(1.0, 1.0 - luma, clamp(u_float3, 0.0, 1.0));
+    
+    // Strength
+    float strength = u_float0 * 0.15;
+    
+    // Color vs monochrome grain
+    vec3 grainColor = mix(vec3(g), grainRGB, clamp(u_float2, 0.0, 1.0));
+    
+    color.rgb += grainColor * strength * lumWeight;
+    fragColor0 = vec4(clamp(color.rgb, 0.0, 1.0), color.a);
+}
--- a/blueprints/.glsl/Glow_30.frag
+++ b/blueprints/.glsl/Glow_30.frag
@@ -0,0 +1,133 @@
+#version 300 es
+precision mediump float;
+
+uniform sampler2D u_image0;
+uniform vec2 u_resolution;
+uniform int u_int0;      // Blend mode
+uniform int u_int1;      // Color tint
+uniform float u_float0;  // Intensity
+uniform float u_float1;  // Radius
+uniform float u_float2;  // Threshold
+
+in vec2 v_texCoord;
+out vec4 fragColor;
+
+const int BLEND_ADD      = 0;
+const int BLEND_SCREEN   = 1;
+const int BLEND_SOFT     = 2;
+const int BLEND_OVERLAY  = 3;
+const int BLEND_LIGHTEN  = 4;
+
+const float GOLDEN_ANGLE = 2.39996323;
+const int MAX_SAMPLES = 48;
+const vec3 LUMA = vec3(0.299, 0.587, 0.114);
+
+float hash(vec2 p) {
+    p = fract(p * vec2(123.34, 456.21));
+    p += dot(p, p + 45.32);
+    return fract(p.x * p.y);
+}
+
+vec3 hexToRgb(int h) {
+    return vec3(
+        float((h >> 16) & 255),
+        float((h >> 8) & 255),
+        float(h & 255)
+    ) * (1.0 / 255.0);
+}
+
+vec3 blend(vec3 base, vec3 glow, int mode) {
+    if (mode == BLEND_SCREEN) {
+        return 1.0 - (1.0 - base) * (1.0 - glow);
+    }
+    if (mode == BLEND_SOFT) {
+        return mix(
+            base - (1.0 - 2.0 * glow) * base * (1.0 - base),
+            base + (2.0 * glow - 1.0) * (sqrt(base) - base),
+            step(0.5, glow)
+        );
+    }
+    if (mode == BLEND_OVERLAY) {
+        return mix(
+            2.0 * base * glow,
+            1.0 - 2.0 * (1.0 - base) * (1.0 - glow),
+            step(0.5, base)
+        );
+    }
+    if (mode == BLEND_LIGHTEN) {
+        return max(base, glow);
+    }
+    return base + glow;
+}
+
+void main() {
+    vec4 original = texture(u_image0, v_texCoord);
+    
+    float intensity = u_float0 * 0.05;
+    float radius = u_float1 * u_float1 * 0.012;
+    
+    if (intensity < 0.001 || radius < 0.1) {
+        fragColor = original;
+        return;
+    }
+    
+    float threshold = 1.0 - u_float2 * 0.01;
+    float t0 = threshold - 0.15;
+    float t1 = threshold + 0.15;
+    
+    vec2 texelSize = 1.0 / u_resolution;
+    float radius2 = radius * radius;
+    
+    float sampleScale = clamp(radius * 0.75, 0.35, 1.0);
+    int samples = int(float(MAX_SAMPLES) * sampleScale);
+    
+    float noise = hash(gl_FragCoord.xy);
+    float angleOffset = noise * GOLDEN_ANGLE;
+    float radiusJitter = 0.85 + noise * 0.3;
+    
+    float ca = cos(GOLDEN_ANGLE);
+    float sa = sin(GOLDEN_ANGLE);
+    vec2 dir = vec2(cos(angleOffset), sin(angleOffset));
+    
+    vec3 glow = vec3(0.0);
+    float totalWeight = 0.0;
+    
+    // Center tap
+    float centerMask = smoothstep(t0, t1, dot(original.rgb, LUMA));
+    glow += original.rgb * centerMask * 2.0;
+    totalWeight += 2.0;
+    
+    for (int i = 1; i < MAX_SAMPLES; i++) {
+        if (i >= samples) break;
+        
+        float fi = float(i);
+        float dist = sqrt(fi / float(samples)) * radius * radiusJitter;
+        
+        vec2 offset = dir * dist * texelSize;
+        vec3 c = texture(u_image0, v_texCoord + offset).rgb;
+        float mask = smoothstep(t0, t1, dot(c, LUMA));
+        
+        float w = 1.0 - (dist * dist) / (radius2 * 1.5);
+        w = max(w, 0.0);
+        w *= w;
+        
+        glow += c * mask * w;
+        totalWeight += w;
+        
+        dir = vec2(
+            dir.x * ca - dir.y * sa,
+            dir.x * sa + dir.y * ca
+        );
+    }
+    
+    glow *= intensity / max(totalWeight, 0.001);
+    
+    if (u_int1 > 0) {
+        glow *= hexToRgb(u_int1);
+    }
+    
+    vec3 result = blend(original.rgb, glow, u_int0);
+    result += (noise - 0.5) * (1.0 / 255.0);
+    
+    fragColor = vec4(clamp(result, 0.0, 1.0), original.a);
+}
--- a/blueprints/.glsl/Hue_and_Saturation_1.frag
+++ b/blueprints/.glsl/Hue_and_Saturation_1.frag
@@ -0,0 +1,222 @@
+#version 300 es
+precision highp float;
+
+uniform sampler2D u_image0;
+uniform int u_int0;      // Mode: 0=Master, 1=Reds, 2=Yellows, 3=Greens, 4=Cyans, 5=Blues, 6=Magentas, 7=Colorize
+uniform int u_int1;      // Color Space: 0=HSL, 1=HSB/HSV
+uniform float u_float0;  // Hue (-180 to 180)
+uniform float u_float1;  // Saturation (-100 to 100)
+uniform float u_float2;  // Lightness/Brightness (-100 to 100)
+uniform float u_float3;  // Overlap (0 to 100) - feathering between adjacent color ranges
+
+in vec2 v_texCoord;
+out vec4 fragColor;
+
+// Color range modes
+const int MODE_MASTER   = 0;
+const int MODE_RED      = 1;
+const int MODE_YELLOW   = 2;
+const int MODE_GREEN    = 3;
+const int MODE_CYAN     = 4;
+const int MODE_BLUE     = 5;
+const int MODE_MAGENTA  = 6;
+const int MODE_COLORIZE = 7;
+
+// Color space modes
+const int COLORSPACE_HSL = 0;
+const int COLORSPACE_HSB = 1;
+
+const float EPSILON = 0.0001;
+
+//=============================================================================
+// RGB <-> HSL Conversions
+//=============================================================================
+
+vec3 rgb2hsl(vec3 c) {
+    float maxC = max(max(c.r, c.g), c.b);
+    float minC = min(min(c.r, c.g), c.b);
+    float delta = maxC - minC;
+
+    float h = 0.0;
+    float s = 0.0;
+    float l = (maxC + minC) * 0.5;
+
+    if (delta > EPSILON) {
+        s = l < 0.5
+            ? delta / (maxC + minC)
+            : delta / (2.0 - maxC - minC);
+
+        if (maxC == c.r) {
+            h = (c.g - c.b) / delta + (c.g < c.b ? 6.0 : 0.0);
+        } else if (maxC == c.g) {
+            h = (c.b - c.r) / delta + 2.0;
+        } else {
+            h = (c.r - c.g) / delta + 4.0;
+        }
+        h /= 6.0;
+    }
+
+    return vec3(h, s, l);
+}
+
+float hue2rgb(float p, float q, float t) {
+    t = fract(t);
+    if (t < 1.0/6.0) return p + (q - p) * 6.0 * t;
+    if (t < 0.5)       return q;
+    if (t < 2.0/3.0)   return p + (q - p) * (2.0/3.0 - t) * 6.0;
+    return p;
+}
+
+vec3 hsl2rgb(vec3 hsl) {
+    if (hsl.y < EPSILON) return vec3(hsl.z);
+
+    float q = hsl.z < 0.5
+        ? hsl.z * (1.0 + hsl.y)
+        : hsl.z + hsl.y - hsl.z * hsl.y;
+    float p = 2.0 * hsl.z - q;
+
+    return vec3(
+        hue2rgb(p, q, hsl.x + 1.0/3.0),
+        hue2rgb(p, q, hsl.x),
+        hue2rgb(p, q, hsl.x - 1.0/3.0)
+    );
+}
+
+vec3 rgb2hsb(vec3 c) {
+    float maxC = max(max(c.r, c.g), c.b);
+    float minC = min(min(c.r, c.g), c.b);
+    float delta = maxC - minC;
+
+    float h = 0.0;
+    float s = (maxC > EPSILON) ? delta / maxC : 0.0;
+    float b = maxC;
+
+    if (delta > EPSILON) {
+        if (maxC == c.r) {
+            h = (c.g - c.b) / delta + (c.g < c.b ? 6.0 : 0.0);
+        } else if (maxC == c.g) {
+            h = (c.b - c.r) / delta + 2.0;
+        } else {
+            h = (c.r - c.g) / delta + 4.0;
+        }
+        h /= 6.0;
+    }
+
+    return vec3(h, s, b);
+}
+
+vec3 hsb2rgb(vec3 hsb) {
+    vec3 rgb = clamp(abs(mod(hsb.x * 6.0 + vec3(0.0, 4.0, 2.0), 6.0) - 3.0) - 1.0, 0.0, 1.0);
+    return hsb.z * mix(vec3(1.0), rgb, hsb.y);
+}
+
+//=============================================================================
+// Color Range Weight Calculation
+//=============================================================================
+
+float hueDistance(float a, float b) {
+    float d = abs(a - b);
+    return min(d, 1.0 - d);
+}
+
+float getHueWeight(float hue, float center, float overlap) {
+    float baseWidth = 1.0 / 6.0;
+    float feather = baseWidth * overlap;
+
+    float d = hueDistance(hue, center);
+
+    float inner = baseWidth * 0.5;
+    float outer = inner + feather;
+
+    return 1.0 - smoothstep(inner, outer, d);
+}
+
+float getModeWeight(float hue, int mode, float overlap) {
+    if (mode == MODE_MASTER || mode == MODE_COLORIZE) return 1.0;
+
+    if (mode == MODE_RED) {
+        return max(
+            getHueWeight(hue, 0.0, overlap),
+            getHueWeight(hue, 1.0, overlap)
+        );
+    }
+
+    float center = float(mode - 1) / 6.0;
+    return getHueWeight(hue, center, overlap);
+}
+
+//=============================================================================
+// Adjustment Functions
+//=============================================================================
+
+float adjustLightness(float l, float amount) {
+    return amount > 0.0
+        ? l + (1.0 - l) * amount
+        : l + l * amount;
+}
+
+float adjustBrightness(float b, float amount) {
+    return clamp(b + amount, 0.0, 1.0);
+}
+
+float adjustSaturation(float s, float amount) {
+    return amount > 0.0
+        ? s + (1.0 - s) * amount
+        : s + s * amount;
+}
+
+vec3 colorize(vec3 rgb, float hue, float sat, float light) {
+    float lum = dot(rgb, vec3(0.299, 0.587, 0.114));
+    float l = adjustLightness(lum, light);
+
+    vec3 hsl = vec3(fract(hue), clamp(abs(sat), 0.0, 1.0), clamp(l, 0.0, 1.0));
+    return hsl2rgb(hsl);
+}
+
+//=============================================================================
+// Main
+//=============================================================================
+
+void main() {
+    vec4 original = texture(u_image0, v_texCoord);
+
+    float hueShift   = u_float0 / 360.0;   // -180..180 -> -0.5..0.5
+    float satAmount  = u_float1 / 100.0;   // -100..100 -> -1..1
+    float lightAmount= u_float2 / 100.0;   // -100..100 -> -1..1
+    float overlap    = u_float3 / 100.0;   // 0..100 -> 0..1
+
+    vec3 result;
+
+    if (u_int0 == MODE_COLORIZE) {
+        result = colorize(original.rgb, hueShift, satAmount, lightAmount);
+        fragColor = vec4(result, original.a);
+        return;
+    }
+
+    vec3 hsx = (u_int1 == COLORSPACE_HSL)
+        ? rgb2hsl(original.rgb)
+        : rgb2hsb(original.rgb);
+
+    float weight = getModeWeight(hsx.x, u_int0, overlap);
+
+    if (u_int0 != MODE_MASTER && hsx.y < EPSILON) {
+        weight = 0.0;
+    }
+
+    if (weight > EPSILON) {
+        float h = fract(hsx.x + hueShift * weight);
+        float s = clamp(adjustSaturation(hsx.y, satAmount * weight), 0.0, 1.0);
+        float v = (u_int1 == COLORSPACE_HSL)
+            ? clamp(adjustLightness(hsx.z, lightAmount * weight), 0.0, 1.0)
+            : clamp(adjustBrightness(hsx.z, lightAmount * weight), 0.0, 1.0);
+
+        vec3 adjusted = vec3(h, s, v);
+        result = (u_int1 == COLORSPACE_HSL)
+            ? hsl2rgb(adjusted)
+            : hsb2rgb(adjusted);
+    } else {
+        result = original.rgb;
+    }
+
+    fragColor = vec4(result, original.a);
+}
--- a/blueprints/.glsl/Image_Blur_1.frag
+++ b/blueprints/.glsl/Image_Blur_1.frag
@@ -0,0 +1,111 @@
+#version 300 es
+#pragma passes 2
+precision highp float;
+
+// Blur type constants
+const int BLUR_GAUSSIAN = 0;
+const int BLUR_BOX = 1;
+const int BLUR_RADIAL = 2;
+
+// Radial blur config
+const int RADIAL_SAMPLES = 12;
+const float RADIAL_STRENGTH = 0.0003;
+
+uniform sampler2D u_image0;
+uniform vec2 u_resolution;
+uniform int u_int0;      // Blur type (BLUR_GAUSSIAN, BLUR_BOX, BLUR_RADIAL)
+uniform float u_float0;  // Blur radius/amount
+uniform int u_pass;      // Pass index (0 = horizontal, 1 = vertical)
+
+in vec2 v_texCoord;
+layout(location = 0) out vec4 fragColor0;
+
+float gaussian(float x, float sigma) {
+    return exp(-(x * x) / (2.0 * sigma * sigma));
+}
+
+void main() {
+    vec2 texelSize = 1.0 / u_resolution;
+    float radius = max(u_float0, 0.0);
+
+    // Radial (angular) blur - single pass, doesn't use separable
+    if (u_int0 == BLUR_RADIAL) {
+        // Only execute on first pass
+        if (u_pass > 0) {
+            fragColor0 = texture(u_image0, v_texCoord);
+            return;
+        }
+
+        vec2 center = vec2(0.5);
+        vec2 dir = v_texCoord - center;
+        float dist = length(dir);
+
+        if (dist < 1e-4) {
+            fragColor0 = texture(u_image0, v_texCoord);
+            return;
+        }
+
+        vec4 sum = vec4(0.0);
+        float totalWeight = 0.0;
+        float angleStep = radius * RADIAL_STRENGTH;
+
+        dir /= dist;
+
+        float cosStep = cos(angleStep);
+        float sinStep = sin(angleStep);
+
+        float negAngle = -float(RADIAL_SAMPLES) * angleStep;
+        vec2 rotDir = vec2(
+            dir.x * cos(negAngle) - dir.y * sin(negAngle),
+            dir.x * sin(negAngle) + dir.y * cos(negAngle)
+        );
+
+        for (int i = -RADIAL_SAMPLES; i <= RADIAL_SAMPLES; i++) {
+            vec2 uv = center + rotDir * dist;
+            float w = 1.0 - abs(float(i)) / float(RADIAL_SAMPLES);
+            sum += texture(u_image0, uv) * w;
+            totalWeight += w;
+
+            rotDir = vec2(
+                rotDir.x * cosStep - rotDir.y * sinStep,
+                rotDir.x * sinStep + rotDir.y * cosStep
+            );
+        }
+
+        fragColor0 = sum / max(totalWeight, 0.001);
+        return;
+    }
+
+    // Separable Gaussian / Box blur
+    int samples = int(ceil(radius));
+
+    if (samples == 0) {
+        fragColor0 = texture(u_image0, v_texCoord);
+        return;
+    }
+
+    // Direction: pass 0 = horizontal, pass 1 = vertical
+    vec2 dir = (u_pass == 0) ? vec2(1.0, 0.0) : vec2(0.0, 1.0);
+
+    vec4 color = vec4(0.0);
+    float totalWeight = 0.0;
+    float sigma = radius / 2.0;
+
+    for (int i = -samples; i <= samples; i++) {
+        vec2 offset = dir * float(i) * texelSize;
+        vec4 sample_color = texture(u_image0, v_texCoord + offset);
+
+        float weight;
+        if (u_int0 == BLUR_GAUSSIAN) {
+            weight = gaussian(float(i), sigma);
+        } else {
+            // BLUR_BOX
+            weight = 1.0;
+        }
+
+        color += sample_color * weight;
+        totalWeight += weight;
+    }
+
+    fragColor0 = color / totalWeight;
+}
--- a/blueprints/.glsl/Image_Channels_23.frag
+++ b/blueprints/.glsl/Image_Channels_23.frag
@@ -0,0 +1,19 @@
+#version 300 es
+precision highp float;
+
+uniform sampler2D u_image0;
+
+in vec2 v_texCoord;
+layout(location = 0) out vec4 fragColor0;
+layout(location = 1) out vec4 fragColor1;
+layout(location = 2) out vec4 fragColor2;
+layout(location = 3) out vec4 fragColor3;
+
+void main() {
+  vec4 color = texture(u_image0, v_texCoord);
+  // Output each channel as grayscale to separate render targets
+  fragColor0 = vec4(vec3(color.r), 1.0);  // Red channel
+  fragColor1 = vec4(vec3(color.g), 1.0);  // Green channel
+  fragColor2 = vec4(vec3(color.b), 1.0);  // Blue channel
+  fragColor3 = vec4(vec3(color.a), 1.0);  // Alpha channel
+}
--- a/blueprints/.glsl/Image_Levels_1.frag
+++ b/blueprints/.glsl/Image_Levels_1.frag
@@ -0,0 +1,71 @@
+#version 300 es
+precision highp float;
+
+// Levels Adjustment
+// u_int0:   channel      (0=RGB, 1=R, 2=G, 3=B)         default: 0
+// u_float0: input black  (0-255)                        default: 0
+// u_float1: input white  (0-255)                        default: 255
+// u_float2: gamma        (0.01-9.99)                    default: 1.0
+// u_float3: output black (0-255)                        default: 0
+// u_float4: output white (0-255)                        default: 255
+
+uniform sampler2D u_image0;
+uniform int u_int0;
+uniform float u_float0;
+uniform float u_float1;
+uniform float u_float2;
+uniform float u_float3;
+uniform float u_float4;
+
+in vec2 v_texCoord;
+out vec4 fragColor;
+
+vec3 applyLevels(vec3 color, float inBlack, float inWhite, float gamma, float outBlack, float outWhite) {
+    float inRange = max(inWhite - inBlack, 0.0001);
+    vec3 result = clamp((color - inBlack) / inRange, 0.0, 1.0);
+    result = pow(result, vec3(1.0 / gamma));
+    result = mix(vec3(outBlack), vec3(outWhite), result);
+    return result;
+}
+
+float applySingleChannel(float value, float inBlack, float inWhite, float gamma, float outBlack, float outWhite) {
+    float inRange = max(inWhite - inBlack, 0.0001);
+    float result = clamp((value - inBlack) / inRange, 0.0, 1.0);
+    result = pow(result, 1.0 / gamma);
+    result = mix(outBlack, outWhite, result);
+    return result;
+}
+
+void main() {
+    vec4 texColor = texture(u_image0, v_texCoord);
+    vec3 color = texColor.rgb;
+    
+    float inBlack = u_float0 / 255.0;
+    float inWhite = u_float1 / 255.0;
+    float gamma = u_float2;
+    float outBlack = u_float3 / 255.0;
+    float outWhite = u_float4 / 255.0;
+    
+    vec3 result;
+    
+    if (u_int0 == 0) {
+        result = applyLevels(color, inBlack, inWhite, gamma, outBlack, outWhite);
+    }
+    else if (u_int0 == 1) {
+        result = color;
+        result.r = applySingleChannel(color.r, inBlack, inWhite, gamma, outBlack, outWhite);
+    }
+    else if (u_int0 == 2) {
+        result = color;
+        result.g = applySingleChannel(color.g, inBlack, inWhite, gamma, outBlack, outWhite);
+    }
+    else if (u_int0 == 3) {
+        result = color;
+        result.b = applySingleChannel(color.b, inBlack, inWhite, gamma, outBlack, outWhite);
+    }
+    else {
+        result = color;
+    }
+    
+    fragColor = vec4(result, texColor.a);
+}
--- a/blueprints/.glsl/README.md
+++ b/blueprints/.glsl/README.md
@@ -0,0 +1,28 @@
+# GLSL Shader Sources
+
+This folder contains the GLSL fragment shaders extracted from blueprint JSON files for easier editing and version control.
+
+## File Naming Convention
+
+`{Blueprint_Name}_{node_id}.frag`
+
+- **Blueprint_Name**: The JSON filename with spaces/special chars replaced by underscores
+- **node_id**: The GLSLShader node ID within the subgraph
+
+## Usage
+
+```bash
+# Extract shaders from blueprint JSONs to this folder
+python update_blueprints.py extract
+
+# Patch edited shaders back into blueprint JSONs
+python update_blueprints.py patch
+```
+
+## Workflow
+
+1. Run `extract` to pull current shaders from JSONs
+2. Edit `.frag` files
+3. Run `patch` to update the blueprint JSONs
+4. Test
+5. Commit both `.frag` files and updated JSONs
--- a/blueprints/.glsl/Sharpen_23.frag
+++ b/blueprints/.glsl/Sharpen_23.frag
@@ -0,0 +1,28 @@
+#version 300 es
+precision highp float;
+
+uniform sampler2D u_image0;
+uniform vec2 u_resolution;
+uniform float u_float0;  // strength [0.0 – 2.0] typical: 0.3–1.0
+
+in vec2 v_texCoord;
+layout(location = 0) out vec4 fragColor0;
+
+void main() {
+    vec2 texel = 1.0 / u_resolution;
+    
+    // Sample center and neighbors
+    vec4 center = texture(u_image0, v_texCoord);
+    vec4 top    = texture(u_image0, v_texCoord + vec2( 0.0, -texel.y));
+    vec4 bottom = texture(u_image0, v_texCoord + vec2( 0.0,  texel.y));
+    vec4 left   = texture(u_image0, v_texCoord + vec2(-texel.x,  0.0));
+    vec4 right  = texture(u_image0, v_texCoord + vec2( texel.x,  0.0));
+    
+    // Edge enhancement (Laplacian)
+    vec4 edges = center * 4.0 - top - bottom - left - right;
+    
+    // Add edges back scaled by strength
+    vec4 sharpened = center + edges * u_float0;
+    
+    fragColor0 = vec4(clamp(sharpened.rgb, 0.0, 1.0), center.a);
+}
--- a/blueprints/.glsl/Unsharp_Mask_26.frag
+++ b/blueprints/.glsl/Unsharp_Mask_26.frag
@@ -0,0 +1,61 @@
+#version 300 es
+precision highp float;
+
+uniform sampler2D u_image0;
+uniform vec2 u_resolution;
+uniform float u_float0;  // amount    [0.0 - 3.0]  typical: 0.5-1.5
+uniform float u_float1;  // radius    [0.5 - 10.0] blur radius in pixels
+uniform float u_float2;  // threshold [0.0 - 0.1]  min difference to sharpen
+
+in vec2 v_texCoord;
+layout(location = 0) out vec4 fragColor0;
+
+float gaussian(float x, float sigma) {
+    return exp(-(x * x) / (2.0 * sigma * sigma));
+}
+
+float getLuminance(vec3 color) {
+    return dot(color, vec3(0.2126, 0.7152, 0.0722));
+}
+
+void main() {
+    vec2 texel = 1.0 / u_resolution;
+    float radius = max(u_float1, 0.5);
+    float amount = u_float0;
+    float threshold = u_float2;
+
+    vec4 original = texture(u_image0, v_texCoord);
+
+    // Gaussian blur for the "unsharp" mask
+    int samples = int(ceil(radius));
+    float sigma = radius / 2.0;
+
+    vec4 blurred = vec4(0.0);
+    float totalWeight = 0.0;
+
+    for (int x = -samples; x <= samples; x++) {
+        for (int y = -samples; y <= samples; y++) {
+            vec2 offset = vec2(float(x), float(y)) * texel;
+            vec4 sample_color = texture(u_image0, v_texCoord + offset);
+
+            float dist = length(vec2(float(x), float(y)));
+            float weight = gaussian(dist, sigma);
+            blurred += sample_color * weight;
+            totalWeight += weight;
+        }
+    }
+    blurred /= totalWeight;
+
+    // Unsharp mask = original - blurred
+    vec3 mask = original.rgb - blurred.rgb;
+
+    // Luminance-based threshold with smooth falloff
+    float lumaDelta = abs(getLuminance(original.rgb) - getLuminance(blurred.rgb));
+    float thresholdScale = smoothstep(0.0, threshold, lumaDelta);
+    mask *= thresholdScale;
+
+    // Sharpen: original + mask * amount
+    vec3 sharpened = original.rgb + mask * amount;
+
+    fragColor0 = vec4(clamp(sharpened, 0.0, 1.0), original.a);
+}
--- a/blueprints/.glsl/update_blueprints.py
+++ b/blueprints/.glsl/update_blueprints.py
@@ -0,0 +1,159 @@
+#!/usr/bin/env python3
+"""
+Shader Blueprint Updater
+
+Syncs GLSL shader files between this folder and blueprint JSON files.
+
+File naming convention:
+    {Blueprint Name}_{node_id}.frag
+
+Usage:
+    python update_blueprints.py extract   # Extract shaders from JSONs to here
+    python update_blueprints.py patch     # Patch shaders back into JSONs
+    python update_blueprints.py           # Same as patch (default)
+"""
+
+import json
+import logging
+import sys
+import re
+from pathlib import Path
+
+logging.basicConfig(level=logging.INFO, format='%(message)s')
+logger = logging.getLogger(__name__)
+
+GLSL_DIR = Path(__file__).parent
+BLUEPRINTS_DIR = GLSL_DIR.parent
+
+
+def get_blueprint_files():
+    """Get all blueprint JSON files."""
+    return sorted(BLUEPRINTS_DIR.glob("*.json"))
+
+
+def sanitize_filename(name):
+    """Convert blueprint name to safe filename."""
+    return re.sub(r'[^\w\-]', '_', name)
+
+
+def extract_shaders():
+    """Extract all shaders from blueprint JSONs to this folder."""
+    extracted = 0
+    for json_path in get_blueprint_files():
+        blueprint_name = json_path.stem
+
+        try:
+            with open(json_path, 'r') as f:
+                data = json.load(f)
+        except (json.JSONDecodeError, IOError) as e:
+            logger.warning("Skipping %s: %s", json_path.name, e)
+            continue
+
+        # Find GLSLShader nodes in subgraphs
+        for subgraph in data.get('definitions', {}).get('subgraphs', []):
+            for node in subgraph.get('nodes', []):
+                if node.get('type') == 'GLSLShader':
+                    node_id = node.get('id')
+                    widgets = node.get('widgets_values', [])
+
+                    # Find shader code (first string that looks like GLSL)
+                    for widget in widgets:
+                        if isinstance(widget, str) and widget.startswith('#version'):
+                            safe_name = sanitize_filename(blueprint_name)
+                            frag_name = f"{safe_name}_{node_id}.frag"
+                            frag_path = GLSL_DIR / frag_name
+
+                            with open(frag_path, 'w') as f:
+                                f.write(widget)
+
+                            logger.info("  Extracted: %s", frag_name)
+                            extracted += 1
+                            break
+
+    logger.info("\nExtracted %d shader(s)", extracted)
+
+
+def patch_shaders():
+    """Patch shaders from this folder back into blueprint JSONs."""
+    # Build lookup: blueprint_name -> [(node_id, shader_code), ...]
+    shader_updates = {}
+
+    for frag_path in sorted(GLSL_DIR.glob("*.frag")):
+        # Parse filename: {blueprint_name}_{node_id}.frag
+        parts = frag_path.stem.rsplit('_', 1)
+        if len(parts) != 2:
+            logger.warning("Skipping %s: invalid filename format", frag_path.name)
+            continue
+
+        blueprint_name, node_id_str = parts
+
+        try:
+            node_id = int(node_id_str)
+        except ValueError:
+            logger.warning("Skipping %s: invalid node_id", frag_path.name)
+            continue
+
+        with open(frag_path, 'r') as f:
+            shader_code = f.read()
+
+        if blueprint_name not in shader_updates:
+            shader_updates[blueprint_name] = []
+        shader_updates[blueprint_name].append((node_id, shader_code))
+
+    # Apply updates to JSON files
+    patched = 0
+    for json_path in get_blueprint_files():
+        blueprint_name = sanitize_filename(json_path.stem)
+
+        if blueprint_name not in shader_updates:
+            continue
+
+        try:
+            with open(json_path, 'r') as f:
+                data = json.load(f)
+        except (json.JSONDecodeError, IOError) as e:
+            logger.error("Error reading %s: %s", json_path.name, e)
+            continue
+
+        modified = False
+        for node_id, shader_code in shader_updates[blueprint_name]:
+            # Find the node and update
+            for subgraph in data.get('definitions', {}).get('subgraphs', []):
+                for node in subgraph.get('nodes', []):
+                    if node.get('id') == node_id and node.get('type') == 'GLSLShader':
+                        widgets = node.get('widgets_values', [])
+                        if len(widgets) > 0 and widgets[0] != shader_code:
+                            widgets[0] = shader_code
+                            modified = True
+                            logger.info("  Patched: %s (node %d)", json_path.name, node_id)
+                            patched += 1
+
+        if modified:
+            with open(json_path, 'w') as f:
+                json.dump(data, f)
+
+    if patched == 0:
+        logger.info("No changes to apply.")
+    else:
+        logger.info("\nPatched %d shader(s)", patched)
+
+
+def main():
+    if len(sys.argv) < 2:
+        command = "patch"
+    else:
+        command = sys.argv[1].lower()
+
+    if command == "extract":
+        logger.info("Extracting shaders from blueprints...")
+        extract_shaders()
+    elif command in ("patch", "update", "apply"):
+        logger.info("Patching shaders into blueprints...")
+        patch_shaders()
+    else:
+        logger.info(__doc__)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
--- a/blueprints/Brightness
+++ b/blueprints/Brightness
--- a/blueprints/Chromatic
+++ b/blueprints/Chromatic
--- a/blueprints/Color
+++ b/blueprints/Color
--- a/blueprints/Edge-Preserving
+++ b/blueprints/Edge-Preserving
--- a/blueprints/Film
+++ b/blueprints/Film
--- a/blueprints/Glow.json
+++ b/blueprints/Glow.json
--- a/Saturation.json
+++ b/Saturation.json
--- a/blueprints/Image
+++ b/blueprints/Image
--- a/blueprints/Image
+++ b/blueprints/Image
@@ -0,0 +1 @@
+{"revision": 0, "last_node_id": 29, "last_link_id": 0, "nodes": [{"id": 29, "type": "4c9d6ea4-b912-40e5-8766-6793a9758c53", "pos": [1970, -230], "size": [180, 86], "flags": {}, "order": 5, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": null}], "outputs": [{"label": "R", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": []}, {"label": "G", "localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": []}, {"label": "B", "localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": []}, {"label": "A", "localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": []}], "title": "Image Channels", "properties": {"proxyWidgets": []}, "widgets_values": []}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "4c9d6ea4-b912-40e5-8766-6793a9758c53", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 28, "lastLinkId": 39, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Image Channels", "inputNode": {"id": -10, "bounding": [1820, -185, 120, 60]}, "outputNode": {"id": -20, "bounding": [2460, -215, 120, 120]}, "inputs": [{"id": "3522932b-2d86-4a1f-a02a-cb29f3a9d7fe", "name": "images.image0", "type": "IMAGE", "linkIds": [39], "localized_name": "images.image0", "label": "image", "pos": [1920, -165]}], "outputs": [{"id": "605cb9c3-b065-4d9b-81d2-3ec331889b2b", "name": "IMAGE0", "type": "IMAGE", "linkIds": [26], "localized_name": "IMAGE0", "label": "R", "pos": [2480, -195]}, {"id": "fb44a77e-0522-43e9-9527-82e7465b3596", "name": "IMAGE1", "type": "IMAGE", "linkIds": [27], "localized_name": "IMAGE1", "label": "G", "pos": [2480, -175]}, {"id": "81460ee6-0131-402a-874f-6bf3001fc4ff", "name": "IMAGE2", "type": "IMAGE", "linkIds": [28], "localized_name": "IMAGE2", "label": "B", "pos": [2480, -155]}, {"id": "ae690246-80d4-4951-b1d9-9306d8a77417", "name": "IMAGE3", "type": "IMAGE", "linkIds": [29], "localized_name": "IMAGE3", "label": "A", "pos": [2480, -135]}], "widgets": [], "nodes": [{"id": 23, "type": "GLSLShader", "pos": [2000, -330], "size": [400, 172], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": 39}, {"localized_name": "fragment_shader", "name": "fragment_shader", "type": "STRING", "widget": {"name": "fragment_shader"}, "link": null}, {"localized_name": "size_mode", "name": "size_mode", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "size_mode"}, "link": null}, {"label": "image1", "localized_name": "images.image1", "name": "images.image1", "shape": 7, "type": "IMAGE", "link": null}], "outputs": [{"label": "R", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": [26]}, {"label": "G", "localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": [27]}, {"label": "B", "localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": [28]}, {"label": "A", "localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": [29]}], "properties": {"Node name for S&R": "GLSLShader"}, "widgets_values": ["#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\nlayout(location = 1) out vec4 fragColor1;\nlayout(location = 2) out vec4 fragColor2;\nlayout(location = 3) out vec4 fragColor3;\n\nvoid main() {\n  vec4 color = texture(u_image0, v_texCoord);\n  // Output each channel as grayscale to separate render targets\n  fragColor0 = vec4(vec3(color.r), 1.0);  // Red channel\n  fragColor1 = vec4(vec3(color.g), 1.0);  // Green channel\n  fragColor2 = vec4(vec3(color.b), 1.0);  // Blue channel\n  fragColor3 = vec4(vec3(color.a), 1.0);  // Alpha channel\n}\n", "from_input"]}], "groups": [], "links": [{"id": 39, "origin_id": -10, "origin_slot": 0, "target_id": 23, "target_slot": 0, "type": "IMAGE"}, {"id": 26, "origin_id": 23, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}, {"id": 27, "origin_id": 23, "origin_slot": 1, "target_id": -20, "target_slot": 1, "type": "IMAGE"}, {"id": 28, "origin_id": 23, "origin_slot": 2, "target_id": -20, "target_slot": 2, "type": "IMAGE"}, {"id": 29, "origin_id": 23, "origin_slot": 3, "target_id": -20, "target_slot": 3, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}}]}}
--- a/blueprints/Image
+++ b/blueprints/Image
--- a/blueprints/Sharpen.json
+++ b/blueprints/Sharpen.json
@@ -0,0 +1 @@
+{"revision":0,"last_node_id":25,"last_link_id":0,"nodes":[{"id":25,"type":"621ba4e2-22a8-482d-a369-023753198b7b","pos":[4610,-790],"size":[230,58],"flags":{},"order":4,"mode":0,"inputs":[{"label":"image","localized_name":"images.image0","name":"images.image0","type":"IMAGE","link":null}],"outputs":[{"label":"IMAGE","localized_name":"IMAGE0","name":"IMAGE0","type":"IMAGE","links":[]}],"title":"Sharpen","properties":{"proxyWidgets":[["24","value"]]},"widgets_values":[]}],"links":[],"version":0.4,"definitions":{"subgraphs":[{"id":"621ba4e2-22a8-482d-a369-023753198b7b","version":1,"state":{"lastGroupId":0,"lastNodeId":24,"lastLinkId":36,"lastRerouteId":0},"revision":0,"config":{},"name":"Sharpen","inputNode":{"id":-10,"bounding":[4090,-825,120,60]},"outputNode":{"id":-20,"bounding":[5150,-825,120,60]},"inputs":[{"id":"37011fb7-14b7-4e0e-b1a0-6a02e8da1fd7","name":"images.image0","type":"IMAGE","linkIds":[34],"localized_name":"images.image0","label":"image","pos":[4190,-805]}],"outputs":[{"id":"e9182b3f-635c-4cd4-a152-4b4be17ae4b9","name":"IMAGE0","type":"IMAGE","linkIds":[35],"localized_name":"IMAGE0","label":"IMAGE","pos":[5170,-805]}],"widgets":[],"nodes":[{"id":24,"type":"PrimitiveFloat","pos":[4280,-1240],"size":[270,58],"flags":{},"order":0,"mode":0,"inputs":[{"label":"strength","localized_name":"value","name":"value","type":"FLOAT","widget":{"name":"value"},"link":null}],"outputs":[{"localized_name":"FLOAT","name":"FLOAT","type":"FLOAT","links":[36]}],"properties":{"Node name for S&R":"PrimitiveFloat","min":0,"max":3,"precision":2,"step":0.05},"widgets_values":[0.5]},{"id":23,"type":"GLSLShader","pos":[4570,-1240],"size":[370,192],"flags":{},"order":1,"mode":0,"inputs":[{"label":"image0","localized_name":"images.image0","name":"images.image0","type":"IMAGE","link":34},{"label":"image1","localized_name":"images.image1","name":"images.image1","shape":7,"type":"IMAGE","link":null},{"label":"u_float0","localized_name":"floats.u_float0","name":"floats.u_float0","shape":7,"type":"FLOAT","link":36},{"label":"u_float1","localized_name":"floats.u_float1","name":"floats.u_float1","shape":7,"type":"FLOAT","link":null},{"label":"u_int0","localized_name":"ints.u_int0","name":"ints.u_int0","shape":7,"type":"INT","link":null},{"localized_name":"fragment_shader","name":"fragment_shader","type":"STRING","widget":{"name":"fragment_shader"},"link":null},{"localized_name":"size_mode","name":"size_mode","type":"COMFY_DYNAMICCOMBO_V3","widget":{"name":"size_mode"},"link":null}],"outputs":[{"localized_name":"IMAGE0","name":"IMAGE0","type":"IMAGE","links":[35]},{"localized_name":"IMAGE1","name":"IMAGE1","type":"IMAGE","links":null},{"localized_name":"IMAGE2","name":"IMAGE2","type":"IMAGE","links":null},{"localized_name":"IMAGE3","name":"IMAGE3","type":"IMAGE","links":null}],"properties":{"Node name for S&R":"GLSLShader"},"widgets_values":["#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform vec2 u_resolution;\nuniform float u_float0;  // strength [0.0 – 2.0] typical: 0.3–1.0\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\n\nvoid main() {\n    vec2 texel = 1.0 / u_resolution;\n    \n    // Sample center and neighbors\n    vec4 center = texture(u_image0, v_texCoord);\n    vec4 top    = texture(u_image0, v_texCoord + vec2( 0.0, -texel.y));\n    vec4 bottom = texture(u_image0, v_texCoord + vec2( 0.0,  texel.y));\n    vec4 left   = texture(u_image0, v_texCoord + vec2(-texel.x,  0.0));\n    vec4 right  = texture(u_image0, v_texCoord + vec2( texel.x,  0.0));\n    \n    // Edge enhancement (Laplacian)\n    vec4 edges = center * 4.0 - top - bottom - left - right;\n    \n    // Add edges back scaled by strength\n    vec4 sharpened = center + edges * u_float0;\n    \n    fragColor0 = vec4(clamp(sharpened.rgb, 0.0, 1.0), center.a);\n}","from_input"]}],"groups":[],"links":[{"id":36,"origin_id":24,"origin_slot":0,"target_id":23,"target_slot":2,"type":"FLOAT"},{"id":34,"origin_id":-10,"origin_slot":0,"target_id":23,"target_slot":0,"type":"IMAGE"},{"id":35,"origin_id":23,"origin_slot":0,"target_id":-20,"target_slot":0,"type":"IMAGE"}],"extra":{"workflowRendererVersion":"LG"}}]}}
--- a/blueprints/Unsharp
+++ b/blueprints/Unsharp
--- a/comfy/checkpoint_pickle.py
+++ b/comfy/checkpoint_pickle.py
@@ -1,13 +0,0 @@
-import pickle
-
-load = pickle.load
-
-class Empty:
-    pass
-
-class Unpickler(pickle.Unpickler):
-    def find_class(self, module, name):
-        #TODO: safe unpickle
-        if module.startswith("pytorch_lightning"):
-            return Empty
-        return super().find_class(module, name)
--- a/comfy/controlnet.py
+++ b/comfy/controlnet.py
@@ -297,6 +297,30 @@ class ControlNet(ControlBase):
        self.model_sampling_current = None
        super().cleanup()

+
+class QwenFunControlNet(ControlNet):
+    def get_control(self, x_noisy, t, cond, batched_number, transformer_options):
+        # Fun checkpoints are more sensitive to high strengths in the generic
+        # ControlNet merge path. Use a soft response curve so strength=1.0 stays
+        # unchanged while >1 grows more gently.
+        original_strength = self.strength
+        self.strength = math.sqrt(max(self.strength, 0.0))
+        try:
+            return super().get_control(x_noisy, t, cond, batched_number, transformer_options)
+        finally:
+            self.strength = original_strength
+
+    def pre_run(self, model, percent_to_timestep_function):
+        super().pre_run(model, percent_to_timestep_function)
+        self.set_extra_arg("base_model", model.diffusion_model)
+
+    def copy(self):
+        c = QwenFunControlNet(None, global_average_pooling=self.global_average_pooling, load_device=self.load_device, manual_cast_dtype=self.manual_cast_dtype)
+        c.control_model = self.control_model
+        c.control_model_wrapped = self.control_model_wrapped
+        self.copy_to(c)
+        return c
+
 class ControlLoraOps:
    class Linear(torch.nn.Module, comfy.ops.CastWeightBiasOp):
        def __init__(self, in_features: int, out_features: int, bias: bool = True,
@@ -560,6 +584,7 @@ def load_controlnet_hunyuandit(controlnet_data, model_options={}):
 def load_controlnet_flux_xlabs_mistoline(sd, mistoline=False, model_options={}):
    model_config, operations, load_device, unet_dtype, manual_cast_dtype, offload_device = controlnet_config(sd, model_options=model_options)
    control_model = comfy.ldm.flux.controlnet.ControlNetFlux(mistoline=mistoline, operations=operations, device=offload_device, dtype=unet_dtype, **model_config.unet_config)
+    sd = model_config.process_unet_state_dict(sd)
    control_model = controlnet_load_state_dict(control_model, sd)
    extra_conds = ['y', 'guidance']
    control = ControlNet(control_model, load_device=load_device, manual_cast_dtype=manual_cast_dtype, extra_conds=extra_conds)
@@ -605,6 +630,53 @@ def load_controlnet_qwen_instantx(sd, model_options={}):
    control = ControlNet(control_model, compression_ratio=1, latent_format=latent_format, concat_mask=concat_mask, load_device=load_device, manual_cast_dtype=manual_cast_dtype, extra_conds=extra_conds)
    return control

+
+def load_controlnet_qwen_fun(sd, model_options={}):
+    load_device = comfy.model_management.get_torch_device()
+    weight_dtype = comfy.utils.weight_dtype(sd)
+    unet_dtype = model_options.get("dtype", weight_dtype)
+    manual_cast_dtype = comfy.model_management.unet_manual_cast(unet_dtype, load_device)
+
+    operations = model_options.get("custom_operations", None)
+    if operations is None:
+        operations = comfy.ops.pick_operations(unet_dtype, manual_cast_dtype, disable_fast_fp8=True)
+
+    in_features = sd["control_img_in.weight"].shape[1]
+    inner_dim = sd["control_img_in.weight"].shape[0]
+
+    block_weight = sd["control_blocks.0.attn.to_q.weight"]
+    attention_head_dim = sd["control_blocks.0.attn.norm_q.weight"].shape[0]
+    num_attention_heads = max(1, block_weight.shape[0] // max(1, attention_head_dim))
+
+    model = comfy.ldm.qwen_image.controlnet.QwenImageFunControlNetModel(
+        control_in_features=in_features,
+        inner_dim=inner_dim,
+        num_attention_heads=num_attention_heads,
+        attention_head_dim=attention_head_dim,
+        num_control_blocks=5,
+        main_model_double=60,
+        injection_layers=(0, 12, 24, 36, 48),
+        operations=operations,
+        device=comfy.model_management.unet_offload_device(),
+        dtype=unet_dtype,
+    )
+    model = controlnet_load_state_dict(model, sd)
+
+    latent_format = comfy.latent_formats.Wan21()
+    control = QwenFunControlNet(
+        model,
+        compression_ratio=1,
+        latent_format=latent_format,
+        # Fun checkpoints already expect their own 33-channel context handling.
+        # Enabling generic concat_mask injects an extra mask channel at apply-time
+        # and breaks the intended fallback packing path.
+        concat_mask=False,
+        load_device=load_device,
+        manual_cast_dtype=manual_cast_dtype,
+        extra_conds=[],
+    )
+    return control
+
 def convert_mistoline(sd):
    return comfy.utils.state_dict_prefix_replace(sd, {"single_controlnet_blocks.": "controlnet_single_blocks."})

@@ -682,6 +754,8 @@ def load_controlnet_state_dict(state_dict, model=None, model_options={}):
            return load_controlnet_qwen_instantx(controlnet_data, model_options=model_options)
        elif "controlnet_x_embedder.weight" in controlnet_data:
            return load_controlnet_flux_instantx(controlnet_data, model_options=model_options)
+    elif "control_blocks.0.after_proj.weight" in controlnet_data and "control_img_in.weight" in controlnet_data:
+        return load_controlnet_qwen_fun(controlnet_data, model_options=model_options)

    elif "controlnet_blocks.0.linear.weight" in controlnet_data: #mistoline flux
        return load_controlnet_flux_xlabs_mistoline(convert_mistoline(controlnet_data), mistoline=True, model_options=model_options)
--- a/comfy/k_diffusion/sampling.py
+++ b/comfy/k_diffusion/sampling.py
@@ -1,12 +1,11 @@
 import math
-import time
 from functools import partial

 from scipy import integrate
 import torch
 from torch import nn
 import torchsde
-from tqdm.auto import trange as trange_, tqdm
+from tqdm.auto import tqdm

 from . import utils
 from . import deis
@@ -15,34 +14,7 @@ import comfy.model_patcher
 import comfy.model_sampling

 import comfy.memory_management
-
-
-def trange(*args, **kwargs):
-    if comfy.memory_management.aimdo_allocator is None:
-        return trange_(*args, **kwargs)
-
-    pbar = trange_(*args, **kwargs, smoothing=1.0)
-    pbar._i = 0
-    pbar.set_postfix_str("  Model Initializing ...  ")
-
-    _update = pbar.update
-
-    def warmup_update(n=1):
-        pbar._i += 1
-        if pbar._i == 1:
-            pbar.i1_time = time.time()
-            pbar.set_postfix_str(" Model Initialization complete!  ")
-        elif pbar._i == 2:
-            #bring forward the effective start time based the the diff between first and second iteration
-            #to attempt to remove load overhead from the final step rate estimate.
-            pbar.start_t = pbar.i1_time - (time.time() - pbar.i1_time)
-            pbar.set_postfix_str("")
-
-        _update(n)
-
-    pbar.update = warmup_update
-    return pbar
-
+from comfy.utils import model_trange as trange

 def append_zero(x):
    return torch.cat([x, x.new_zeros([1])])
--- a/comfy/ldm/anima/model.py
+++ b/comfy/ldm/anima/model.py
@@ -195,8 +195,20 @@ class Anima(MiniTrainDIT):
        super().__init__(*args, **kwargs)
        self.llm_adapter = LLMAdapter(device=kwargs.get("device"), dtype=kwargs.get("dtype"), operations=kwargs.get("operations"))

-    def preprocess_text_embeds(self, text_embeds, text_ids):
+    def preprocess_text_embeds(self, text_embeds, text_ids, t5xxl_weights=None):
        if text_ids is not None:
-            return self.llm_adapter(text_embeds, text_ids)
+            out = self.llm_adapter(text_embeds, text_ids)
+            if t5xxl_weights is not None:
+                out = out * t5xxl_weights
+
+            if out.shape[1] < 512:
+                out = torch.nn.functional.pad(out, (0, 0, 0, 512 - out.shape[1]))
+            return out
        else:
            return text_embeds
+
+    def forward(self, x, timesteps, context, **kwargs):
+        t5xxl_ids = kwargs.pop("t5xxl_ids", None)
+        if t5xxl_ids is not None:
+            context = self.preprocess_text_embeds(context, t5xxl_ids, t5xxl_weights=kwargs.pop("t5xxl_weights", None))
+        return super().forward(x, timesteps, context, **kwargs)
--- a/comfy/ldm/chroma/layers.py
+++ b/comfy/ldm/chroma/layers.py
@@ -3,7 +3,6 @@ from torch import Tensor, nn

 from comfy.ldm.flux.layers import (
    MLPEmbedder,
-    RMSNorm,
    ModulationOut,
 )

@@ -29,7 +28,7 @@ class Approximator(nn.Module):
        super().__init__()
        self.in_proj = operations.Linear(in_dim, hidden_dim, bias=True, dtype=dtype, device=device)
        self.layers = nn.ModuleList([MLPEmbedder(hidden_dim, hidden_dim, dtype=dtype, device=device, operations=operations) for x in range( n_layers)])
-        self.norms = nn.ModuleList([RMSNorm(hidden_dim, dtype=dtype, device=device, operations=operations) for x in range( n_layers)])
+        self.norms = nn.ModuleList([operations.RMSNorm(hidden_dim, dtype=dtype, device=device) for x in range( n_layers)])
        self.out_proj = operations.Linear(hidden_dim, out_dim, dtype=dtype, device=device)

    @property
--- a/comfy/ldm/chroma_radiance/layers.py
+++ b/comfy/ldm/chroma_radiance/layers.py
@@ -4,8 +4,6 @@ from functools import lru_cache
 import torch
 from torch import nn

-from comfy.ldm.flux.layers import RMSNorm
-

 class NerfEmbedder(nn.Module):
    """
@@ -145,7 +143,7 @@ class NerfGLUBlock(nn.Module):
        # We now need to generate parameters for 3 matrices.
        total_params = 3 * hidden_size_x**2 * mlp_ratio
        self.param_generator = operations.Linear(hidden_size_s, total_params, dtype=dtype, device=device)
-        self.norm = RMSNorm(hidden_size_x, dtype=dtype, device=device, operations=operations)
+        self.norm = operations.RMSNorm(hidden_size_x, dtype=dtype, device=device)
        self.mlp_ratio = mlp_ratio


@@ -178,7 +176,7 @@ class NerfGLUBlock(nn.Module):
 class NerfFinalLayer(nn.Module):
    def __init__(self, hidden_size, out_channels, dtype=None, device=None, operations=None):
        super().__init__()
-        self.norm = RMSNorm(hidden_size, dtype=dtype, device=device, operations=operations)
+        self.norm = operations.RMSNorm(hidden_size, dtype=dtype, device=device)
        self.linear = operations.Linear(hidden_size, out_channels, dtype=dtype, device=device)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
@@ -190,7 +188,7 @@ class NerfFinalLayer(nn.Module):
 class NerfFinalLayerConv(nn.Module):
    def __init__(self, hidden_size: int, out_channels: int, dtype=None, device=None, operations=None):
        super().__init__()
-        self.norm = RMSNorm(hidden_size, dtype=dtype, device=device, operations=operations)
+        self.norm = operations.RMSNorm(hidden_size, dtype=dtype, device=device)
        self.conv = operations.Conv2d(
            in_channels=hidden_size,
            out_channels=out_channels,
--- a/comfy/ldm/flux/layers.py
+++ b/comfy/ldm/flux/layers.py
@@ -5,9 +5,9 @@ import torch
 from torch import Tensor, nn

 from .math import attention, rope
-import comfy.ops
-import comfy.ldm.common_dit

+# Fix import for some custom nodes, TODO: delete eventually.
+RMSNorm = None

 class EmbedND(nn.Module):
    def __init__(self, dim: int, theta: int, axes_dim: list):
@@ -87,20 +87,12 @@ def build_mlp(hidden_size, mlp_hidden_dim, mlp_silu_act=False, yak_mlp=False, dt
            operations.Linear(mlp_hidden_dim, hidden_size, bias=True, dtype=dtype, device=device),
        )

-class RMSNorm(torch.nn.Module):
-    def __init__(self, dim: int, dtype=None, device=None, operations=None):
-        super().__init__()
-        self.scale = nn.Parameter(torch.empty((dim), dtype=dtype, device=device))
-
-    def forward(self, x: Tensor):
-        return comfy.ldm.common_dit.rms_norm(x, self.scale, 1e-6)
-

 class QKNorm(torch.nn.Module):
    def __init__(self, dim: int, dtype=None, device=None, operations=None):
        super().__init__()
-        self.query_norm = RMSNorm(dim, dtype=dtype, device=device, operations=operations)
-        self.key_norm = RMSNorm(dim, dtype=dtype, device=device, operations=operations)
+        self.query_norm = operations.RMSNorm(dim, dtype=dtype, device=device)
+        self.key_norm = operations.RMSNorm(dim, dtype=dtype, device=device)

    def forward(self, q: Tensor, k: Tensor, v: Tensor) -> tuple:
        q = self.query_norm(q)
@@ -169,7 +161,7 @@ class SiLUActivation(nn.Module):


 class DoubleStreamBlock(nn.Module):
-    def __init__(self, hidden_size: int, num_heads: int, mlp_ratio: float, qkv_bias: bool = False, flipped_img_txt=False, modulation=True, mlp_silu_act=False, proj_bias=True, yak_mlp=False, dtype=None, device=None, operations=None):
+    def __init__(self, hidden_size: int, num_heads: int, mlp_ratio: float, qkv_bias: bool = False, modulation=True, mlp_silu_act=False, proj_bias=True, yak_mlp=False, dtype=None, device=None, operations=None):
        super().__init__()

        mlp_hidden_dim = int(hidden_size * mlp_ratio)
@@ -197,8 +189,6 @@ class DoubleStreamBlock(nn.Module):

        self.txt_mlp = build_mlp(hidden_size, mlp_hidden_dim, mlp_silu_act=mlp_silu_act, yak_mlp=yak_mlp, dtype=dtype, device=device, operations=operations)

-        self.flipped_img_txt = flipped_img_txt
-
    def forward(self, img: Tensor, txt: Tensor, vec: Tensor, pe: Tensor, attn_mask=None, modulation_dims_img=None, modulation_dims_txt=None, transformer_options={}):
        if self.modulation:
            img_mod1, img_mod2 = self.img_mod(vec)
@@ -224,32 +214,17 @@ class DoubleStreamBlock(nn.Module):
        del txt_qkv
        txt_q, txt_k = self.txt_attn.norm(txt_q, txt_k, txt_v)

-        if self.flipped_img_txt:
-            q = torch.cat((img_q, txt_q), dim=2)
-            del img_q, txt_q
-            k = torch.cat((img_k, txt_k), dim=2)
-            del img_k, txt_k
-            v = torch.cat((img_v, txt_v), dim=2)
-            del img_v, txt_v
-            # run actual attention
-            attn = attention(q, k, v,
-                             pe=pe, mask=attn_mask, transformer_options=transformer_options)
-            del q, k, v
+        q = torch.cat((txt_q, img_q), dim=2)
+        del txt_q, img_q
+        k = torch.cat((txt_k, img_k), dim=2)
+        del txt_k, img_k
+        v = torch.cat((txt_v, img_v), dim=2)
+        del txt_v, img_v
+        # run actual attention
+        attn = attention(q, k, v, pe=pe, mask=attn_mask, transformer_options=transformer_options)
+        del q, k, v

-            img_attn, txt_attn = attn[:, : img.shape[1]], attn[:, img.shape[1]:]
-        else:
-            q = torch.cat((txt_q, img_q), dim=2)
-            del txt_q, img_q
-            k = torch.cat((txt_k, img_k), dim=2)
-            del txt_k, img_k
-            v = torch.cat((txt_v, img_v), dim=2)
-            del txt_v, img_v
-            # run actual attention
-            attn = attention(q, k, v,
-                             pe=pe, mask=attn_mask, transformer_options=transformer_options)
-            del q, k, v
-
-            txt_attn, img_attn = attn[:, : txt.shape[1]], attn[:, txt.shape[1]:]
+        txt_attn, img_attn = attn[:, : txt.shape[1]], attn[:, txt.shape[1]:]

        # calculate the img bloks
        img += apply_mod(self.img_attn.proj(img_attn), img_mod1.gate, None, modulation_dims_img)
--- a/comfy/ldm/flux/math.py
+++ b/comfy/ldm/flux/math.py
@@ -29,19 +29,34 @@ def rope(pos: Tensor, dim: int, theta: int) -> Tensor:
    return out.to(dtype=torch.float32, device=pos.device)


+def _apply_rope1(x: Tensor, freqs_cis: Tensor):
+    x_ = x.to(dtype=freqs_cis.dtype).reshape(*x.shape[:-1], -1, 1, 2)
+
+    x_out = freqs_cis[..., 0] * x_[..., 0]
+    x_out.addcmul_(freqs_cis[..., 1], x_[..., 1])
+
+    return x_out.reshape(*x.shape).type_as(x)
+
+
+def _apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor):
+    return apply_rope1(xq, freqs_cis), apply_rope1(xk, freqs_cis)
+
+
 try:
    import comfy.quant_ops
-    apply_rope = comfy.quant_ops.ck.apply_rope
-    apply_rope1 = comfy.quant_ops.ck.apply_rope1
+    q_apply_rope = comfy.quant_ops.ck.apply_rope
+    q_apply_rope1 = comfy.quant_ops.ck.apply_rope1
+    def apply_rope(xq, xk, freqs_cis):
+        if comfy.model_management.in_training:
+            return _apply_rope(xq, xk, freqs_cis)
+        else:
+            return apply_rope1(xq, freqs_cis), apply_rope1(xk, freqs_cis)
+    def apply_rope1(x, freqs_cis):
+        if comfy.model_management.in_training:
+            return _apply_rope1(x, freqs_cis)
+        else:
+            return q_apply_rope1(x, freqs_cis)
 except:
    logging.warning("No comfy kitchen, using old apply_rope functions.")
-    def apply_rope1(x: Tensor, freqs_cis: Tensor):
-        x_ = x.to(dtype=freqs_cis.dtype).reshape(*x.shape[:-1], -1, 1, 2)
-
-        x_out = freqs_cis[..., 0] * x_[..., 0]
-        x_out.addcmul_(freqs_cis[..., 1], x_[..., 1])
-
-        return x_out.reshape(*x.shape).type_as(x)
-
-    def apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor):
-        return apply_rope1(xq, freqs_cis), apply_rope1(xk, freqs_cis)
+    apply_rope = _apply_rope
+    apply_rope1 = _apply_rope1
--- a/comfy/ldm/flux/model.py
+++ b/comfy/ldm/flux/model.py
@@ -16,7 +16,6 @@ from .layers import (
    SingleStreamBlock,
    timestep_embedding,
    Modulation,
-    RMSNorm
 )

@dataclass
@@ -81,7 +80,7 @@ class Flux(nn.Module):
        self.txt_in = operations.Linear(params.context_in_dim, self.hidden_size, bias=params.ops_bias, dtype=dtype, device=device)

        if params.txt_norm:
-            self.txt_norm = RMSNorm(params.context_in_dim, dtype=dtype, device=device, operations=operations)
+            self.txt_norm = operations.RMSNorm(params.context_in_dim, dtype=dtype, device=device)
        else:
            self.txt_norm = None

--- a/comfy/ldm/hunyuan_video/model.py
+++ b/comfy/ldm/hunyuan_video/model.py
@@ -241,7 +241,6 @@ class HunyuanVideo(nn.Module):
                    self.num_heads,
                    mlp_ratio=params.mlp_ratio,
                    qkv_bias=params.qkv_bias,
-                    flipped_img_txt=True,
                    dtype=dtype, device=device, operations=operations
                )
                for _ in range(params.depth)
@@ -378,14 +377,14 @@ class HunyuanVideo(nn.Module):
            extra_txt_ids = torch.zeros((txt_ids.shape[0], txt_vision_states.shape[1], txt_ids.shape[-1]), device=txt_ids.device, dtype=txt_ids.dtype)
            txt_ids = torch.cat((txt_ids, extra_txt_ids), dim=1)

-        ids = torch.cat((img_ids, txt_ids), dim=1)
+        ids = torch.cat((txt_ids, img_ids), dim=1)
        pe = self.pe_embedder(ids)

        img_len = img.shape[1]
        if txt_mask is not None:
            attn_mask_len = img_len + txt.shape[1]
            attn_mask = torch.zeros((1, 1, attn_mask_len), dtype=img.dtype, device=img.device)
-            attn_mask[:, 0, img_len:] = txt_mask
+            attn_mask[:, 0, :txt.shape[1]] = txt_mask
        else:
            attn_mask = None

@@ -413,7 +412,7 @@ class HunyuanVideo(nn.Module):
                    if add is not None:
                        img += add

-        img = torch.cat((img, txt), 1)
+        img = torch.cat((txt, img), 1)

        transformer_options["total_blocks"] = len(self.single_blocks)
        transformer_options["block_type"] = "single"
@@ -435,9 +434,9 @@ class HunyuanVideo(nn.Module):
                if i < len(control_o):
                    add = control_o[i]
                    if add is not None:
-                        img[:, : img_len] += add
+                        img[:, txt.shape[1]: img_len + txt.shape[1]] += add

-        img = img[:, : img_len]
+        img = img[:, txt.shape[1]: img_len + txt.shape[1]]
        if ref_latent is not None:
            img = img[:, ref_latent.shape[1]:]

--- a/comfy/ldm/qwen_image/controlnet.py
+++ b/comfy/ldm/qwen_image/controlnet.py
@@ -2,6 +2,196 @@ import torch
 import math

 from .model import QwenImageTransformer2DModel
+from .model import QwenImageTransformerBlock
+
+
+class QwenImageFunControlBlock(QwenImageTransformerBlock):
+    def __init__(self, dim, num_attention_heads, attention_head_dim, has_before_proj=False, dtype=None, device=None, operations=None):
+        super().__init__(
+            dim=dim,
+            num_attention_heads=num_attention_heads,
+            attention_head_dim=attention_head_dim,
+            dtype=dtype,
+            device=device,
+            operations=operations,
+        )
+        self.has_before_proj = has_before_proj
+        if has_before_proj:
+            self.before_proj = operations.Linear(dim, dim, device=device, dtype=dtype)
+        self.after_proj = operations.Linear(dim, dim, device=device, dtype=dtype)
+
+
+class QwenImageFunControlNetModel(torch.nn.Module):
+    def __init__(
+        self,
+        control_in_features=132,
+        inner_dim=3072,
+        num_attention_heads=24,
+        attention_head_dim=128,
+        num_control_blocks=5,
+        main_model_double=60,
+        injection_layers=(0, 12, 24, 36, 48),
+        dtype=None,
+        device=None,
+        operations=None,
+    ):
+        super().__init__()
+        self.dtype = dtype
+        self.main_model_double = main_model_double
+        self.injection_layers = tuple(injection_layers)
+        # Keep base hint scaling at 1.0 so user-facing strength behaves similarly
+        # to the reference Gen2/VideoX implementation around strength=1.
+        self.hint_scale = 1.0
+        self.control_img_in = operations.Linear(control_in_features, inner_dim, device=device, dtype=dtype)
+
+        self.control_blocks = torch.nn.ModuleList([])
+        for i in range(num_control_blocks):
+            self.control_blocks.append(
+                QwenImageFunControlBlock(
+                    dim=inner_dim,
+                    num_attention_heads=num_attention_heads,
+                    attention_head_dim=attention_head_dim,
+                    has_before_proj=(i == 0),
+                    dtype=dtype,
+                    device=device,
+                    operations=operations,
+                )
+            )
+
+    def _process_hint_tokens(self, hint):
+        if hint is None:
+            return None
+        if hint.ndim == 4:
+            hint = hint.unsqueeze(2)
+
+        # Fun checkpoints are trained with 33 latent channels before 2x2 packing:
+        # [control_latent(16), mask(1), inpaint_latent(16)] -> 132 features.
+        # Default behavior (no inpaint input in stock Apply ControlNet) should use
+        # zeros for mask/inpaint branches, matching VideoX fallback semantics.
+        expected_c = self.control_img_in.weight.shape[1] // 4
+        if hint.shape[1] == 16 and expected_c == 33:
+            zeros_mask = torch.zeros_like(hint[:, :1])
+            zeros_inpaint = torch.zeros_like(hint)
+            hint = torch.cat([hint, zeros_mask, zeros_inpaint], dim=1)
+
+        bs, c, t, h, w = hint.shape
+        hidden_states = torch.nn.functional.pad(hint, (0, w % 2, 0, h % 2))
+        orig_shape = hidden_states.shape
+        hidden_states = hidden_states.view(
+            orig_shape[0],
+            orig_shape[1],
+            orig_shape[-3],
+            orig_shape[-2] // 2,
+            2,
+            orig_shape[-1] // 2,
+            2,
+        )
+        hidden_states = hidden_states.permute(0, 2, 3, 5, 1, 4, 6)
+        hidden_states = hidden_states.reshape(
+            bs,
+            t * ((h + 1) // 2) * ((w + 1) // 2),
+            c * 4,
+        )
+
+        expected_in = self.control_img_in.weight.shape[1]
+        cur_in = hidden_states.shape[-1]
+        if cur_in < expected_in:
+            pad = torch.zeros(
+                (hidden_states.shape[0], hidden_states.shape[1], expected_in - cur_in),
+                device=hidden_states.device,
+                dtype=hidden_states.dtype,
+            )
+            hidden_states = torch.cat([hidden_states, pad], dim=-1)
+        elif cur_in > expected_in:
+            hidden_states = hidden_states[:, :, :expected_in]
+
+        return hidden_states
+
+    def forward(
+        self,
+        x,
+        timesteps,
+        context,
+        attention_mask=None,
+        guidance: torch.Tensor = None,
+        hint=None,
+        transformer_options={},
+        base_model=None,
+        **kwargs,
+    ):
+        if base_model is None:
+            raise RuntimeError("Qwen Fun ControlNet requires a QwenImage base model at runtime.")
+
+        encoder_hidden_states_mask = attention_mask
+        # Keep attention mask disabled inside Fun control blocks to mirror
+        # VideoX behavior (they rely on seq lengths for RoPE, not masked attention).
+        encoder_hidden_states_mask = None
+
+        hidden_states, img_ids, _ = base_model.process_img(x)
+        hint_tokens = self._process_hint_tokens(hint)
+        if hint_tokens is None:
+            raise RuntimeError("Qwen Fun ControlNet requires a control hint image.")
+
+        if hint_tokens.shape[1] != hidden_states.shape[1]:
+            max_tokens = min(hint_tokens.shape[1], hidden_states.shape[1])
+            hint_tokens = hint_tokens[:, :max_tokens]
+            hidden_states = hidden_states[:, :max_tokens]
+            img_ids = img_ids[:, :max_tokens]
+
+        txt_start = round(
+            max(
+                ((x.shape[-1] + (base_model.patch_size // 2)) // base_model.patch_size) // 2,
+                ((x.shape[-2] + (base_model.patch_size // 2)) // base_model.patch_size) // 2,
+            )
+        )
+        txt_ids = torch.arange(txt_start, txt_start + context.shape[1], device=x.device).reshape(1, -1, 1).repeat(x.shape[0], 1, 3)
+        ids = torch.cat((txt_ids, img_ids), dim=1)
+        image_rotary_emb = base_model.pe_embedder(ids).to(x.dtype).contiguous()
+
+        hidden_states = base_model.img_in(hidden_states)
+        encoder_hidden_states = base_model.txt_norm(context)
+        encoder_hidden_states = base_model.txt_in(encoder_hidden_states)
+
+        if guidance is not None:
+            guidance = guidance * 1000
+
+        temb = (
+            base_model.time_text_embed(timesteps, hidden_states)
+            if guidance is None
+            else base_model.time_text_embed(timesteps, guidance, hidden_states)
+        )
+
+        c = self.control_img_in(hint_tokens)
+
+        for i, block in enumerate(self.control_blocks):
+            if i == 0:
+                c_in = block.before_proj(c) + hidden_states
+                all_c = []
+            else:
+                all_c = list(torch.unbind(c, dim=0))
+                c_in = all_c.pop(-1)
+
+            encoder_hidden_states, c_out = block(
+                hidden_states=c_in,
+                encoder_hidden_states=encoder_hidden_states,
+                encoder_hidden_states_mask=encoder_hidden_states_mask,
+                temb=temb,
+                image_rotary_emb=image_rotary_emb,
+                transformer_options=transformer_options,
+            )
+
+            c_skip = block.after_proj(c_out) * self.hint_scale
+            all_c += [c_skip, c_out]
+            c = torch.stack(all_c, dim=0)
+
+        hints = torch.unbind(c, dim=0)[:-1]
+
+        controlnet_block_samples = [None] * self.main_model_double
+        for local_idx, base_idx in enumerate(self.injection_layers):
+            if local_idx < len(hints) and base_idx < len(controlnet_block_samples):
+                controlnet_block_samples[base_idx] = hints[local_idx]
+
+        return {"input": controlnet_block_samples}


 class QwenImageControlNetModel(QwenImageTransformer2DModel):
--- a/comfy/lora_convert.py
+++ b/comfy/lora_convert.py
@@ -5,7 +5,7 @@ import comfy.utils
 def convert_lora_bfl_control(sd): #BFL loras for Flux
    sd_out = {}
    for k in sd:
-        k_to = "diffusion_model.{}".format(k.replace(".lora_B.bias", ".diff_b").replace("_norm.scale", "_norm.scale.set_weight"))
+        k_to = "diffusion_model.{}".format(k.replace(".lora_B.bias", ".diff_b").replace("_norm.scale", "_norm.set_weight"))
        sd_out[k_to] = sd[k]

    sd_out["diffusion_model.img_in.reshape_weight"] = torch.tensor([sd["img_in.lora_B.weight"].shape[0], sd["img_in.lora_A.weight"].shape[1]])
--- a/comfy/model_base.py
+++ b/comfy/model_base.py
@@ -1160,12 +1160,16 @@ class Anima(BaseModel):
        device = kwargs["device"]
        if cross_attn is not None:
            if t5xxl_ids is not None:
-                cross_attn = self.diffusion_model.preprocess_text_embeds(cross_attn.to(device=device, dtype=self.get_dtype()), t5xxl_ids.unsqueeze(0).to(device=device))
                if t5xxl_weights is not None:
-                    cross_attn *= t5xxl_weights.unsqueeze(0).unsqueeze(-1).to(cross_attn)
+                    t5xxl_weights = t5xxl_weights.unsqueeze(0).unsqueeze(-1).to(cross_attn)
+                t5xxl_ids = t5xxl_ids.unsqueeze(0)
+
+                if torch.is_inference_mode_enabled():  # if not we are training
+                    cross_attn = self.diffusion_model.preprocess_text_embeds(cross_attn.to(device=device, dtype=self.get_dtype()), t5xxl_ids.to(device=device), t5xxl_weights=t5xxl_weights.to(device=device, dtype=self.get_dtype()))
+                else:
+                    out['t5xxl_ids'] = comfy.conds.CONDRegular(t5xxl_ids)
+                    out['t5xxl_weights'] = comfy.conds.CONDRegular(t5xxl_weights)

-                if cross_attn.shape[1] < 512:
-                    cross_attn = torch.nn.functional.pad(cross_attn, (0, 0, 0, 512 - cross_attn.shape[1]))
            out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
        return out

--- a/comfy/model_detection.py
+++ b/comfy/model_detection.py
@@ -19,6 +19,12 @@ def count_blocks(state_dict_keys, prefix_string):
        count += 1
    return count

+def any_suffix_in(keys, prefix, main, suffix_list=[]):
+    for x in suffix_list:
+        if "{}{}{}".format(prefix, main, x) in keys:
+            return True
+    return False
+
 def calculate_transformer_depth(prefix, state_dict_keys, state_dict):
    context_dim = None
    use_linear_in_transformer = False
@@ -186,7 +192,7 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
            dit_config["meanflow_sum"] = False
        return dit_config

-    if '{}double_blocks.0.img_attn.norm.key_norm.scale'.format(key_prefix) in state_dict_keys and ('{}img_in.weight'.format(key_prefix) in state_dict_keys or f"{key_prefix}distilled_guidance_layer.norms.0.scale" in state_dict_keys): #Flux, Chroma or Chroma Radiance (has no img_in.weight)
+    if any_suffix_in(state_dict_keys, key_prefix, 'double_blocks.0.img_attn.norm.key_norm.', ["weight", "scale"]) and ('{}img_in.weight'.format(key_prefix) in state_dict_keys or any_suffix_in(state_dict_keys, key_prefix, 'distilled_guidance_layer.norms.0.', ["weight", "scale"])): #Flux, Chroma or Chroma Radiance (has no img_in.weight)
        dit_config = {}
        if '{}double_stream_modulation_img.lin.weight'.format(key_prefix) in state_dict_keys:
            dit_config["image_model"] = "flux2"
@@ -241,7 +247,8 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):

        dit_config["depth"] = count_blocks(state_dict_keys, '{}double_blocks.'.format(key_prefix) + '{}.')
        dit_config["depth_single_blocks"] = count_blocks(state_dict_keys, '{}single_blocks.'.format(key_prefix) + '{}.')
-        if '{}distilled_guidance_layer.0.norms.0.scale'.format(key_prefix) in state_dict_keys or '{}distilled_guidance_layer.norms.0.scale'.format(key_prefix) in state_dict_keys: #Chroma
+
+        if any_suffix_in(state_dict_keys, key_prefix, 'distilled_guidance_layer.0.norms.0.', ["weight", "scale"]) or any_suffix_in(state_dict_keys, key_prefix, 'distilled_guidance_layer.norms.0.', ["weight", "scale"]): #Chroma
            dit_config["image_model"] = "chroma"
            dit_config["in_channels"] = 64
            dit_config["out_channels"] = 64
@@ -249,7 +256,8 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
            dit_config["out_dim"] = 3072
            dit_config["hidden_dim"] = 5120
            dit_config["n_layers"] = 5
-            if f"{key_prefix}nerf_blocks.0.norm.scale" in state_dict_keys: #Chroma Radiance
+
+            if any_suffix_in(state_dict_keys, key_prefix, 'nerf_blocks.0.norm.', ["weight", "scale"]): #Chroma Radiance
                dit_config["image_model"] = "chroma_radiance"
                dit_config["in_channels"] = 3
                dit_config["out_channels"] = 3
@@ -259,7 +267,7 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
                dit_config["nerf_depth"] = 4
                dit_config["nerf_max_freqs"] = 8
                dit_config["nerf_tile_size"] = 512
-                dit_config["nerf_final_head_type"] = "conv" if f"{key_prefix}nerf_final_layer_conv.norm.scale" in state_dict_keys else "linear"
+                dit_config["nerf_final_head_type"] = "conv" if any_suffix_in(state_dict_keys, key_prefix, 'nerf_final_layer_conv.norm.', ["weight", "scale"]) else "linear"
                dit_config["nerf_embedder_dtype"] = torch.float32
                if "{}__x0__".format(key_prefix) in state_dict_keys: # x0 pred
                    dit_config["use_x0"] = True
@@ -268,7 +276,7 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
        else:
            dit_config["guidance_embed"] = "{}guidance_in.in_layer.weight".format(key_prefix) in state_dict_keys
            dit_config["yak_mlp"] = '{}double_blocks.0.img_mlp.gate_proj.weight'.format(key_prefix) in state_dict_keys
-            dit_config["txt_norm"] = "{}txt_norm.scale".format(key_prefix) in state_dict_keys
+            dit_config["txt_norm"] = any_suffix_in(state_dict_keys, key_prefix, 'txt_norm.', ["weight", "scale"])
            if dit_config["yak_mlp"] and dit_config["txt_norm"]:  # Ovis model
                dit_config["txt_ids_dims"] = [1, 2]

--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -55,6 +55,11 @@ cpu_state = CPUState.GPU

 total_vram = 0

+
+# Training Related State
+in_training = False
+
+
 def get_supported_float8_types():
    float8_types = []
    try:
@@ -1208,8 +1213,12 @@ def cast_to(weight, dtype=None, device=None, non_blocking=False, copy=False, str

        signature = comfy_aimdo.model_vbar.vbar_fault(weight._v)
        if signature is not None:
-            v_tensor = comfy.memory_management.interpret_gathered_like(cast_geometry, weight._v_tensor)[0]
-            if not comfy_aimdo.model_vbar.vbar_signature_compare(signature, weight._v_signature):
+            if comfy_aimdo.model_vbar.vbar_signature_compare(signature, weight._v_signature):
+                v_tensor = weight._v_tensor
+            else:
+                raw_tensor = comfy_aimdo.torch.aimdo_to_tensor(weight._v, device)
+                v_tensor = comfy.memory_management.interpret_gathered_like(cast_geometry, raw_tensor)[0]
+                weight._v_tensor = v_tensor
                weight._v_signature = signature
                #Send it over
                v_tensor.copy_(weight, non_blocking=non_blocking)
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@@ -679,18 +679,19 @@ class ModelPatcher:
        for key in list(self.pinned):
            self.unpin_weight(key)

-    def _load_list(self, prio_comfy_cast_weights=False):
+    def _load_list(self, prio_comfy_cast_weights=False, default_device=None):
        loading = []
        for n, m in self.model.named_modules():
-            params = []
-            skip = False
-            for name, param in m.named_parameters(recurse=False):
-                params.append(name)
+            default = False
+            params = { name: param for name, param in m.named_parameters(recurse=False) }
            for name, param in m.named_parameters(recurse=True):
                if name not in params:
-                    skip = True # skip random weights in non leaf modules
+                    default = True # default random weights in non leaf modules
                    break
-            if not skip and (hasattr(m, "comfy_cast_weights") or len(params) > 0):
+            if default and default_device is not None:
+                for param in params.values():
+                    param.data = param.data.to(device=default_device)
+            if not default and (hasattr(m, "comfy_cast_weights") or len(params) > 0):
                module_mem = comfy.model_management.module_size(m)
                module_offload_mem = module_mem
                if hasattr(m, "comfy_cast_weights"):
@@ -1495,7 +1496,7 @@ class ModelPatcherDynamic(ModelPatcher):
            #with pin and unpin syncrhonization which can be expensive for small weights
            #with a high layer rate (e.g. autoregressive LLMs).
            #prioritize the non-comfy weights (note the order reverse).
-            loading = self._load_list(prio_comfy_cast_weights=True)
+            loading = self._load_list(prio_comfy_cast_weights=True, default_device=device_to)
            loading.sort(reverse=True)

            for x in loading:
@@ -1525,7 +1526,7 @@ class ModelPatcherDynamic(ModelPatcher):
                    setattr(m, param_key + "_function", weight_function)
                    geometry = weight
                    if not isinstance(weight, QuantizedTensor):
-                        model_dtype = getattr(m, param_key + "_comfy_model_dtype", weight.dtype)
+                        model_dtype = getattr(m, param_key + "_comfy_model_dtype", None) or weight.dtype
                        weight._model_dtype = model_dtype
                        geometry = comfy.memory_management.TensorGeometry(shape=weight.shape, dtype=model_dtype)
                    return comfy.memory_management.vram_aligned_size(geometry)
@@ -1542,7 +1543,6 @@ class ModelPatcherDynamic(ModelPatcher):

                    if vbar is not None and not hasattr(m, "_v"):
                        m._v = vbar.alloc(v_weight_size)
-                        m._v_tensor = comfy_aimdo.torch.aimdo_to_tensor(m._v, device_to)
                    allocated_size += v_weight_size

                else:
@@ -1552,16 +1552,17 @@ class ModelPatcherDynamic(ModelPatcher):
                        weight.seed_key = key
                        set_dirty(weight, dirty)
                        geometry = weight
-                        model_dtype = getattr(m, param + "_comfy_model_dtype", weight.dtype)
+                        model_dtype = getattr(m, param + "_comfy_model_dtype", None) or weight.dtype
                        geometry = comfy.memory_management.TensorGeometry(shape=weight.shape, dtype=model_dtype)
                        weight_size = geometry.numel() * geometry.element_size()
                        if vbar is not None and not hasattr(weight, "_v"):
                            weight._v = vbar.alloc(weight_size)
-                            weight._v_tensor = comfy_aimdo.torch.aimdo_to_tensor(weight._v, device_to)
                            weight._model_dtype = model_dtype
                        allocated_size += weight_size
                    vbar.set_watermark_limit(allocated_size)

+                move_weight_functions(m, device_to)
+
            logging.info(f"Model {self.model.__class__.__name__} prepared for dynamic VRAM loading. {allocated_size // (1024 ** 2)}MB Staged. {num_patches} patches attached.")

            self.model.device = device_to
@@ -1581,7 +1582,7 @@ class ModelPatcherDynamic(ModelPatcher):
        return 0 if vbar is None else vbar.free_memory(memory_to_free)

    def partially_unload_ram(self, ram_to_unload):
-        loading = self._load_list(prio_comfy_cast_weights=True)
+        loading = self._load_list(prio_comfy_cast_weights=True, default_device=self.offload_device)
        for x in loading:
            _, _, _, _, m, _ = x
            ram_to_unload -= comfy.pinned_memory.unpin_memory(m)
@@ -1602,6 +1603,8 @@ class ModelPatcherDynamic(ModelPatcher):
        if unpatch_weights:
            self.partially_unload_ram(1e32)
            self.partially_unload(None, 1e32)
+            for m in self.model.modules():
+                move_weight_functions(m, device_to)

    def partially_load(self, device_to, extra_memory=0, force_patch_weights=False):
        assert not force_patch_weights #See above
--- a/comfy/ops.py
+++ b/comfy/ops.py
@@ -83,14 +83,18 @@ def cast_to_input(weight, input, non_blocking=False, copy=True):
 def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compute_dtype):
    offload_stream = None
    xfer_dest = None
-    cast_geometry = comfy.memory_management.tensors_to_geometries([ s.weight, s.bias ])

    signature = comfy_aimdo.model_vbar.vbar_fault(s._v)
-    if signature is not None:
-        xfer_dest = s._v_tensor
    resident = comfy_aimdo.model_vbar.vbar_signature_compare(signature, s._v_signature)
+    if signature is not None:
+        if resident:
+            weight = s._v_weight
+            bias = s._v_bias
+        else:
+            xfer_dest = comfy_aimdo.torch.aimdo_to_tensor(s._v, device)

    if not resident:
+        cast_geometry = comfy.memory_management.tensors_to_geometries([ s.weight, s.bias ])
        cast_dest = None

        xfer_source = [ s.weight, s.bias ]
@@ -140,9 +144,13 @@ def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compu
                    post_cast.copy_(pre_cast)
            xfer_dest = cast_dest

-    params = comfy.memory_management.interpret_gathered_like(cast_geometry, xfer_dest)
-    weight = params[0]
-    bias = params[1]
+        params = comfy.memory_management.interpret_gathered_like(cast_geometry, xfer_dest)
+        weight = params[0]
+        bias = params[1]
+        if signature is not None:
+            s._v_weight = weight
+            s._v_bias = bias
+        s._v_signature=signature

    def post_cast(s, param_key, x, dtype, resident, update_weight):
        lowvram_fn = getattr(s, param_key + "_lowvram_function", None)
@@ -182,7 +190,6 @@ def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compu
    weight = post_cast(s, "weight", weight, dtype, resident, update_weight)
    if s.bias is not None:
        bias = post_cast(s, "bias", bias, bias_dtype, resident, update_weight)
-    s._v_signature=signature

    #FIXME: weird offload return protocol
    return weight, bias, (offload_stream, device if signature is not None else None, None)
--- a/comfy/sampler_helpers.py
+++ b/comfy/sampler_helpers.py
@@ -122,20 +122,26 @@ def estimate_memory(model, noise_shape, conds):
    minimum_memory_required = model.model.memory_required([noise_shape[0]] + list(noise_shape[1:]), cond_shapes=cond_shapes_min)
    return memory_required, minimum_memory_required

-def prepare_sampling(model: ModelPatcher, noise_shape, conds, model_options=None, force_full_load=False):
+def prepare_sampling(model: ModelPatcher, noise_shape, conds, model_options=None, force_full_load=False, force_offload=False):
    executor = comfy.patcher_extension.WrapperExecutor.new_executor(
        _prepare_sampling,
        comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.PREPARE_SAMPLING, model_options, is_model_options=True)
    )
-    return executor.execute(model, noise_shape, conds, model_options=model_options, force_full_load=force_full_load)
+    return executor.execute(model, noise_shape, conds, model_options=model_options, force_full_load=force_full_load, force_offload=force_offload)

-def _prepare_sampling(model: ModelPatcher, noise_shape, conds, model_options=None, force_full_load=False):
+def _prepare_sampling(model: ModelPatcher, noise_shape, conds, model_options=None, force_full_load=False, force_offload=False):
    real_model: BaseModel = None
    models, inference_memory = get_additional_models(conds, model.model_dtype())
    models += get_additional_models_from_model_options(model_options)
    models += model.get_nested_additional_models()  # TODO: does this require inference_memory update?
-    memory_required, minimum_memory_required = estimate_memory(model, noise_shape, conds)
-    comfy.model_management.load_models_gpu([model] + models, memory_required=memory_required + inference_memory, minimum_memory_required=minimum_memory_required + inference_memory, force_full_load=force_full_load)
+    if force_offload: # In training + offload enabled, we want to force prepare sampling to trigger partial load
+        memory_required = 1e20
+        minimum_memory_required = None
+    else:
+        memory_required, minimum_memory_required = estimate_memory(model, noise_shape, conds)
+        memory_required += inference_memory
+        minimum_memory_required += inference_memory
+    comfy.model_management.load_models_gpu([model] + models, memory_required=memory_required, minimum_memory_required=minimum_memory_required, force_full_load=force_full_load)
    real_model = model.model

    return real_model, conds, models
--- a/comfy/sd1_clip.py
+++ b/comfy/sd1_clip.py
@@ -171,8 +171,9 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder):

    def process_tokens(self, tokens, device):
        end_token = self.special_tokens.get("end", None)
+        pad_token = self.special_tokens.get("pad", -1)
        if end_token is None:
-            cmp_token = self.special_tokens.get("pad", -1)
+            cmp_token = pad_token
        else:
            cmp_token = end_token

@@ -186,15 +187,21 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder):
            other_embeds = []
            eos = False
            index = 0
+            left_pad = False
            for y in x:
                if isinstance(y, numbers.Integral):
-                    if eos:
+                    token = int(y)
+                    if index == 0 and token == pad_token:
+                        left_pad = True
+
+                    if eos or (left_pad and token == pad_token):
                        attention_mask.append(0)
                    else:
                        attention_mask.append(1)
-                    token = int(y)
+                        left_pad = False
+
                    tokens_temp += [token]
-                    if not eos and token == cmp_token:
+                    if not eos and token == cmp_token and not left_pad:
                        if end_token is None:
                            attention_mask[-1] = 0
                        eos = True
--- a/comfy/supported_models.py
+++ b/comfy/supported_models.py
@@ -710,6 +710,15 @@ class Flux(supported_models_base.BASE):

    supported_inference_dtypes = [torch.bfloat16, torch.float16, torch.float32]

+    def process_unet_state_dict(self, state_dict):
+        out_sd = {}
+        for k in list(state_dict.keys()):
+            key_out = k
+            if key_out.endswith("_norm.scale"):
+                key_out = "{}.weight".format(key_out[:-len(".scale")])
+            out_sd[key_out] = state_dict[k]
+        return out_sd
+
    vae_key_prefix = ["vae."]
    text_encoder_key_prefix = ["text_encoders."]

@@ -898,11 +907,13 @@ class HunyuanVideo(supported_models_base.BASE):
            key_out = key_out.replace("txt_in.c_embedder.linear_1.", "txt_in.c_embedder.in_layer.").replace("txt_in.c_embedder.linear_2.", "txt_in.c_embedder.out_layer.")
            key_out = key_out.replace("_mod.linear.", "_mod.lin.").replace("_attn_qkv.", "_attn.qkv.")
            key_out = key_out.replace("mlp.fc1.", "mlp.0.").replace("mlp.fc2.", "mlp.2.")
-            key_out = key_out.replace("_attn_q_norm.weight", "_attn.norm.query_norm.scale").replace("_attn_k_norm.weight", "_attn.norm.key_norm.scale")
-            key_out = key_out.replace(".q_norm.weight", ".norm.query_norm.scale").replace(".k_norm.weight", ".norm.key_norm.scale")
+            key_out = key_out.replace("_attn_q_norm.weight", "_attn.norm.query_norm.weight").replace("_attn_k_norm.weight", "_attn.norm.key_norm.weight")
+            key_out = key_out.replace(".q_norm.weight", ".norm.query_norm.weight").replace(".k_norm.weight", ".norm.key_norm.weight")
            key_out = key_out.replace("_attn_proj.", "_attn.proj.")
            key_out = key_out.replace(".modulation.linear.", ".modulation.lin.")
            key_out = key_out.replace("_in.mlp.2.", "_in.out_layer.").replace("_in.mlp.0.", "_in.in_layer.")
+            if key_out.endswith(".scale"):
+                key_out = "{}.weight".format(key_out[:-len(".scale")])
            out_sd[key_out] = state_dict[k]
        return out_sd

@@ -1264,6 +1275,15 @@ class Hunyuan3Dv2(supported_models_base.BASE):

    latent_format = latent_formats.Hunyuan3Dv2

+    def process_unet_state_dict(self, state_dict):
+        out_sd = {}
+        for k in list(state_dict.keys()):
+            key_out = k
+            if key_out.endswith(".scale"):
+                key_out = "{}.weight".format(key_out[:-len(".scale")])
+            out_sd[key_out] = state_dict[k]
+        return out_sd
+
    def process_unet_state_dict_for_saving(self, state_dict):
        replace_prefix = {"": "model."}
        return utils.state_dict_prefix_replace(state_dict, replace_prefix)
@@ -1341,6 +1361,14 @@ class Chroma(supported_models_base.BASE):

    supported_inference_dtypes = [torch.bfloat16, torch.float16, torch.float32]

+    def process_unet_state_dict(self, state_dict):
+        out_sd = {}
+        for k in list(state_dict.keys()):
+            key_out = k
+            if key_out.endswith(".scale"):
+                key_out = "{}.weight".format(key_out[:-len(".scale")])
+            out_sd[key_out] = state_dict[k]
+        return out_sd

    def get_model(self, state_dict, prefix="", device=None):
        out = model_base.Chroma(self, device=device)
--- a/comfy/text_encoders/ace15.py
+++ b/comfy/text_encoders/ace15.py
@@ -3,7 +3,6 @@ import comfy.text_encoders.llama
 from comfy import sd1_clip
 import torch
 import math
-from tqdm.auto import trange
 import yaml
 import comfy.utils

@@ -11,12 +10,12 @@ import comfy.utils
 def sample_manual_loop_no_classes(
    model,
    ids=None,
-    paddings=[],
    execution_dtype=None,
    cfg_scale: float = 2.0,
    temperature: float = 0.85,
    top_p: float = 0.9,
    top_k: int = None,
+    min_p: float = 0.000,
    seed: int = 1,
    min_tokens: int = 1,
    max_new_tokens: int = 2048,
@@ -36,9 +35,6 @@ def sample_manual_loop_no_classes(

    embeds, attention_mask, num_tokens, embeds_info = model.process_tokens(ids, device)
    embeds_batch = embeds.shape[0]
-    for i, t in enumerate(paddings):
-        attention_mask[i, :t] = 0
-        attention_mask[i, t:] = 1

    output_audio_codes = []
    past_key_values = []
@@ -52,7 +48,7 @@ def sample_manual_loop_no_classes(

    progress_bar = comfy.utils.ProgressBar(max_new_tokens)

-    for step in trange(max_new_tokens, desc="LM sampling"):
+    for step in comfy.utils.model_trange(max_new_tokens, desc="LM sampling"):
        outputs = model.transformer(None, attention_mask, embeds=embeds.to(execution_dtype), num_tokens=num_tokens, intermediate_output=None, dtype=execution_dtype, embeds_info=embeds_info, past_key_values=past_key_values)
        next_token_logits = model.transformer.logits(outputs[0])[:, -1]
        past_key_values = outputs[2]
@@ -81,6 +77,12 @@ def sample_manual_loop_no_classes(
            min_val = top_k_vals[..., -1, None]
            cfg_logits[cfg_logits < min_val] = remove_logit_value

+        if min_p is not None and min_p > 0:
+            probs = torch.softmax(cfg_logits, dim=-1)
+            p_max = probs.max(dim=-1, keepdim=True).values
+            indices_to_remove = probs < (min_p * p_max)
+            cfg_logits[indices_to_remove] = remove_logit_value
+
        if top_p is not None and top_p < 1.0:
            sorted_logits, sorted_indices = torch.sort(cfg_logits, descending=True)
            cumulative_probs = torch.cumsum(torch.softmax(sorted_logits, dim=-1), dim=-1)
@@ -111,7 +113,7 @@ def sample_manual_loop_no_classes(
    return output_audio_codes


-def generate_audio_codes(model, positive, negative, min_tokens=1, max_tokens=1024, seed=0, cfg_scale=2.0, temperature=0.85, top_p=0.9, top_k=0):
+def generate_audio_codes(model, positive, negative, min_tokens=1, max_tokens=1024, seed=0, cfg_scale=2.0, temperature=0.85, top_p=0.9, top_k=0, min_p=0.000):
    positive = [[token for token, _ in inner_list] for inner_list in positive]
    positive = positive[0]

@@ -129,13 +131,11 @@ def generate_audio_codes(model, positive, negative, min_tokens=1, max_tokens=102
            pos_pad = (len(negative) - len(positive))
            positive = [model.special_tokens["pad"]] * pos_pad + positive

-        paddings = [pos_pad, neg_pad]
        ids = [positive, negative]
    else:
-        paddings = []
        ids = [positive]

-    return sample_manual_loop_no_classes(model, ids, paddings, cfg_scale=cfg_scale, temperature=temperature, top_p=top_p, top_k=top_k, seed=seed, min_tokens=min_tokens, max_new_tokens=max_tokens)
+    return sample_manual_loop_no_classes(model, ids, cfg_scale=cfg_scale, temperature=temperature, top_p=top_p, top_k=top_k, min_p=min_p, seed=seed, min_tokens=min_tokens, max_new_tokens=max_tokens)


 class ACE15Tokenizer(sd1_clip.SD1Tokenizer):
@@ -193,6 +193,7 @@ class ACE15Tokenizer(sd1_clip.SD1Tokenizer):
        temperature = kwargs.get("temperature", 0.85)
        top_p = kwargs.get("top_p", 0.9)
        top_k = kwargs.get("top_k", 0.0)
+        min_p = kwargs.get("min_p", 0.000)

        duration = math.ceil(duration)
        kwargs["duration"] = duration
@@ -240,6 +241,7 @@ class ACE15Tokenizer(sd1_clip.SD1Tokenizer):
                              "temperature": temperature,
                              "top_p": top_p,
                              "top_k": top_k,
+                              "min_p": min_p,
                              }
        return out

@@ -300,7 +302,7 @@ class ACE15TEModel(torch.nn.Module):

        lm_metadata = token_weight_pairs["lm_metadata"]
        if lm_metadata["generate_audio_codes"]:
-            audio_codes = generate_audio_codes(getattr(self, self.lm_model, self.qwen3_06b), token_weight_pairs["lm_prompt"], token_weight_pairs["lm_prompt_negative"], min_tokens=lm_metadata["min_tokens"], max_tokens=lm_metadata["max_tokens"], seed=lm_metadata["seed"], cfg_scale=lm_metadata["cfg_scale"], temperature=lm_metadata["temperature"], top_p=lm_metadata["top_p"], top_k=lm_metadata["top_k"])
+            audio_codes = generate_audio_codes(getattr(self, self.lm_model, self.qwen3_06b), token_weight_pairs["lm_prompt"], token_weight_pairs["lm_prompt_negative"], min_tokens=lm_metadata["min_tokens"], max_tokens=lm_metadata["min_tokens"], seed=lm_metadata["seed"], cfg_scale=lm_metadata["cfg_scale"], temperature=lm_metadata["temperature"], top_p=lm_metadata["top_p"], top_k=lm_metadata["top_k"], min_p=lm_metadata["min_p"])
            out["audio_codes"] = [audio_codes]

        return base_out, None, out
--- a/comfy/text_encoders/llama.py
+++ b/comfy/text_encoders/llama.py
@@ -355,13 +355,6 @@ class RMSNorm(nn.Module):



-def rotate_half(x):
-    """Rotates half the hidden dims of the input."""
-    x1 = x[..., : x.shape[-1] // 2]
-    x2 = x[..., x.shape[-1] // 2 :]
-    return torch.cat((-x2, x1), dim=-1)
-
-
 def precompute_freqs_cis(head_dim, position_ids, theta, rope_scale=None, rope_dims=None, device=None):
    if not isinstance(theta, list):
        theta = [theta]
@@ -390,20 +383,30 @@ def precompute_freqs_cis(head_dim, position_ids, theta, rope_scale=None, rope_di
        else:
            cos = cos.unsqueeze(1)
            sin = sin.unsqueeze(1)
-        out.append((cos, sin))
+        sin_split = sin.shape[-1] // 2
+        out.append((cos, sin[..., : sin_split], -sin[..., sin_split :]))

    if len(out) == 1:
        return out[0]

    return out

-
 def apply_rope(xq, xk, freqs_cis):
    org_dtype = xq.dtype
    cos = freqs_cis[0]
    sin = freqs_cis[1]
-    q_embed = (xq * cos) + (rotate_half(xq) * sin)
-    k_embed = (xk * cos) + (rotate_half(xk) * sin)
+    nsin = freqs_cis[2]
+
+    q_embed = (xq * cos)
+    q_split = q_embed.shape[-1] // 2
+    q_embed[..., : q_split].addcmul_(xq[..., q_split :], nsin)
+    q_embed[..., q_split :].addcmul_(xq[..., : q_split], sin)
+
+    k_embed = (xk * cos)
+    k_split = k_embed.shape[-1] // 2
+    k_embed[..., : k_split].addcmul_(xk[..., k_split :], nsin)
+    k_embed[..., k_split :].addcmul_(xk[..., : k_split], sin)
+
    return q_embed.to(org_dtype), k_embed.to(org_dtype)


--- a/comfy/text_encoders/lt.py
+++ b/comfy/text_encoders/lt.py
@@ -25,7 +25,7 @@ def ltxv_te(*args, **kwargs):
 class Gemma3_12BTokenizer(sd1_clip.SDTokenizer):
    def __init__(self, embedding_directory=None, tokenizer_data={}):
        tokenizer = tokenizer_data.get("spiece_model", None)
-        super().__init__(tokenizer, pad_with_end=False, embedding_size=3840, embedding_key='gemma3_12b', tokenizer_class=SPieceTokenizer, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=1, disable_weights=True, tokenizer_args={"add_bos": True, "add_eos": False}, tokenizer_data=tokenizer_data)
+        super().__init__(tokenizer, pad_with_end=False, embedding_size=3840, embedding_key='gemma3_12b', tokenizer_class=SPieceTokenizer, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=512, pad_left=True, disable_weights=True, tokenizer_args={"add_bos": True, "add_eos": False}, tokenizer_data=tokenizer_data)

    def state_dict(self):
        return {"spiece_model": self.tokenizer.serialize_model()}
@@ -97,6 +97,7 @@ class LTXAVTEModel(torch.nn.Module):
        token_weight_pairs = token_weight_pairs["gemma3_12b"]

        out, pooled, extra = self.gemma3_12b.encode_token_weights(token_weight_pairs)
+        out = out[:, :, -torch.sum(extra["attention_mask"]).item():]
        out_device = out.device
        if comfy.model_management.should_use_bf16(self.execution_device):
            out = out.to(device=self.execution_device, dtype=torch.bfloat16)
@@ -138,6 +139,7 @@ class LTXAVTEModel(torch.nn.Module):

        token_weight_pairs = token_weight_pairs.get("gemma3_12b", [])
        num_tokens = sum(map(lambda a: len(a), token_weight_pairs))
+        num_tokens = max(num_tokens, 64)
        return num_tokens * constant * 1024 * 1024

 def ltxav_te(dtype_llama=None, llama_quantization_metadata=None):
--- a/comfy/utils.py
+++ b/comfy/utils.py
@@ -20,13 +20,14 @@
 import torch
 import math
 import struct
-import comfy.checkpoint_pickle
+import comfy.memory_management
 import safetensors.torch
 import numpy as np
 from PIL import Image
 import logging
 import itertools
 from torch.nn.functional import interpolate
+from tqdm.auto import trange
 from einops import rearrange
 from comfy.cli_args import args, enables_dynamic_vram
 import json
@@ -37,26 +38,26 @@ import warnings
 MMAP_TORCH_FILES = args.mmap_torch_files
 DISABLE_MMAP = args.disable_mmap

-ALWAYS_SAFE_LOAD = False
-if hasattr(torch.serialization, "add_safe_globals"):  # TODO: this was added in pytorch 2.4, the unsafe path should be removed once earlier versions are deprecated
+
+if True:  # ckpt/pt file whitelist for safe loading of old sd files
    class ModelCheckpoint:
        pass
    ModelCheckpoint.__module__ = "pytorch_lightning.callbacks.model_checkpoint"

    def scalar(*args, **kwargs):
-        from numpy.core.multiarray import scalar as sc
-        return sc(*args, **kwargs)
+        return None
    scalar.__module__ = "numpy.core.multiarray"

    from numpy import dtype
    from numpy.dtypes import Float64DType
-    from _codecs import encode
+
+    def encode(*args, **kwargs):  # no longer necessary on newer torch
+        return None
+    encode.__module__ = "_codecs"

    torch.serialization.add_safe_globals([ModelCheckpoint, scalar, dtype, Float64DType, encode])
-    ALWAYS_SAFE_LOAD = True
    logging.info("Checkpoint files will always be loaded safely.")
-else:
-    logging.warning("Warning, you are using an old pytorch version and some ckpt/pt files might be loaded unsafely. Upgrading to 2.4 or above is recommended as older versions of pytorch are no longer supported.")
+

 # Current as of safetensors 0.7.0
 _TYPES = {
@@ -139,11 +140,8 @@ def load_torch_file(ckpt, safe_load=False, device=None, return_metadata=False):
        if MMAP_TORCH_FILES:
            torch_args["mmap"] = True

-        if safe_load or ALWAYS_SAFE_LOAD:
-            pl_sd = torch.load(ckpt, map_location=device, weights_only=True, **torch_args)
-        else:
-            logging.warning("WARNING: loading {} unsafely, upgrade your pytorch to 2.4 or newer to load this file safely.".format(ckpt))
-            pl_sd = torch.load(ckpt, map_location=device, pickle_module=comfy.checkpoint_pickle)
+        pl_sd = torch.load(ckpt, map_location=device, weights_only=True, **torch_args)
+
        if "state_dict" in pl_sd:
            sd = pl_sd["state_dict"]
        else:
@@ -674,10 +672,10 @@ def flux_to_diffusers(mmdit_config, output_prefix=""):
                        "ff_context.linear_in.bias": "txt_mlp.0.bias",
                        "ff_context.linear_out.weight": "txt_mlp.2.weight",
                        "ff_context.linear_out.bias": "txt_mlp.2.bias",
-                        "attn.norm_q.weight": "img_attn.norm.query_norm.scale",
-                        "attn.norm_k.weight": "img_attn.norm.key_norm.scale",
-                        "attn.norm_added_q.weight": "txt_attn.norm.query_norm.scale",
-                        "attn.norm_added_k.weight": "txt_attn.norm.key_norm.scale",
+                        "attn.norm_q.weight": "img_attn.norm.query_norm.weight",
+                        "attn.norm_k.weight": "img_attn.norm.key_norm.weight",
+                        "attn.norm_added_q.weight": "txt_attn.norm.query_norm.weight",
+                        "attn.norm_added_k.weight": "txt_attn.norm.key_norm.weight",
                    }

        for k in block_map:
@@ -700,8 +698,8 @@ def flux_to_diffusers(mmdit_config, output_prefix=""):
                        "norm.linear.bias": "modulation.lin.bias",
                        "proj_out.weight": "linear2.weight",
                        "proj_out.bias": "linear2.bias",
-                        "attn.norm_q.weight": "norm.query_norm.scale",
-                        "attn.norm_k.weight": "norm.key_norm.scale",
+                        "attn.norm_q.weight": "norm.query_norm.weight",
+                        "attn.norm_k.weight": "norm.key_norm.weight",
                        "attn.to_qkv_mlp_proj.weight": "linear1.weight", # Flux 2
                        "attn.to_out.weight": "linear2.weight", # Flux 2
                    }
@@ -1155,6 +1153,32 @@ def tiled_scale_multidim(samples, function, tile=(64, 64), overlap=8, upscale_am
 def tiled_scale(samples, function, tile_x=64, tile_y=64, overlap = 8, upscale_amount = 4, out_channels = 3, output_device="cpu", pbar = None):
    return tiled_scale_multidim(samples, function, (tile_y, tile_x), overlap=overlap, upscale_amount=upscale_amount, out_channels=out_channels, output_device=output_device, pbar=pbar)

+def model_trange(*args, **kwargs):
+    if comfy.memory_management.aimdo_allocator is None:
+        return trange(*args, **kwargs)
+
+    pbar = trange(*args, **kwargs, smoothing=1.0)
+    pbar._i = 0
+    pbar.set_postfix_str("  Model Initializing ...  ")
+
+    _update = pbar.update
+
+    def warmup_update(n=1):
+        pbar._i += 1
+        if pbar._i == 1:
+            pbar.i1_time = time.time()
+            pbar.set_postfix_str(" Model Initialization complete!  ")
+        elif pbar._i == 2:
+            #bring forward the effective start time based the the diff between first and second iteration
+            #to attempt to remove load overhead from the final step rate estimate.
+            pbar.start_t = pbar.i1_time - (time.time() - pbar.i1_time)
+            pbar.set_postfix_str("")
+
+        _update(n)
+
+    pbar.update = warmup_update
+    return pbar
+
 PROGRESS_BAR_ENABLED = True
 def set_progress_bar_enabled(enabled):
    global PROGRESS_BAR_ENABLED
--- a/comfy/weight_adapter/bypass.py
+++ b/comfy/weight_adapter/bypass.py
@@ -21,6 +21,7 @@ from typing import Optional, Union
 import torch
 import torch.nn as nn

+import comfy.model_management
 from .base import WeightAdapterBase, WeightAdapterTrainBase
 from comfy.patcher_extension import PatcherInjection

@@ -181,18 +182,21 @@ class BypassForwardHook:
            )
            return  # Already injected

-        # Move adapter weights to module's device to avoid CPU-GPU transfer on every forward
-        device = None
+        # Move adapter weights to compute device (GPU)
+        # Use get_torch_device() instead of module.weight.device because
+        # with offloading, module weights may be on CPU while compute happens on GPU
+        device = comfy.model_management.get_torch_device()
+
+        # Get dtype from module weight if available
        dtype = None
        if hasattr(self.module, "weight") and self.module.weight is not None:
-            device = self.module.weight.device
            dtype = self.module.weight.dtype
-        elif hasattr(self.module, "W_q"):  # Quantized layers might use different attr
-            device = self.module.W_q.device
-            dtype = self.module.W_q.dtype

-        if device is not None:
-            self._move_adapter_weights_to_device(device, dtype)
+        # Only use dtype if it's a standard float type, not quantized
+        if dtype is not None and dtype not in (torch.float32, torch.float16, torch.bfloat16):
+            dtype = None
+
+        self._move_adapter_weights_to_device(device, dtype)

        self.original_forward = self.module.forward
        self.module.forward = self._bypass_forward
--- a/comfy_api/latest/_input/video_types.py
+++ b/comfy_api/latest/_input/video_types.py
@@ -34,6 +34,21 @@ class VideoInput(ABC):
        """
        pass

+    @abstractmethod
+    def as_trimmed(
+        self,
+        start_time: float | None = None,
+        duration: float | None = None,
+        strict_duration: bool = False,
+    ) -> VideoInput | None:
+        """
+        Create a new VideoInput which is trimmed to have the corresponding start_time and duration
+
+        Returns:
+            A new VideoInput, or None if the result would have negative duration
+        """
+        pass
+
    def get_stream_source(self) -> Union[str, io.BytesIO]:
        """
        Get a streamable source for the video. This allows processing without
--- a/comfy_api/latest/_input_impl/video_types.py
+++ b/comfy_api/latest/_input_impl/video_types.py
@@ -6,6 +6,7 @@ from typing import Optional
 from .._input import AudioInput, VideoInput
 import av
 import io
+import itertools
 import json
 import numpy as np
 import math
@@ -29,7 +30,6 @@ def container_to_output_format(container_format: str | None) -> str | None:
    formats = container_format.split(",")
    return formats[0]

-
 def get_open_write_kwargs(
    dest: str | io.BytesIO, container_format: str, to_format: str | None
 ) -> dict:
@@ -57,12 +57,14 @@ class VideoFromFile(VideoInput):
    Class representing video input from a file.
    """

-    def __init__(self, file: str | io.BytesIO):
+    def __init__(self, file: str | io.BytesIO, *, start_time: float=0, duration: float=0):
        """
        Initialize the VideoFromFile object based off of either a path on disk or a BytesIO object
        containing the file contents.
        """
        self.__file = file
+        self.__start_time = start_time
+        self.__duration = duration

    def get_stream_source(self) -> str | io.BytesIO:
        """
@@ -96,6 +98,16 @@ class VideoFromFile(VideoInput):
        Returns:
            Duration in seconds
        """
+        raw_duration = self._get_raw_duration()
+        if self.__start_time < 0:
+            duration_from_start = min(raw_duration, -self.__start_time)
+        else:
+            duration_from_start = raw_duration - self.__start_time
+        if self.__duration:
+            return min(self.__duration, duration_from_start)
+        return duration_from_start
+
+    def _get_raw_duration(self) -> float:
        if isinstance(self.__file, io.BytesIO):
            self.__file.seek(0)
        with av.open(self.__file, mode="r") as container:
@@ -113,9 +125,13 @@ class VideoFromFile(VideoInput):
            if video_stream and video_stream.average_rate:
                frame_count = 0
                container.seek(0)
-                for packet in container.demux(video_stream):
-                    for _ in packet.decode():
-                        frame_count += 1
+                frame_iterator = (
+                    container.decode(video_stream)
+                    if video_stream.codec.capabilities & 0x100
+                    else container.demux(video_stream)
+                )
+                for packet in frame_iterator:
+                    frame_count += 1
                if frame_count > 0:
                    return float(frame_count / video_stream.average_rate)

@@ -131,36 +147,54 @@ class VideoFromFile(VideoInput):

        with av.open(self.__file, mode="r") as container:
            video_stream = self._get_first_video_stream(container)
-            # 1. Prefer the frames field if available
-            if video_stream.frames and video_stream.frames > 0:
+            # 1. Prefer the frames field if available and usable
+            if (
+                video_stream.frames
+                and video_stream.frames > 0
+                and not self.__start_time
+                and not self.__duration
+            ):
                return int(video_stream.frames)

            # 2. Try to estimate from duration and average_rate using only metadata
-            if container.duration is not None and video_stream.average_rate:
-                duration_seconds = float(container.duration / av.time_base)
-                estimated_frames = int(round(duration_seconds * float(video_stream.average_rate)))
-                if estimated_frames > 0:
-                    return estimated_frames
-
            if (
                getattr(video_stream, "duration", None) is not None
                and getattr(video_stream, "time_base", None) is not None
                and video_stream.average_rate
            ):
-                duration_seconds = float(video_stream.duration * video_stream.time_base)
+                raw_duration = float(video_stream.duration * video_stream.time_base)
+                if self.__start_time < 0:
+                    duration_from_start = min(raw_duration, -self.__start_time)
+                else:
+                    duration_from_start = raw_duration - self.__start_time
+                duration_seconds = min(self.__duration, duration_from_start)
                estimated_frames = int(round(duration_seconds * float(video_stream.average_rate)))
                if estimated_frames > 0:
                    return estimated_frames

            # 3. Last resort: decode frames and count them (streaming)
-            frame_count = 0
-            container.seek(0)
-            for packet in container.demux(video_stream):
-                for _ in packet.decode():
-                    frame_count += 1
-
-            if frame_count == 0:
-                raise ValueError(f"Could not determine frame count for file '{self.__file}'")
+            if self.__start_time < 0:
+                start_time = max(self._get_raw_duration() + self.__start_time, 0)
+            else:
+                start_time = self.__start_time
+            frame_count = 1
+            start_pts = int(start_time / video_stream.time_base)
+            end_pts = int((start_time + self.__duration) / video_stream.time_base)
+            container.seek(start_pts, stream=video_stream)
+            frame_iterator = (
+                container.decode(video_stream)
+                if video_stream.codec.capabilities & 0x100
+                else container.demux(video_stream)
+            )
+            for frame in frame_iterator:
+                if frame.pts >= start_pts:
+                    break
+            else:
+                raise ValueError(f"Could not determine frame count for file '{self.__file}'\nNo frames exist for start_time {self.__start_time}")
+            for frame in frame_iterator:
+                if frame.pts >= end_pts:
+                    break
+                frame_count += 1
            return frame_count

    def get_frame_rate(self) -> Fraction:
@@ -199,9 +233,21 @@ class VideoFromFile(VideoInput):
            return container.format.name

    def get_components_internal(self, container: InputContainer) -> VideoComponents:
+        video_stream = self._get_first_video_stream(container)
+        if self.__start_time < 0:
+            start_time = max(self._get_raw_duration() + self.__start_time, 0)
+        else:
+            start_time = self.__start_time
        # Get video frames
        frames = []
-        for frame in container.decode(video=0):
+        start_pts = int(start_time / video_stream.time_base)
+        end_pts = int((start_time + self.__duration) / video_stream.time_base)
+        container.seek(start_pts, stream=video_stream)
+        for frame in container.decode(video_stream):
+            if frame.pts < start_pts:
+                continue
+            if self.__duration and frame.pts >= end_pts:
+                break
            img = frame.to_ndarray(format='rgb24')  # shape: (H, W, 3)
            img = torch.from_numpy(img) / 255.0  # shape: (H, W, 3)
            frames.append(img)
@@ -209,31 +255,44 @@ class VideoFromFile(VideoInput):
        images = torch.stack(frames) if len(frames) > 0 else torch.zeros(0, 3, 0, 0)

        # Get frame rate
-        video_stream = next(s for s in container.streams if s.type == 'video')
-        frame_rate = Fraction(video_stream.average_rate) if video_stream and video_stream.average_rate else Fraction(1)
+        frame_rate = Fraction(video_stream.average_rate) if video_stream.average_rate else Fraction(1)

        # Get audio if available
        audio = None
-        try:
-            container.seek(0)  # Reset the container to the beginning
-            for stream in container.streams:
-                if stream.type != 'audio':
-                    continue
-                assert isinstance(stream, av.AudioStream)
-                audio_frames = []
-                for packet in container.demux(stream):
-                    for frame in packet.decode():
-                        assert isinstance(frame, av.AudioFrame)
-                        audio_frames.append(frame.to_ndarray())  # shape: (channels, samples)
-                if len(audio_frames) > 0:
-                    audio_data = np.concatenate(audio_frames, axis=1)  # shape: (channels, total_samples)
-                    audio_tensor = torch.from_numpy(audio_data).unsqueeze(0)  # shape: (1, channels, total_samples)
-                    audio = AudioInput({
-                        "waveform": audio_tensor,
-                        "sample_rate": int(stream.sample_rate) if stream.sample_rate else 1,
-                    })
-        except StopIteration:
-            pass  # No audio stream
+        container.seek(start_pts, stream=video_stream)
+        # Use last stream for consistency
+        if len(container.streams.audio):
+            audio_stream = container.streams.audio[-1]
+            audio_frames = []
+            resample = av.audio.resampler.AudioResampler(format='fltp').resample
+            frames = itertools.chain.from_iterable(
+                map(resample, container.decode(audio_stream))
+            )
+
+            has_first_frame = False
+            for frame in frames:
+                offset_seconds = start_time - frame.pts * audio_stream.time_base
+                to_skip = int(offset_seconds * audio_stream.sample_rate)
+                if to_skip < frame.samples:
+                    has_first_frame = True
+                    break
+            if has_first_frame:
+                audio_frames.append(frame.to_ndarray()[..., to_skip:])
+
+            for frame in frames:
+                if frame.time > start_time + self.__duration:
+                    break
+                audio_frames.append(frame.to_ndarray())  # shape: (channels, samples)
+            if len(audio_frames) > 0:
+                audio_data = np.concatenate(audio_frames, axis=1)  # shape: (channels, total_samples)
+                if self.__duration:
+                    audio_data = audio_data[..., :int(self.__duration * audio_stream.sample_rate)]
+
+                audio_tensor = torch.from_numpy(audio_data).unsqueeze(0)  # shape: (1, channels, total_samples)
+                audio = AudioInput({
+                    "waveform": audio_tensor,
+                    "sample_rate": int(audio_stream.sample_rate) if audio_stream.sample_rate else 1,
+                })

        metadata = container.metadata
        return VideoComponents(images=images, audio=audio, frame_rate=frame_rate, metadata=metadata)
@@ -250,7 +309,7 @@ class VideoFromFile(VideoInput):
        path: str | io.BytesIO,
        format: VideoContainer = VideoContainer.AUTO,
        codec: VideoCodec = VideoCodec.AUTO,
-        metadata: Optional[dict] = None
+        metadata: Optional[dict] = None,
    ):
        if isinstance(self.__file, io.BytesIO):
            self.__file.seek(0)  # Reset the BytesIO object to the beginning
@@ -262,15 +321,14 @@ class VideoFromFile(VideoInput):
                reuse_streams = False
            if codec != VideoCodec.AUTO and codec != video_encoding and video_encoding is not None:
                reuse_streams = False
+            if self.__start_time or self.__duration:
+                reuse_streams = False

            if not reuse_streams:
                components = self.get_components_internal(container)
                video = VideoFromComponents(components)
                return video.save_to(
-                    path,
-                    format=format,
-                    codec=codec,
-                    metadata=metadata
+                    path, format=format, codec=codec, metadata=metadata
                )

            streams = container.streams
@@ -304,10 +362,21 @@ class VideoFromFile(VideoInput):
                        output_container.mux(packet)

    def _get_first_video_stream(self, container: InputContainer):
-        video_stream = next((s for s in container.streams if s.type == "video"), None)
-        if video_stream is None:
-            raise ValueError(f"No video stream found in file '{self.__file}'")
-        return video_stream
+        if len(container.streams.video):
+            return container.streams.video[0]
+        raise ValueError(f"No video stream found in file '{self.__file}'")
+
+    def as_trimmed(
+        self, start_time: float = 0, duration: float = 0, strict_duration: bool = True
+    ) -> VideoInput | None:
+        trimmed = VideoFromFile(
+            self.get_stream_source(),
+            start_time=start_time + self.__start_time,
+            duration=duration,
+        )
+        if trimmed.get_duration() < duration and strict_duration:
+            return None
+        return trimmed


 class VideoFromComponents(VideoInput):
@@ -322,7 +391,7 @@ class VideoFromComponents(VideoInput):
        return VideoComponents(
            images=self.__components.images,
            audio=self.__components.audio,
-            frame_rate=self.__components.frame_rate
+            frame_rate=self.__components.frame_rate,
        )

    def save_to(
@@ -330,7 +399,7 @@ class VideoFromComponents(VideoInput):
        path: str,
        format: VideoContainer = VideoContainer.AUTO,
        codec: VideoCodec = VideoCodec.AUTO,
-        metadata: Optional[dict] = None
+        metadata: Optional[dict] = None,
    ):
        if format != VideoContainer.AUTO and format != VideoContainer.MP4:
            raise ValueError("Only MP4 format is supported for now")
@@ -357,7 +426,10 @@ class VideoFromComponents(VideoInput):
            audio_stream: Optional[av.AudioStream] = None
            if self.__components.audio:
                audio_sample_rate = int(self.__components.audio['sample_rate'])
-                audio_stream = output.add_stream('aac', rate=audio_sample_rate)
+                waveform = self.__components.audio['waveform']
+                waveform = waveform[0, :, :math.ceil((audio_sample_rate / frame_rate) * self.__components.images.shape[0])]
+                layout = {1: 'mono', 2: 'stereo', 6: '5.1'}.get(waveform.shape[0], 'stereo')
+                audio_stream = output.add_stream('aac', rate=audio_sample_rate, layout=layout)

            # Encode video
            for i, frame in enumerate(self.__components.images):
@@ -372,12 +444,21 @@ class VideoFromComponents(VideoInput):
            output.mux(packet)

            if audio_stream and self.__components.audio:
-                waveform = self.__components.audio['waveform']
-                waveform = waveform[:, :, :math.ceil((audio_sample_rate / frame_rate) * self.__components.images.shape[0])]
-                frame = av.AudioFrame.from_ndarray(waveform.movedim(2, 1).reshape(1, -1).float().cpu().numpy(), format='flt', layout='mono' if waveform.shape[1] == 1 else 'stereo')
+                frame = av.AudioFrame.from_ndarray(waveform.float().cpu().numpy(), format='fltp', layout=layout)
                frame.sample_rate = audio_sample_rate
                frame.pts = 0
                output.mux(audio_stream.encode(frame))

                # Flush encoder
                output.mux(audio_stream.encode(None))
+
+    def as_trimmed(
+        self,
+        start_time: float | None = None,
+        duration: float | None = None,
+        strict_duration: bool = True,
+    ) -> VideoInput | None:
+        if self.get_duration() < start_time + duration:
+            return None
+        #TODO Consider tracking duration and trimming at time of save?
+        return VideoFromFile(self.get_stream_source(), start_time=start_time, duration=duration)
--- a/comfy_api/latest/_io.py
+++ b/comfy_api/latest/_io.py
@@ -75,12 +75,6 @@ class NumberDisplay(str, Enum):
    slider = "slider"


-class ControlAfterGenerate(str, Enum):
-    fixed = "fixed"
-    increment = "increment"
-    decrement = "decrement"
-    randomize = "randomize"
-
 class _ComfyType(ABC):
    Type = Any
    io_type: str = None
@@ -269,7 +263,7 @@ class Int(ComfyTypeIO):
    class Input(WidgetInput):
        '''Integer input.'''
        def __init__(self, id: str, display_name: str=None, optional=False, tooltip: str=None, lazy: bool=None,
-                    default: int=None, min: int=None, max: int=None, step: int=None, control_after_generate: bool | ControlAfterGenerate=None,
+                    default: int=None, min: int=None, max: int=None, step: int=None, control_after_generate: bool=None,
                    display_mode: NumberDisplay=None, socketless: bool=None, force_input: bool=None, extra_dict=None, raw_link: bool=None, advanced: bool=None):
            super().__init__(id, display_name, optional, tooltip, lazy, default, socketless, None, force_input, extra_dict, raw_link, advanced)
            self.min = min
@@ -351,7 +345,7 @@ class Combo(ComfyTypeIO):
            tooltip: str=None,
            lazy: bool=None,
            default: str | int | Enum = None,
-            control_after_generate: bool | ControlAfterGenerate=None,
+            control_after_generate: bool=None,
            upload: UploadType=None,
            image_folder: FolderType=None,
            remote: RemoteOptions=None,
@@ -395,7 +389,7 @@ class MultiCombo(ComfyTypeI):
    Type = list[str]
    class Input(Combo.Input):
        def __init__(self, id: str, options: list[str], display_name: str=None, optional=False, tooltip: str=None, lazy: bool=None,
-                    default: list[str]=None, placeholder: str=None, chip: bool=None, control_after_generate: bool | ControlAfterGenerate=None,
+                    default: list[str]=None, placeholder: str=None, chip: bool=None, control_after_generate: bool=None,
                    socketless: bool=None, extra_dict=None, raw_link: bool=None, advanced: bool=None):
            super().__init__(id, options, display_name, optional, tooltip, lazy, default, control_after_generate, socketless=socketless, extra_dict=extra_dict, raw_link=raw_link, advanced=advanced)
            self.multiselect = True
@@ -2041,7 +2035,6 @@ __all__ = [
    "UploadType",
    "RemoteOptions",
    "NumberDisplay",
-    "ControlAfterGenerate",

    "comfytype",
    "Custom",
--- a/comfy_api_nodes/nodes_magnific.py
+++ b/comfy_api_nodes/nodes_magnific.py
@@ -30,6 +30,30 @@ from comfy_api_nodes.util import (
    validate_image_dimensions,
 )

+_EUR_TO_USD = 1.19
+
+
+def _tier_price_eur(megapixels: float) -> float:
+    """Price in EUR for a single Magnific upscaling step based on input megapixels."""
+    if megapixels <= 1.3:
+        return 0.143
+    if megapixels <= 3.0:
+        return 0.286
+    if megapixels <= 6.4:
+        return 0.429
+    return 1.716
+
+
+def _calculate_magnific_upscale_price_usd(width: int, height: int, scale: int) -> float:
+    """Calculate total Magnific upscale price in USD for given input dimensions and scale factor."""
+    num_steps = int(math.log2(scale))
+    total_eur = 0.0
+    pixels = width * height
+    for _ in range(num_steps):
+        total_eur += _tier_price_eur(pixels / 1_000_000)
+        pixels *= 4
+    return round(total_eur * _EUR_TO_USD, 2)
+

 class MagnificImageUpscalerCreativeNode(IO.ComfyNode):
    @classmethod
@@ -103,11 +127,20 @@ class MagnificImageUpscalerCreativeNode(IO.ComfyNode):
            ],
            is_api_node=True,
            price_badge=IO.PriceBadge(
-                depends_on=IO.PriceBadgeDepends(widgets=["scale_factor"]),
+                depends_on=IO.PriceBadgeDepends(widgets=["scale_factor", "auto_downscale"]),
                expr="""
                (
-                  $max := widgets.scale_factor = "2x" ? 1.326 : 1.657;
-                  {"type": "range_usd", "min_usd": 0.11, "max_usd": $max}
+                  $ad := widgets.auto_downscale;
+                  $mins := $ad
+                    ? {"2x": 0.172, "4x": 0.343, "8x": 0.515, "16x": 0.515}
+                    : {"2x": 0.172, "4x": 0.343, "8x": 0.515, "16x": 0.844};
+                  $maxs := {"2x": 0.515, "4x": 0.844, "8x": 1.015, "16x": 1.187};
+                  {
+                    "type": "range_usd",
+                    "min_usd": $lookup($mins, widgets.scale_factor),
+                    "max_usd": $lookup($maxs, widgets.scale_factor),
+                    "format": { "approximate": true }
+                  }
                )
                """,
            ),
@@ -168,6 +201,10 @@ class MagnificImageUpscalerCreativeNode(IO.ComfyNode):
                    f"Use a smaller input image or lower scale factor."
                )

+        final_height, final_width = get_image_dimensions(image)
+        actual_scale = int(scale_factor.rstrip("x"))
+        price_usd = _calculate_magnific_upscale_price_usd(final_width, final_height, actual_scale)
+
        initial_res = await sync_op(
            cls,
            ApiEndpoint(path="/proxy/freepik/v1/ai/image-upscaler", method="POST"),
@@ -189,6 +226,7 @@ class MagnificImageUpscalerCreativeNode(IO.ComfyNode):
            ApiEndpoint(path=f"/proxy/freepik/v1/ai/image-upscaler/{initial_res.task_id}"),
            response_model=TaskResponse,
            status_extractor=lambda x: x.status,
+            price_extractor=lambda _: price_usd,
            poll_interval=10.0,
            max_poll_attempts=480,
        )
@@ -257,8 +295,14 @@ class MagnificImageUpscalerPreciseV2Node(IO.ComfyNode):
                depends_on=IO.PriceBadgeDepends(widgets=["scale_factor"]),
                expr="""
                (
-                  $max := widgets.scale_factor = "2x" ? 1.326 : 1.657;
-                  {"type": "range_usd", "min_usd": 0.11, "max_usd": $max}
+                  $mins := {"2x": 0.172, "4x": 0.343, "8x": 0.515, "16x": 0.844};
+                  $maxs := {"2x": 2.045, "4x": 2.545, "8x": 2.889, "16x": 3.06};
+                  {
+                    "type": "range_usd",
+                    "min_usd": $lookup($mins, widgets.scale_factor),
+                    "max_usd": $lookup($maxs, widgets.scale_factor),
+                    "format": { "approximate": true }
+                  }
                )
                """,
            ),
@@ -321,6 +365,9 @@ class MagnificImageUpscalerPreciseV2Node(IO.ComfyNode):
                    f"Use a smaller input image or lower scale factor."
                )

+        final_height, final_width = get_image_dimensions(image)
+        price_usd = _calculate_magnific_upscale_price_usd(final_width, final_height, requested_scale)
+
        initial_res = await sync_op(
            cls,
            ApiEndpoint(path="/proxy/freepik/v1/ai/image-upscaler-precision-v2", method="POST"),
@@ -339,6 +386,7 @@ class MagnificImageUpscalerPreciseV2Node(IO.ComfyNode):
            ApiEndpoint(path=f"/proxy/freepik/v1/ai/image-upscaler-precision-v2/{initial_res.task_id}"),
            response_model=TaskResponse,
            status_extractor=lambda x: x.status,
+            price_extractor=lambda _: price_usd,
            poll_interval=10.0,
            max_poll_attempts=480,
        )
@@ -877,8 +925,8 @@ class MagnificExtension(ComfyExtension):
    @override
    async def get_node_list(self) -> list[type[IO.ComfyNode]]:
        return [
-            # MagnificImageUpscalerCreativeNode,
-            # MagnificImageUpscalerPreciseV2Node,
+            MagnificImageUpscalerCreativeNode,
+            MagnificImageUpscalerPreciseV2Node,
            MagnificImageStyleTransferNode,
            MagnificImageRelightNode,
            MagnificImageSkinEnhancerNode,
--- a/comfy_api_nodes/util/client.py
+++ b/comfy_api_nodes/util/client.py
@@ -57,6 +57,7 @@ class _RequestConfig:
    files: dict[str, Any] | list[tuple[str, Any]] | None
    multipart_parser: Callable | None
    max_retries: int
+    max_retries_on_rate_limit: int
    retry_delay: float
    retry_backoff: float
    wait_label: str = "Waiting"
@@ -65,6 +66,7 @@ class _RequestConfig:
    final_label_on_success: str | None = "Completed"
    progress_origin_ts: float | None = None
    price_extractor: Callable[[dict[str, Any]], float | None] | None = None
+    is_rate_limited: Callable[[int, Any], bool] | None = None


@dataclass
@@ -78,7 +80,7 @@ class _PollUIState:
    active_since: float | None = None  # start time of current active interval (None if queued)


-_RETRY_STATUS = {408, 429, 500, 502, 503, 504}
+_RETRY_STATUS = {408, 500, 502, 503, 504}  # status 429 is handled separately
 COMPLETED_STATUSES = ["succeeded", "succeed", "success", "completed", "finished", "done", "complete"]
 FAILED_STATUSES = ["cancelled", "canceled", "canceling", "fail", "failed", "error"]
 QUEUED_STATUSES = ["created", "queued", "queueing", "submitted", "initializing"]
@@ -103,6 +105,8 @@ async def sync_op(
    final_label_on_success: str | None = "Completed",
    progress_origin_ts: float | None = None,
    monitor_progress: bool = True,
+    max_retries_on_rate_limit: int = 16,
+    is_rate_limited: Callable[[int, Any], bool] | None = None,
 ) -> M:
    raw = await sync_op_raw(
        cls,
@@ -122,6 +126,8 @@ async def sync_op(
        final_label_on_success=final_label_on_success,
        progress_origin_ts=progress_origin_ts,
        monitor_progress=monitor_progress,
+        max_retries_on_rate_limit=max_retries_on_rate_limit,
+        is_rate_limited=is_rate_limited,
    )
    if not isinstance(raw, dict):
        raise Exception("Expected JSON response to validate into a Pydantic model, got non-JSON (binary or text).")
@@ -143,9 +149,9 @@ async def poll_op(
    poll_interval: float = 5.0,
    max_poll_attempts: int = 160,
    timeout_per_poll: float = 120.0,
-    max_retries_per_poll: int = 3,
+    max_retries_per_poll: int = 10,
    retry_delay_per_poll: float = 1.0,
-    retry_backoff_per_poll: float = 2.0,
+    retry_backoff_per_poll: float = 1.4,
    estimated_duration: int | None = None,
    cancel_endpoint: ApiEndpoint | None = None,
    cancel_timeout: float = 10.0,
@@ -194,6 +200,8 @@ async def sync_op_raw(
    final_label_on_success: str | None = "Completed",
    progress_origin_ts: float | None = None,
    monitor_progress: bool = True,
+    max_retries_on_rate_limit: int = 16,
+    is_rate_limited: Callable[[int, Any], bool] | None = None,
 ) -> dict[str, Any] | bytes:
    """
    Make a single network request.
@@ -222,6 +230,8 @@ async def sync_op_raw(
        final_label_on_success=final_label_on_success,
        progress_origin_ts=progress_origin_ts,
        price_extractor=price_extractor,
+        max_retries_on_rate_limit=max_retries_on_rate_limit,
+        is_rate_limited=is_rate_limited,
    )
    return await _request_base(cfg, expect_binary=as_binary)

@@ -240,9 +250,9 @@ async def poll_op_raw(
    poll_interval: float = 5.0,
    max_poll_attempts: int = 160,
    timeout_per_poll: float = 120.0,
-    max_retries_per_poll: int = 3,
+    max_retries_per_poll: int = 10,
    retry_delay_per_poll: float = 1.0,
-    retry_backoff_per_poll: float = 2.0,
+    retry_backoff_per_poll: float = 1.4,
    estimated_duration: int | None = None,
    cancel_endpoint: ApiEndpoint | None = None,
    cancel_timeout: float = 10.0,
@@ -506,7 +516,7 @@ def _friendly_http_message(status: int, body: Any) -> str:
    if status == 409:
        return "There is a problem with your account. Please contact support@comfy.org."
    if status == 429:
-        return "Rate Limit Exceeded: Please try again later."
+        return "Rate Limit Exceeded: The server returned 429 after all retry attempts. Please wait and try again."
    try:
        if isinstance(body, dict):
            err = body.get("error")
@@ -586,6 +596,8 @@ async def _request_base(cfg: _RequestConfig, expect_binary: bool):
    start_time = cfg.progress_origin_ts if cfg.progress_origin_ts is not None else time.monotonic()
    attempt = 0
    delay = cfg.retry_delay
+    rate_limit_attempts = 0
+    rate_limit_delay = cfg.retry_delay
    operation_succeeded: bool = False
    final_elapsed_seconds: int | None = None
    extracted_price: float | None = None
@@ -653,17 +665,14 @@ async def _request_base(cfg: _RequestConfig, expect_binary: bool):
                payload_headers["Content-Type"] = "application/json"
                payload_kw["json"] = cfg.data or {}

-            try:
-                request_logger.log_request_response(
-                    operation_id=operation_id,
-                    request_method=method,
-                    request_url=url,
-                    request_headers=dict(payload_headers) if payload_headers else None,
-                    request_params=dict(params) if params else None,
-                    request_data=request_body_log,
-                )
-            except Exception as _log_e:
-                logging.debug("[DEBUG] request logging failed: %s", _log_e)
+            request_logger.log_request_response(
+                operation_id=operation_id,
+                request_method=method,
+                request_url=url,
+                request_headers=dict(payload_headers) if payload_headers else None,
+                request_params=dict(params) if params else None,
+                request_data=request_body_log,
+            )

            req_coro = sess.request(method, url, params=params, **payload_kw)
            req_task = asyncio.create_task(req_coro)
@@ -688,41 +697,33 @@ async def _request_base(cfg: _RequestConfig, expect_binary: bool):
                        body = await resp.json()
                    except (ContentTypeError, json.JSONDecodeError):
                        body = await resp.text()
-                    if resp.status in _RETRY_STATUS and attempt <= cfg.max_retries:
+                    should_retry = False
+                    wait_time = 0.0
+                    retry_label = ""
+                    is_rl = resp.status == 429 or (
+                        cfg.is_rate_limited is not None and cfg.is_rate_limited(resp.status, body)
+                    )
+                    if is_rl and rate_limit_attempts < cfg.max_retries_on_rate_limit:
+                        rate_limit_attempts += 1
+                        wait_time = min(rate_limit_delay, 30.0)
+                        rate_limit_delay *= cfg.retry_backoff
+                        retry_label = f"rate-limit retry {rate_limit_attempts} of {cfg.max_retries_on_rate_limit}"
+                        should_retry = True
+                    elif resp.status in _RETRY_STATUS and (attempt - rate_limit_attempts) <= cfg.max_retries:
+                        wait_time = delay
+                        delay *= cfg.retry_backoff
+                        retry_label = f"retry {attempt - rate_limit_attempts} of {cfg.max_retries}"
+                        should_retry = True
+
+                    if should_retry:
                        logging.warning(
-                            "HTTP %s %s -> %s. Retrying in %.2fs (retry %d of %d).",
+                            "HTTP %s %s -> %s. Waiting %.2fs (%s).",
                            method,
                            url,
                            resp.status,
-                            delay,
-                            attempt,
-                            cfg.max_retries,
+                            wait_time,
+                            retry_label,
                        )
-                        try:
-                            request_logger.log_request_response(
-                                operation_id=operation_id,
-                                request_method=method,
-                                request_url=url,
-                                response_status_code=resp.status,
-                                response_headers=dict(resp.headers),
-                                response_content=body,
-                                error_message=_friendly_http_message(resp.status, body),
-                            )
-                        except Exception as _log_e:
-                            logging.debug("[DEBUG] response logging failed: %s", _log_e)
-
-                        await sleep_with_interrupt(
-                            delay,
-                            cfg.node_cls,
-                            cfg.wait_label if cfg.monitor_progress else None,
-                            start_time if cfg.monitor_progress else None,
-                            cfg.estimated_total,
-                            display_callback=_display_time_progress if cfg.monitor_progress else None,
-                        )
-                        delay *= cfg.retry_backoff
-                        continue
-                    msg = _friendly_http_message(resp.status, body)
-                    try:
                        request_logger.log_request_response(
                            operation_id=operation_id,
                            request_method=method,
@@ -730,10 +731,27 @@ async def _request_base(cfg: _RequestConfig, expect_binary: bool):
                            response_status_code=resp.status,
                            response_headers=dict(resp.headers),
                            response_content=body,
-                            error_message=msg,
+                            error_message=f"HTTP {resp.status} ({retry_label}, will retry in {wait_time:.1f}s)",
                        )
-                    except Exception as _log_e:
-                        logging.debug("[DEBUG] response logging failed: %s", _log_e)
+                        await sleep_with_interrupt(
+                            wait_time,
+                            cfg.node_cls,
+                            cfg.wait_label if cfg.monitor_progress else None,
+                            start_time if cfg.monitor_progress else None,
+                            cfg.estimated_total,
+                            display_callback=_display_time_progress if cfg.monitor_progress else None,
+                        )
+                        continue
+                    msg = _friendly_http_message(resp.status, body)
+                    request_logger.log_request_response(
+                        operation_id=operation_id,
+                        request_method=method,
+                        request_url=url,
+                        response_status_code=resp.status,
+                        response_headers=dict(resp.headers),
+                        response_content=body,
+                        error_message=msg,
+                    )
                    raise Exception(msg)

                if expect_binary:
@@ -753,17 +771,14 @@ async def _request_base(cfg: _RequestConfig, expect_binary: bool):
                    bytes_payload = bytes(buff)
                    operation_succeeded = True
                    final_elapsed_seconds = int(time.monotonic() - start_time)
-                    try:
-                        request_logger.log_request_response(
-                            operation_id=operation_id,
-                            request_method=method,
-                            request_url=url,
-                            response_status_code=resp.status,
-                            response_headers=dict(resp.headers),
-                            response_content=bytes_payload,
-                        )
-                    except Exception as _log_e:
-                        logging.debug("[DEBUG] response logging failed: %s", _log_e)
+                    request_logger.log_request_response(
+                        operation_id=operation_id,
+                        request_method=method,
+                        request_url=url,
+                        response_status_code=resp.status,
+                        response_headers=dict(resp.headers),
+                        response_content=bytes_payload,
+                    )
                    return bytes_payload
                else:
                    try:
@@ -780,45 +795,39 @@ async def _request_base(cfg: _RequestConfig, expect_binary: bool):
                        extracted_price = cfg.price_extractor(payload) if cfg.price_extractor else None
                    operation_succeeded = True
                    final_elapsed_seconds = int(time.monotonic() - start_time)
-                    try:
-                        request_logger.log_request_response(
-                            operation_id=operation_id,
-                            request_method=method,
-                            request_url=url,
-                            response_status_code=resp.status,
-                            response_headers=dict(resp.headers),
-                            response_content=response_content_to_log,
-                        )
-                    except Exception as _log_e:
-                        logging.debug("[DEBUG] response logging failed: %s", _log_e)
+                    request_logger.log_request_response(
+                        operation_id=operation_id,
+                        request_method=method,
+                        request_url=url,
+                        response_status_code=resp.status,
+                        response_headers=dict(resp.headers),
+                        response_content=response_content_to_log,
+                    )
                    return payload

        except ProcessingInterrupted:
            logging.debug("Polling was interrupted by user")
            raise
        except (ClientError, OSError) as e:
-            if attempt <= cfg.max_retries:
+            if (attempt - rate_limit_attempts) <= cfg.max_retries:
                logging.warning(
                    "Connection error calling %s %s. Retrying in %.2fs (%d/%d): %s",
                    method,
                    url,
                    delay,
-                    attempt,
+                    attempt - rate_limit_attempts,
                    cfg.max_retries,
                    str(e),
                )
-                try:
-                    request_logger.log_request_response(
-                        operation_id=operation_id,
-                        request_method=method,
-                        request_url=url,
-                        request_headers=dict(payload_headers) if payload_headers else None,
-                        request_params=dict(params) if params else None,
-                        request_data=request_body_log,
-                        error_message=f"{type(e).__name__}: {str(e)} (will retry)",
-                    )
-                except Exception as _log_e:
-                    logging.debug("[DEBUG] request error logging failed: %s", _log_e)
+                request_logger.log_request_response(
+                    operation_id=operation_id,
+                    request_method=method,
+                    request_url=url,
+                    request_headers=dict(payload_headers) if payload_headers else None,
+                    request_params=dict(params) if params else None,
+                    request_data=request_body_log,
+                    error_message=f"{type(e).__name__}: {str(e)} (will retry)",
+                )
                await sleep_with_interrupt(
                    delay,
                    cfg.node_cls,
@@ -831,23 +840,6 @@ async def _request_base(cfg: _RequestConfig, expect_binary: bool):
                continue
            diag = await _diagnose_connectivity()
            if not diag["internet_accessible"]:
-                try:
-                    request_logger.log_request_response(
-                        operation_id=operation_id,
-                        request_method=method,
-                        request_url=url,
-                        request_headers=dict(payload_headers) if payload_headers else None,
-                        request_params=dict(params) if params else None,
-                        request_data=request_body_log,
-                        error_message=f"LocalNetworkError: {str(e)}",
-                    )
-                except Exception as _log_e:
-                    logging.debug("[DEBUG] final error logging failed: %s", _log_e)
-                raise LocalNetworkError(
-                    "Unable to connect to the API server due to local network issues. "
-                    "Please check your internet connection and try again."
-                ) from e
-            try:
                request_logger.log_request_response(
                    operation_id=operation_id,
                    request_method=method,
@@ -855,10 +847,21 @@ async def _request_base(cfg: _RequestConfig, expect_binary: bool):
                    request_headers=dict(payload_headers) if payload_headers else None,
                    request_params=dict(params) if params else None,
                    request_data=request_body_log,
-                    error_message=f"ApiServerError: {str(e)}",
+                    error_message=f"LocalNetworkError: {str(e)}",
                )
-            except Exception as _log_e:
-                logging.debug("[DEBUG] final error logging failed: %s", _log_e)
+                raise LocalNetworkError(
+                    "Unable to connect to the API server due to local network issues. "
+                    "Please check your internet connection and try again."
+                ) from e
+            request_logger.log_request_response(
+                operation_id=operation_id,
+                request_method=method,
+                request_url=url,
+                request_headers=dict(payload_headers) if payload_headers else None,
+                request_params=dict(params) if params else None,
+                request_data=request_body_log,
+                error_message=f"ApiServerError: {str(e)}",
+            )
            raise ApiServerError(
                f"The API server at {default_base_url()} is currently unreachable. "
                f"The service may be experiencing issues."
--- a/comfy_api_nodes/util/download_helpers.py
+++ b/comfy_api_nodes/util/download_helpers.py
@@ -167,27 +167,25 @@ async def download_url_to_bytesio(
                    with contextlib.suppress(Exception):
                        dest.seek(0)

-                with contextlib.suppress(Exception):
-                    request_logger.log_request_response(
-                        operation_id=op_id,
-                        request_method="GET",
-                        request_url=url,
-                        response_status_code=resp.status,
-                        response_headers=dict(resp.headers),
-                        response_content=f"[streamed {written} bytes to dest]",
-                    )
+                request_logger.log_request_response(
+                    operation_id=op_id,
+                    request_method="GET",
+                    request_url=url,
+                    response_status_code=resp.status,
+                    response_headers=dict(resp.headers),
+                    response_content=f"[streamed {written} bytes to dest]",
+                )
                return
        except asyncio.CancelledError:
            raise ProcessingInterrupted("Task cancelled") from None
        except (ClientError, OSError) as e:
            if attempt <= max_retries:
-                with contextlib.suppress(Exception):
-                    request_logger.log_request_response(
-                        operation_id=op_id,
-                        request_method="GET",
-                        request_url=url,
-                        error_message=f"{type(e).__name__}: {str(e)} (will retry)",
-                    )
+                request_logger.log_request_response(
+                    operation_id=op_id,
+                    request_method="GET",
+                    request_url=url,
+                    error_message=f"{type(e).__name__}: {str(e)} (will retry)",
+                )
                await sleep_with_interrupt(delay, cls, None, None, None)
                delay *= retry_backoff
                continue
--- a/comfy_api_nodes/util/request_logger.py
+++ b/comfy_api_nodes/util/request_logger.py
@@ -8,7 +8,6 @@ from typing import Any

 import folder_paths

-# Get the logger instance
 logger = logging.getLogger(__name__)


@@ -91,38 +90,41 @@ def log_request_response(
    Filenames are sanitized and length-limited for cross-platform safety.
    If we still fail to write, we fall back to appending into api.log.
    """
-    log_dir = get_log_directory()
-    filepath = _build_log_filepath(log_dir, operation_id, request_url)
-
-    log_content: list[str] = []
-    log_content.append(f"Timestamp: {datetime.datetime.now().isoformat()}")
-    log_content.append(f"Operation ID: {operation_id}")
-    log_content.append("-" * 30 + " REQUEST " + "-" * 30)
-    log_content.append(f"Method: {request_method}")
-    log_content.append(f"URL: {request_url}")
-    if request_headers:
-        log_content.append(f"Headers:\n{_format_data_for_logging(request_headers)}")
-    if request_params:
-        log_content.append(f"Params:\n{_format_data_for_logging(request_params)}")
-    if request_data is not None:
-        log_content.append(f"Data/Body:\n{_format_data_for_logging(request_data)}")
-
-    log_content.append("\n" + "-" * 30 + " RESPONSE " + "-" * 30)
-    if response_status_code is not None:
-        log_content.append(f"Status Code: {response_status_code}")
-    if response_headers:
-        log_content.append(f"Headers:\n{_format_data_for_logging(response_headers)}")
-    if response_content is not None:
-        log_content.append(f"Content:\n{_format_data_for_logging(response_content)}")
-    if error_message:
-        log_content.append(f"Error:\n{error_message}")
-
    try:
-        with open(filepath, "w", encoding="utf-8") as f:
-            f.write("\n".join(log_content))
-        logger.debug("API log saved to: %s", filepath)
-    except Exception as e:
-        logger.error("Error writing API log to %s: %s", filepath, str(e))
+        log_dir = get_log_directory()
+        filepath = _build_log_filepath(log_dir, operation_id, request_url)
+
+        log_content: list[str] = []
+        log_content.append(f"Timestamp: {datetime.datetime.now().isoformat()}")
+        log_content.append(f"Operation ID: {operation_id}")
+        log_content.append("-" * 30 + " REQUEST " + "-" * 30)
+        log_content.append(f"Method: {request_method}")
+        log_content.append(f"URL: {request_url}")
+        if request_headers:
+            log_content.append(f"Headers:\n{_format_data_for_logging(request_headers)}")
+        if request_params:
+            log_content.append(f"Params:\n{_format_data_for_logging(request_params)}")
+        if request_data is not None:
+            log_content.append(f"Data/Body:\n{_format_data_for_logging(request_data)}")
+
+        log_content.append("\n" + "-" * 30 + " RESPONSE " + "-" * 30)
+        if response_status_code is not None:
+            log_content.append(f"Status Code: {response_status_code}")
+        if response_headers:
+            log_content.append(f"Headers:\n{_format_data_for_logging(response_headers)}")
+        if response_content is not None:
+            log_content.append(f"Content:\n{_format_data_for_logging(response_content)}")
+        if error_message:
+            log_content.append(f"Error:\n{error_message}")
+
+        try:
+            with open(filepath, "w", encoding="utf-8") as f:
+                f.write("\n".join(log_content))
+            logger.debug("API log saved to: %s", filepath)
+        except Exception as e:
+            logger.error("Error writing API log to %s: %s", filepath, str(e))
+    except Exception as _log_e:
+        logging.debug("[DEBUG] log_request_response failed: %s", _log_e)


 if __name__ == '__main__':
--- a/comfy_api_nodes/util/upload_helpers.py
+++ b/comfy_api_nodes/util/upload_helpers.py
@@ -255,17 +255,14 @@ async def upload_file(
        monitor_task = asyncio.create_task(_monitor())
        sess: aiohttp.ClientSession | None = None
        try:
-            try:
-                request_logger.log_request_response(
-                    operation_id=operation_id,
-                    request_method="PUT",
-                    request_url=upload_url,
-                    request_headers=headers or None,
-                    request_params=None,
-                    request_data=f"[File data {len(data)} bytes]",
-                )
-            except Exception as e:
-                logging.debug("[DEBUG] upload request logging failed: %s", e)
+            request_logger.log_request_response(
+                operation_id=operation_id,
+                request_method="PUT",
+                request_url=upload_url,
+                request_headers=headers or None,
+                request_params=None,
+                request_data=f"[File data {len(data)} bytes]",
+            )

            sess = aiohttp.ClientSession(timeout=timeout)
            req = sess.put(upload_url, data=data, headers=headers, skip_auto_headers=skip_auto_headers)
@@ -311,31 +308,27 @@ async def upload_file(
                        delay *= retry_backoff
                        continue
                    raise Exception(f"Failed to upload (HTTP {resp.status}).")
-                try:
-                    request_logger.log_request_response(
-                        operation_id=operation_id,
-                        request_method="PUT",
-                        request_url=upload_url,
-                        response_status_code=resp.status,
-                        response_headers=dict(resp.headers),
-                        response_content="File uploaded successfully.",
-                    )
-                except Exception as e:
-                    logging.debug("[DEBUG] upload response logging failed: %s", e)
+                request_logger.log_request_response(
+                    operation_id=operation_id,
+                    request_method="PUT",
+                    request_url=upload_url,
+                    response_status_code=resp.status,
+                    response_headers=dict(resp.headers),
+                    response_content="File uploaded successfully.",
+                )
                return
        except asyncio.CancelledError:
            raise ProcessingInterrupted("Task cancelled") from None
        except (aiohttp.ClientError, OSError) as e:
            if attempt <= max_retries:
-                with contextlib.suppress(Exception):
-                    request_logger.log_request_response(
-                        operation_id=operation_id,
-                        request_method="PUT",
-                        request_url=upload_url,
-                        request_headers=headers or None,
-                        request_data=f"[File data {len(data)} bytes]",
-                        error_message=f"{type(e).__name__}: {str(e)} (will retry)",
-                    )
+                request_logger.log_request_response(
+                    operation_id=operation_id,
+                    request_method="PUT",
+                    request_url=upload_url,
+                    request_headers=headers or None,
+                    request_data=f"[File data {len(data)} bytes]",
+                    error_message=f"{type(e).__name__}: {str(e)} (will retry)",
+                )
                await sleep_with_interrupt(
                    delay,
                    cls,
--- a/comfy_execution/jobs.py
+++ b/comfy_execution/jobs.py
@@ -20,10 +20,60 @@ class JobStatus:


 # Media types that can be previewed in the frontend
-PREVIEWABLE_MEDIA_TYPES = frozenset({'images', 'video', 'audio'})
+PREVIEWABLE_MEDIA_TYPES = frozenset({'images', 'video', 'audio', '3d'})

 # 3D file extensions for preview fallback (no dedicated media_type exists)
-THREE_D_EXTENSIONS = frozenset({'.obj', '.fbx', '.gltf', '.glb'})
+THREE_D_EXTENSIONS = frozenset({'.obj', '.fbx', '.gltf', '.glb', '.usdz'})
+
+
+def has_3d_extension(filename: str) -> bool:
+    lower = filename.lower()
+    return any(lower.endswith(ext) for ext in THREE_D_EXTENSIONS)
+
+
+def normalize_output_item(item):
+    """Normalize a single output list item for the jobs API.
+
+    Returns the normalized item, or None to exclude it.
+    String items with 3D extensions become {filename, type, subfolder} dicts.
+    """
+    if item is None:
+        return None
+    if isinstance(item, str):
+        if has_3d_extension(item):
+            return {'filename': item, 'type': 'output', 'subfolder': '', 'mediaType': '3d'}
+        return None
+    if isinstance(item, dict):
+        return item
+    return None
+
+
+def normalize_outputs(outputs: dict) -> dict:
+    """Normalize raw node outputs for the jobs API.
+
+    Transforms string 3D filenames into file output dicts and removes
+    None items. All other items (non-3D strings, dicts, etc.) are
+    preserved as-is.
+    """
+    normalized = {}
+    for node_id, node_outputs in outputs.items():
+        if not isinstance(node_outputs, dict):
+            normalized[node_id] = node_outputs
+            continue
+        normalized_node = {}
+        for media_type, items in node_outputs.items():
+            if media_type == 'animated' or not isinstance(items, list):
+                normalized_node[media_type] = items
+                continue
+            normalized_items = []
+            for item in items:
+                if item is None:
+                    continue
+                norm = normalize_output_item(item)
+                normalized_items.append(norm if norm is not None else item)
+            normalized_node[media_type] = normalized_items
+        normalized[node_id] = normalized_node
+    return normalized


 def _extract_job_metadata(extra_data: dict) -> tuple[Optional[int], Optional[str]]:
@@ -45,9 +95,9 @@ def is_previewable(media_type: str, item: dict) -> bool:
    Maintains backwards compatibility with existing logic.

    Priority:
-    1. media_type is 'images', 'video', or 'audio'
+    1. media_type is 'images', 'video', 'audio', or '3d'
    2. format field starts with 'video/' or 'audio/'
-    3. filename has a 3D extension (.obj, .fbx, .gltf, .glb)
+    3. filename has a 3D extension (.obj, .fbx, .gltf, .glb, .usdz)
    """
    if media_type in PREVIEWABLE_MEDIA_TYPES:
        return True
@@ -139,7 +189,7 @@ def normalize_history_item(prompt_id: str, history_item: dict, include_outputs:
    })

    if include_outputs:
-        job['outputs'] = outputs
+        job['outputs'] = normalize_outputs(outputs)
        job['execution_status'] = status_info
        job['workflow'] = {
            'prompt': prompt,
@@ -171,18 +221,23 @@ def get_outputs_summary(outputs: dict) -> tuple[int, Optional[dict]]:
                continue

            for item in items:
-                count += 1
-
-                if not isinstance(item, dict):
+                normalized = normalize_output_item(item)
+                if normalized is None:
                    continue

-                if preview_output is None and is_previewable(media_type, item):
+                count += 1
+
+                if preview_output is not None:
+                    continue
+
+                if isinstance(normalized, dict) and is_previewable(media_type, normalized):
                    enriched = {
-                        **item,
+                        **normalized,
                        'nodeId': node_id,
-                        'mediaType': media_type
                    }
-                    if item.get('type') == 'output':
+                    if 'mediaType' not in normalized:
+                        enriched['mediaType'] = media_type
+                    if normalized.get('type') == 'output':
                        preview_output = enriched
                    elif fallback_preview is None:
                        fallback_preview = enriched
--- a/comfy_extras/nodes_ace.py
+++ b/comfy_extras/nodes_ace.py
@@ -49,13 +49,14 @@ class TextEncodeAceStepAudio15(io.ComfyNode):
                io.Float.Input("temperature", default=0.85, min=0.0, max=2.0, step=0.01, advanced=True),
                io.Float.Input("top_p", default=0.9, min=0.0, max=2000.0, step=0.01, advanced=True),
                io.Int.Input("top_k", default=0, min=0, max=100, advanced=True),
+                io.Float.Input("min_p", default=0.000, min=0.0, max=1.0, step=0.001, advanced=True),
            ],
            outputs=[io.Conditioning.Output()],
        )

    @classmethod
-    def execute(cls, clip, tags, lyrics, seed, bpm, duration, timesignature, language, keyscale, generate_audio_codes, cfg_scale, temperature, top_p, top_k) -> io.NodeOutput:
-        tokens = clip.tokenize(tags, lyrics=lyrics, bpm=bpm, duration=duration, timesignature=int(timesignature), language=language, keyscale=keyscale, seed=seed, generate_audio_codes=generate_audio_codes, cfg_scale=cfg_scale, temperature=temperature, top_p=top_p, top_k=top_k)
+    def execute(cls, clip, tags, lyrics, seed, bpm, duration, timesignature, language, keyscale, generate_audio_codes, cfg_scale, temperature, top_p, top_k, min_p) -> io.NodeOutput:
+        tokens = clip.tokenize(tags, lyrics=lyrics, bpm=bpm, duration=duration, timesignature=int(timesignature), language=language, keyscale=keyscale, seed=seed, generate_audio_codes=generate_audio_codes, cfg_scale=cfg_scale, temperature=temperature, top_p=top_p, top_k=top_k, min_p=min_p)
        conditioning = clip.encode_from_tokens_scheduled(tokens)
        return io.NodeOutput(conditioning)

--- a/comfy_extras/nodes_glsl.py
+++ b/comfy_extras/nodes_glsl.py
@@ -0,0 +1,897 @@
+import os
+import sys
+import re
+import logging
+import ctypes.util
+import importlib.util
+from typing import TypedDict
+
+import numpy as np
+import torch
+
+import nodes
+from comfy_api.latest import ComfyExtension, io, ui
+from typing_extensions import override
+from utils.install_util import get_missing_requirements_message
+
+logger = logging.getLogger(__name__)
+
+
+def _check_opengl_availability():
+    """Early check for OpenGL availability. Raises RuntimeError if unlikely to work."""
+    logger.debug("_check_opengl_availability: starting")
+    missing = []
+
+    # Check Python packages (using find_spec to avoid importing)
+    logger.debug("_check_opengl_availability: checking for glfw package")
+    if importlib.util.find_spec("glfw") is None:
+        missing.append("glfw")
+
+    logger.debug("_check_opengl_availability: checking for OpenGL package")
+    if importlib.util.find_spec("OpenGL") is None:
+        missing.append("PyOpenGL")
+
+    if missing:
+        raise RuntimeError(
+            f"OpenGL dependencies not available.\n{get_missing_requirements_message()}\n"
+        )
+
+    # On Linux without display, check if headless backends are available
+    logger.debug(f"_check_opengl_availability: platform={sys.platform}")
+    if sys.platform.startswith("linux"):
+        has_display = os.environ.get("DISPLAY") or os.environ.get("WAYLAND_DISPLAY")
+        logger.debug(f"_check_opengl_availability: has_display={bool(has_display)}")
+        if not has_display:
+            # Check for EGL or OSMesa libraries
+            logger.debug("_check_opengl_availability: checking for EGL library")
+            has_egl = ctypes.util.find_library("EGL")
+            logger.debug("_check_opengl_availability: checking for OSMesa library")
+            has_osmesa = ctypes.util.find_library("OSMesa")
+
+            # Error disabled for CI as it fails this check
+            # if not has_egl and not has_osmesa:
+            #     raise RuntimeError(
+            #         "GLSL Shader node: No display and no headless backend (EGL/OSMesa) found.\n"
+            #         "See error below for installation instructions."
+            #     )
+            logger.debug(f"Headless mode: EGL={'yes' if has_egl else 'no'}, OSMesa={'yes' if has_osmesa else 'no'}")
+
+    logger.debug("_check_opengl_availability: completed")
+
+
+# Run early check at import time
+logger.debug("nodes_glsl: running _check_opengl_availability at import time")
+_check_opengl_availability()
+
+# OpenGL modules - initialized lazily when context is created
+gl = None
+glfw = None
+EGL = None
+
+
+def _import_opengl():
+    """Import OpenGL module. Called after context is created."""
+    global gl
+    if gl is None:
+        logger.debug("_import_opengl: importing OpenGL.GL")
+        import OpenGL.GL as _gl
+        gl = _gl
+        logger.debug("_import_opengl: import completed")
+    return gl
+
+
+class SizeModeInput(TypedDict):
+    size_mode: str
+    width: int
+    height: int
+
+
+MAX_IMAGES = 5      # u_image0-4
+MAX_UNIFORMS = 5    # u_float0-4, u_int0-4
+MAX_OUTPUTS = 4     # fragColor0-3 (MRT)
+
+# Vertex shader using gl_VertexID trick - no VBO needed.
+# Draws a single triangle that covers the entire screen:
+#
+#     (-1,3)
+#       /|
+#      / |  <- visible area is the unit square from (-1,-1) to (1,1)
+#     /  |     parts outside get clipped away
+# (-1,-1)---(3,-1)
+#
+# v_texCoord is computed from clip space: * 0.5 + 0.5 maps (-1,1) -> (0,1)
+VERTEX_SHADER = """#version 330 core
+out vec2 v_texCoord;
+void main() {
+    vec2 verts[3] = vec2[](vec2(-1, -1), vec2(3, -1), vec2(-1, 3));
+    v_texCoord = verts[gl_VertexID] * 0.5 + 0.5;
+    gl_Position = vec4(verts[gl_VertexID], 0, 1);
+}
+"""
+
+DEFAULT_FRAGMENT_SHADER = """#version 300 es
+precision highp float;
+
+uniform sampler2D u_image0;
+uniform vec2 u_resolution;
+
+in vec2 v_texCoord;
+layout(location = 0) out vec4 fragColor0;
+
+void main() {
+    fragColor0 = texture(u_image0, v_texCoord);
+}
+"""
+
+
+def _convert_es_to_desktop(source: str) -> str:
+    """Convert GLSL ES (WebGL) shader source to desktop GLSL 330 core."""
+    # Remove any existing #version directive
+    source = re.sub(r"#version\s+\d+(\s+es)?\s*\n?", "", source, flags=re.IGNORECASE)
+    # Remove precision qualifiers (not needed in desktop GLSL)
+    source = re.sub(r"precision\s+(lowp|mediump|highp)\s+\w+\s*;\s*\n?", "", source)
+    # Prepend desktop GLSL version
+    return "#version 330 core\n" + source
+
+
+def _detect_output_count(source: str) -> int:
+    """Detect how many fragColor outputs are used in the shader.
+
+    Returns the count of outputs needed (1 to MAX_OUTPUTS).
+    """
+    matches = re.findall(r"fragColor(\d+)", source)
+    if not matches:
+        return 1  # Default to 1 output if none found
+    max_index = max(int(m) for m in matches)
+    return min(max_index + 1, MAX_OUTPUTS)
+
+
+def _detect_pass_count(source: str) -> int:
+    """Detect multi-pass rendering from #pragma passes N directive.
+
+    Returns the number of passes (1 if not specified).
+    """
+    match = re.search(r'#pragma\s+passes\s+(\d+)', source)
+    if match:
+        return max(1, int(match.group(1)))
+    return 1
+
+
+def _init_glfw():
+    """Initialize GLFW. Returns (window, glfw_module). Raises RuntimeError on failure."""
+    logger.debug("_init_glfw: starting")
+    # On macOS, glfw.init() must be called from main thread or it hangs forever
+    if sys.platform == "darwin":
+        logger.debug("_init_glfw: skipping on macOS")
+        raise RuntimeError("GLFW backend not supported on macOS")
+
+    logger.debug("_init_glfw: importing glfw module")
+    import glfw as _glfw
+
+    logger.debug("_init_glfw: calling glfw.init()")
+    if not _glfw.init():
+        raise RuntimeError("glfw.init() failed")
+
+    try:
+        logger.debug("_init_glfw: setting window hints")
+        _glfw.window_hint(_glfw.VISIBLE, _glfw.FALSE)
+        _glfw.window_hint(_glfw.CONTEXT_VERSION_MAJOR, 3)
+        _glfw.window_hint(_glfw.CONTEXT_VERSION_MINOR, 3)
+        _glfw.window_hint(_glfw.OPENGL_PROFILE, _glfw.OPENGL_CORE_PROFILE)
+
+        logger.debug("_init_glfw: calling create_window()")
+        window = _glfw.create_window(64, 64, "ComfyUI GLSL", None, None)
+        if not window:
+            raise RuntimeError("glfw.create_window() failed")
+
+        logger.debug("_init_glfw: calling make_context_current()")
+        _glfw.make_context_current(window)
+        logger.debug("_init_glfw: completed successfully")
+        return window, _glfw
+    except Exception:
+        logger.debug("_init_glfw: failed, terminating glfw")
+        _glfw.terminate()
+        raise
+
+
+def _init_egl():
+    """Initialize EGL for headless rendering. Returns (display, context, surface, EGL_module). Raises RuntimeError on failure."""
+    logger.debug("_init_egl: starting")
+    from OpenGL import EGL as _EGL
+    from OpenGL.EGL import (
+        eglGetDisplay, eglInitialize, eglChooseConfig, eglCreateContext,
+        eglMakeCurrent, eglCreatePbufferSurface, eglBindAPI,
+        eglTerminate, eglDestroyContext, eglDestroySurface,
+        EGL_DEFAULT_DISPLAY, EGL_NO_CONTEXT, EGL_NONE,
+        EGL_SURFACE_TYPE, EGL_PBUFFER_BIT, EGL_RENDERABLE_TYPE, EGL_OPENGL_BIT,
+        EGL_RED_SIZE, EGL_GREEN_SIZE, EGL_BLUE_SIZE, EGL_ALPHA_SIZE, EGL_DEPTH_SIZE,
+        EGL_WIDTH, EGL_HEIGHT, EGL_OPENGL_API,
+    )
+    logger.debug("_init_egl: imports completed")
+
+    display = None
+    context = None
+    surface = None
+
+    try:
+        logger.debug("_init_egl: calling eglGetDisplay()")
+        display = eglGetDisplay(EGL_DEFAULT_DISPLAY)
+        if display == _EGL.EGL_NO_DISPLAY:
+            raise RuntimeError("eglGetDisplay() failed")
+
+        logger.debug("_init_egl: calling eglInitialize()")
+        major, minor = _EGL.EGLint(), _EGL.EGLint()
+        if not eglInitialize(display, major, minor):
+            display = None  # Not initialized, don't terminate
+            raise RuntimeError("eglInitialize() failed")
+        logger.debug(f"_init_egl: EGL version {major.value}.{minor.value}")
+
+        config_attribs = [
+            EGL_SURFACE_TYPE, EGL_PBUFFER_BIT,
+            EGL_RENDERABLE_TYPE, EGL_OPENGL_BIT,
+            EGL_RED_SIZE, 8, EGL_GREEN_SIZE, 8, EGL_BLUE_SIZE, 8, EGL_ALPHA_SIZE, 8,
+            EGL_DEPTH_SIZE, 0, EGL_NONE
+        ]
+        configs = (_EGL.EGLConfig * 1)()
+        num_configs = _EGL.EGLint()
+        if not eglChooseConfig(display, config_attribs, configs, 1, num_configs) or num_configs.value == 0:
+            raise RuntimeError("eglChooseConfig() failed")
+        config = configs[0]
+        logger.debug(f"_init_egl: config chosen, num_configs={num_configs.value}")
+
+        if not eglBindAPI(EGL_OPENGL_API):
+            raise RuntimeError("eglBindAPI() failed")
+
+        logger.debug("_init_egl: calling eglCreateContext()")
+        context_attribs = [
+            _EGL.EGL_CONTEXT_MAJOR_VERSION, 3,
+            _EGL.EGL_CONTEXT_MINOR_VERSION, 3,
+            _EGL.EGL_CONTEXT_OPENGL_PROFILE_MASK, _EGL.EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT,
+            EGL_NONE
+        ]
+        context = eglCreateContext(display, config, EGL_NO_CONTEXT, context_attribs)
+        if context == EGL_NO_CONTEXT:
+            raise RuntimeError("eglCreateContext() failed")
+
+        logger.debug("_init_egl: calling eglCreatePbufferSurface()")
+        pbuffer_attribs = [EGL_WIDTH, 64, EGL_HEIGHT, 64, EGL_NONE]
+        surface = eglCreatePbufferSurface(display, config, pbuffer_attribs)
+        if surface == _EGL.EGL_NO_SURFACE:
+            raise RuntimeError("eglCreatePbufferSurface() failed")
+
+        logger.debug("_init_egl: calling eglMakeCurrent()")
+        if not eglMakeCurrent(display, surface, surface, context):
+            raise RuntimeError("eglMakeCurrent() failed")
+
+        logger.debug("_init_egl: completed successfully")
+        return display, context, surface, _EGL
+
+    except Exception:
+        logger.debug("_init_egl: failed, cleaning up")
+        # Clean up any resources on failure
+        if surface is not None:
+            eglDestroySurface(display, surface)
+        if context is not None:
+            eglDestroyContext(display, context)
+        if display is not None:
+            eglTerminate(display)
+        raise
+
+
+def _init_osmesa():
+    """Initialize OSMesa for software rendering. Returns (context, buffer). Raises RuntimeError on failure."""
+    import ctypes
+
+    logger.debug("_init_osmesa: starting")
+    os.environ["PYOPENGL_PLATFORM"] = "osmesa"
+
+    logger.debug("_init_osmesa: importing OpenGL.osmesa")
+    from OpenGL import GL as _gl
+    from OpenGL.osmesa import (
+        OSMesaCreateContextExt, OSMesaMakeCurrent, OSMesaDestroyContext,
+        OSMESA_RGBA,
+    )
+    logger.debug("_init_osmesa: imports completed")
+
+    ctx = OSMesaCreateContextExt(OSMESA_RGBA, 24, 0, 0, None)
+    if not ctx:
+        raise RuntimeError("OSMesaCreateContextExt() failed")
+
+    width, height = 64, 64
+    buffer = (ctypes.c_ubyte * (width * height * 4))()
+
+    logger.debug("_init_osmesa: calling OSMesaMakeCurrent()")
+    if not OSMesaMakeCurrent(ctx, buffer, _gl.GL_UNSIGNED_BYTE, width, height):
+        OSMesaDestroyContext(ctx)
+        raise RuntimeError("OSMesaMakeCurrent() failed")
+
+    logger.debug("_init_osmesa: completed successfully")
+    return ctx, buffer
+
+
+class GLContext:
+    """Manages OpenGL context and resources for shader execution.
+
+    Tries backends in order: GLFW (desktop) → EGL (headless GPU) → OSMesa (software).
+    """
+
+    _instance = None
+    _initialized = False
+
+    def __new__(cls):
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
+        return cls._instance
+
+    def __init__(self):
+        if GLContext._initialized:
+            logger.debug("GLContext.__init__: already initialized, skipping")
+            return
+        GLContext._initialized = True
+
+        logger.debug("GLContext.__init__: starting initialization")
+
+        global glfw, EGL
+
+        import time
+        start = time.perf_counter()
+
+        self._backend = None
+        self._window = None
+        self._egl_display = None
+        self._egl_context = None
+        self._egl_surface = None
+        self._osmesa_ctx = None
+        self._osmesa_buffer = None
+
+        # Try backends in order: GLFW → EGL → OSMesa
+        errors = []
+
+        logger.debug("GLContext.__init__: trying GLFW backend")
+        try:
+            self._window, glfw = _init_glfw()
+            self._backend = "glfw"
+            logger.debug("GLContext.__init__: GLFW backend succeeded")
+        except Exception as e:
+            logger.debug(f"GLContext.__init__: GLFW backend failed: {e}")
+            errors.append(("GLFW", e))
+
+        if self._backend is None:
+            logger.debug("GLContext.__init__: trying EGL backend")
+            try:
+                self._egl_display, self._egl_context, self._egl_surface, EGL = _init_egl()
+                self._backend = "egl"
+                logger.debug("GLContext.__init__: EGL backend succeeded")
+            except Exception as e:
+                logger.debug(f"GLContext.__init__: EGL backend failed: {e}")
+                errors.append(("EGL", e))
+
+        if self._backend is None:
+            logger.debug("GLContext.__init__: trying OSMesa backend")
+            try:
+                self._osmesa_ctx, self._osmesa_buffer = _init_osmesa()
+                self._backend = "osmesa"
+                logger.debug("GLContext.__init__: OSMesa backend succeeded")
+            except Exception as e:
+                logger.debug(f"GLContext.__init__: OSMesa backend failed: {e}")
+                errors.append(("OSMesa", e))
+
+        if self._backend is None:
+            if sys.platform == "win32":
+                platform_help = (
+                    "Windows: Ensure GPU drivers are installed and display is available.\n"
+                    "         CPU-only/headless mode is not supported on Windows."
+                )
+            elif sys.platform == "darwin":
+                platform_help = (
+                    "macOS: GLFW is not supported.\n"
+                    "  Install OSMesa via Homebrew: brew install mesa\n"
+                    "  Then: pip install PyOpenGL PyOpenGL-accelerate"
+                )
+            else:
+                platform_help = (
+                    "Linux: Install one of these backends:\n"
+                    "  Desktop:           sudo apt install libgl1-mesa-glx libglfw3\n"
+                    "  Headless with GPU: sudo apt install libegl1-mesa libgl1-mesa-dri\n"
+                    "  Headless (CPU):    sudo apt install libosmesa6"
+                )
+
+            error_details = "\n".join(f"  {name}: {err}" for name, err in errors)
+            raise RuntimeError(
+                f"Failed to create OpenGL context.\n\n"
+                f"Backend errors:\n{error_details}\n\n"
+                f"{platform_help}"
+            )
+
+        # Now import OpenGL.GL (after context is current)
+        logger.debug("GLContext.__init__: importing OpenGL.GL")
+        _import_opengl()
+
+        # Create VAO (required for core profile, but OSMesa may use compat profile)
+        logger.debug("GLContext.__init__: creating VAO")
+        self._vao = None
+        try:
+            vao = gl.glGenVertexArrays(1)
+            gl.glBindVertexArray(vao)
+            self._vao = vao  # Only store after successful bind
+            logger.debug("GLContext.__init__: VAO created successfully")
+        except Exception as e:
+            logger.debug(f"GLContext.__init__: VAO creation failed (may be expected for OSMesa): {e}")
+            # OSMesa with older Mesa may not support VAOs
+            # Clean up if we created but couldn't bind
+            if vao:
+                try:
+                    gl.glDeleteVertexArrays(1, [vao])
+                except Exception:
+                    pass
+
+        elapsed = (time.perf_counter() - start) * 1000
+
+        # Log device info
+        renderer = gl.glGetString(gl.GL_RENDERER)
+        vendor = gl.glGetString(gl.GL_VENDOR)
+        version = gl.glGetString(gl.GL_VERSION)
+        renderer = renderer.decode() if renderer else "Unknown"
+        vendor = vendor.decode() if vendor else "Unknown"
+        version = version.decode() if version else "Unknown"
+
+        logger.info(f"GLSL context initialized in {elapsed:.1f}ms ({self._backend}) - {renderer} ({vendor}), GL {version}")
+
+    def make_current(self):
+        if self._backend == "glfw":
+            glfw.make_context_current(self._window)
+        elif self._backend == "egl":
+            from OpenGL.EGL import eglMakeCurrent
+            eglMakeCurrent(self._egl_display, self._egl_surface, self._egl_surface, self._egl_context)
+        elif self._backend == "osmesa":
+            from OpenGL.osmesa import OSMesaMakeCurrent
+            OSMesaMakeCurrent(self._osmesa_ctx, self._osmesa_buffer, gl.GL_UNSIGNED_BYTE, 64, 64)
+
+        if self._vao is not None:
+            gl.glBindVertexArray(self._vao)
+
+
+def _compile_shader(source: str, shader_type: int) -> int:
+    """Compile a shader and return its ID."""
+    shader = gl.glCreateShader(shader_type)
+    gl.glShaderSource(shader, source)
+    gl.glCompileShader(shader)
+
+    if gl.glGetShaderiv(shader, gl.GL_COMPILE_STATUS) != gl.GL_TRUE:
+        error = gl.glGetShaderInfoLog(shader).decode()
+        gl.glDeleteShader(shader)
+        raise RuntimeError(f"Shader compilation failed:\n{error}")
+
+    return shader
+
+
+def _create_program(vertex_source: str, fragment_source: str) -> int:
+    """Create and link a shader program."""
+    vertex_shader = _compile_shader(vertex_source, gl.GL_VERTEX_SHADER)
+    try:
+        fragment_shader = _compile_shader(fragment_source, gl.GL_FRAGMENT_SHADER)
+    except RuntimeError:
+        gl.glDeleteShader(vertex_shader)
+        raise
+
+    program = gl.glCreateProgram()
+    gl.glAttachShader(program, vertex_shader)
+    gl.glAttachShader(program, fragment_shader)
+    gl.glLinkProgram(program)
+
+    gl.glDeleteShader(vertex_shader)
+    gl.glDeleteShader(fragment_shader)
+
+    if gl.glGetProgramiv(program, gl.GL_LINK_STATUS) != gl.GL_TRUE:
+        error = gl.glGetProgramInfoLog(program).decode()
+        gl.glDeleteProgram(program)
+        raise RuntimeError(f"Program linking failed:\n{error}")
+
+    return program
+
+
+def _render_shader_batch(
+    fragment_code: str,
+    width: int,
+    height: int,
+    image_batches: list[list[np.ndarray]],
+    floats: list[float],
+    ints: list[int],
+) -> list[list[np.ndarray]]:
+    """
+    Render a fragment shader for multiple batches efficiently.
+
+    Compiles shader once, reuses framebuffer/textures across batches.
+    Supports multi-pass rendering via #pragma passes N directive.
+
+    Args:
+        fragment_code: User's fragment shader code
+        width: Output width
+        height: Output height
+        image_batches: List of batches, each batch is a list of input images (H, W, C) float32 [0,1]
+        floats: List of float uniforms
+        ints: List of int uniforms
+
+    Returns:
+        List of batch outputs, each is a list of output images (H, W, 4) float32 [0,1]
+    """
+    import time
+    start_time = time.perf_counter()
+
+    if not image_batches:
+        return []
+
+    ctx = GLContext()
+    ctx.make_current()
+
+    # Convert from GLSL ES to desktop GLSL 330
+    fragment_source = _convert_es_to_desktop(fragment_code)
+
+    # Detect how many outputs the shader actually uses
+    num_outputs = _detect_output_count(fragment_code)
+
+    # Detect multi-pass rendering
+    num_passes = _detect_pass_count(fragment_code)
+
+    # Track resources for cleanup
+    program = None
+    fbo = None
+    output_textures = []
+    input_textures = []
+    ping_pong_textures = []
+    ping_pong_fbos = []
+
+    num_inputs = len(image_batches[0])
+
+    try:
+        # Compile shaders (once for all batches)
+        try:
+            program = _create_program(VERTEX_SHADER, fragment_source)
+        except RuntimeError:
+            logger.error(f"Fragment shader:\n{fragment_source}")
+            raise
+
+        gl.glUseProgram(program)
+
+        # Create framebuffer with only the needed color attachments
+        fbo = gl.glGenFramebuffers(1)
+        gl.glBindFramebuffer(gl.GL_FRAMEBUFFER, fbo)
+
+        draw_buffers = []
+        for i in range(num_outputs):
+            tex = gl.glGenTextures(1)
+            output_textures.append(tex)
+            gl.glBindTexture(gl.GL_TEXTURE_2D, tex)
+            gl.glTexImage2D(gl.GL_TEXTURE_2D, 0, gl.GL_RGBA32F, width, height, 0, gl.GL_RGBA, gl.GL_FLOAT, None)
+            gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MIN_FILTER, gl.GL_LINEAR)
+            gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MAG_FILTER, gl.GL_LINEAR)
+            gl.glFramebufferTexture2D(gl.GL_FRAMEBUFFER, gl.GL_COLOR_ATTACHMENT0 + i, gl.GL_TEXTURE_2D, tex, 0)
+            draw_buffers.append(gl.GL_COLOR_ATTACHMENT0 + i)
+
+        gl.glDrawBuffers(num_outputs, draw_buffers)
+
+        if gl.glCheckFramebufferStatus(gl.GL_FRAMEBUFFER) != gl.GL_FRAMEBUFFER_COMPLETE:
+            raise RuntimeError("Framebuffer is not complete")
+
+        # Create ping-pong resources for multi-pass rendering
+        if num_passes > 1:
+            for _ in range(2):
+                pp_tex = gl.glGenTextures(1)
+                ping_pong_textures.append(pp_tex)
+                gl.glBindTexture(gl.GL_TEXTURE_2D, pp_tex)
+                gl.glTexImage2D(gl.GL_TEXTURE_2D, 0, gl.GL_RGBA32F, width, height, 0, gl.GL_RGBA, gl.GL_FLOAT, None)
+                gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MIN_FILTER, gl.GL_LINEAR)
+                gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MAG_FILTER, gl.GL_LINEAR)
+                gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_WRAP_S, gl.GL_CLAMP_TO_EDGE)
+                gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_WRAP_T, gl.GL_CLAMP_TO_EDGE)
+
+                pp_fbo = gl.glGenFramebuffers(1)
+                ping_pong_fbos.append(pp_fbo)
+                gl.glBindFramebuffer(gl.GL_FRAMEBUFFER, pp_fbo)
+                gl.glFramebufferTexture2D(gl.GL_FRAMEBUFFER, gl.GL_COLOR_ATTACHMENT0, gl.GL_TEXTURE_2D, pp_tex, 0)
+                gl.glDrawBuffers(1, [gl.GL_COLOR_ATTACHMENT0])
+
+                if gl.glCheckFramebufferStatus(gl.GL_FRAMEBUFFER) != gl.GL_FRAMEBUFFER_COMPLETE:
+                    raise RuntimeError("Ping-pong framebuffer is not complete")
+
+        # Create input textures (reused for all batches)
+        for i in range(num_inputs):
+            tex = gl.glGenTextures(1)
+            input_textures.append(tex)
+            gl.glActiveTexture(gl.GL_TEXTURE0 + i)
+            gl.glBindTexture(gl.GL_TEXTURE_2D, tex)
+            gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MIN_FILTER, gl.GL_LINEAR)
+            gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MAG_FILTER, gl.GL_LINEAR)
+            gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_WRAP_S, gl.GL_CLAMP_TO_EDGE)
+            gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_WRAP_T, gl.GL_CLAMP_TO_EDGE)
+
+            loc = gl.glGetUniformLocation(program, f"u_image{i}")
+            if loc >= 0:
+                gl.glUniform1i(loc, i)
+
+        # Set static uniforms (once for all batches)
+        loc = gl.glGetUniformLocation(program, "u_resolution")
+        if loc >= 0:
+            gl.glUniform2f(loc, float(width), float(height))
+
+        for i, v in enumerate(floats):
+            loc = gl.glGetUniformLocation(program, f"u_float{i}")
+            if loc >= 0:
+                gl.glUniform1f(loc, v)
+
+        for i, v in enumerate(ints):
+            loc = gl.glGetUniformLocation(program, f"u_int{i}")
+            if loc >= 0:
+                gl.glUniform1i(loc, v)
+
+        # Get u_pass uniform location for multi-pass
+        pass_loc = gl.glGetUniformLocation(program, "u_pass")
+
+        gl.glViewport(0, 0, width, height)
+        gl.glDisable(gl.GL_BLEND)  # Ensure no alpha blending - write output directly
+
+        # Process each batch
+        all_batch_outputs = []
+        for images in image_batches:
+            # Update input textures with this batch's images
+            for i, img in enumerate(images):
+                gl.glActiveTexture(gl.GL_TEXTURE0 + i)
+                gl.glBindTexture(gl.GL_TEXTURE_2D, input_textures[i])
+
+                # Flip vertically for GL coordinates, ensure RGBA
+                h, w, c = img.shape
+                if c == 3:
+                    img_upload = np.empty((h, w, 4), dtype=np.float32)
+                    img_upload[:, :, :3] = img[::-1, :, :]
+                    img_upload[:, :, 3] = 1.0
+                else:
+                    img_upload = np.ascontiguousarray(img[::-1, :, :])
+
+                gl.glTexImage2D(gl.GL_TEXTURE_2D, 0, gl.GL_RGBA32F, w, h, 0, gl.GL_RGBA, gl.GL_FLOAT, img_upload)
+
+            if num_passes == 1:
+                # Single pass - render directly to output FBO
+                gl.glBindFramebuffer(gl.GL_FRAMEBUFFER, fbo)
+                if pass_loc >= 0:
+                    gl.glUniform1i(pass_loc, 0)
+                gl.glClearColor(0, 0, 0, 0)
+                gl.glClear(gl.GL_COLOR_BUFFER_BIT)
+                gl.glDrawArrays(gl.GL_TRIANGLES, 0, 3)
+            else:
+                # Multi-pass rendering with ping-pong
+                for p in range(num_passes):
+                    is_last_pass = (p == num_passes - 1)
+
+                    # Set pass uniform
+                    if pass_loc >= 0:
+                        gl.glUniform1i(pass_loc, p)
+
+                    if is_last_pass:
+                        # Last pass renders to the main output FBO
+                        gl.glBindFramebuffer(gl.GL_FRAMEBUFFER, fbo)
+                    else:
+                        # Intermediate passes render to ping-pong FBO
+                        target_fbo = ping_pong_fbos[p % 2]
+                        gl.glBindFramebuffer(gl.GL_FRAMEBUFFER, target_fbo)
+
+                    # Set input texture for this pass
+                    gl.glActiveTexture(gl.GL_TEXTURE0)
+                    if p == 0:
+                        # First pass reads from original input
+                        gl.glBindTexture(gl.GL_TEXTURE_2D, input_textures[0])
+                    else:
+                        # Subsequent passes read from previous pass output
+                        source_tex = ping_pong_textures[(p - 1) % 2]
+                        gl.glBindTexture(gl.GL_TEXTURE_2D, source_tex)
+
+                    gl.glClearColor(0, 0, 0, 0)
+                    gl.glClear(gl.GL_COLOR_BUFFER_BIT)
+                    gl.glDrawArrays(gl.GL_TRIANGLES, 0, 3)
+
+            # Read back outputs for this batch
+            # (glGetTexImage is synchronous, implicitly waits for rendering)
+            batch_outputs = []
+            for tex in output_textures:
+                gl.glBindTexture(gl.GL_TEXTURE_2D, tex)
+                data = gl.glGetTexImage(gl.GL_TEXTURE_2D, 0, gl.GL_RGBA, gl.GL_FLOAT)
+                img = np.frombuffer(data, dtype=np.float32).reshape(height, width, 4)
+                batch_outputs.append(np.ascontiguousarray(img[::-1, :, :]))
+
+            # Pad with black images for unused outputs
+            black_img = np.zeros((height, width, 4), dtype=np.float32)
+            for _ in range(num_outputs, MAX_OUTPUTS):
+                batch_outputs.append(black_img)
+
+            all_batch_outputs.append(batch_outputs)
+
+        elapsed = (time.perf_counter() - start_time) * 1000
+        num_batches = len(image_batches)
+        pass_info = f", {num_passes} passes" if num_passes > 1 else ""
+        logger.info(f"GLSL shader executed in {elapsed:.1f}ms ({num_batches} batch{'es' if num_batches != 1 else ''}, {width}x{height}{pass_info})")
+
+        return all_batch_outputs
+
+    finally:
+        # Unbind before deleting
+        gl.glBindFramebuffer(gl.GL_FRAMEBUFFER, 0)
+        gl.glUseProgram(0)
+
+        if input_textures:
+            gl.glDeleteTextures(len(input_textures), input_textures)
+        if output_textures:
+            gl.glDeleteTextures(len(output_textures), output_textures)
+        if ping_pong_textures:
+            gl.glDeleteTextures(len(ping_pong_textures), ping_pong_textures)
+        if fbo is not None:
+            gl.glDeleteFramebuffers(1, [fbo])
+        for pp_fbo in ping_pong_fbos:
+            gl.glDeleteFramebuffers(1, [pp_fbo])
+        if program is not None:
+            gl.glDeleteProgram(program)
+
+class GLSLShader(io.ComfyNode):
+
+    @classmethod
+    def define_schema(cls) -> io.Schema:
+        image_template = io.Autogrow.TemplatePrefix(
+            io.Image.Input("image"),
+            prefix="image",
+            min=1,
+            max=MAX_IMAGES,
+        )
+
+        float_template = io.Autogrow.TemplatePrefix(
+            io.Float.Input("float", default=0.0),
+            prefix="u_float",
+            min=0,
+            max=MAX_UNIFORMS,
+        )
+
+        int_template = io.Autogrow.TemplatePrefix(
+            io.Int.Input("int", default=0),
+            prefix="u_int",
+            min=0,
+            max=MAX_UNIFORMS,
+        )
+
+        return io.Schema(
+            node_id="GLSLShader",
+            display_name="GLSL Shader",
+            category="image/shader",
+            description=(
+                f"Apply GLSL fragment shaders to images. "
+                f"Inputs: u_image0-{MAX_IMAGES-1} (sampler2D), u_resolution (vec2), "
+                f"u_float0-{MAX_UNIFORMS-1}, u_int0-{MAX_UNIFORMS-1}. "
+                f"Outputs: layout(location = 0-{MAX_OUTPUTS-1}) out vec4 fragColor0-{MAX_OUTPUTS-1}."
+            ),
+            inputs=[
+                io.String.Input(
+                    "fragment_shader",
+                    default=DEFAULT_FRAGMENT_SHADER,
+                    multiline=True,
+                    tooltip="GLSL fragment shader source code (GLSL ES 3.00 / WebGL 2.0 compatible)",
+                ),
+                io.DynamicCombo.Input(
+                    "size_mode",
+                    options=[
+                        io.DynamicCombo.Option("from_input", []),
+                        io.DynamicCombo.Option(
+                            "custom",
+                            [
+                                io.Int.Input(
+                                    "width",
+                                    default=512,
+                                    min=1,
+                                    max=nodes.MAX_RESOLUTION,
+                                ),
+                                io.Int.Input(
+                                    "height",
+                                    default=512,
+                                    min=1,
+                                    max=nodes.MAX_RESOLUTION,
+                                ),
+                            ],
+                        ),
+                    ],
+                    tooltip="Output size: 'from_input' uses first input image dimensions, 'custom' allows manual size",
+                ),
+                io.Autogrow.Input("images", template=image_template),
+                io.Autogrow.Input("floats", template=float_template),
+                io.Autogrow.Input("ints", template=int_template),
+            ],
+            outputs=[
+                io.Image.Output(display_name="IMAGE0"),
+                io.Image.Output(display_name="IMAGE1"),
+                io.Image.Output(display_name="IMAGE2"),
+                io.Image.Output(display_name="IMAGE3"),
+            ],
+        )
+
+    @classmethod
+    def execute(
+        cls,
+        fragment_shader: str,
+        size_mode: SizeModeInput,
+        images: io.Autogrow.Type,
+        floats: io.Autogrow.Type = None,
+        ints: io.Autogrow.Type = None,
+        **kwargs,
+    ) -> io.NodeOutput:
+        image_list = [v for v in images.values() if v is not None]
+        float_list = (
+            [v if v is not None else 0.0 for v in floats.values()] if floats else []
+        )
+        int_list = [v if v is not None else 0 for v in ints.values()] if ints else []
+
+        if not image_list:
+            raise ValueError("At least one input image is required")
+
+        # Determine output dimensions
+        if size_mode["size_mode"] == "custom":
+            out_width = size_mode["width"]
+            out_height = size_mode["height"]
+        else:
+            out_height, out_width = image_list[0].shape[1:3]
+
+        batch_size = image_list[0].shape[0]
+
+        # Prepare batches
+        image_batches = []
+        for batch_idx in range(batch_size):
+            batch_images = [img_tensor[batch_idx].cpu().numpy().astype(np.float32) for img_tensor in image_list]
+            image_batches.append(batch_images)
+
+        all_batch_outputs = _render_shader_batch(
+            fragment_shader,
+            out_width,
+            out_height,
+            image_batches,
+            float_list,
+            int_list,
+        )
+
+        # Collect outputs into tensors
+        all_outputs = [[] for _ in range(MAX_OUTPUTS)]
+        for batch_outputs in all_batch_outputs:
+            for i, out_img in enumerate(batch_outputs):
+                all_outputs[i].append(torch.from_numpy(out_img))
+
+        output_tensors = [torch.stack(all_outputs[i], dim=0) for i in range(MAX_OUTPUTS)]
+        return io.NodeOutput(
+            *output_tensors,
+            ui=cls._build_ui_output(image_list, output_tensors[0]),
+        )
+
+    @classmethod
+    def _build_ui_output(
+        cls, image_list: list[torch.Tensor], output_batch: torch.Tensor
+    ) -> dict[str, list]:
+        """Build UI output with input and output images for client-side shader execution."""
+        combined_inputs = torch.cat(image_list, dim=0)
+        input_images_ui = ui.ImageSaveHelper.save_images(
+            combined_inputs,
+            filename_prefix="GLSLShader_input",
+            folder_type=io.FolderType.temp,
+            cls=None,
+            compress_level=1,
+        )
+
+        output_images_ui = ui.ImageSaveHelper.save_images(
+            output_batch,
+            filename_prefix="GLSLShader_output",
+            folder_type=io.FolderType.temp,
+            cls=None,
+            compress_level=1,
+        )
+
+        return {"input_images": input_images_ui, "images": output_images_ui}
+
+
+class GLSLExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
+        return [GLSLShader]
+
+
+async def comfy_entrypoint() -> GLSLExtension:
+    return GLSLExtension()
--- a/comfy_extras/nodes_train.py
+++ b/comfy_extras/nodes_train.py
@@ -4,6 +4,7 @@ import os
 import numpy as np
 import safetensors
 import torch
+import torch.nn as nn
 import torch.utils.checkpoint
 from tqdm.auto import trange
 from PIL import Image, ImageDraw, ImageFont
@@ -27,6 +28,11 @@ class TrainGuider(comfy_extras.nodes_custom_sampler.Guider_Basic):
    """
    CFGGuider with modifications for training specific logic
    """
+
+    def __init__(self, *args, offloading=False, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.offloading = offloading
+
    def outer_sample(
        self,
        noise,
@@ -45,9 +51,11 @@ class TrainGuider(comfy_extras.nodes_custom_sampler.Guider_Basic):
                noise.shape,
                self.conds,
                self.model_options,
-                force_full_load=True,  # mirror behavior in TrainLoraNode.execute() to keep model loaded
+                force_full_load=not self.offloading,
+                force_offload=self.offloading,
            )
        )
+        torch.cuda.empty_cache()
        device = self.model_patcher.load_device

        if denoise_mask is not None:
@@ -404,16 +412,97 @@ def find_all_highest_child_module_with_forward(
    return result


-def patch(m):
+def find_modules_at_depth(
+    model: nn.Module, depth: int = 1, result=None, current_depth=0, name=None
+) -> list[nn.Module]:
+    """
+    Find modules at a specific depth level for gradient checkpointing.
+
+    Args:
+        model: The model to search
+        depth: Target depth level (1 = top-level blocks, 2 = their children, etc.)
+        result: Accumulator for results
+        current_depth: Current recursion depth
+        name: Current module name for logging
+
+    Returns:
+        List of modules at the target depth
+    """
+    if result is None:
+        result = []
+    name = name or "root"
+
+    # Skip container modules (they don't have meaningful forward)
+    is_container = isinstance(model, (nn.ModuleList, nn.Sequential, nn.ModuleDict))
+    has_forward = hasattr(model, "forward") and not is_container
+
+    if has_forward:
+        current_depth += 1
+        if current_depth == depth:
+            result.append(model)
+            logging.debug(f"Found module at depth {depth}: {name} ({model.__class__.__name__})")
+            return result
+
+    # Recurse into children
+    for next_name, child in model.named_children():
+        find_modules_at_depth(child, depth, result, current_depth, f"{name}.{next_name}")
+
+    return result
+
+
+class OffloadCheckpointFunction(torch.autograd.Function):
+    """
+    Gradient checkpointing that works with weight offloading.
+
+    Forward: no_grad -> compute -> weights can be freed
+    Backward: enable_grad -> recompute -> backward -> weights can be freed
+
+    For single input, single output modules (Linear, Conv*).
+    """
+
+    @staticmethod
+    def forward(ctx, x: torch.Tensor, forward_fn):
+        ctx.save_for_backward(x)
+        ctx.forward_fn = forward_fn
+        with torch.no_grad():
+            return forward_fn(x)
+
+    @staticmethod
+    def backward(ctx, grad_out: torch.Tensor):
+        x, = ctx.saved_tensors
+        forward_fn = ctx.forward_fn
+
+        # Clear context early
+        ctx.forward_fn = None
+
+        with torch.enable_grad():
+            x_detached = x.detach().requires_grad_(True)
+            y = forward_fn(x_detached)
+            y.backward(grad_out)
+            grad_x = x_detached.grad
+
+        # Explicit cleanup
+        del y, x_detached, forward_fn
+
+        return grad_x, None
+
+
+def patch(m, offloading=False):
    if not hasattr(m, "forward"):
        return
    org_forward = m.forward

-    def fwd(args, kwargs):
-        return org_forward(*args, **kwargs)
+    # Branch 1: Linear/Conv* -> offload-compatible checkpoint (single input/output)
+    if offloading and isinstance(m, (nn.Linear, nn.Conv1d, nn.Conv2d, nn.Conv3d)):
+        def checkpointing_fwd(x):
+            return OffloadCheckpointFunction.apply(x, org_forward)
+    # Branch 2: Others -> standard checkpoint
+    else:
+        def fwd(args, kwargs):
+            return org_forward(*args, **kwargs)

-    def checkpointing_fwd(*args, **kwargs):
-        return torch.utils.checkpoint.checkpoint(fwd, args, kwargs, use_reentrant=False)
+        def checkpointing_fwd(*args, **kwargs):
+            return torch.utils.checkpoint.checkpoint(fwd, args, kwargs, use_reentrant=False)

    m.org_forward = org_forward
    m.forward = checkpointing_fwd
@@ -936,6 +1025,18 @@ class TrainLoraNode(io.ComfyNode):
                    default=True,
                    tooltip="Use gradient checkpointing for training.",
                ),
+                io.Int.Input(
+                    "checkpoint_depth",
+                    default=1,
+                    min=1,
+                    max=5,
+                    tooltip="Depth level for gradient checkpointing.",
+                ),
+                io.Boolean.Input(
+                    "offloading",
+                    default=False,
+                    tooltip="Offload the Model to RAM. Requires Bypass Mode.",
+                ),
                io.Combo.Input(
                    "existing_lora",
                    options=folder_paths.get_filename_list("loras") + ["[None]"],
@@ -982,6 +1083,8 @@ class TrainLoraNode(io.ComfyNode):
        lora_dtype,
        algorithm,
        gradient_checkpointing,
+        checkpoint_depth,
+        offloading,
        existing_lora,
        bucket_mode,
        bypass_mode,
@@ -1000,6 +1103,8 @@ class TrainLoraNode(io.ComfyNode):
        lora_dtype = lora_dtype[0]
        algorithm = algorithm[0]
        gradient_checkpointing = gradient_checkpointing[0]
+        offloading = offloading[0]
+        checkpoint_depth = checkpoint_depth[0]
        existing_lora = existing_lora[0]
        bucket_mode = bucket_mode[0]
        bypass_mode = bypass_mode[0]
@@ -1019,6 +1124,15 @@ class TrainLoraNode(io.ComfyNode):
        lora_dtype = node_helpers.string_to_torch_dtype(lora_dtype)
        mp.set_model_compute_dtype(dtype)

+        if mp.is_dynamic():
+            if not bypass_mode:
+                logging.info("Training MP is Dynamic - forcing bypass mode. Start comfy with --highvram to force weight diff mode")
+                bypass_mode = True
+            offloading = True
+        elif offloading:
+            if not bypass_mode:
+                logging.info("Training Offload selected - forcing bypass mode. Set bypass = True to remove this message")
+
        # Prepare latents and compute counts
        latents, num_images, multi_res = _prepare_latents_and_count(
            latents, dtype, bucket_mode
@@ -1054,16 +1168,18 @@ class TrainLoraNode(io.ComfyNode):

            # Setup gradient checkpointing
            if gradient_checkpointing:
-                for m in find_all_highest_child_module_with_forward(
-                    mp.model.diffusion_model
-                ):
-                    patch(m)
+                modules_to_patch = find_modules_at_depth(
+                    mp.model.diffusion_model, depth=checkpoint_depth
+                )
+                logging.info(f"Gradient checkpointing: patching {len(modules_to_patch)} modules at depth {checkpoint_depth}")
+                for m in modules_to_patch:
+                    patch(m, offloading=offloading)

            torch.cuda.empty_cache()
            # With force_full_load=False we should be able to have offloading
            # But for offloading in training we need custom AutoGrad hooks for fwd/bwd
            comfy.model_management.load_models_gpu(
-                [mp], memory_required=1e20, force_full_load=True
+                [mp], memory_required=1e20, force_full_load=not offloading
            )
            torch.cuda.empty_cache()

@@ -1100,7 +1216,7 @@ class TrainLoraNode(io.ComfyNode):
                )

            # Setup guider
-            guider = TrainGuider(mp)
+            guider = TrainGuider(mp, offloading=offloading)
            guider.set_conds(positive)

            # Inject bypass hooks if bypass mode is enabled
@@ -1113,6 +1229,7 @@ class TrainLoraNode(io.ComfyNode):

            # Run training loop
            try:
+                comfy.model_management.in_training = True
                _run_training_loop(
                    guider,
                    train_sampler,
@@ -1123,6 +1240,7 @@ class TrainLoraNode(io.ComfyNode):
                    multi_res,
                )
            finally:
+                comfy.model_management.in_training = False
                # Eject bypass hooks if they were injected
                if bypass_injections is not None:
                    for injection in bypass_injections:
@@ -1132,19 +1250,20 @@ class TrainLoraNode(io.ComfyNode):
                    unpatch(m)
            del train_sampler, optimizer

-            # Finalize adapters
+            for param in lora_sd:
+                lora_sd[param] = lora_sd[param].to(lora_dtype).detach()
+
            for adapter in all_weight_adapters:
                adapter.requires_grad_(False)
-
-            for param in lora_sd:
-                lora_sd[param] = lora_sd[param].to(lora_dtype)
+                del adapter
+            del all_weight_adapters

            # mp in train node is highly specialized for training
            # use it in inference will result in bad behavior so we don't return it
            return io.NodeOutput(lora_sd, loss_map, steps + existing_steps)


-class LoraModelLoader(io.ComfyNode):#
+class LoraModelLoader(io.ComfyNode):
    @classmethod
    def define_schema(cls):
        return io.Schema(
@@ -1166,6 +1285,11 @@ class LoraModelLoader(io.ComfyNode):#
                    max=100.0,
                    tooltip="How strongly to modify the diffusion model. This value can be negative.",
                ),
+                io.Boolean.Input(
+                    "bypass",
+                    default=False,
+                    tooltip="When enabled, applies LoRA in bypass mode without modifying base model weights. Useful for training and when model weights are offloaded.",
+                ),
            ],
            outputs=[
                io.Model.Output(
@@ -1175,13 +1299,18 @@ class LoraModelLoader(io.ComfyNode):#
        )

    @classmethod
-    def execute(cls, model, lora, strength_model):
+    def execute(cls, model, lora, strength_model, bypass=False):
        if strength_model == 0:
            return io.NodeOutput(model)

-        model_lora, _ = comfy.sd.load_lora_for_models(
-            model, None, lora, strength_model, 0
-        )
+        if bypass:
+            model_lora, _ = comfy.sd.load_bypass_lora_for_models(
+                model, None, lora, strength_model, 0
+            )
+        else:
+            model_lora, _ = comfy.sd.load_lora_for_models(
+                model, None, lora, strength_model, 0
+            )
        return io.NodeOutput(model_lora)


--- a/comfy_extras/nodes_video.py
+++ b/comfy_extras/nodes_video.py
@@ -202,6 +202,56 @@ class LoadVideo(io.ComfyNode):

        return True

+class VideoSlice(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="Video Slice",
+            display_name="Video Slice",
+            search_aliases=[
+                "trim video duration",
+                "skip first frames",
+                "frame load cap",
+                "start time",
+            ],
+            category="image/video",
+            inputs=[
+                io.Video.Input("video"),
+                io.Float.Input(
+                    "start_time",
+                    default=0.0,
+                    max=1e5,
+                    min=-1e5,
+                    step=0.001,
+                    tooltip="Start time in seconds",
+                ),
+                io.Float.Input(
+                    "duration",
+                    default=0.0,
+                    min=0.0,
+                    step=0.001,
+                    tooltip="Duration in seconds, or 0 for unlimited duration",
+                ),
+                io.Boolean.Input(
+                    "strict_duration",
+                    default=False,
+                    tooltip="If True, when the specified duration is not possible, an error will be raised.",
+                ),
+            ],
+            outputs=[
+                io.Video.Output(),
+            ],
+        )
+
+    @classmethod
+    def execute(cls, video: io.Video.Type, start_time: float, duration: float, strict_duration: bool) -> io.NodeOutput:
+        trimmed = video.as_trimmed(start_time, duration, strict_duration=strict_duration)
+        if trimmed is not None:
+            return io.NodeOutput(trimmed)
+        raise ValueError(
+            f"Failed to slice video:\nSource duration: {video.get_duration()}\nStart time: {start_time}\nTarget duration: {duration}"
+        )
+

 class VideoExtension(ComfyExtension):
    @override
@@ -212,6 +262,7 @@ class VideoExtension(ComfyExtension):
            CreateVideo,
            GetVideoComponents,
            LoadVideo,
+            VideoSlice,
        ]

 async def comfy_entrypoint() -> VideoExtension:
--- a/execution.py
+++ b/execution.py
@@ -623,6 +623,8 @@ async def execute(server, dynprompt, caches, current_item, extra_data, executed,
            logging.info("Memory summary: {}".format(comfy.model_management.debug_memory_summary()))
            logging.error("Got an OOM, unloading all loaded models.")
            comfy.model_management.unload_all_models()
+        elif isinstance(ex, RuntimeError) and ("mat1 and mat2 shapes" in str(ex)) and "Sampler" in class_type:
+            tips = "\n\nTIPS: If you have any \"Load CLIP\" or \"*CLIP Loader\" nodes in your workflow connected to this sampler node make sure the correct file(s) and type is selected."

        error_details = {
            "node_id": real_node_id,
--- a/nodes.py
+++ b/nodes.py
@@ -2432,6 +2432,7 @@ async def init_builtin_extra_nodes():
        "nodes_wanmove.py",
        "nodes_image_compare.py",
        "nodes_zimage.py",
+        "nodes_glsl.py",
        "nodes_lora_debug.py",
        "nodes_color.py",
        "nodes_toolkit.py",
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-comfyui-frontend-package==1.38.13
+comfyui-frontend-package==1.38.14
 comfyui-workflow-templates==0.8.38
 comfyui-embedded-docs==0.4.1
 torch
@@ -30,3 +30,6 @@ kornia>=0.7.1
 spandrel
 pydantic~=2.0
 pydantic-settings~=2.0
+PyOpenGL
+PyOpenGL-accelerate
+glfw
--- a/tests/execution/test_jobs.py
+++ b/tests/execution/test_jobs.py
@@ -5,8 +5,11 @@ from comfy_execution.jobs import (
    is_previewable,
    normalize_queue_item,
    normalize_history_item,
+    normalize_output_item,
+    normalize_outputs,
    get_outputs_summary,
    apply_sorting,
+    has_3d_extension,
 )


@@ -35,8 +38,8 @@ class TestIsPreviewable:
    """Unit tests for is_previewable()"""

    def test_previewable_media_types(self):
-        """Images, video, audio media types should be previewable."""
-        for media_type in ['images', 'video', 'audio']:
+        """Images, video, audio, 3d media types should be previewable."""
+        for media_type in ['images', 'video', 'audio', '3d']:
            assert is_previewable(media_type, {}) is True

    def test_non_previewable_media_types(self):
@@ -46,7 +49,7 @@ class TestIsPreviewable:

    def test_3d_extensions_previewable(self):
        """3D file extensions should be previewable regardless of media_type."""
-        for ext in ['.obj', '.fbx', '.gltf', '.glb']:
+        for ext in ['.obj', '.fbx', '.gltf', '.glb', '.usdz']:
            item = {'filename': f'model{ext}'}
            assert is_previewable('files', item) is True

@@ -160,7 +163,7 @@ class TestGetOutputsSummary:

    def test_3d_files_previewable(self):
        """3D file extensions should be previewable."""
-        for ext in ['.obj', '.fbx', '.gltf', '.glb']:
+        for ext in ['.obj', '.fbx', '.gltf', '.glb', '.usdz']:
            outputs = {
                'node1': {
                    'files': [{'filename': f'model{ext}', 'type': 'output'}]
@@ -192,6 +195,64 @@ class TestGetOutputsSummary:
        assert preview['mediaType'] == 'images'
        assert preview['subfolder'] == 'outputs'

+    def test_string_3d_filename_creates_preview(self):
+        """String items with 3D extensions should synthesize a preview (Preview3D node output).
+        Only the .glb counts — nulls and non-file strings are excluded."""
+        outputs = {
+            'node1': {
+                'result': ['preview3d_abc123.glb', None, None]
+            }
+        }
+        count, preview = get_outputs_summary(outputs)
+        assert count == 1
+        assert preview is not None
+        assert preview['filename'] == 'preview3d_abc123.glb'
+        assert preview['mediaType'] == '3d'
+        assert preview['nodeId'] == 'node1'
+        assert preview['type'] == 'output'
+
+    def test_string_non_3d_filename_no_preview(self):
+        """String items without 3D extensions should not create a preview."""
+        outputs = {
+            'node1': {
+                'result': ['data.json', None]
+            }
+        }
+        count, preview = get_outputs_summary(outputs)
+        assert count == 0
+        assert preview is None
+
+    def test_string_3d_filename_used_as_fallback(self):
+        """String 3D preview should be used when no dict items are previewable."""
+        outputs = {
+            'node1': {
+                'latents': [{'filename': 'latent.safetensors'}],
+            },
+            'node2': {
+                'result': ['model.glb', None]
+            }
+        }
+        count, preview = get_outputs_summary(outputs)
+        assert preview is not None
+        assert preview['filename'] == 'model.glb'
+        assert preview['mediaType'] == '3d'
+
+
+class TestHas3DExtension:
+    """Unit tests for has_3d_extension()"""
+
+    def test_recognized_extensions(self):
+        for ext in ['.obj', '.fbx', '.gltf', '.glb', '.usdz']:
+            assert has_3d_extension(f'model{ext}') is True
+
+    def test_case_insensitive(self):
+        assert has_3d_extension('MODEL.GLB') is True
+        assert has_3d_extension('Scene.GLTF') is True
+
+    def test_non_3d_extensions(self):
+        for name in ['photo.png', 'video.mp4', 'data.json', 'model']:
+            assert has_3d_extension(name) is False
+

 class TestApplySorting:
    """Unit tests for apply_sorting()"""
@@ -395,3 +456,142 @@ class TestNormalizeHistoryItem:
            'prompt': {'nodes': {'1': {}}},
            'extra_data': {'create_time': 1234567890, 'client_id': 'abc'},
        }
+
+    def test_include_outputs_normalizes_3d_strings(self):
+        """Detail view should transform string 3D filenames into file output dicts."""
+        history_item = {
+            'prompt': (
+                5,
+                'prompt-3d',
+                {'nodes': {}},
+                {'create_time': 1234567890},
+                ['node1'],
+            ),
+            'status': {'status_str': 'success', 'completed': True, 'messages': []},
+            'outputs': {
+                'node1': {
+                    'result': ['preview3d_abc123.glb', None, None]
+                }
+            },
+        }
+        job = normalize_history_item('prompt-3d', history_item, include_outputs=True)
+
+        assert job['outputs_count'] == 1
+        result_items = job['outputs']['node1']['result']
+        assert len(result_items) == 1
+        assert result_items[0] == {
+            'filename': 'preview3d_abc123.glb',
+            'type': 'output',
+            'subfolder': '',
+            'mediaType': '3d',
+        }
+
+    def test_include_outputs_preserves_dict_items(self):
+        """Detail view normalization should pass dict items through unchanged."""
+        history_item = {
+            'prompt': (
+                5,
+                'prompt-img',
+                {'nodes': {}},
+                {'create_time': 1234567890},
+                ['node1'],
+            ),
+            'status': {'status_str': 'success', 'completed': True, 'messages': []},
+            'outputs': {
+                'node1': {
+                    'images': [
+                        {'filename': 'photo.png', 'type': 'output', 'subfolder': ''},
+                    ]
+                }
+            },
+        }
+        job = normalize_history_item('prompt-img', history_item, include_outputs=True)
+
+        assert job['outputs_count'] == 1
+        assert job['outputs']['node1']['images'] == [
+            {'filename': 'photo.png', 'type': 'output', 'subfolder': ''},
+        ]
+
+
+class TestNormalizeOutputItem:
+    """Unit tests for normalize_output_item()"""
+
+    def test_none_returns_none(self):
+        assert normalize_output_item(None) is None
+
+    def test_string_3d_extension_synthesizes_dict(self):
+        result = normalize_output_item('model.glb')
+        assert result == {'filename': 'model.glb', 'type': 'output', 'subfolder': '', 'mediaType': '3d'}
+
+    def test_string_non_3d_extension_returns_none(self):
+        assert normalize_output_item('data.json') is None
+
+    def test_string_no_extension_returns_none(self):
+        assert normalize_output_item('camera_info_string') is None
+
+    def test_dict_passes_through(self):
+        item = {'filename': 'test.png', 'type': 'output'}
+        assert normalize_output_item(item) is item
+
+    def test_other_types_return_none(self):
+        assert normalize_output_item(42) is None
+        assert normalize_output_item(True) is None
+
+
+class TestNormalizeOutputs:
+    """Unit tests for normalize_outputs()"""
+
+    def test_empty_outputs(self):
+        assert normalize_outputs({}) == {}
+
+    def test_dict_items_pass_through(self):
+        outputs = {
+            'node1': {
+                'images': [{'filename': 'a.png', 'type': 'output'}],
+            }
+        }
+        result = normalize_outputs(outputs)
+        assert result == outputs
+
+    def test_3d_string_synthesized(self):
+        outputs = {
+            'node1': {
+                'result': ['model.glb', None, None],
+            }
+        }
+        result = normalize_outputs(outputs)
+        assert result == {
+            'node1': {
+                'result': [
+                    {'filename': 'model.glb', 'type': 'output', 'subfolder': '', 'mediaType': '3d'},
+                ],
+            }
+        }
+
+    def test_animated_key_preserved(self):
+        outputs = {
+            'node1': {
+                'images': [{'filename': 'a.png', 'type': 'output'}],
+                'animated': [True],
+            }
+        }
+        result = normalize_outputs(outputs)
+        assert result['node1']['animated'] == [True]
+
+    def test_non_dict_node_outputs_preserved(self):
+        outputs = {'node1': 'unexpected_value'}
+        result = normalize_outputs(outputs)
+        assert result == {'node1': 'unexpected_value'}
+
+    def test_none_items_filtered_but_other_types_preserved(self):
+        outputs = {
+            'node1': {
+                'result': ['data.json', None, [1, 2, 3]],
+            }
+        }
+        result = normalize_outputs(outputs)
+        assert result == {
+            'node1': {
+                'result': ['data.json', [1, 2, 3]],
+            }
+        }
Author	SHA1	Message	Date
Hunter Senft-Grupp	40b247f046	Merge remote-tracking branch 'comfy-org/master' into test-glsl-nodes	2026-02-14 20:20:56 -08:00
comfyanonymous	e1ede29d82	Remove unsafe pickle loading code that was used on pytorch older than 2.4 (#12473 ) ComfyUI hasn't started on pytorch 2.4 since last month.	2026-02-14 22:53:52 -05:00
Christian Byrne	df1e5e8514	Update frontend package to 1.38.14 (#12469 )	2026-02-14 11:01:10 -08:00
krigeta	dc9822b7df	Add working Qwen 2512 ControlNet (Fun ControlNet) support (#12359 )	2026-02-13 22:23:52 -05:00
comfyanonymous	712efb466b	Add left padding to LTXAV text encoder. (#12456 )	2026-02-13 21:56:54 -05:00
comfyanonymous	726af73867	Fix some custom nodes. (#12455 )	2026-02-13 20:21:10 -05:00
comfyanonymous	831351a29e	Support generating attention masks for left padded text encoders. (#12454 )	2026-02-13 20:15:23 -05:00
comfyanonymous	e1add563f9	Use torch RMSNorm for flux models and refactor hunyuan video code. (#12432 )	2026-02-13 15:35:13 -05:00
rattus	8902907d7a	dynamic_vram: Training fixes (#12442 )	2026-02-13 15:29:37 -05:00
comfyanonymous	e03fe8b591	Update command to install AMD stable linux pytorch. (#12437 )	2026-02-12 23:29:12 -05:00
rattus	ae79e33345	llama: use a more efficient rope implementation (#12434 ) Get rid of the cat and unary negation and inplace add-cmul the two halves of the rope. Precompute -sin once at the start of the model rather than every transformer block. This is slightly faster on both GPU and CPU bound setups.	2026-02-12 19:56:42 -05:00
rattus	117e214354	ModelPatcherDynamic: force load non leaf weights (#12433 ) The current behaviour of the default ModelPatcher is to .to a model only if its fully loaded, which is how random non-leaf weights get loaded in non-LowVRAM conditions. The however means they never get loaded in dynamic_vram. In the dynamic_vram case, force load them to the GPU.	2026-02-12 19:51:50 -05:00
Alexander Piskun	4a93a62371	fix(api-nodes): add separate retry budget for 429 rate limit responses (#12421 )	2026-02-12 01:38:51 -08:00
comfyanonymous	66c18522fb	Add a tip for common error. (#12414 )	2026-02-11 22:12:16 -05:00
askmyteapot	e5ae670a40	Update ace15.py to allow min_p sampling (#12373 )	2026-02-11 20:28:48 -05:00
rattus	3fe61cedda	model_patcher: guard against none model_dtype (#12410 ) Handle the case where the _model_dtype exists but is none with the intended fallback.	2026-02-11 14:54:02 -05:00
rattus	2a4328d639	ace15: Use dynamic_vram friendly trange (#12409 ) Factor out the ksampler trange and use it in ACE LLM to prevent the silent stall at 0 and rate distortion due to first-step model load.	2026-02-11 14:53:42 -05:00
rattus	d297a749a2	dynamic_vram: Fix windows Aimdo crash + Fix LLM performance (#12408 ) * model_management: lazy-cache aimdo_tensor These tensors cosntructed from aimdo-allocations are CPU expensive to make on the pytorch side. Add a cache version that will be valid with signature match to fast path past whatever torch is doing. * dynamic_vram: Minimize fast path CPU work Move as much as possible inside the not resident if block and cache the formed weight and bias rather than the flat intermediates. In extreme layer weight rates this adds up.	2026-02-11 14:50:16 -05:00
Alexander Piskun	2b7cc7e3b6	[API Nodes] enable Magnific Upscalers (#12179 ) * feat(api-nodes): enable Magnific Upscalers * update price badges --------- Co-authored-by: Jedrzej Kosinski <kosinkadink1@gmail.com>	2026-02-11 11:30:19 -08:00
Benjamin Lu	4993411fd9	Dispatch desktop auto-bump when a ComfyUI release is published (#12398 ) * Dispatch desktop auto-bump on ComfyUI release publish * Fix release webhook secret checks in step conditions * Require desktop dispatch token in release webhook * Apply suggestion from @Copilot Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: Luke Mino-Altherr <lminoaltherr@gmail.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: Jedrzej Kosinski <kosinkadink1@gmail.com>	2026-02-11 11:15:13 -08:00
Alexander Piskun	2c7cef4a23	fix(api-nodes): retry on connection errors during polling instead of aborting (#12393 )	2026-02-11 10:51:49 -08:00
comfyanonymous	76a7fa96db	Make built in lora training work on anima. (#12402 )	2026-02-10 22:04:32 -05:00
Kohaku-Blueleaf	cdcf4119b3	[Trainer] training with proper offloading (#12189 ) * Fix bypass dtype/device moving * Force offloading mode for training * training context var * offloading implementation in training node * fix wrong input type * Support bypass load lora model, correct adapter/offloading handling	2026-02-10 21:45:19 -05:00
AustinMroz	dbe70b6821	Add a VideoSlice node (#12107 ) * Base TrimVideo implementation * Raise error if as_trimmed call fails * Bigger max start_time, tooltips, and formatting * Count packets unless codec has subframes * Remove incorrect nested decode * Add null check for audio streams * Support non-strict duration * Added strict_duration bool to node definition * Empty commit for approval * Fix duration * Support 5.1 audio layout on save --------- Co-authored-by: Jedrzej Kosinski <kosinkadink1@gmail.com>	2026-02-10 14:42:21 -08:00
guill	00fff6019e	feat(jobs): add 3d to PREVIEWABLE_MEDIA_TYPES for first-class 3D output support (#12381 ) Co-authored-by: Jedrzej Kosinski <kosinkadink1@gmail.com>	2026-02-10 14:37:14 -08:00
pythongosssss	c2d229a786	Add edge preserving blur	2026-02-04 10:20:24 -08:00
pythongosssss	3c40ee0f02	print -> logger	2026-01-31 16:43:57 -08:00
pythongosssss	43034b6881	rebuild blueprints	2026-01-31 16:32:00 -08:00
pythongosssss	bb048d4aaa	more fixes	2026-01-31 16:30:10 -08:00
pythongosssss	7c1f02d1fa	add multipass for faster blur	2026-01-31 16:30:00 -08:00
pythongosssss	292a5918f4	shader nit iteration	2026-01-31 16:03:47 -08:00
pythongosssss	0050b66a0b	add glsl shader update system	2026-01-31 13:48:59 -08:00
pythongosssss	0c313f5293	hsb	2026-01-31 12:25:38 -08:00
pythongosssss	1fcf9dca18	brightness/contrast	2026-01-31 12:25:34 -08:00
pythongosssss	3b790d24d6	Add glow	2026-01-31 10:16:51 -08:00
pythongosssss	92b2b7198a	Merge remote-tracking branch 'origin/master' into pysssss/glsl-blueprints	2026-01-30 16:27:23 -08:00
pythongosssss	309c3e4ec0	Add channels	2026-01-30 14:57:44 -08:00
pythongosssss	23591d4388	Add image operation blueprints	2026-01-30 14:53:39 -08:00
pythongosssss	c3d07bec6d	add diagnostics, update mac initialization	2026-01-30 12:26:04 -08:00
pythongosssss	59b955ff54	fix ci perf: only read required outputs	2026-01-29 20:14:26 -08:00
pythongosssss	1263d6fe88	add additional support for egl & osmesa backends	2026-01-29 20:07:40 -08:00
pythongosssss	23572c6314	tidy	2026-01-28 20:59:01 -08:00
pythongosssss	d809ef8fb1	remove cpu support	2026-01-28 20:58:04 -08:00
pythongosssss	a4317314d2	convert to using PyOpenGL and glfw	2026-01-28 20:48:20 -08:00
pythongosssss	aaea976f36	fix line endings	2026-01-28 11:02:17 -08:00
pythongosssss	cee092213e	Merge remote-tracking branch 'origin/master' into pysssss/basic-glsl-shader-node	2026-01-28 10:50:12 -08:00
pythongosssss	3da0e9c367	fix casing	2026-01-28 10:47:36 -08:00
pythongosssss	9fa8202620	Try fix build	2026-01-28 10:47:36 -08:00
pythongosssss	b4438c9baf	Support multiple outputs	2026-01-28 10:47:36 -08:00
pythongosssss	cc30293d65	tidy	2026-01-23 10:38:26 -08:00
pythongosssss	866d863128	adds support for executing simple glsl shaders using moderngl package	2026-01-23 10:37:52 -08:00
				`@@ -0,0 +1 @@`
				{"revision": 0, "last_node_id": 29, "last_link_id": 0, "nodes": [{"id": 29, "type": "4c9d6ea4-b912-40e5-8766-6793a9758c53", "pos": [1970, -230], "size": [180, 86], "flags": {}, "order": 5, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": null}], "outputs": [{"label": "R", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": []}, {"label": "G", "localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": []}, {"label": "B", "localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": []}, {"label": "A", "localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": []}], "title": "Image Channels", "properties": {"proxyWidgets": []}, "widgets_values": []}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "4c9d6ea4-b912-40e5-8766-6793a9758c53", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 28, "lastLinkId": 39, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Image Channels", "inputNode": {"id": -10, "bounding": [1820, -185, 120, 60]}, "outputNode": {"id": -20, "bounding": [2460, -215, 120, 120]}, "inputs": [{"id": "3522932b-2d86-4a1f-a02a-cb29f3a9d7fe", "name": "images.image0", "type": "IMAGE", "linkIds": [39], "localized_name": "images.image0", "label": "image", "pos": [1920, -165]}], "outputs": [{"id": "605cb9c3-b065-4d9b-81d2-3ec331889b2b", "name": "IMAGE0", "type": "IMAGE", "linkIds": [26], "localized_name": "IMAGE0", "label": "R", "pos": [2480, -195]}, {"id": "fb44a77e-0522-43e9-9527-82e7465b3596", "name": "IMAGE1", "type": "IMAGE", "linkIds": [27], "localized_name": "IMAGE1", "label": "G", "pos": [2480, -175]}, {"id": "81460ee6-0131-402a-874f-6bf3001fc4ff", "name": "IMAGE2", "type": "IMAGE", "linkIds": [28], "localized_name": "IMAGE2", "label": "B", "pos": [2480, -155]}, {"id": "ae690246-80d4-4951-b1d9-9306d8a77417", "name": "IMAGE3", "type": "IMAGE", "linkIds": [29], "localized_name": "IMAGE3", "label": "A", "pos": [2480, -135]}], "widgets": [], "nodes": [{"id": 23, "type": "GLSLShader", "pos": [2000, -330], "size": [400, 172], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": 39}, {"localized_name": "fragment_shader", "name": "fragment_shader", "type": "STRING", "widget": {"name": "fragment_shader"}, "link": null}, {"localized_name": "size_mode", "name": "size_mode", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "size_mode"}, "link": null}, {"label": "image1", "localized_name": "images.image1", "name": "images.image1", "shape": 7, "type": "IMAGE", "link": null}], "outputs": [{"label": "R", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": [26]}, {"label": "G", "localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": [27]}, {"label": "B", "localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": [28]}, {"label": "A", "localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": [29]}], "properties": {"Node name for S&R": "GLSLShader"}, "widgets_values": ["#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\nlayout(location = 1) out vec4 fragColor1;\nlayout(location = 2) out vec4 fragColor2;\nlayout(location = 3) out vec4 fragColor3;\n\nvoid main() {\n vec4 color = texture(u_image0, v_texCoord);\n // Output each channel as grayscale to separate render targets\n fragColor0 = vec4(vec3(color.r), 1.0); // Red channel\n fragColor1 = vec4(vec3(color.g), 1.0); // Green channel\n fragColor2 = vec4(vec3(color.b), 1.0); // Blue channel\n fragColor3 = vec4(vec3(color.a), 1.0); // Alpha channel\n}\n", "from_input"]}], "groups": [], "links": [{"id": 39, "origin_id": -10, "origin_slot": 0, "target_id": 23, "target_slot": 0, "type": "IMAGE"}, {"id": 26, "origin_id": 23, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}, {"id": 27, "origin_id": 23, "origin_slot": 1, "target_id": -20, "target_slot": 1, "type": "IMAGE"}, {"id": 28, "origin_id": 23, "origin_slot": 2, "target_id": -20, "target_slot": 2, "type": "IMAGE"}, {"id": 29, "origin_id": 23, "origin_slot": 3, "target_id": -20, "target_slot": 3, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}}]}}
				`@@ -0,0 +1 @@`
				{"revision":0,"last_node_id":25,"last_link_id":0,"nodes":[{"id":25,"type":"621ba4e2-22a8-482d-a369-023753198b7b","pos":[4610,-790],"size":[230,58],"flags":{},"order":4,"mode":0,"inputs":[{"label":"image","localized_name":"images.image0","name":"images.image0","type":"IMAGE","link":null}],"outputs":[{"label":"IMAGE","localized_name":"IMAGE0","name":"IMAGE0","type":"IMAGE","links":[]}],"title":"Sharpen","properties":{"proxyWidgets":[["24","value"]]},"widgets_values":[]}],"links":[],"version":0.4,"definitions":{"subgraphs":[{"id":"621ba4e2-22a8-482d-a369-023753198b7b","version":1,"state":{"lastGroupId":0,"lastNodeId":24,"lastLinkId":36,"lastRerouteId":0},"revision":0,"config":{},"name":"Sharpen","inputNode":{"id":-10,"bounding":[4090,-825,120,60]},"outputNode":{"id":-20,"bounding":[5150,-825,120,60]},"inputs":[{"id":"37011fb7-14b7-4e0e-b1a0-6a02e8da1fd7","name":"images.image0","type":"IMAGE","linkIds":[34],"localized_name":"images.image0","label":"image","pos":[4190,-805]}],"outputs":[{"id":"e9182b3f-635c-4cd4-a152-4b4be17ae4b9","name":"IMAGE0","type":"IMAGE","linkIds":[35],"localized_name":"IMAGE0","label":"IMAGE","pos":[5170,-805]}],"widgets":[],"nodes":[{"id":24,"type":"PrimitiveFloat","pos":[4280,-1240],"size":[270,58],"flags":{},"order":0,"mode":0,"inputs":[{"label":"strength","localized_name":"value","name":"value","type":"FLOAT","widget":{"name":"value"},"link":null}],"outputs":[{"localized_name":"FLOAT","name":"FLOAT","type":"FLOAT","links":[36]}],"properties":{"Node name for S&R":"PrimitiveFloat","min":0,"max":3,"precision":2,"step":0.05},"widgets_values":[0.5]},{"id":23,"type":"GLSLShader","pos":[4570,-1240],"size":[370,192],"flags":{},"order":1,"mode":0,"inputs":[{"label":"image0","localized_name":"images.image0","name":"images.image0","type":"IMAGE","link":34},{"label":"image1","localized_name":"images.image1","name":"images.image1","shape":7,"type":"IMAGE","link":null},{"label":"u_float0","localized_name":"floats.u_float0","name":"floats.u_float0","shape":7,"type":"FLOAT","link":36},{"label":"u_float1","localized_name":"floats.u_float1","name":"floats.u_float1","shape":7,"type":"FLOAT","link":null},{"label":"u_int0","localized_name":"ints.u_int0","name":"ints.u_int0","shape":7,"type":"INT","link":null},{"localized_name":"fragment_shader","name":"fragment_shader","type":"STRING","widget":{"name":"fragment_shader"},"link":null},{"localized_name":"size_mode","name":"size_mode","type":"COMFY_DYNAMICCOMBO_V3","widget":{"name":"size_mode"},"link":null}],"outputs":[{"localized_name":"IMAGE0","name":"IMAGE0","type":"IMAGE","links":[35]},{"localized_name":"IMAGE1","name":"IMAGE1","type":"IMAGE","links":null},{"localized_name":"IMAGE2","name":"IMAGE2","type":"IMAGE","links":null},{"localized_name":"IMAGE3","name":"IMAGE3","type":"IMAGE","links":null}],"properties":{"Node name for S&R":"GLSLShader"},"widgets_values":["#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform vec2 u_resolution;\nuniform float u_float0; // strength [0.0 – 2.0] typical: 0.3–1.0\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\n\nvoid main() {\n vec2 texel = 1.0 / u_resolution;\n \n // Sample center and neighbors\n vec4 center = texture(u_image0, v_texCoord);\n vec4 top = texture(u_image0, v_texCoord + vec2( 0.0, -texel.y));\n vec4 bottom = texture(u_image0, v_texCoord + vec2( 0.0, texel.y));\n vec4 left = texture(u_image0, v_texCoord + vec2(-texel.x, 0.0));\n vec4 right = texture(u_image0, v_texCoord + vec2( texel.x, 0.0));\n \n // Edge enhancement (Laplacian)\n vec4 edges = center * 4.0 - top - bottom - left - right;\n \n // Add edges back scaled by strength\n vec4 sharpened = center + edges * u_float0;\n \n fragColor0 = vec4(clamp(sharpened.rgb, 0.0, 1.0), center.a);\n}","from_input"]}],"groups":[],"links":[{"id":36,"origin_id":24,"origin_slot":0,"target_id":23,"target_slot":2,"type":"FLOAT"},{"id":34,"origin_id":-10,"origin_slot":0,"target_id":23,"target_slot":0,"type":"IMAGE"},{"id":35,"origin_id":23,"origin_slot":0,"target_id":-20,"target_slot":0,"type":"IMAGE"}],"extra":{"workflowRendererVersion":"LG"}}]}}